mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 09:04:36 +00:00
qwen2 warning FA
This commit is contained in:
parent
116d5fe58e
commit
0dd3907940
3 changed files with 7 additions and 14 deletions
|
@ -826,17 +826,6 @@ static float CalcGradientAIRopeFreqBase(float original_rope_base, int n_ctx_trai
|
||||||
}
|
}
|
||||||
return rope_freq_base_with_positive_offset;
|
return rope_freq_base_with_positive_offset;
|
||||||
}
|
}
|
||||||
// else if(model_arch==GGUFArch::ARCH_MISTRAL_LLAMA_1_AND_2)
|
|
||||||
// {
|
|
||||||
// float extended_rope_negative_offset_value = 1 + ((log10f(chi_ctx_value) - log10f(chi_ctx_train_value)) / (3.14159265358979323846 * 3.14159265358979323846));
|
|
||||||
// float rope_freq_base_with_negative_offset = gradient_ai_rope_freq_base_value / extended_rope_negative_offset_value;
|
|
||||||
// if(debugmode==1)
|
|
||||||
// {
|
|
||||||
// printf("Extended RoPE Negative Offset (divisor) for Llama 1 and 2 based models. (value:%.3f).\n", extended_rope_negative_offset_value);
|
|
||||||
// printf("RoPE base calculated via Gradient AI formula for Llama 1 and 2 based models. (value:%.1f).\n", rope_freq_base_with_negative_offset);
|
|
||||||
// }
|
|
||||||
// return rope_freq_base_with_negative_offset;
|
|
||||||
// }
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
return gradient_ai_rope_freq_base_value;
|
return gradient_ai_rope_freq_base_value;
|
||||||
|
@ -1087,6 +1076,10 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
||||||
printf("CUBLAS: Set main device to %d\n",cu_parseinfo_maindevice);
|
printf("CUBLAS: Set main device to %d\n",cu_parseinfo_maindevice);
|
||||||
}
|
}
|
||||||
ggml_cuda_set_mul_mat_q(inputs.use_mmq);
|
ggml_cuda_set_mul_mat_q(inputs.use_mmq);
|
||||||
|
if(file_format_meta.model_architecture == GGUFArch::ARCH_QWEN2 && kcpp_params->flash_attn)
|
||||||
|
{
|
||||||
|
printf("CUBLAS: Warning, you are running Qwen2 without Flash Attention and may observe incoherent output.\n");
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
model_params.main_gpu = cu_parseinfo_maindevice;
|
model_params.main_gpu = cu_parseinfo_maindevice;
|
||||||
|
|
||||||
|
|
|
@ -310,9 +310,9 @@ void print_tok_vec(std::vector<float> &embd)
|
||||||
{
|
{
|
||||||
fileformatmeta->model_architecture = GGUFArch::ARCH_SOLAR;
|
fileformatmeta->model_architecture = GGUFArch::ARCH_SOLAR;
|
||||||
}
|
}
|
||||||
else if(modelarch=="llama" && freq_base_train==10000.0f)
|
else if(modelarch=="qwen2")
|
||||||
{
|
{
|
||||||
fileformatmeta->model_architecture = GGUFArch::ARCH_MISTRAL_LLAMA_1_AND_2;
|
fileformatmeta->model_architecture = GGUFArch::ARCH_QWEN2;
|
||||||
}
|
}
|
||||||
printf("Arch Category: %d\n",fileformatmeta->model_architecture);
|
printf("Arch Category: %d\n",fileformatmeta->model_architecture);
|
||||||
|
|
||||||
|
|
|
@ -57,7 +57,7 @@ enum GGUFArch
|
||||||
ARCH_PHI = 2,
|
ARCH_PHI = 2,
|
||||||
ARCH_MAMBA = 3,
|
ARCH_MAMBA = 3,
|
||||||
ARCH_SOLAR = 4,
|
ARCH_SOLAR = 4,
|
||||||
ARCH_MISTRAL_LLAMA_1_AND_2 = 5,
|
ARCH_QWEN2 = 5,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct FileFormatExtraMeta
|
struct FileFormatExtraMeta
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue