diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index f7a1f5206..0f78c088f 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -826,17 +826,6 @@ static float CalcGradientAIRopeFreqBase(float original_rope_base, int n_ctx_trai } return rope_freq_base_with_positive_offset; } - // else if(model_arch==GGUFArch::ARCH_MISTRAL_LLAMA_1_AND_2) - // { - // float extended_rope_negative_offset_value = 1 + ((log10f(chi_ctx_value) - log10f(chi_ctx_train_value)) / (3.14159265358979323846 * 3.14159265358979323846)); - // float rope_freq_base_with_negative_offset = gradient_ai_rope_freq_base_value / extended_rope_negative_offset_value; - // if(debugmode==1) - // { - // printf("Extended RoPE Negative Offset (divisor) for Llama 1 and 2 based models. (value:%.3f).\n", extended_rope_negative_offset_value); - // printf("RoPE base calculated via Gradient AI formula for Llama 1 and 2 based models. (value:%.1f).\n", rope_freq_base_with_negative_offset); - // } - // return rope_freq_base_with_negative_offset; - // } else { return gradient_ai_rope_freq_base_value; @@ -1087,6 +1076,10 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in printf("CUBLAS: Set main device to %d\n",cu_parseinfo_maindevice); } ggml_cuda_set_mul_mat_q(inputs.use_mmq); + if(file_format_meta.model_architecture == GGUFArch::ARCH_QWEN2 && kcpp_params->flash_attn) + { + printf("CUBLAS: Warning, you are running Qwen2 without Flash Attention and may observe incoherent output.\n"); + } #endif model_params.main_gpu = cu_parseinfo_maindevice; diff --git a/model_adapter.cpp b/model_adapter.cpp index 1792fd489..ecde7d191 100644 --- a/model_adapter.cpp +++ b/model_adapter.cpp @@ -310,9 +310,9 @@ void print_tok_vec(std::vector &embd) { fileformatmeta->model_architecture = GGUFArch::ARCH_SOLAR; } - else if(modelarch=="llama" && freq_base_train==10000.0f) + else if(modelarch=="qwen2") { - fileformatmeta->model_architecture = GGUFArch::ARCH_MISTRAL_LLAMA_1_AND_2; + fileformatmeta->model_architecture = GGUFArch::ARCH_QWEN2; } printf("Arch Category: %d\n",fileformatmeta->model_architecture); diff --git a/model_adapter.h b/model_adapter.h index a8a14e4ad..591971562 100644 --- a/model_adapter.h +++ b/model_adapter.h @@ -57,7 +57,7 @@ enum GGUFArch ARCH_PHI = 2, ARCH_MAMBA = 3, ARCH_SOLAR = 4, - ARCH_MISTRAL_LLAMA_1_AND_2 = 5, + ARCH_QWEN2 = 5, }; struct FileFormatExtraMeta