From b162c25a5e260fdad0b38388cf3b2bb23d4423f7 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Mon, 10 Feb 2025 17:46:08 +0800 Subject: [PATCH] fixed moe experts to use detected arch for key --- gpttype_adapter.cpp | 9 ++++++++- model_adapter.cpp | 1 + model_adapter.h | 1 + 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 8f9daddcb..baa0f263a 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -2152,7 +2152,14 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in { printf("\nOverriding number of experts to %d\n",inputs.moe_experts); llama_model_kv_override kvo; - const char * moekey = "llama.expert_used_count"; + std::string moekeystr = "llama"; + if(file_format_meta.model_architecture_str!="") + { + moekeystr = file_format_meta.model_architecture_str; + } + moekeystr += ".expert_used_count"; + + const char * moekey = moekeystr.c_str(); std::strncpy(kvo.key, moekey, sizeof(kvo.key) - 1); kvo.key[sizeof(kvo.key) - 1] = '\0'; // Ensure null termination kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT; diff --git a/model_adapter.cpp b/model_adapter.cpp index 0b286b879..9cb8ac0f0 100644 --- a/model_adapter.cpp +++ b/model_adapter.cpp @@ -296,6 +296,7 @@ void print_tok_vec(std::vector &embd) fileformatmeta->fileversion = filever; fileformatmeta->model_architecture = GGUFArch::ARCH_DEFAULT; + fileformatmeta->model_architecture_str = modelarch; if(modelarch=="phi2") { fileformatmeta->model_architecture = GGUFArch::ARCH_PHI; diff --git a/model_adapter.h b/model_adapter.h index 2b7f566a7..c28bb0cf2 100644 --- a/model_adapter.h +++ b/model_adapter.h @@ -68,6 +68,7 @@ struct FileFormatExtraMeta int fileversion = 0; GGUFArch model_architecture = GGUFArch::ARCH_DEFAULT; int n_expert_count = 0; + std::string model_architecture_str = ""; }; struct TopPicksData