fixed moe experts to use detected arch for key

2025-09-11 01:24:36 +00:00 · 2025-02-10 17:46:08 +08:00 · 2025-02-10 17:46:08 +08:00 · b162c25a5e
commit b162c25a5e
parent c1d38897ee
3 changed files with 10 additions and 1 deletions
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@ -2152,7 +2152,14 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
        {
            printf("\nOverriding number of experts to %d\n",inputs.moe_experts);
            llama_model_kv_override kvo;
-            const char * moekey = "llama.expert_used_count";
+            std::string moekeystr = "llama";
+            if(file_format_meta.model_architecture_str!="")
+            {
+                moekeystr = file_format_meta.model_architecture_str;
+            }
+            moekeystr += ".expert_used_count";
+
+            const char * moekey = moekeystr.c_str();
            std::strncpy(kvo.key, moekey, sizeof(kvo.key) - 1);
            kvo.key[sizeof(kvo.key) - 1] = '\0'; // Ensure null termination
            kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT;
--- a/model_adapter.cpp
+++ b/model_adapter.cpp
@ -296,6 +296,7 @@ void print_tok_vec(std::vector<float> &embd)

            fileformatmeta->fileversion = filever;
            fileformatmeta->model_architecture = GGUFArch::ARCH_DEFAULT;
+            fileformatmeta->model_architecture_str = modelarch;
            if(modelarch=="phi2")
            {
                fileformatmeta->model_architecture = GGUFArch::ARCH_PHI;
--- a/model_adapter.h
+++ b/model_adapter.h
@ -68,6 +68,7 @@ struct FileFormatExtraMeta
    int fileversion = 0;
    GGUFArch model_architecture = GGUFArch::ARCH_DEFAULT;
    int n_expert_count = 0;
+    std::string model_architecture_str = "";
 };

 struct TopPicksData