fix for older phi3 models without swa

2025-09-11 09:34:37 +00:00 · 2024-07-28 12:13:38 +08:00 · 2024-07-28 12:13:38 +08:00 · 0029e36f50
commit 0029e36f50
parent 01afb28a63
1 changed files with 17 additions and 2 deletions
--- a/src/llama.cpp
+++ b/src/llama.cpp
@ -153,6 +153,7 @@ static void zeros(std::ofstream & file, size_t n) {
    }
 }
 static bool phi3swa_warning_showed = false; //to warn when old phi3 model has no SWA
 static bool clblast_offload_fallback_mode = false; //used when regular offload will segfault
 static int clblast_offload_fallback_layers = 0;
 static int layer_name_to_number(std::string inputString)
@ -4911,7 +4912,7 @@ static void llm_load_hparams(
            } break;
        case LLM_ARCH_PHI3:
            {
-                ml.get_key(LLM_KV_ATTENTION_SLIDING_WINDOW, hparams.n_swa);
+                ml.get_key(LLM_KV_ATTENTION_SLIDING_WINDOW, hparams.n_swa,false);
                ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
                switch (hparams.n_layer) {
@ -10807,7 +10808,21 @@ struct llm_build_context {
        struct ggml_tensor * inp_pos = build_inp_pos();
        // KQ_mask (mask for 1 head, it will be broadcasted to all heads)
-        struct ggml_tensor * KQ_mask_swa = build_inp_KQ_mask_swa();
+        struct ggml_tensor * KQ_mask_swa;
        if(hparams.n_swa==0)
        {
            if(!phi3swa_warning_showed)
            {
                phi3swa_warning_showed = true;
                printf("\nWarning: PHI3 model did not contain sliding window!!!\nSWA is disabled. Model may need a new quant.\n");
            }
            KQ_mask_swa = build_inp_KQ_mask();
        }
        else
        {
            KQ_mask_swa = build_inp_KQ_mask_swa();
        }
        for (int il = 0; il < n_layer; ++il) {
            auto residual = inpL;