diff --git a/src/llama.cpp b/src/llama.cpp
index d199eb380..8ab9b14a3 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -153,6 +153,7 @@ static void zeros(std::ofstream & file, size_t n) {
     }
 }
 
+static bool phi3swa_warning_showed = false; //to warn when old phi3 model has no SWA
 static bool clblast_offload_fallback_mode = false; //used when regular offload will segfault
 static int clblast_offload_fallback_layers = 0;
 static int layer_name_to_number(std::string inputString)
@@ -4911,7 +4912,7 @@ static void llm_load_hparams(
             } break;
         case LLM_ARCH_PHI3:
             {
-                ml.get_key(LLM_KV_ATTENTION_SLIDING_WINDOW, hparams.n_swa);
+                ml.get_key(LLM_KV_ATTENTION_SLIDING_WINDOW, hparams.n_swa,false);
                 ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
 
                 switch (hparams.n_layer) {
@@ -10807,7 +10808,21 @@ struct llm_build_context {
         struct ggml_tensor * inp_pos = build_inp_pos();
 
         // KQ_mask (mask for 1 head, it will be broadcasted to all heads)
-        struct ggml_tensor * KQ_mask_swa = build_inp_KQ_mask_swa();
+        struct ggml_tensor * KQ_mask_swa;
+        if(hparams.n_swa==0)
+        {
+            if(!phi3swa_warning_showed)
+            {
+                phi3swa_warning_showed = true;
+                printf("\nWarning: PHI3 model did not contain sliding window!!!\nSWA is disabled. Model may need a new quant.\n");
+            }
+            KQ_mask_swa = build_inp_KQ_mask();
+        }
+        else
+        {
+            KQ_mask_swa = build_inp_KQ_mask_swa();
+        }
+
 
         for (int il = 0; il < n_layer; ++il) {
             auto residual = inpL;