fix for older phi3 models without swa

This commit is contained in:
Concedo 2024-07-28 12:13:38 +08:00
parent 01afb28a63
commit 0029e36f50

View file

@ -153,6 +153,7 @@ static void zeros(std::ofstream & file, size_t n) {
} }
} }
static bool phi3swa_warning_showed = false; //to warn when old phi3 model has no SWA
static bool clblast_offload_fallback_mode = false; //used when regular offload will segfault static bool clblast_offload_fallback_mode = false; //used when regular offload will segfault
static int clblast_offload_fallback_layers = 0; static int clblast_offload_fallback_layers = 0;
static int layer_name_to_number(std::string inputString) static int layer_name_to_number(std::string inputString)
@ -4911,7 +4912,7 @@ static void llm_load_hparams(
} break; } break;
case LLM_ARCH_PHI3: case LLM_ARCH_PHI3:
{ {
ml.get_key(LLM_KV_ATTENTION_SLIDING_WINDOW, hparams.n_swa); ml.get_key(LLM_KV_ATTENTION_SLIDING_WINDOW, hparams.n_swa,false);
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps); ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
switch (hparams.n_layer) { switch (hparams.n_layer) {
@ -10807,7 +10808,21 @@ struct llm_build_context {
struct ggml_tensor * inp_pos = build_inp_pos(); struct ggml_tensor * inp_pos = build_inp_pos();
// KQ_mask (mask for 1 head, it will be broadcasted to all heads) // KQ_mask (mask for 1 head, it will be broadcasted to all heads)
struct ggml_tensor * KQ_mask_swa = build_inp_KQ_mask_swa(); struct ggml_tensor * KQ_mask_swa;
if(hparams.n_swa==0)
{
if(!phi3swa_warning_showed)
{
phi3swa_warning_showed = true;
printf("\nWarning: PHI3 model did not contain sliding window!!!\nSWA is disabled. Model may need a new quant.\n");
}
KQ_mask_swa = build_inp_KQ_mask();
}
else
{
KQ_mask_swa = build_inp_KQ_mask_swa();
}
for (int il = 0; il < n_layer; ++il) { for (int il = 0; il < n_layer; ++il) {
auto residual = inpL; auto residual = inpL;