mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 09:34:37 +00:00
fix for older phi3 models without swa
This commit is contained in:
parent
01afb28a63
commit
0029e36f50
1 changed files with 17 additions and 2 deletions
|
@ -153,6 +153,7 @@ static void zeros(std::ofstream & file, size_t n) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool phi3swa_warning_showed = false; //to warn when old phi3 model has no SWA
|
||||||
static bool clblast_offload_fallback_mode = false; //used when regular offload will segfault
|
static bool clblast_offload_fallback_mode = false; //used when regular offload will segfault
|
||||||
static int clblast_offload_fallback_layers = 0;
|
static int clblast_offload_fallback_layers = 0;
|
||||||
static int layer_name_to_number(std::string inputString)
|
static int layer_name_to_number(std::string inputString)
|
||||||
|
@ -4911,7 +4912,7 @@ static void llm_load_hparams(
|
||||||
} break;
|
} break;
|
||||||
case LLM_ARCH_PHI3:
|
case LLM_ARCH_PHI3:
|
||||||
{
|
{
|
||||||
ml.get_key(LLM_KV_ATTENTION_SLIDING_WINDOW, hparams.n_swa);
|
ml.get_key(LLM_KV_ATTENTION_SLIDING_WINDOW, hparams.n_swa,false);
|
||||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||||
|
|
||||||
switch (hparams.n_layer) {
|
switch (hparams.n_layer) {
|
||||||
|
@ -10807,7 +10808,21 @@ struct llm_build_context {
|
||||||
struct ggml_tensor * inp_pos = build_inp_pos();
|
struct ggml_tensor * inp_pos = build_inp_pos();
|
||||||
|
|
||||||
// KQ_mask (mask for 1 head, it will be broadcasted to all heads)
|
// KQ_mask (mask for 1 head, it will be broadcasted to all heads)
|
||||||
struct ggml_tensor * KQ_mask_swa = build_inp_KQ_mask_swa();
|
struct ggml_tensor * KQ_mask_swa;
|
||||||
|
if(hparams.n_swa==0)
|
||||||
|
{
|
||||||
|
if(!phi3swa_warning_showed)
|
||||||
|
{
|
||||||
|
phi3swa_warning_showed = true;
|
||||||
|
printf("\nWarning: PHI3 model did not contain sliding window!!!\nSWA is disabled. Model may need a new quant.\n");
|
||||||
|
}
|
||||||
|
KQ_mask_swa = build_inp_KQ_mask();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
KQ_mask_swa = build_inp_KQ_mask_swa();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
for (int il = 0; il < n_layer; ++il) {
|
for (int il = 0; il < n_layer; ++il) {
|
||||||
auto residual = inpL;
|
auto residual = inpL;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue