mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-05 23:41:45 +00:00
models : fix the attn_factor for mistral3 graphs + improve consistency (#17945)
* models : fix the attn_factor for mistral3 graphs * cont : rework attn_factor correction logic * cont : make deepseek2 consistent * cont : add TODO * cont : special-case DSv2 * cont : revert Mistral 3 Large changes * cont : fix DS2 to use the original attn_factor * cont : minor comments
This commit is contained in:
parent
dcb7d17758
commit
7bed317f53
7 changed files with 78 additions and 33 deletions
|
|
@ -574,7 +574,7 @@ llm_graph_context::llm_graph_context(const llm_graph_params & params) :
|
|||
freq_base (cparams.rope_freq_base),
|
||||
freq_scale (cparams.rope_freq_scale),
|
||||
ext_factor (cparams.yarn_ext_factor),
|
||||
attn_factor (cparams.yarn_attn_factor),
|
||||
attn_factor (llama_hparams::yarn_attn_factor_adjust(cparams.yarn_attn_factor, cparams.rope_freq_scale, cparams.yarn_ext_factor)),
|
||||
beta_fast (cparams.yarn_beta_fast),
|
||||
beta_slow (cparams.yarn_beta_slow),
|
||||
norm_eps (hparams.f_norm_eps),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue