mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-18 23:49:46 +00:00
convert : Mistral format yarn apply_scale support (#22612)
* [BUGFIX] Mistral format apply_scale support. * Update convert_hf_to_gguf.py Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * fix misunderstood boolean parameters --------- Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
This commit is contained in:
parent
db44417b02
commit
048a490f76
2 changed files with 4 additions and 3 deletions
|
|
@ -12716,11 +12716,12 @@ class MistralModel(LlamaModel):
|
|||
def set_mistral_config(gguf_writer: gguf.GGUFWriter, hparams: dict):
|
||||
if "yarn" in hparams:
|
||||
yarn_params = hparams["yarn"]
|
||||
mscale_all_dim = 1.0 if not yarn_params["apply_scale"] else 0.0
|
||||
gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN)
|
||||
gguf_writer.add_rope_scaling_factor(yarn_params["factor"])
|
||||
gguf_writer.add_rope_scaling_yarn_beta_fast(yarn_params["beta"])
|
||||
gguf_writer.add_rope_scaling_yarn_beta_slow(yarn_params["alpha"])
|
||||
gguf_writer.add_rope_scaling_yarn_log_mul(1.0) # mscale_all_dim
|
||||
gguf_writer.add_rope_scaling_yarn_log_mul(mscale_all_dim)
|
||||
gguf_writer.add_rope_scaling_orig_ctx_len(yarn_params["original_max_position_embeddings"])
|
||||
|
||||
if "llama_4_scaling" in hparams:
|
||||
|
|
|
|||
|
|
@ -1994,7 +1994,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
}
|
||||
}
|
||||
|
||||
if (ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul, 0.0f)) {
|
||||
if (ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul, false)) {
|
||||
// [TAG_DEEPSEEK2_YARN_LOG_MUL_FIX]
|
||||
// cancel the factor from the convert script
|
||||
hparams.rope_yarn_log_mul /= 0.1f;
|
||||
|
|
@ -2868,7 +2868,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
|
||||
ml.get_key(LLM_KV_ROPE_SCALING_YARN_BETA_FAST, hparams.yarn_beta_fast, false);
|
||||
ml.get_key(LLM_KV_ROPE_SCALING_YARN_BETA_SLOW, hparams.yarn_beta_slow, false);
|
||||
ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul, 0.0f);
|
||||
ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul, false);
|
||||
|
||||
hparams.f_attn_temp_offset = 0.0f;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue