mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-09 19:46:11 +00:00
model: add llama 4 scaling for mistral-large (deepseek arch) (#17744)
This commit is contained in:
parent
08f9d3cc1d
commit
4d3726278b
2 changed files with 22 additions and 0 deletions
|
|
@ -1628,6 +1628,10 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
|||
}
|
||||
ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul, false);
|
||||
|
||||
// (optional) temperature tuning - used by mistral-large
|
||||
ml.get_key(LLM_KV_ATTENTION_TEMPERATURE_SCALE, hparams.f_attn_temp_scale, false);
|
||||
ml.get_key(LLM_KV_ATTENTION_TEMPERATURE_LENGTH, hparams.n_attn_temp_floor_scale, false);
|
||||
|
||||
switch (hparams.n_layer) {
|
||||
case 27: type = LLM_TYPE_16B; break;
|
||||
case 60: type = LLM_TYPE_236B; break;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue