llama : refactor rope_freq_base/scale_swa conversion and init (#18553)

* refactor rope_freq_base/scale_swa conversion and init

* safe defaults for unknowns

* update relevant models

* grammar

* add get_rope_freq_scale to modern-bert

* const

* const

* log swa info
This commit is contained in:
Sigbjørn Skjæret 2026-01-05 09:14:04 +01:00 committed by GitHub
parent 67e3f6f601
commit eadc4184ca
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 94 additions and 37 deletions

View file

@ -26,10 +26,16 @@ llm_build_smallthinker<iswa>::llm_build_smallthinker(const llama_model & model,
ggml_tensor * inp_out_ids = build_inp_out_ids();
for (int il = 0; il < n_layer; ++il) {
ggml_tensor * inpSA = inpL;
ggml_tensor * probs = nullptr;
const float freq_base_l = model.get_rope_freq_base (cparams, il);
const float freq_scale_l = model.get_rope_freq_scale(cparams, il);
probs = build_lora_mm(model.layers[il].ffn_gate_inp, inpL); // [n_expert, n_tokens]
ggml_tensor * inpSA = inpL;
// This overlaps with SWA layers in current models, so get_rope_freq_base/scale may be superfluous
const bool use_rope = hparams.n_no_rope_layer_step == n_layer ||
il % hparams.n_no_rope_layer_step != 0;
ggml_tensor * probs = build_lora_mm(model.layers[il].ffn_gate_inp, inpL); // [n_expert, n_tokens]
cb(probs, "ffn_moe_logits", il);
// norm
@ -52,11 +58,11 @@ llm_build_smallthinker<iswa>::llm_build_smallthinker(const llama_model & model,
Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens);
Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens);
if (hparams.n_no_rope_layer_step == n_layer || il % hparams.n_no_rope_layer_step != 0) {
Qcur = ggml_rope_ext(ctx0, Qcur, inp_pos, nullptr, n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
if (use_rope) {
Qcur = ggml_rope_ext(ctx0, Qcur, inp_pos, nullptr, n_rot, rope_type, n_ctx_orig, freq_base_l, freq_scale_l,
ext_factor, attn_factor, beta_fast, beta_slow);
Kcur = ggml_rope_ext(ctx0, Kcur, inp_pos, nullptr, n_rot, rope_type, n_ctx_orig, freq_base, freq_scale,
Kcur = ggml_rope_ext(ctx0, Kcur, inp_pos, nullptr, n_rot, rope_type, n_ctx_orig, freq_base_l, freq_scale_l,
ext_factor, attn_factor, beta_fast, beta_slow);
}
cb(Qcur, "Qcur", il);