model : support NVFP4 tensors for Gemma4 (#21971)

* support nvfp4 tensors for Gemma4

* add wo_s to build_attn

* add wo_s to build_attn

* fix glm4
This commit is contained in:
Sigbjørn Skjæret 2026-04-16 16:51:47 +02:00 committed by GitHub
parent b572d1ecd6
commit f772f6e434
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
105 changed files with 149 additions and 148 deletions

View file

@ -66,7 +66,7 @@ llm_build_lfm2<iswa>::llm_build_lfm2(const llama_model & model, const llm_graph_
attn_factor, beta_fast, beta_slow);
cur = build_attn(inp_attn,
model.layers[il].wo, NULL,
model.layers[il].wo, NULL, model.layers[il].wo_s,
q, k, v, nullptr, nullptr, nullptr, 1.0f / sqrtf(float(n_embd_head)), il);
cb(cur, "model.layers.{}.self_attn.out_proj", il);