model : support NVFP4 tensors for Gemma4 (#21971)

* support nvfp4 tensors for Gemma4 * add wo_s to build_attn * add wo_s to build_attn * fix glm4
2026-05-18 06:19:19 +00:00 · 2026-04-16 16:51:47 +02:00 · 2026-04-16 16:51:47 +02:00 · f772f6e434
commit f772f6e434
parent b572d1ecd6
105 changed files with 149 additions and 148 deletions
--- a/src/models/maincoder.cpp
+++ b/src/models/maincoder.cpp
@ -66,7 +66,7 @@ llm_build_maincoder::llm_build_maincoder(const llama_model & model, const llm_gr
            cb(Vcur, "Vcur", il);

            cur = build_attn(inp_attn,
-                    model.layers[il].wo, model.layers[il].bo,
+                    model.layers[il].wo, model.layers[il].bo, model.layers[il].wo_s,
                    Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
        }
        if (il == n_layer - 1 && inp_out_ids) {