llama : merge build_moe_ffn_from_probs function into build_moe_ffn (#14968)

2025-09-13 10:29:43 +00:00 · 2025-07-31 20:12:20 +08:00 · 2025-07-31 20:12:20 +08:00 · c1dacaa99b
commit c1dacaa99b
parent a9f77a8be3
3 changed files with 32 additions and 114 deletions
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@ -17320,10 +17320,18 @@ struct llm_build_smallthinker : public llm_graph_context{
            cur = build_norm(ffn_inp, model.layers[il].ffn_norm, NULL, LLM_NORM_RMS, il);
            cb(cur, "ffn_norm", il);

-            ggml_tensor * ffn_out = build_moe_ffn_from_probs(cur, probs, model.layers[il].ffn_up_exps,
-                                                model.layers[il].ffn_gate_exps, model.layers[il].ffn_down_exps,
-                                                nullptr, n_expert, n_expert_used,
-                                                static_cast<llama_expert_gating_func_type>(hparams.expert_gating_func), il);
+            ggml_tensor * ffn_out =
+                build_moe_ffn(cur,
+                        nullptr,
+                        model.layers[il].ffn_up_exps,
+                        model.layers[il].ffn_gate_exps,
+                        model.layers[il].ffn_down_exps,
+                        nullptr,
+                        n_expert, n_expert_used,
+                        LLM_FFN_RELU, true,
+                        false, 0.0,
+                        static_cast<llama_expert_gating_func_type>(hparams.expert_gating_func),
+                        il, probs);

            cb(ffn_out, "ffn_out", il);
            cur = ffn_out;