Merge branch 'upstream' into concedo_experimental

# Conflicts:
#	docs/development/HOWTO-add-model.md
#	docs/development/token_generation_performance_tips.md
#	flake.lock
This commit is contained in:
Concedo 2024-07-16 00:49:34 +08:00
commit e707ab9025
11 changed files with 549 additions and 171 deletions

View file

@ -12954,12 +12954,12 @@ struct llm_build_context {
struct ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA);
cb(ffn_inp, "ffn_inp", il);
if ((uint32_t) il < hparams.n_layer_dense_lead) {
cur = llm_build_norm(ctx0, ffn_inp, hparams,
model.layers[il].ffn_norm, NULL,
LLM_NORM_RMS, cb, il);
cb(cur, "ffn_norm", il);
cur = llm_build_norm(ctx0, ffn_inp, hparams,
model.layers[il].ffn_norm, NULL,
LLM_NORM_RMS, cb, il);
cb(cur, "ffn_norm", il);
if ((uint32_t) il < hparams.n_layer_dense_lead) {
cur = llm_build_ffn(ctx0, cur,
model.layers[il].ffn_up, NULL, NULL,
model.layers[il].ffn_gate, NULL, NULL,
@ -12969,11 +12969,6 @@ struct llm_build_context {
cb(cur, "ffn_out", il);
} else {
// MoE branch
cur = llm_build_norm(ctx0, ffn_inp, hparams,
model.layers[il].ffn_norm, NULL,
LLM_NORM_RMS, cb, il);
cb(cur, "ffn_norm", il);
ggml_tensor * moe_out =
llm_build_moe_ffn(ctx0, cur,
model.layers[il].ffn_gate_inp,