mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-13 07:09:03 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # .github/workflows/build.yml # CODEOWNERS # ggml/CMakeLists.txt # ggml/src/ggml-cpu/CMakeLists.txt # ggml/src/ggml-cpu/kleidiai/kleidiai.cpp # scripts/sync-ggml.last # tests/test-backend-ops.cpp
This commit is contained in:
commit
20c802a198
11 changed files with 455 additions and 241 deletions
|
|
@ -11813,6 +11813,7 @@ struct llm_graph_context_mamba : public llm_graph_context {
|
|||
// TODO: skip computing output earlier for unused tokens
|
||||
|
||||
y = ggml_add(ctx0, y, ggml_mul(ctx0, x, model.layers[il].ssm_d));
|
||||
cb(y, "mamba2_y_add_d", il);
|
||||
y = ggml_swiglu_split(ctx0, ggml_cont(ctx0, z), y);
|
||||
|
||||
// grouped RMS norm
|
||||
|
|
@ -14767,6 +14768,7 @@ struct llm_build_nemotron_h : public llm_graph_context_mamba {
|
|||
ggml_tensor * inpL;
|
||||
|
||||
inpL = build_inp_embd(model.tok_embd);
|
||||
ggml_build_forward_expand(gf, inpL);
|
||||
|
||||
auto * inp = build_inp_mem_hybrid();
|
||||
|
||||
|
|
@ -14798,7 +14800,7 @@ struct llm_build_nemotron_h : public llm_graph_context_mamba {
|
|||
|
||||
// add residual
|
||||
cur = ggml_add(ctx0, cur, inpSA);
|
||||
cb(cur, "block_out", il);
|
||||
cb(cur, "nemotron_h_block_out", il);
|
||||
|
||||
// input for next layer
|
||||
inpL = cur;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue