llama: dynamic head_dim and n_rot for SWA (#20301)

* llama: dynamic head_dim and n_rot for SWA * also add gguf_writer wrappers * fix build * build_rope_shift arg reorder
2026-05-08 01:41:37 +00:00 · 2026-03-09 22:22:39 +01:00 · 2026-03-09 22:22:39 +01:00 · 59db9a357d
commit 59db9a357d
parent 23fbfcb1ad
112 changed files with 419 additions and 346 deletions
--- a/src/models/codeshell.cpp
+++ b/src/models/codeshell.cpp
@ -1,11 +1,11 @@
 #include "models.h"

 llm_build_codeshell::llm_build_codeshell(const llama_model & model, const llm_graph_params & params) : llm_graph_context(params) {
-    const int64_t n_embd_head = hparams.n_embd_head_v;
+    const int64_t n_embd_head = hparams.n_embd_head_v();
    const int64_t n_embd_gqa  = hparams.n_embd_v_gqa();

-    GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
-    GGML_ASSERT(n_embd_head == hparams.n_rot);
+    GGML_ASSERT(n_embd_head == hparams.n_embd_head_k());
+    GGML_ASSERT(n_embd_head == n_rot);

    ggml_tensor * cur;
    ggml_tensor * inpL;