llama: dynamic head_dim and n_rot for SWA (#20301)

* llama: dynamic head_dim and n_rot for SWA

* also add gguf_writer wrappers

* fix build

* build_rope_shift arg reorder
This commit is contained in:
Xuan-Son Nguyen 2026-03-09 22:22:39 +01:00 committed by GitHub
parent 23fbfcb1ad
commit 59db9a357d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
112 changed files with 419 additions and 346 deletions

View file

@ -2,10 +2,10 @@
llm_build_phi2::llm_build_phi2(const llama_model & model, const llm_graph_params & params) : llm_graph_context(params) {
const int64_t n_embd_head = hparams.n_embd_head_v;
const int64_t n_embd_head = hparams.n_embd_head_v();
const int64_t n_embd_gqa = hparams.n_embd_v_gqa();
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k());
ggml_tensor * cur;
ggml_tensor * attn_norm_output;