default kv_unified to true, handle LLAMA_SET_ROWS.

2025-09-13 10:29:43 +00:00 · 2025-07-21 16:13:20 +08:00 · 2025-07-21 16:13:20 +08:00 · 6d50def409
commit 6d50def409
parent 30675b0798
5 changed files with 18 additions and 1 deletions
--- a/otherarch/embeddings_adapter.cpp
+++ b/otherarch/embeddings_adapter.cpp
@ -135,6 +135,7 @@ bool embeddingstype_load_model(const embeddings_load_model_inputs inputs)
    ctx_params.n_threads = nthreads;
    ctx_params.n_threads_batch = nthreads;
    ctx_params.flash_attn = inputs.flash_attention;
+    ctx_params.kv_unified = true;

    embeddings_ctx = llama_init_from_model(embeddingsmodel, ctx_params);