default kv_unified to true, handle LLAMA_SET_ROWS.

This commit is contained in:
Concedo 2025-07-21 16:13:20 +08:00
parent 30675b0798
commit 6d50def409
5 changed files with 18 additions and 1 deletions

View file

@ -135,6 +135,7 @@ bool embeddingstype_load_model(const embeddings_load_model_inputs inputs)
ctx_params.n_threads = nthreads;
ctx_params.n_threads_batch = nthreads;
ctx_params.flash_attn = inputs.flash_attention;
ctx_params.kv_unified = true;
embeddings_ctx = llama_init_from_model(embeddingsmodel, ctx_params);