kv-cache : use ggml_set_rows (#14285)

* kv-cache : use ggml_set_rows ggml-ci * graph : separate k and v indices ggml-ci * cont : remove redundant ifs ggml-ci * kv-cache : improve find_slot impl * kv-cache : bounds-check when accessing slot_info indices * kv-cache : add comments ggml-ci * ggml : add TODOs for adding GGML_OP_SET_ROWS support in the backends ggml-ci
2025-09-13 18:39:48 +00:00 · 2025-07-03 10:53:35 +03:00 · 2025-07-03 10:53:35 +03:00 · a70c8a0c4b
commit a70c8a0c4b
parent 9067487c44
13 changed files with 451 additions and 143 deletions
--- a/src/llama-memory-hybrid.h
+++ b/src/llama-memory-hybrid.h
@ -92,6 +92,8 @@ private:

 class llama_memory_hybrid_context : public llama_memory_context_i {
 public:
+    using slot_info_vec_t = llama_kv_cache_unified::slot_info_vec_t;
+
    // init failure
    explicit llama_memory_hybrid_context(llama_memory_status status);

@ -107,7 +109,7 @@ public:
    // init success
    llama_memory_hybrid_context(
              llama_memory_hybrid * mem,
-            std::vector<uint32_t>   heads_attn,
+                  slot_info_vec_t   sinfos_attn,
        std::vector<llama_ubatch>   ubatches);

    ~llama_memory_hybrid_context() = default;