mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-13 18:39:48 +00:00
kv-cache : use ggml_set_rows (#14285)
* kv-cache : use ggml_set_rows ggml-ci * graph : separate k and v indices ggml-ci * cont : remove redundant ifs ggml-ci * kv-cache : improve find_slot impl * kv-cache : bounds-check when accessing slot_info indices * kv-cache : add comments ggml-ci * ggml : add TODOs for adding GGML_OP_SET_ROWS support in the backends ggml-ci
This commit is contained in:
parent
9067487c44
commit
a70c8a0c4b
13 changed files with 451 additions and 143 deletions
|
@ -92,6 +92,8 @@ private:
|
|||
|
||||
class llama_memory_hybrid_context : public llama_memory_context_i {
|
||||
public:
|
||||
using slot_info_vec_t = llama_kv_cache_unified::slot_info_vec_t;
|
||||
|
||||
// init failure
|
||||
explicit llama_memory_hybrid_context(llama_memory_status status);
|
||||
|
||||
|
@ -107,7 +109,7 @@ public:
|
|||
// init success
|
||||
llama_memory_hybrid_context(
|
||||
llama_memory_hybrid * mem,
|
||||
std::vector<uint32_t> heads_attn,
|
||||
slot_info_vec_t sinfos_attn,
|
||||
std::vector<llama_ubatch> ubatches);
|
||||
|
||||
~llama_memory_hybrid_context() = default;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue