Merge branch 'master' into concedo_experimental

# Conflicts: # Makefile # README.md # flake.lock # ggml-cuda.cu # llama.cpp # tests/test-backend-ops.cpp # tests/test-quantize-fns.cpp
2025-09-10 00:54:41 +00:00 · 2024-02-28 13:41:35 +08:00 · 2024-02-28 13:41:35 +08:00 · ad638285de
commit ad638285de
parent 12b4c14847 cb49e0f8c9
26 changed files with 3393 additions and 589 deletions
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@ -666,7 +666,7 @@ void PurgeMissingTokens(llama_context * ctx, std::vector<int> &current_context_t
            //extract the unwanted tokens out from context and KV
            int diff = found - trimstart;
            llama_kv_cache_seq_rm(llama_ctx_v4, 0, trimstart, trimstart + diff);
-            llama_kv_cache_seq_shift(llama_ctx_v4, 0, trimstart + diff, -1, -diff);
+            llama_kv_cache_seq_add(llama_ctx_v4, 0, trimstart + diff, -1, -diff);

            for (size_t i = trimstart + diff; i < current_context_tokens.size() - 1; i++)
            {