Merge commit 'ab86335760' into concedo_experimental

# Conflicts: # .github/workflows/release.yml # examples/retrieval/retrieval.cpp # examples/simple-chat/simple-chat.cpp # ggml/src/ggml-opencl/ggml-opencl.cpp # ggml/src/ggml-sycl/ggml-sycl.cpp # requirements/requirements-convert_hf_to_gguf.txt # requirements/requirements-convert_hf_to_gguf_update.txt # requirements/requirements-convert_lora_to_gguf.txt # tools/run/run.cpp
2025-09-14 02:49:41 +00:00 · 2025-05-23 11:41:36 +08:00 · 2025-05-23 11:41:36 +08:00 · 22ef97d7d3
commit 22ef97d7d3
parent ae8f01c2d4 ab86335760
23 changed files with 495 additions and 231 deletions
--- a/include/llama.h
+++ b/include/llama.h
@ -612,10 +612,12 @@ extern "C" {

    // Returns the number of tokens in the KV cache (slow, use only for debug)
    // If a KV cell has multiple sequences assigned to it, it will be counted multiple times
-    LLAMA_API int32_t llama_kv_self_n_tokens(const struct llama_context * ctx);
+    DEPRECATED(LLAMA_API int32_t llama_kv_self_n_tokens(const struct llama_context * ctx),
+               "Use llama_kv_self_seq_pos_max() instead");

    // Returns the number of used KV cells (i.e. have at least one sequence assigned to them)
-    LLAMA_API int32_t llama_kv_self_used_cells(const struct llama_context * ctx);
+    DEPRECATED(LLAMA_API int32_t llama_kv_self_used_cells(const struct llama_context * ctx),
+               "Use llama_kv_self_seq_pos_max() instead");

    // Clear the KV cache - both cell info is erased and KV data is zeroed
    LLAMA_API void llama_kv_self_clear(