Merge branch 'upstream' into concedo_experimental

# Conflicts: # README.md # examples/server/README.md # examples/speculative/speculative.cpp # flake.lock # ggml/src/CMakeLists.txt # scripts/sync-ggml.last # tests/test-backend-ops.cpp
2025-09-14 02:49:41 +00:00 · 2024-11-14 21:40:52 +08:00 · 2024-11-14 21:40:52 +08:00 · df080b074d
commit df080b074d
parent bfa118ee45 2a82891a85
41 changed files with 132106 additions and 130341 deletions
--- a/include/llama.h
+++ b/include/llama.h
@ -799,7 +799,7 @@ extern "C" {
    // Processes a batch of tokens with the ecoder part of the encoder-decoder model.
    // Stores the encoder output internally for later use by the decoder cross-attention layers.
    //   0 - success
-    // < 0 - error
+    // < 0 - error. the KV cache state is restored to the state before this call
    LLAMA_API int32_t llama_encode(
            struct llama_context * ctx,
              struct llama_batch   batch);
@ -807,7 +807,7 @@ extern "C" {
    // Positive return values does not mean a fatal error, but rather a warning.
    //   0 - success
    //   1 - could not find a KV slot for the batch (try reducing the size of the batch or increase the context)
-    // < 0 - error
+    // < 0 - error. the KV cache state is restored to the state before this call
    LLAMA_API int32_t llama_decode(
            struct llama_context * ctx,
              struct llama_batch   batch);