fix seq_id mismatch between head and worker devices

2025-09-10 00:14:33 +00:00 · 2025-06-11 17:10:21 +04:00 · 2025-06-11 17:10:21 +04:00 · 3e6d831930
commit 3e6d831930
parent fb9b1f2b00
3 changed files with 33 additions and 33 deletions
--- a/include/llama.h
+++ b/include/llama.h
@ -957,7 +957,8 @@ extern "C" {
    // < 0 - error
    LLAMA_API int32_t llama_decode(
            struct llama_context * ctx,
-              struct llama_batch   batch);
+              struct llama_batch   batch,
+                            bool   server_mode = false);

    // Set the number of threads used for decoding
    // n_threads is the number of threads used for generation (single token)