fix seq_id mismatch between head and worker devices

2025-09-09 15:04:33 +00:00 · 2025-06-11 17:10:21 +04:00 · 2025-06-11 17:10:21 +04:00 · 3e6d831930
commit 3e6d831930
parent fb9b1f2b00
3 changed files with 33 additions and 33 deletions
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -1059,13 +1059,9 @@ struct server_context {
    }

    void kv_cache_clear() {
-        SRV_DBG("%s", "clearing KV cache\n");
-
-        // clear the entire KV cache
+        SRV_DBG("%s", "clearing all KV cache\n");
        llama_kv_cache_clear(ctx);
-
        llama_send_kv_cache_clear(ctx);
-
        clean_kv_cache = false;
    }

@ -1090,7 +1086,7 @@ struct server_context {
                    llama_batch_add(batch, system_tokens[i + j], i + j, { 0 }, false);
                }

-                if (llama_decode(ctx, batch) != 0) {
+                if (llama_decode(ctx, batch, true) != 0) {
                    SRV_ERR("%s", "llama_decode() failed\n");
                    return;
                }
@ -2311,7 +2307,7 @@ struct server_context {
                0, 0, 0, // unused
            };

-            const int ret = llama_decode(ctx, batch_view);
+            const int ret = llama_decode(ctx, batch_view, true);
            metrics.on_decoded(slots);

            if (ret != 0) {