From ca5996e7a6fbf900fa881f9d08ced2749abb8702 Mon Sep 17 00:00:00 2001 From: DeEMO Date: Mon, 30 Jun 2025 07:31:05 +0000 Subject: [PATCH] fix: slot id --- examples/server/server.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 5b970e4f..1acf1421 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2534,10 +2534,10 @@ struct server_context { // construct the speculation batch llama_batch_clear(slot.batch_spec); - llama_batch_add (slot.batch_spec, id, slot.n_past, { slot.id }, true); + llama_batch_add (slot.batch_spec, id, slot.n_past, { slot.id + 1 }, true); for (size_t i = 0; i < draft.size(); ++i) { - llama_batch_add(slot.batch_spec, draft[i], slot.n_past + 1 + i, { slot.id }, true); + llama_batch_add(slot.batch_spec, draft[i], slot.n_past + 1 + i, { slot.id + 1 }, true); } llama_decode(ctx, slot.batch_spec); @@ -2551,7 +2551,8 @@ struct server_context { slot.cache_tokens.push_back(id); slot.cache_tokens.insert(slot.cache_tokens.end(), ids.begin(), ids.end() - 1); - llama_kv_cache_seq_rm(ctx, slot.id, slot.n_past, -1); + llama_kv_cache_seq_rm (ctx, slot.id + 1, slot.n_past, -1); + llama_send_kv_cache_seq_rm(ctx, slot.id , slot.n_past, -1); for (size_t i = 0; i < ids.size(); ++i) { completion_token_output result;