fix: slot id

This commit is contained in:
DeEMO 2025-06-30 07:31:05 +00:00
parent b4929d510a
commit ca5996e7a6

View file

@ -2534,10 +2534,10 @@ struct server_context {
// construct the speculation batch
llama_batch_clear(slot.batch_spec);
llama_batch_add (slot.batch_spec, id, slot.n_past, { slot.id }, true);
llama_batch_add (slot.batch_spec, id, slot.n_past, { slot.id + 1 }, true);
for (size_t i = 0; i < draft.size(); ++i) {
llama_batch_add(slot.batch_spec, draft[i], slot.n_past + 1 + i, { slot.id }, true);
llama_batch_add(slot.batch_spec, draft[i], slot.n_past + 1 + i, { slot.id + 1 }, true);
}
llama_decode(ctx, slot.batch_spec);
@ -2551,7 +2551,8 @@ struct server_context {
slot.cache_tokens.push_back(id);
slot.cache_tokens.insert(slot.cache_tokens.end(), ids.begin(), ids.end() - 1);
llama_kv_cache_seq_rm(ctx, slot.id, slot.n_past, -1);
llama_kv_cache_seq_rm (ctx, slot.id + 1, slot.n_past, -1);
llama_send_kv_cache_seq_rm(ctx, slot.id , slot.n_past, -1);
for (size_t i = 0; i < ids.size(); ++i) {
completion_token_output result;