mirror of
https://github.com/Lizonghang/prima.cpp.git
synced 2025-09-05 17:49:02 +00:00
fix: slot id
This commit is contained in:
parent
b4929d510a
commit
ca5996e7a6
1 changed files with 4 additions and 3 deletions
|
@ -2534,10 +2534,10 @@ struct server_context {
|
|||
|
||||
// construct the speculation batch
|
||||
llama_batch_clear(slot.batch_spec);
|
||||
llama_batch_add (slot.batch_spec, id, slot.n_past, { slot.id }, true);
|
||||
llama_batch_add (slot.batch_spec, id, slot.n_past, { slot.id + 1 }, true);
|
||||
|
||||
for (size_t i = 0; i < draft.size(); ++i) {
|
||||
llama_batch_add(slot.batch_spec, draft[i], slot.n_past + 1 + i, { slot.id }, true);
|
||||
llama_batch_add(slot.batch_spec, draft[i], slot.n_past + 1 + i, { slot.id + 1 }, true);
|
||||
}
|
||||
|
||||
llama_decode(ctx, slot.batch_spec);
|
||||
|
@ -2551,7 +2551,8 @@ struct server_context {
|
|||
slot.cache_tokens.push_back(id);
|
||||
slot.cache_tokens.insert(slot.cache_tokens.end(), ids.begin(), ids.end() - 1);
|
||||
|
||||
llama_kv_cache_seq_rm(ctx, slot.id, slot.n_past, -1);
|
||||
llama_kv_cache_seq_rm (ctx, slot.id + 1, slot.n_past, -1);
|
||||
llama_send_kv_cache_seq_rm(ctx, slot.id , slot.n_past, -1);
|
||||
|
||||
for (size_t i = 0; i < ids.size(); ++i) {
|
||||
completion_token_output result;
|
||||
|
|
Loading…
Add table
Reference in a new issue