mirror of
https://github.com/Lizonghang/prima.cpp.git
synced 2025-09-17 04:59:39 +00:00
server: fix bugs when running speculative decoding
This commit is contained in:
parent
b019a707b8
commit
86ca21e49c
2 changed files with 2 additions and 1 deletions
|
@ -2542,7 +2542,7 @@ struct server_context {
|
|||
llama_batch_add(slot.batch_spec, draft[i], slot.n_past + 1 + i, { slot.id + 1 }, true);
|
||||
}
|
||||
|
||||
llama_decode(ctx, slot.batch_spec);
|
||||
llama_decode(ctx, slot.batch_spec, true);
|
||||
|
||||
// the accepted tokens from the speculation
|
||||
const auto ids = gpt_sampler_sample_and_accept_n(slot.smpl, ctx, draft);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue