mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-11 21:32:11 +00:00
speculative-simple : add checkpoint support (#22227)
* speculative-simple : add checkpoint support * cont : fix build
This commit is contained in:
parent
225088ea76
commit
bcb5eeb645
2 changed files with 112 additions and 10 deletions
|
|
@ -2961,7 +2961,13 @@ private:
|
|||
|
||||
// verify and try to accept the draft
|
||||
{
|
||||
common_sampler_ptr smpl_save(common_sampler_clone(slot.smpl.get()));
|
||||
const bool use_ckpt = slot.ctx_seq_rm_type == COMMON_CONTEXT_SEQ_RM_TYPE_FULL;
|
||||
|
||||
// only save the sampler sampler state if we use checkpoints
|
||||
common_sampler_ptr smpl_save;
|
||||
if (use_ckpt) {
|
||||
smpl_save.reset(common_sampler_clone(slot.smpl.get()));
|
||||
}
|
||||
|
||||
GGML_ASSERT(slot.spec_i_batch.size() == n_draft + 1);
|
||||
auto accepted = common_sampler_sample_and_accept_n(slot.smpl.get(), slot.ctx, slot.spec_i_batch, slot.spec_draft);
|
||||
|
|
@ -2973,7 +2979,7 @@ private:
|
|||
|
||||
// check for partial draft acceptance
|
||||
if (accepted.size() < slot.spec_draft.size() + 1) {
|
||||
if (slot.ctx_seq_rm_type == COMMON_CONTEXT_SEQ_RM_TYPE_FULL) {
|
||||
if (use_ckpt) {
|
||||
// partial acceptance is not supported by the context -> truncate the draft and restore the state
|
||||
slot.spec_draft = std::move(accepted);
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue