fixed a bug with drafting tokens

This commit is contained in:
Concedo 2024-12-23 11:36:08 +08:00
parent fd5100c382
commit 10d4fc637d

View file

@ -3611,7 +3611,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
{
logits_to_sample = draft_results.drafted_amount;
}
while(logits_sampled<logits_to_sample && remaining_tokens>0 && !abort_draft)
while(logits_sampled<logits_to_sample && remaining_tokens>0 && !abort_draft && !early_abort)
{
if(logits_sampled>0)
{