support Moonlight

This commit is contained in:
Atream 2025-02-23 14:21:18 +00:00
parent 95d937c51d
commit e8e02e5ccc
3 changed files with 4 additions and 10 deletions

View file

@ -207,7 +207,7 @@ def prefill_and_generate(model, tokenizer, inputs, max_new_tokens=10000, use_cud
tokens.append(int(next_token))
seq_length += 1
if next_token[0].item() == tokenizer.eos_token_id or tokenizer.decode(next_token) == '<|im_end|>':
if next_token[0].item() == tokenizer.eos_token_id or tokenizer.decode(next_token.tolist()) == '<|im_end|>':
print(stream.end(), end="", flush=True)
break
else: