mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-09 13:55:27 +00:00
update fp8 kernel tutorial
This commit is contained in:
parent
ca7366d2db
commit
4dc5518e4d
7 changed files with 46 additions and 5 deletions
|
@ -340,7 +340,7 @@ class TransformersInterface(BackendInterfaceBase):
|
|||
sm_scale=(self.model.config.qk_rope_head_dim + self.model.config.qk_nope_head_dim) ** (-0.5), q_data_type=torch.bfloat16, kv_data_type=torch.bfloat16)
|
||||
next_token = self.decode_one_tokens()
|
||||
self.profiler.inc("decode")
|
||||
if next_token == self.tokenizer.eos_token_id:
|
||||
if next_token == self.tokenizer.eos_token_id or "<|im_end|>" == self.tokenizer.decode(next_token):
|
||||
assert self.args.batch_size == 1
|
||||
break
|
||||
yield self.append_new_tokens(next_token)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue