mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-10 23:34:35 +00:00
Merge branch 'fix_precision_MLA' of https://github.com/kvcache-ai/ktransformers into server-prefix-cache
This commit is contained in:
commit
73d072f609
3 changed files with 14 additions and 4 deletions
|
@ -104,7 +104,10 @@ class KTransformersInterface(TransformersInterface):
|
|||
torch.cuda.synchronize()
|
||||
logits = logits[0, -1, :]
|
||||
return self.logits_to_token(logits)
|
||||
|
||||
|
||||
if self.args.use_cuda_graph:
|
||||
warm_uped = True
|
||||
|
||||
if self.use_static_cache:
|
||||
mask = torch.ones((1, self.seq_length)).to(torch_device)
|
||||
logits = self.model(
|
||||
|
@ -118,7 +121,6 @@ class KTransformersInterface(TransformersInterface):
|
|||
else:
|
||||
logits = self.model(self.current_ids, return_dict=False)[0]
|
||||
logits = logits[0, -1, :]
|
||||
warm_uped = True
|
||||
|
||||
return self.logits_to_token(logits)
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue