mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-13 00:29:59 +00:00
fix flashinfer precision
This commit is contained in:
parent
96d75d53df
commit
d453c320f1
5 changed files with 151 additions and 61 deletions
|
@ -25,7 +25,7 @@ from ktransformers.operators.triton_attention import decode_attention_fwd_groupe
|
|||
import os
|
||||
from ktransformers.operators.flashinfer_wrapper import flashinfer_enabled
|
||||
if flashinfer_enabled:
|
||||
from ktransformers.operators.flashinfer_wrapper import MLAWrapperSingleton, attention_ref
|
||||
from ktransformers.operators.flashinfer_wrapper import MLAWrapperSingleton
|
||||
|
||||
logger = logging.getLogger("attention")
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue