fix flashinfer precision

This commit is contained in:
Atream 2025-03-07 14:07:00 +00:00
parent 96d75d53df
commit d453c320f1
5 changed files with 151 additions and 61 deletions

View file

@ -25,7 +25,7 @@ from ktransformers.operators.triton_attention import decode_attention_fwd_groupe
import os
from ktransformers.operators.flashinfer_wrapper import flashinfer_enabled
if flashinfer_enabled:
from ktransformers.operators.flashinfer_wrapper import MLAWrapperSingleton, attention_ref
from ktransformers.operators.flashinfer_wrapper import MLAWrapperSingleton
logger = logging.getLogger("attention")