fix flashinfer precision

2025-09-13 00:29:59 +00:00 · 2025-03-07 14:07:00 +00:00 · 2025-03-07 14:07:00 +00:00 · d453c320f1
commit d453c320f1
parent 96d75d53df
5 changed files with 151 additions and 61 deletions
--- a/ktransformers/operators/attention.py
+++ b/ktransformers/operators/attention.py
@ -25,7 +25,7 @@ from ktransformers.operators.triton_attention import decode_attention_fwd_groupe
 import os
 from ktransformers.operators.flashinfer_wrapper import flashinfer_enabled
 if flashinfer_enabled:
-    from ktransformers.operators.flashinfer_wrapper import MLAWrapperSingleton, attention_ref
+    from ktransformers.operators.flashinfer_wrapper import MLAWrapperSingleton

 logger = logging.getLogger("attention")