merge main; Add torch q8 linear

2025-09-09 13:55:27 +00:00 · 2025-03-14 05:52:07 -04:00 · 2025-03-14 05:52:07 -04:00 · ed8437413b
commit ed8437413b
parent 6c4ed59175
27 changed files with 1561 additions and 114 deletions
--- a/ktransformers/operators/dynamic_attention.py
+++ b/ktransformers/operators/dynamic_attention.py
@ -17,7 +17,10 @@ import logging
 logger = logging.getLogger("dynamic_attention")
 sys.path.append(os.path.dirname(__file__) + "/../ktransformers_ext/cpu_backend")
 from ktransformers.operators.cpuinfer import CPUInfer, CPUInferKVCache
-from flash_attn import flash_attn_func, flash_attn_with_kvcache
+try:
+    from flash_attn import flash_attn_func, flash_attn_with_kvcache
+except:
+    print("falsh attn not found")


 import math