fix-singleton

2025-09-10 15:29:39 +00:00 · 2025-03-14 04:16:53 +00:00 · 2025-03-14 04:16:53 +00:00 · 6f43bbe55f
commit 6f43bbe55f
parent 7f57769c23
4 changed files with 11 additions and 4 deletions
--- a/ktransformers/operators/dynamic_attention.py
+++ b/ktransformers/operators/dynamic_attention.py
@ -26,6 +26,7 @@ import json

 class DynamicScaledDotProductAttention:
    remaining_length: int
+    cpu_infer = None

    def __init__(
        self,
@ -180,7 +181,9 @@ class DynamicScaledDotProductAttention:
            self.preselect_block_num = 0  # block_num before preselect
            self.evict_tokens = 0

-        self.cpu_infer = CPUInfer(threads_num)
+        if DynamicScaledDotProductAttention.cpu_infer is None:
+            DynamicScaledDotProductAttention.cpu_infer = CPUInfer(threads_num)
+            self.cpu_infer = DynamicScaledDotProductAttention.cpu_infer
        self.local_thread = CPUInferKVCache(
            self.layer_num,
            self.kv_head_num,