fix local_chat.py chunk_size not effect experts

2025-09-11 07:44:35 +00:00 · 2025-05-23 02:35:01 +00:00 · 2025-05-23 02:35:01 +00:00 · 71a5fc5770
commit 71a5fc5770
parent adc0906967
2 changed files with 3 additions and 2 deletions
--- a/ktransformers/local_chat.py
+++ b/ktransformers/local_chat.py
@ -70,6 +70,7 @@ def local_chat(
    torch.set_grad_enabled(False)
    Config().cpu_infer = cpu_infer
    Config().chunk_size = chunk_size
    if torch.xpu.is_available():
        use_cuda_graph = False
--- a/ktransformers/operators/experts.py
+++ b/ktransformers/operators/experts.py
@ -213,7 +213,7 @@ class KExpertsCPU(KExpertsBase):
                self.config.num_experts_per_tok,
                self.config.hidden_size,
                self.config.moe_intermediate_size,
-                max(cuda_graphs),
+                max(cuda_graphs) if isinstance(cuda_graphs, list) else Config().chunk_size,
                gate_ptr,
                up_ptr,
                down_ptr,
@ -231,7 +231,7 @@ class KExpertsCPU(KExpertsBase):
                self.config.num_experts_per_tok,
                self.config.hidden_size,
                self.config.moe_intermediate_size,
-                max(cuda_graphs),
+                max(cuda_graphs) if isinstance(cuda_graphs, list) else Config().chunk_size,
                gate_ptr,
                up_ptr,
                down_ptr,