mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-11 07:44:35 +00:00
fix local_chat.py chunk_size not effect experts
This commit is contained in:
parent
adc0906967
commit
71a5fc5770
2 changed files with 3 additions and 2 deletions
|
@ -70,6 +70,7 @@ def local_chat(
|
||||||
torch.set_grad_enabled(False)
|
torch.set_grad_enabled(False)
|
||||||
|
|
||||||
Config().cpu_infer = cpu_infer
|
Config().cpu_infer = cpu_infer
|
||||||
|
Config().chunk_size = chunk_size
|
||||||
if torch.xpu.is_available():
|
if torch.xpu.is_available():
|
||||||
use_cuda_graph = False
|
use_cuda_graph = False
|
||||||
|
|
||||||
|
|
|
@ -213,7 +213,7 @@ class KExpertsCPU(KExpertsBase):
|
||||||
self.config.num_experts_per_tok,
|
self.config.num_experts_per_tok,
|
||||||
self.config.hidden_size,
|
self.config.hidden_size,
|
||||||
self.config.moe_intermediate_size,
|
self.config.moe_intermediate_size,
|
||||||
max(cuda_graphs),
|
max(cuda_graphs) if isinstance(cuda_graphs, list) else Config().chunk_size,
|
||||||
gate_ptr,
|
gate_ptr,
|
||||||
up_ptr,
|
up_ptr,
|
||||||
down_ptr,
|
down_ptr,
|
||||||
|
@ -231,7 +231,7 @@ class KExpertsCPU(KExpertsBase):
|
||||||
self.config.num_experts_per_tok,
|
self.config.num_experts_per_tok,
|
||||||
self.config.hidden_size,
|
self.config.hidden_size,
|
||||||
self.config.moe_intermediate_size,
|
self.config.moe_intermediate_size,
|
||||||
max(cuda_graphs),
|
max(cuda_graphs) if isinstance(cuda_graphs, list) else Config().chunk_size,
|
||||||
gate_ptr,
|
gate_ptr,
|
||||||
up_ptr,
|
up_ptr,
|
||||||
down_ptr,
|
down_ptr,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue