diff --git a/ktransformers/operators/experts.py b/ktransformers/operators/experts.py index 44f0037..81135ea 100644 --- a/ktransformers/operators/experts.py +++ b/ktransformers/operators/experts.py @@ -727,7 +727,7 @@ class KDeepseekV2MoE(BaseInjectedModule, DeepseekV2MoE): ) return final_out -class KMisrtalSparseMoEBlock(BaseInjectedModule, MixtralSparseMoeBlock): +class KMistralSparseMoEBlock(BaseInjectedModule, MixtralSparseMoeBlock): def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: """ """ diff --git a/ktransformers/optimize/optimize_rules/Mixtral.yaml b/ktransformers/optimize/optimize_rules/Mixtral.yaml index ad7d293..7d48812 100644 --- a/ktransformers/optimize/optimize_rules/Mixtral.yaml +++ b/ktransformers/optimize/optimize_rules/Mixtral.yaml @@ -19,7 +19,7 @@ name: "^model\\.layers\\..*\\.block_sparse_moe$" class: ktransformers.models.modeling_mixtral.MixtralSparseMoeBlock replace: - class: ktransformers.operators.experts.KMisrtalSparseMoEBlock + class: ktransformers.operators.experts.KMistralSparseMoEBlock - match: name: "^model\\.layers\\..*\\.block_sparse_moe\\.experts$" replace: