From c9a0c442135b1d9a2628d45a796c41ed7e11fabe Mon Sep 17 00:00:00 2001 From: Atream <80757050+Atream@users.noreply.github.com> Date: Mon, 17 Mar 2025 17:03:52 +0800 Subject: [PATCH] Update DeepSeek-V3-Chat-multi-gpu-fp8-linear-ggml-experts.yaml --- .../DeepSeek-V3-Chat-multi-gpu-fp8-linear-ggml-experts.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-multi-gpu-fp8-linear-ggml-experts.yaml b/ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-multi-gpu-fp8-linear-ggml-experts.yaml index fa8c03d..849439c 100644 --- a/ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-multi-gpu-fp8-linear-ggml-experts.yaml +++ b/ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-multi-gpu-fp8-linear-ggml-experts.yaml @@ -66,7 +66,7 @@ name: "^model\\.layers\\.(0|[1-9]|[12][0-9])\\.mlp\\.gate$" class: ktransformers.models.modeling_deepseek_v3.MoEGate replace: - class: ktransformers.operators.gate.KMoEGate + class: ktransformers.operators.gate.KMoEGateDeepSeekV3 kwargs: generate_device: "cuda:0" prefill_device: "cuda:0" @@ -74,7 +74,7 @@ name: "^model\\.layers\\.([3456][0-9])\\.mlp\\.gate$" class: ktransformers.models.modeling_deepseek_v3.MoEGate replace: - class: ktransformers.operators.gate.KMoEGate # mlp module with custom forward function + class: ktransformers.operators.gate.KMoEGateDeepSeekV3 # mlp module with custom forward function kwargs: generate_device: "cuda:1" prefill_device: "cuda:1"