From 0564ac8465d98ef92bdbbb05c7e098f8ffda25d2 Mon Sep 17 00:00:00 2001 From: Azure Date: Wed, 12 Feb 2025 04:11:00 +0000 Subject: [PATCH] update marlin expert example --- .../DeepSeek-V3-Chat-multi-gpu-marlin.yaml | 22 +++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-multi-gpu-marlin.yaml b/ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-multi-gpu-marlin.yaml index 06ab4db..92571b5 100644 --- a/ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-multi-gpu-marlin.yaml +++ b/ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-multi-gpu-marlin.yaml @@ -79,6 +79,24 @@ generate_device: "cuda:1" prefill_device: "cuda:1" +- match: + name: "^model\\.layers\\.(0|[1-4])\\.mlp\\.experts$" # inject experts in layer 0~4 as marlin expert + replace: + class: ktransformers.operators.experts.KTransformersExperts + kwargs: + generate_device: "cuda:0" # run in cuda:0 + generate_op: "KExpertsMarlin" + recursive: False + +- match: + name: "^model\\.layers\\.([3][0])\\.mlp\\.experts$" # inject experts in layer 30~31 as marlin expert + replace: + class: ktransformers.operators.experts.KTransformersExperts + kwargs: + generate_device: "cuda:1" + generate_op: "KExpertsMarlin" + recursive: False + - match: name: "^model\\.layers\\.(0|[1-9]|[12][0-9])\\.mlp\\.experts$" replace: @@ -139,5 +157,5 @@ replace: class: "default" kwargs: - generate_device: "cuda:1" - prefill_device: "cuda:1" + generate_device: "cuda:0" + prefill_device: "cuda:0"