diff --git a/ktransformers/local_chat.py b/ktransformers/local_chat.py index f16ee7f..513f480 100644 --- a/ktransformers/local_chat.py +++ b/ktransformers/local_chat.py @@ -32,8 +32,7 @@ custom_models = { ktransformer_rules_dir = os.path.dirname(os.path.abspath(__file__)) + "/optimize/optimize_rules/" default_optimize_rules = { "DeepseekV2ForCausalLM": ktransformer_rules_dir + "DeepSeek-V2-Chat.yaml", - # "DeepseekV3ForCausalLM": ktransformer_rules_dir + "DeepSeek-V2-Chat.yaml", - "DeepseekV3ForCausalLM": ktransformer_rules_dir + "DeepSeek-V3-Chat-multi-gpu.yaml", + "DeepseekV3ForCausalLM": ktransformer_rules_dir + "DeepSeek-V3-Chat.yaml", "Qwen2MoeForCausalLM": ktransformer_rules_dir + "Qwen2-57B-A14B-Instruct.yaml", "LlamaForCausalLM": ktransformer_rules_dir + "Internlm2_5-7b-Chat-1m.yaml", "MixtralForCausalLM": ktransformer_rules_dir + "Mixtral.yaml", diff --git a/ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat.yaml b/ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat.yaml index 6fb87b7..4a306be 100644 --- a/ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat.yaml +++ b/ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat.yaml @@ -1,7 +1,7 @@ - match: - class: ktransformers.models.modeling_deepseek.DeepseekV3YarnRotaryEmbedding + class: ktransformers.models.modeling_deepseek_v3.DeepseekV3RotaryEmbedding replace: - class: ktransformers.operators.RoPE.YarnRotaryEmbedding + class: ktransformers.operators.RoPE.RotaryEmbeddingV3 kwargs: generate_device: "cuda" prefill_device: "cuda" @@ -17,12 +17,19 @@ prefill_op: "KLinearTorch" - match: name: "^model\\.layers\\..*\\.mlp$" - class: ktransformers.models.modeling_deepseek.DeepseekV2MoE + class: ktransformers.models.modeling_deepseek_v3.DeepseekV3MoE replace: - class: ktransformers.operators.experts.KDeepseekV2MoE # mlp module with custom forward function + class: ktransformers.operators.experts.KDeepseekV3MoE # mlp module with custom forward function kwargs: generate_device: "cuda" prefill_device: "cuda" +- match: + class: ktransformers.models.modeling_deepseek_v3.MoEGate + replace: + class: ktransformers.operators.gate.KMoEGate + kwargs: + generate_device: "cuda:0" + prefill_device: "cuda:0" - match: name: "^model\\.layers\\..*\\.mlp\\.experts$" replace: