use compile for gate, slight performance improvement

This commit is contained in:
Atream 2025-03-14 12:43:28 +00:00
parent 6c4ed59175
commit a889288fc1
9 changed files with 155 additions and 37 deletions

View file

@ -26,7 +26,7 @@
- match:
class: ktransformers.models.modeling_deepseek_v3.MoEGate
replace:
class: ktransformers.operators.gate.KMoEGate
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
kwargs:
generate_device: "cuda:0"
prefill_device: "cuda:0"

View file

@ -147,7 +147,7 @@
name: "^model\\.layers\\.([0-9]|1[0-4])\\.mlp\\.gate$"
class: ktransformers.models.modeling_deepseek_v3.MoEGate
replace:
class: ktransformers.operators.gate.KMoEGate
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
kwargs:
generate_device: "cuda:0"
prefill_device: "cuda:0"
@ -157,7 +157,7 @@
name: "^model\\.layers\\.(1[5-9]|2[0-9])\\.mlp\\.gate$"
class: ktransformers.models.modeling_deepseek_v3.MoEGate
replace:
class: ktransformers.operators.gate.KMoEGate
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
kwargs:
generate_device: "cuda:1"
prefill_device: "cuda:1"
@ -167,7 +167,7 @@
name: "^model\\.layers\\.(3[0-9]|4[0-4])\\.mlp\\.gate$"
class: ktransformers.models.modeling_deepseek_v3.MoEGate
replace:
class: ktransformers.operators.gate.KMoEGate
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
kwargs:
generate_device: "cuda:2"
prefill_device: "cuda:2"
@ -177,7 +177,7 @@
name: "^model\\.layers\\.(4[5-9]|5[0-9]|60)\\.mlp\\.gate$"
class: ktransformers.models.modeling_deepseek_v3.MoEGate
replace:
class: ktransformers.operators.gate.KMoEGate
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
kwargs:
generate_device: "cuda:3"
prefill_device: "cuda:3"

View file

@ -278,7 +278,7 @@
name: "^model\\.layers\\.([0-7])\\.mlp\\.gate$"
class: ktransformers.models.modeling_deepseek_v3.MoEGate
replace:
class: ktransformers.operators.gate.KMoEGate
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
kwargs:
generate_device: "cuda:0"
prefill_device: "cuda:0"
@ -288,7 +288,7 @@
name: "^model\\.layers\\.(8|9|1[0-5])\\.mlp\\.gate$"
class: ktransformers.models.modeling_deepseek_v3.MoEGate
replace:
class: ktransformers.operators.gate.KMoEGate
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
kwargs:
generate_device: "cuda:1"
prefill_device: "cuda:1"
@ -298,7 +298,7 @@
name: "^model\\.layers\\.(1[6-9]|2[0-3])\\.mlp\\.gate$"
class: ktransformers.models.modeling_deepseek_v3.MoEGate
replace:
class: ktransformers.operators.gate.KMoEGate
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
kwargs:
generate_device: "cuda:2"
prefill_device: "cuda:2"
@ -308,7 +308,7 @@
name: "^model\\.layers\\.(2[4-9]|3[0-1])\\.mlp\\.gate$"
class: ktransformers.models.modeling_deepseek_v3.MoEGate
replace:
class: ktransformers.operators.gate.KMoEGate
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
kwargs:
generate_device: "cuda:3"
prefill_device: "cuda:3"
@ -318,7 +318,7 @@
name: "^model\\.layers\\.(3[2-9])\\.mlp\\.gate$"
class: ktransformers.models.modeling_deepseek_v3.MoEGate
replace:
class: ktransformers.operators.gate.KMoEGate
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
kwargs:
generate_device: "cuda:4"
prefill_device: "cuda:4"
@ -328,7 +328,7 @@
name: "^model\\.layers\\.(4[0-7])\\.mlp\\.gate$"
class: ktransformers.models.modeling_deepseek_v3.MoEGate
replace:
class: ktransformers.operators.gate.KMoEGate
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
kwargs:
generate_device: "cuda:5"
prefill_device: "cuda:5"
@ -338,7 +338,7 @@
name: "^model\\.layers\\.(4[8-9]|5[0-5])\\.mlp\\.gate$"
class: ktransformers.models.modeling_deepseek_v3.MoEGate
replace:
class: ktransformers.operators.gate.KMoEGate
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
kwargs:
generate_device: "cuda:6"
prefill_device: "cuda:6"
@ -348,7 +348,7 @@
name: "^model\\.layers\\.(5[6-9]|60)\\.mlp\\.gate$"
class: ktransformers.models.modeling_deepseek_v3.MoEGate
replace:
class: ktransformers.operators.gate.KMoEGate
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
kwargs:
generate_device: "cuda:7"
prefill_device: "cuda:7"

View file

@ -10,7 +10,7 @@
name: "^model\\.layers\\.(0|[1-9]|[12][0-9])\\."
class: ktransformers.models.modeling_deepseek_v3.DeepseekV3RotaryEmbedding
replace:
class: ktransformers.operators.RoPE.YarnRotaryEmbeddingV3
class: ktransformers.operators.RoPE.KMoEGateDeepSeekV3
kwargs:
generate_device: "cuda:0"
prefill_device: "cuda:0"
@ -18,7 +18,7 @@
name: "^model\\.layers\\.([3456][0-9])\\."
class: ktransformers.models.modeling_deepseek_v3.DeepseekV3RotaryEmbedding
replace:
class: ktransformers.operators.RoPE.YarnRotaryEmbeddingV3
class: ktransformers.operators.RoPE.KMoEGateDeepSeekV3
kwargs:
generate_device: "cuda:1"
prefill_device: "cuda:1"

View file

@ -10,7 +10,7 @@
name: "^model\\.layers\\.(0|[1-9]|[12][0-9])\\."
class: ktransformers.models.modeling_deepseek_v3.DeepseekV3RotaryEmbedding
replace:
class: ktransformers.operators.RoPE.YarnRotaryEmbeddingV3
class: ktransformers.operators.RoPE.KMoEGateDeepSeekV3
kwargs:
generate_device: "cuda:0"
prefill_device: "cuda:0"

View file

@ -66,7 +66,7 @@
name: "^model\\.layers\\.(0|[1-9]|[12][0-9])\\.mlp\\.gate$"
class: ktransformers.models.modeling_deepseek_v3.MoEGate
replace:
class: ktransformers.operators.gate.KMoEGate
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
kwargs:
generate_device: "cuda:0"
prefill_device: "cuda:0"
@ -74,7 +74,7 @@
name: "^model\\.layers\\.([3456][0-9])\\.mlp\\.gate$"
class: ktransformers.models.modeling_deepseek_v3.MoEGate
replace:
class: ktransformers.operators.gate.KMoEGate # mlp module with custom forward function
class: ktransformers.operators.gate.KMoEGateDeepSeekV3 # mlp module with custom forward function
kwargs:
generate_device: "cuda:1"
prefill_device: "cuda:1"

View file

@ -38,7 +38,7 @@
- match:
class: ktransformers.models.modeling_deepseek_v3.MoEGate
replace:
class: ktransformers.operators.gate.KMoEGate
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
kwargs:
generate_device: "cuda:0"
prefill_device: "cuda:0"