use compile for gate, slight performance improvement

This commit is contained in:
Atream 2025-03-14 12:43:28 +00:00
parent 6c4ed59175
commit a889288fc1
9 changed files with 155 additions and 37 deletions

View file

@ -278,7 +278,7 @@
name: "^model\\.layers\\.([0-7])\\.mlp\\.gate$"
class: ktransformers.models.modeling_deepseek_v3.MoEGate
replace:
class: ktransformers.operators.gate.KMoEGate
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
kwargs:
generate_device: "cuda:0"
prefill_device: "cuda:0"
@ -288,7 +288,7 @@
name: "^model\\.layers\\.(8|9|1[0-5])\\.mlp\\.gate$"
class: ktransformers.models.modeling_deepseek_v3.MoEGate
replace:
class: ktransformers.operators.gate.KMoEGate
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
kwargs:
generate_device: "cuda:1"
prefill_device: "cuda:1"
@ -298,7 +298,7 @@
name: "^model\\.layers\\.(1[6-9]|2[0-3])\\.mlp\\.gate$"
class: ktransformers.models.modeling_deepseek_v3.MoEGate
replace:
class: ktransformers.operators.gate.KMoEGate
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
kwargs:
generate_device: "cuda:2"
prefill_device: "cuda:2"
@ -308,7 +308,7 @@
name: "^model\\.layers\\.(2[4-9]|3[0-1])\\.mlp\\.gate$"
class: ktransformers.models.modeling_deepseek_v3.MoEGate
replace:
class: ktransformers.operators.gate.KMoEGate
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
kwargs:
generate_device: "cuda:3"
prefill_device: "cuda:3"
@ -318,7 +318,7 @@
name: "^model\\.layers\\.(3[2-9])\\.mlp\\.gate$"
class: ktransformers.models.modeling_deepseek_v3.MoEGate
replace:
class: ktransformers.operators.gate.KMoEGate
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
kwargs:
generate_device: "cuda:4"
prefill_device: "cuda:4"
@ -328,7 +328,7 @@
name: "^model\\.layers\\.(4[0-7])\\.mlp\\.gate$"
class: ktransformers.models.modeling_deepseek_v3.MoEGate
replace:
class: ktransformers.operators.gate.KMoEGate
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
kwargs:
generate_device: "cuda:5"
prefill_device: "cuda:5"
@ -338,7 +338,7 @@
name: "^model\\.layers\\.(4[8-9]|5[0-5])\\.mlp\\.gate$"
class: ktransformers.models.modeling_deepseek_v3.MoEGate
replace:
class: ktransformers.operators.gate.KMoEGate
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
kwargs:
generate_device: "cuda:6"
prefill_device: "cuda:6"
@ -348,7 +348,7 @@
name: "^model\\.layers\\.(5[6-9]|60)\\.mlp\\.gate$"
class: ktransformers.models.modeling_deepseek_v3.MoEGate
replace:
class: ktransformers.operators.gate.KMoEGate
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
kwargs:
generate_device: "cuda:7"
prefill_device: "cuda:7"