mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2026-04-28 11:49:51 +00:00
use compile for gate, slight performance improvement
This commit is contained in:
parent
6c4ed59175
commit
a889288fc1
9 changed files with 155 additions and 37 deletions
|
|
@ -278,7 +278,7 @@
|
|||
name: "^model\\.layers\\.([0-7])\\.mlp\\.gate$"
|
||||
class: ktransformers.models.modeling_deepseek_v3.MoEGate
|
||||
replace:
|
||||
class: ktransformers.operators.gate.KMoEGate
|
||||
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
|
||||
kwargs:
|
||||
generate_device: "cuda:0"
|
||||
prefill_device: "cuda:0"
|
||||
|
|
@ -288,7 +288,7 @@
|
|||
name: "^model\\.layers\\.(8|9|1[0-5])\\.mlp\\.gate$"
|
||||
class: ktransformers.models.modeling_deepseek_v3.MoEGate
|
||||
replace:
|
||||
class: ktransformers.operators.gate.KMoEGate
|
||||
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
|
||||
kwargs:
|
||||
generate_device: "cuda:1"
|
||||
prefill_device: "cuda:1"
|
||||
|
|
@ -298,7 +298,7 @@
|
|||
name: "^model\\.layers\\.(1[6-9]|2[0-3])\\.mlp\\.gate$"
|
||||
class: ktransformers.models.modeling_deepseek_v3.MoEGate
|
||||
replace:
|
||||
class: ktransformers.operators.gate.KMoEGate
|
||||
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
|
||||
kwargs:
|
||||
generate_device: "cuda:2"
|
||||
prefill_device: "cuda:2"
|
||||
|
|
@ -308,7 +308,7 @@
|
|||
name: "^model\\.layers\\.(2[4-9]|3[0-1])\\.mlp\\.gate$"
|
||||
class: ktransformers.models.modeling_deepseek_v3.MoEGate
|
||||
replace:
|
||||
class: ktransformers.operators.gate.KMoEGate
|
||||
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
|
||||
kwargs:
|
||||
generate_device: "cuda:3"
|
||||
prefill_device: "cuda:3"
|
||||
|
|
@ -318,7 +318,7 @@
|
|||
name: "^model\\.layers\\.(3[2-9])\\.mlp\\.gate$"
|
||||
class: ktransformers.models.modeling_deepseek_v3.MoEGate
|
||||
replace:
|
||||
class: ktransformers.operators.gate.KMoEGate
|
||||
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
|
||||
kwargs:
|
||||
generate_device: "cuda:4"
|
||||
prefill_device: "cuda:4"
|
||||
|
|
@ -328,7 +328,7 @@
|
|||
name: "^model\\.layers\\.(4[0-7])\\.mlp\\.gate$"
|
||||
class: ktransformers.models.modeling_deepseek_v3.MoEGate
|
||||
replace:
|
||||
class: ktransformers.operators.gate.KMoEGate
|
||||
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
|
||||
kwargs:
|
||||
generate_device: "cuda:5"
|
||||
prefill_device: "cuda:5"
|
||||
|
|
@ -338,7 +338,7 @@
|
|||
name: "^model\\.layers\\.(4[8-9]|5[0-5])\\.mlp\\.gate$"
|
||||
class: ktransformers.models.modeling_deepseek_v3.MoEGate
|
||||
replace:
|
||||
class: ktransformers.operators.gate.KMoEGate
|
||||
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
|
||||
kwargs:
|
||||
generate_device: "cuda:6"
|
||||
prefill_device: "cuda:6"
|
||||
|
|
@ -348,7 +348,7 @@
|
|||
name: "^model\\.layers\\.(5[6-9]|60)\\.mlp\\.gate$"
|
||||
class: ktransformers.models.modeling_deepseek_v3.MoEGate
|
||||
replace:
|
||||
class: ktransformers.operators.gate.KMoEGate
|
||||
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
|
||||
kwargs:
|
||||
generate_device: "cuda:7"
|
||||
prefill_device: "cuda:7"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue