mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-10 23:34:35 +00:00
use compile for gate, slight performance improvement
This commit is contained in:
parent
6c4ed59175
commit
a889288fc1
9 changed files with 155 additions and 37 deletions
|
@ -26,7 +26,7 @@
|
|||
- match:
|
||||
class: ktransformers.models.modeling_deepseek_v3.MoEGate
|
||||
replace:
|
||||
class: ktransformers.operators.gate.KMoEGate
|
||||
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
|
||||
kwargs:
|
||||
generate_device: "cuda:0"
|
||||
prefill_device: "cuda:0"
|
||||
|
|
|
@ -147,7 +147,7 @@
|
|||
name: "^model\\.layers\\.([0-9]|1[0-4])\\.mlp\\.gate$"
|
||||
class: ktransformers.models.modeling_deepseek_v3.MoEGate
|
||||
replace:
|
||||
class: ktransformers.operators.gate.KMoEGate
|
||||
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
|
||||
kwargs:
|
||||
generate_device: "cuda:0"
|
||||
prefill_device: "cuda:0"
|
||||
|
@ -157,7 +157,7 @@
|
|||
name: "^model\\.layers\\.(1[5-9]|2[0-9])\\.mlp\\.gate$"
|
||||
class: ktransformers.models.modeling_deepseek_v3.MoEGate
|
||||
replace:
|
||||
class: ktransformers.operators.gate.KMoEGate
|
||||
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
|
||||
kwargs:
|
||||
generate_device: "cuda:1"
|
||||
prefill_device: "cuda:1"
|
||||
|
@ -167,7 +167,7 @@
|
|||
name: "^model\\.layers\\.(3[0-9]|4[0-4])\\.mlp\\.gate$"
|
||||
class: ktransformers.models.modeling_deepseek_v3.MoEGate
|
||||
replace:
|
||||
class: ktransformers.operators.gate.KMoEGate
|
||||
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
|
||||
kwargs:
|
||||
generate_device: "cuda:2"
|
||||
prefill_device: "cuda:2"
|
||||
|
@ -177,7 +177,7 @@
|
|||
name: "^model\\.layers\\.(4[5-9]|5[0-9]|60)\\.mlp\\.gate$"
|
||||
class: ktransformers.models.modeling_deepseek_v3.MoEGate
|
||||
replace:
|
||||
class: ktransformers.operators.gate.KMoEGate
|
||||
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
|
||||
kwargs:
|
||||
generate_device: "cuda:3"
|
||||
prefill_device: "cuda:3"
|
||||
|
|
|
@ -278,7 +278,7 @@
|
|||
name: "^model\\.layers\\.([0-7])\\.mlp\\.gate$"
|
||||
class: ktransformers.models.modeling_deepseek_v3.MoEGate
|
||||
replace:
|
||||
class: ktransformers.operators.gate.KMoEGate
|
||||
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
|
||||
kwargs:
|
||||
generate_device: "cuda:0"
|
||||
prefill_device: "cuda:0"
|
||||
|
@ -288,7 +288,7 @@
|
|||
name: "^model\\.layers\\.(8|9|1[0-5])\\.mlp\\.gate$"
|
||||
class: ktransformers.models.modeling_deepseek_v3.MoEGate
|
||||
replace:
|
||||
class: ktransformers.operators.gate.KMoEGate
|
||||
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
|
||||
kwargs:
|
||||
generate_device: "cuda:1"
|
||||
prefill_device: "cuda:1"
|
||||
|
@ -298,7 +298,7 @@
|
|||
name: "^model\\.layers\\.(1[6-9]|2[0-3])\\.mlp\\.gate$"
|
||||
class: ktransformers.models.modeling_deepseek_v3.MoEGate
|
||||
replace:
|
||||
class: ktransformers.operators.gate.KMoEGate
|
||||
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
|
||||
kwargs:
|
||||
generate_device: "cuda:2"
|
||||
prefill_device: "cuda:2"
|
||||
|
@ -308,7 +308,7 @@
|
|||
name: "^model\\.layers\\.(2[4-9]|3[0-1])\\.mlp\\.gate$"
|
||||
class: ktransformers.models.modeling_deepseek_v3.MoEGate
|
||||
replace:
|
||||
class: ktransformers.operators.gate.KMoEGate
|
||||
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
|
||||
kwargs:
|
||||
generate_device: "cuda:3"
|
||||
prefill_device: "cuda:3"
|
||||
|
@ -318,7 +318,7 @@
|
|||
name: "^model\\.layers\\.(3[2-9])\\.mlp\\.gate$"
|
||||
class: ktransformers.models.modeling_deepseek_v3.MoEGate
|
||||
replace:
|
||||
class: ktransformers.operators.gate.KMoEGate
|
||||
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
|
||||
kwargs:
|
||||
generate_device: "cuda:4"
|
||||
prefill_device: "cuda:4"
|
||||
|
@ -328,7 +328,7 @@
|
|||
name: "^model\\.layers\\.(4[0-7])\\.mlp\\.gate$"
|
||||
class: ktransformers.models.modeling_deepseek_v3.MoEGate
|
||||
replace:
|
||||
class: ktransformers.operators.gate.KMoEGate
|
||||
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
|
||||
kwargs:
|
||||
generate_device: "cuda:5"
|
||||
prefill_device: "cuda:5"
|
||||
|
@ -338,7 +338,7 @@
|
|||
name: "^model\\.layers\\.(4[8-9]|5[0-5])\\.mlp\\.gate$"
|
||||
class: ktransformers.models.modeling_deepseek_v3.MoEGate
|
||||
replace:
|
||||
class: ktransformers.operators.gate.KMoEGate
|
||||
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
|
||||
kwargs:
|
||||
generate_device: "cuda:6"
|
||||
prefill_device: "cuda:6"
|
||||
|
@ -348,7 +348,7 @@
|
|||
name: "^model\\.layers\\.(5[6-9]|60)\\.mlp\\.gate$"
|
||||
class: ktransformers.models.modeling_deepseek_v3.MoEGate
|
||||
replace:
|
||||
class: ktransformers.operators.gate.KMoEGate
|
||||
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
|
||||
kwargs:
|
||||
generate_device: "cuda:7"
|
||||
prefill_device: "cuda:7"
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
name: "^model\\.layers\\.(0|[1-9]|[12][0-9])\\."
|
||||
class: ktransformers.models.modeling_deepseek_v3.DeepseekV3RotaryEmbedding
|
||||
replace:
|
||||
class: ktransformers.operators.RoPE.YarnRotaryEmbeddingV3
|
||||
class: ktransformers.operators.RoPE.KMoEGateDeepSeekV3
|
||||
kwargs:
|
||||
generate_device: "cuda:0"
|
||||
prefill_device: "cuda:0"
|
||||
|
@ -18,7 +18,7 @@
|
|||
name: "^model\\.layers\\.([3456][0-9])\\."
|
||||
class: ktransformers.models.modeling_deepseek_v3.DeepseekV3RotaryEmbedding
|
||||
replace:
|
||||
class: ktransformers.operators.RoPE.YarnRotaryEmbeddingV3
|
||||
class: ktransformers.operators.RoPE.KMoEGateDeepSeekV3
|
||||
kwargs:
|
||||
generate_device: "cuda:1"
|
||||
prefill_device: "cuda:1"
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
name: "^model\\.layers\\.(0|[1-9]|[12][0-9])\\."
|
||||
class: ktransformers.models.modeling_deepseek_v3.DeepseekV3RotaryEmbedding
|
||||
replace:
|
||||
class: ktransformers.operators.RoPE.YarnRotaryEmbeddingV3
|
||||
class: ktransformers.operators.RoPE.KMoEGateDeepSeekV3
|
||||
kwargs:
|
||||
generate_device: "cuda:0"
|
||||
prefill_device: "cuda:0"
|
||||
|
|
|
@ -66,7 +66,7 @@
|
|||
name: "^model\\.layers\\.(0|[1-9]|[12][0-9])\\.mlp\\.gate$"
|
||||
class: ktransformers.models.modeling_deepseek_v3.MoEGate
|
||||
replace:
|
||||
class: ktransformers.operators.gate.KMoEGate
|
||||
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
|
||||
kwargs:
|
||||
generate_device: "cuda:0"
|
||||
prefill_device: "cuda:0"
|
||||
|
@ -74,7 +74,7 @@
|
|||
name: "^model\\.layers\\.([3456][0-9])\\.mlp\\.gate$"
|
||||
class: ktransformers.models.modeling_deepseek_v3.MoEGate
|
||||
replace:
|
||||
class: ktransformers.operators.gate.KMoEGate # mlp module with custom forward function
|
||||
class: ktransformers.operators.gate.KMoEGateDeepSeekV3 # mlp module with custom forward function
|
||||
kwargs:
|
||||
generate_device: "cuda:1"
|
||||
prefill_device: "cuda:1"
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
- match:
|
||||
class: ktransformers.models.modeling_deepseek_v3.MoEGate
|
||||
replace:
|
||||
class: ktransformers.operators.gate.KMoEGate
|
||||
class: ktransformers.operators.gate.KMoEGateDeepSeekV3
|
||||
kwargs:
|
||||
generate_device: "cuda:0"
|
||||
prefill_device: "cuda:0"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue