update yaml example; update version idx; update docker file

This commit is contained in:
TangJingqi 2024-08-29 22:39:20 +08:00
parent 6735beb5b6
commit 8747c099f2
4 changed files with 34 additions and 34 deletions

View file

@ -7,7 +7,7 @@
prefill_device: "cpu"
- match:
name: "^model\\.layers\\.([0-9])\\."
name: "^model\\.layers\\.([0-9]|[1][0-4])\\."
class: ktransformers.models.modeling_deepseek.DeepseekV2YarnRotaryEmbedding
replace:
class: ktransformers.operators.RoPE.YarnRotaryEmbedding
@ -15,7 +15,7 @@
generate_device: "cuda:0"
prefill_device: "cuda:0"
- match:
name: "^model\\.layers\\.([1][0-9])\\."
name: "^model\\.layers\\.([2][0-9]|[1][5-9])\\."
class: ktransformers.models.modeling_deepseek.DeepseekV2YarnRotaryEmbedding
replace:
class: ktransformers.operators.RoPE.YarnRotaryEmbedding
@ -23,7 +23,7 @@
generate_device: "cuda:1"
prefill_device: "cuda:1"
- match:
name: "^model\\.layers\\.([2][0-9])\\."
name: "^model\\.layers\\.([3][0-9]|[4][0-4])\\."
class: ktransformers.models.modeling_deepseek.DeepseekV2YarnRotaryEmbedding
replace:
class: ktransformers.operators.RoPE.YarnRotaryEmbedding
@ -31,7 +31,7 @@
generate_device: "cuda:2"
prefill_device: "cuda:2"
- match:
name: "^model\\.layers\\.([345][0-9])\\."
name: "^model\\.layers\\.([5][0-9]|[4][5-9])\\."
class: ktransformers.models.modeling_deepseek.DeepseekV2YarnRotaryEmbedding
replace:
class: ktransformers.operators.RoPE.YarnRotaryEmbedding
@ -40,7 +40,7 @@
prefill_device: "cuda:3"
- match:
name: "^model\\.layers\\.([0-9])\\.(?!self_attn).*$" # regular expression
name: "^model\\.layers\\.([0-9]|[1][0-4])\\.(?!self_attn\\.kv_b_proj).*$" # regular expression
class: torch.nn.Linear # only match modules matching name and class simultaneously
replace:
class: ktransformers.operators.linear.KTransformersLinear # optimized Kernel on quantized data types
@ -50,7 +50,7 @@
generate_op: "KLinearMarlin"
prefill_op: "KLinearTorch"
- match:
name: "^model\\.layers\\.([1][0-9])\\.(?!self_attn).*$" # regular expression
name: "^model\\.layers\\.([2][0-9]|[1][5-9])\\.(?!self_attn\\.kv_b_proj).*$" # regular expression
class: torch.nn.Linear # only match modules matching name and class simultaneously
replace:
class: ktransformers.operators.linear.KTransformersLinear # optimized Kernel on quantized data types
@ -60,7 +60,7 @@
generate_op: "KLinearMarlin"
prefill_op: "KLinearTorch"
- match:
name: "^model\\.layers\\.([2][0-9])\\.(?!self_attn).*$" # regular expression
name: "^model\\.layers\\.([3][0-9]|[4][0-4])\\.(?!self_attn\\.kv_b_proj).*$" # regular expression
class: torch.nn.Linear # only match modules matching name and class simultaneously
replace:
class: ktransformers.operators.linear.KTransformersLinear # optimized Kernel on quantized data types
@ -70,7 +70,7 @@
generate_op: "KLinearMarlin"
prefill_op: "KLinearTorch"
- match:
name: "^model\\.layers\\.([345][0-9])\\.(?!self_attn).*$" # regular expression
name: "^model\\.layers\\.([5][0-9]|[4][5-9])\\.(?!self_attn\\.kv_b_proj).*$" # regular expression
class: torch.nn.Linear # only match modules matching name and class simultaneously
replace:
class: ktransformers.operators.linear.KTransformersLinear # optimized Kernel on quantized data types
@ -81,7 +81,7 @@
prefill_op: "KLinearTorch"
- match:
name: "^model\\.layers\\.([0-9])\\.mlp$"
name: "^model\\.layers\\.([0-9]|[1][0-4])\\.mlp$"
class: ktransformers.models.modeling_deepseek.DeepseekV2MoE
replace:
class: ktransformers.operators.experts.KDeepseekV2MoE # mlp module with custom forward function
@ -89,7 +89,7 @@
generate_device: "cuda:0"
prefill_device: "cuda:0"
- match:
name: "^model\\.layers\\.([1][0-9])\\.mlp$"
name: "^model\\.layers\\.([2][0-9]|[1][5-9])\\.mlp$"
class: ktransformers.models.modeling_deepseek.DeepseekV2MoE
replace:
class: ktransformers.operators.experts.KDeepseekV2MoE # mlp module with custom forward function
@ -97,7 +97,7 @@
generate_device: "cuda:1"
prefill_device: "cuda:1"
- match:
name: "^model\\.layers\\.([2][0-9])\\.mlp$"
name: "^model\\.layers\\.([3][0-9]|[4][0-4])\\.mlp$"
class: ktransformers.models.modeling_deepseek.DeepseekV2MoE
replace:
class: ktransformers.operators.experts.KDeepseekV2MoE # mlp module with custom forward function
@ -105,7 +105,7 @@
generate_device: "cuda:2"
prefill_device: "cuda:2"
- match:
name: "^model\\.layers\\.([345][0-9])\\.mlp$"
name: "^model\\.layers\\.([5][0-9]|[4][5-9])\\.mlp$"
class: ktransformers.models.modeling_deepseek.DeepseekV2MoE
replace:
class: ktransformers.operators.experts.KDeepseekV2MoE # mlp module with custom forward function
@ -114,7 +114,7 @@
prefill_device: "cuda:3"
- match:
name: "^model\\.layers\\.([0-9])\\.mlp\\.experts$"
name: "^model\\.layers\\.([0-9]|[1][0-4])\\.mlp\\.experts$"
replace:
class: ktransformers.operators.experts.KTransformersExperts # custom MoE Kernel with expert paralleism
kwargs:
@ -125,7 +125,7 @@
out_device: "cuda:0"
recursive: False # don't recursively inject submodules of this module
- match:
name: "^model\\.layers\\.([1][0-9])\\.mlp\\.experts$"
name: "^model\\.layers\\.([2][0-9]|[1][5-9])\\.mlp\\.experts$"
replace:
class: ktransformers.operators.experts.KTransformersExperts # custom MoE Kernel with expert paralleism
kwargs:
@ -136,7 +136,7 @@
out_device: "cuda:1"
recursive: False # don't recursively inject submodules of this module
- match:
name: "^model\\.layers\\.([2][0-9])\\.mlp\\.experts$"
name: "^model\\.layers\\.([3][0-9]|[4][0-4])\\.mlp\\.experts$"
replace:
class: ktransformers.operators.experts.KTransformersExperts # custom MoE Kernel with expert paralleism
kwargs:
@ -147,7 +147,7 @@
out_device: "cuda:2"
recursive: False # don't recursively inject submodules of this module
- match:
name: "^model\\.layers\\.([345][0-9])\\.mlp\\.experts$"
name: "^model\\.layers\\.([5][0-9]|[4][5-9])\\.mlp\\.experts$"
replace:
class: ktransformers.operators.experts.KTransformersExperts # custom MoE Kernel with expert paralleism
kwargs:
@ -159,28 +159,28 @@
recursive: False # don't recursively inject submodules of this module
- match:
name: "^model\\.layers\\.([0-9])\\.self_attn$"
name: "^model\\.layers\\.([0-9]|[1][0-4])\\.self_attn$"
replace:
class: ktransformers.operators.attention.KDeepseekV2Attention # optimized MLA implementation
kwargs:
generate_device: "cuda:0"
prefill_device: "cuda:0"
- match:
name: "^model\\.layers\\.([1][0-9])\\.self_attn$"
name: "^model\\.layers\\.([2][0-9]|[1][5-9])\\.self_attn$"
replace:
class: ktransformers.operators.attention.KDeepseekV2Attention # optimized MLA implementation
kwargs:
generate_device: "cuda:1"
prefill_device: "cuda:1"
- match:
name: "^model\\.layers\\.([2][0-9])\\.self_attn$"
name: "^model\\.layers\\.([3][0-9]|[4][0-4])\\.self_attn$"
replace:
class: ktransformers.operators.attention.KDeepseekV2Attention # optimized MLA implementation
kwargs:
generate_device: "cuda:2"
prefill_device: "cuda:2"
- match:
name: "^model\\.layers\\.([345][0-9])\\.self_attn$"
name: "^model\\.layers\\.([5][0-9]|[4][5-9])\\.self_attn$"
replace:
class: ktransformers.operators.attention.KDeepseekV2Attention # optimized MLA implementation
kwargs:
@ -194,35 +194,35 @@
kwargs:
per_layer_prefill_intput_threshold: 0 # 0 is close layer wise prefill
transfer_map:
10: "cuda:1"
20: "cuda:2"
30: "cuda:3"
15: "cuda:1"
30: "cuda:2"
45: "cuda:3"
- match:
name: "^model\\.layers\\.([0-9])\\."
name: "^model\\.layers\\.([0-9]|[1][0-4])\\."
replace:
class: "default"
kwargs:
generate_device: "cuda:0"
prefill_device: "cuda:0"
- match:
name: "(^model\\.layers\\.([1][0-9])\\.)"
name: "(^model\\.layers\\.([2][0-9]|[1][5-9])\\.)"
replace:
class: "default"
kwargs:
generate_device: "cuda:1"
prefill_device: "cuda:1"
- match:
name: "(^model\\.layers\\.([2][0-9])\\.)"
name: "(^model\\.layers\\.([3][0-9]|[4][0-4])\\.)"
replace:
class: "default"
kwargs:
generate_device: "cuda:2"
prefill_device: "cuda:2"
- match:
name: "(^model\\.layers\\.([345][0-9])\\.)|(^model.norm)|(^lm_head)"
name: "(^model\\.layers\\.([5][0-9]|[4][5-9])\\.)|(^model.norm)|(^lm_head)"
replace:
class: "default"
kwargs:
generate_device: "cuda:3"
prefill_device: "cuda:3"
prefill_device: "cuda:3"