[fix] format classes and files name

This commit is contained in:
TangJingqi 2024-08-15 10:44:59 +08:00
parent 1db4a67dca
commit 67043b4b5c
15 changed files with 212 additions and 212 deletions

View file

@ -10,27 +10,27 @@
name: "^model\\.layers\\.([012])$" # regular expression
class: torch.nn.Linear # only match modules matching name and class simultaneously
replace:
class: ktransformers.operators.linear.KTransformerLinear # optimized Kernel on quantized data types
class: ktransformers.operators.linear.KTransformersLinear # optimized Kernel on quantized data types
kwargs:
generate_device: "cuda:0"
prefill_device: "cuda:0"
generate_op: "QuantizedLinearMarlin"
prefill_op: "QuantizedLinearTorch"
generate_op: "KLinearMarlin"
prefill_op: "KLinearTorch"
- match:
name: "^model\\.layers\\.([012])\\.mlp$"
class: ktransformers.models.modeling_qwen2_moe.Qwen2MoeSparseMoeBlock
replace:
class: ktransformers.operators.experts.Qwen2MoeSparseMoeBlockInjected # mlp module with custom forward function
class: ktransformers.operators.experts.KQwen2MoeSparseMoeBlock # mlp module with custom forward function
- match:
name: "^model\\.layers\\.([012])\\.mlp\\.experts$"
replace:
class: ktransformers.operators.experts.KTransformersMLPExpert # custom MoE Kernel with expert paralleism
class: ktransformers.operators.experts.KTransformersExperts # custom MoE Kernel with expert paralleism
# device: "cpu" # which devices to load this module when initializing
kwargs:
prefill_device: "cuda:0"
prefill_mlp_type: "MLPExpertsTorch"
prefill_op: "KExpertsTorch"
generate_device: "cpu"
generate_mlp_type: "MLPCPUExperts"
generate_op: "KExpertsCPU"
out_device: "cuda:0"
recursive: False # don't recursively inject submodules of this module
@ -46,27 +46,27 @@
name: "^model\\.layers\\.([12][0-9]|[3-9])$" # regular expression
class: torch.nn.Linear # only match modules matching name and class simultaneously
replace:
class: ktransformers.operators.linear.KTransformerLinear # optimized Kernel on quantized data types
class: ktransformers.operators.linear.KTransformersLinear # optimized Kernel on quantized data types
kwargs:
generate_device: "cuda:1"
prefill_device: "cuda:1"
generate_op: "QuantizedLinearMarlin"
prefill_op: "QuantizedLinearTorch"
generate_op: "KLinearMarlin"
prefill_op: "KLinearTorch"
- match:
name: "^model\\.layers\\.([12][0-9]|[3-9])\\.mlp$"
class: ktransformers.models.modeling_qwen2_moe.Qwen2MoeSparseMoeBlock
replace:
class: ktransformers.operators.experts.Qwen2MoeSparseMoeBlockInjected # mlp module with custom forward function
class: ktransformers.operators.experts.KQwen2MoeSparseMoeBlock # mlp module with custom forward function
- match:
name: "^model\\.layers\\.([12][0-9]|[3-9])\\.mlp\\.experts$"
replace:
class: ktransformers.operators.experts.KTransformersMLPExpert # custom MoE Kernel with expert paralleism
class: ktransformers.operators.experts.KTransformersExperts # custom MoE Kernel with expert paralleism
# device: "cpu" # which devices to load this module when initializing
kwargs:
prefill_device: "cuda:1"
prefill_mlp_type: "MLPExpertsTorch"
prefill_op: "KExpertsTorch"
generate_device: "cpu"
generate_mlp_type: "MLPCPUExperts"
generate_op: "KExpertsCPU"
out_device: "cuda:1"
recursive: False # don't recursively inject submodules of this module
@ -89,7 +89,7 @@
- match:
name: "^model$"
replace:
class: "ktransformers.operators.layer_wise_prefill.Qwen2MoeModelKTransformers"
class: "ktransformers.operators.layer_wise_prefill.KQwen2MoeModel"
kwargs:
per_layer_prefill_intput_threshold: 0 # 0 is close layer wise prefill
transfer_map: