[fix] format classes and files name

2026-04-28 11:49:51 +00:00 · 2024-08-15 10:44:59 +08:00 · 2024-08-15 10:44:59 +08:00 · 67043b4b5c
commit 67043b4b5c
parent 1db4a67dca
15 changed files with 212 additions and 212 deletions
--- a/ktransformers/optimize/optimize_rules/Qwen2-57B-A14B-Instruct-multi-gpu.yaml
+++ b/ktransformers/optimize/optimize_rules/Qwen2-57B-A14B-Instruct-multi-gpu.yaml
@ -10,27 +10,27 @@
    name: "^model\\.layers\\.([012])$"  # regular expression 
    class: torch.nn.Linear  # only match modules matching name and class simultaneously
  replace:
-    class: ktransformers.operators.linear.KTransformerLinear  # optimized Kernel on quantized data types
+    class: ktransformers.operators.linear.KTransformersLinear  # optimized Kernel on quantized data types
    kwargs:
      generate_device: "cuda:0"
      prefill_device: "cuda:0"
-      generate_op: "QuantizedLinearMarlin"
-      prefill_op: "QuantizedLinearTorch"
+      generate_op: "KLinearMarlin"
+      prefill_op: "KLinearTorch"
 - match:
    name: "^model\\.layers\\.([012])\\.mlp$"
    class: ktransformers.models.modeling_qwen2_moe.Qwen2MoeSparseMoeBlock
  replace:
-    class: ktransformers.operators.experts.Qwen2MoeSparseMoeBlockInjected     # mlp module with custom forward function
+    class: ktransformers.operators.experts.KQwen2MoeSparseMoeBlock     # mlp module with custom forward function
 - match:
    name: "^model\\.layers\\.([012])\\.mlp\\.experts$"
  replace:
-    class: ktransformers.operators.experts.KTransformersMLPExpert     # custom MoE Kernel with expert paralleism
+    class: ktransformers.operators.experts.KTransformersExperts     # custom MoE Kernel with expert paralleism
    # device: "cpu"   # which devices to load this module when initializing
    kwargs:
      prefill_device: "cuda:0"
-      prefill_mlp_type: "MLPExpertsTorch"
+      prefill_op: "KExpertsTorch"
      generate_device: "cpu"
-      generate_mlp_type:  "MLPCPUExperts"
+      generate_op:  "KExpertsCPU"
      out_device: "cuda:0"
  recursive: False # don't recursively inject submodules of this module

@ -46,27 +46,27 @@
    name: "^model\\.layers\\.([12][0-9]|[3-9])$"  # regular expression 
    class: torch.nn.Linear  # only match modules matching name and class simultaneously
  replace:
-    class: ktransformers.operators.linear.KTransformerLinear  # optimized Kernel on quantized data types
+    class: ktransformers.operators.linear.KTransformersLinear  # optimized Kernel on quantized data types
    kwargs:
      generate_device: "cuda:1"
      prefill_device: "cuda:1"
-      generate_op: "QuantizedLinearMarlin"
-      prefill_op: "QuantizedLinearTorch"
+      generate_op: "KLinearMarlin"
+      prefill_op: "KLinearTorch"
 - match:
    name: "^model\\.layers\\.([12][0-9]|[3-9])\\.mlp$"
    class: ktransformers.models.modeling_qwen2_moe.Qwen2MoeSparseMoeBlock
  replace:
-    class: ktransformers.operators.experts.Qwen2MoeSparseMoeBlockInjected     # mlp module with custom forward function
+    class: ktransformers.operators.experts.KQwen2MoeSparseMoeBlock     # mlp module with custom forward function
 - match:
    name: "^model\\.layers\\.([12][0-9]|[3-9])\\.mlp\\.experts$"
  replace:
-    class: ktransformers.operators.experts.KTransformersMLPExpert     # custom MoE Kernel with expert paralleism
+    class: ktransformers.operators.experts.KTransformersExperts     # custom MoE Kernel with expert paralleism
    # device: "cpu"   # which devices to load this module when initializing
    kwargs:
      prefill_device: "cuda:1"
-      prefill_mlp_type: "MLPExpertsTorch"
+      prefill_op: "KExpertsTorch"
      generate_device: "cpu"
-      generate_mlp_type:  "MLPCPUExperts"
+      generate_op:  "KExpertsCPU"
      out_device: "cuda:1"
  recursive: False # don't recursively inject submodules of this module

@ -89,7 +89,7 @@
 - match:
    name: "^model$"
  replace:
-    class: "ktransformers.operators.layer_wise_prefill.Qwen2MoeModelKTransformers"
+    class: "ktransformers.operators.layer_wise_prefill.KQwen2MoeModel"
    kwargs:
      per_layer_prefill_intput_threshold: 0 # 0 is close layer wise prefill
      transfer_map: