optimize GPU

2025-09-14 00:59:42 +00:00 · 2025-02-21 05:06:57 +00:00 · 2025-02-21 05:06:57 +00:00 · 7e1fe256c8
commit 7e1fe256c8
parent cf4da5fd47
8 changed files with 677 additions and 156 deletions
--- a/ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat.yaml
+++ b/ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat.yaml
@ -5,6 +5,18 @@
    kwargs:
      generate_device: "cuda"
      prefill_device: "cuda"
+
+- match:
+    name: "^lm_head$"  # regular expression 
+    class: torch.nn.Linear  # only match modules matching name and class simultaneously
+  replace:
+    class: ktransformers.operators.linear.KTransformersLinear  # optimized Kernel on quantized data types
+    kwargs:
+      generate_device: "cuda"
+      prefill_device: "cuda"
+      generate_op: "KLinearMarlin"
+      prefill_op: "KLinearTorch"
+
 - match:
    name: "^model\\.layers\\.(?!.*self_attn\\.kv_b_proj).*$"  # regular expression 
    class: torch.nn.Linear  # only match modules matching name and class simultaneously