From a12e8ab46ef48c0e400c38232e804022a2ee9306 Mon Sep 17 00:00:00 2001 From: Aubrey Li Date: Fri, 21 Mar 2025 23:58:20 +0800 Subject: [PATCH] yaml: fix Marlin AssertionError Marlin quantized linear only supports GPU device, when change generate_op to "KLinearMarlin", generate_device need to be changed to "cuda" accordingly. Fixes: e5b001d76fba ("Update readme; Format code; Add example yaml.") --- .../optimize/optimize_rules/DeepSeek-V2-Lite-Chat.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ktransformers/optimize/optimize_rules/DeepSeek-V2-Lite-Chat.yaml b/ktransformers/optimize/optimize_rules/DeepSeek-V2-Lite-Chat.yaml index c20973d..7f3e44e 100644 --- a/ktransformers/optimize/optimize_rules/DeepSeek-V2-Lite-Chat.yaml +++ b/ktransformers/optimize/optimize_rules/DeepSeek-V2-Lite-Chat.yaml @@ -22,7 +22,7 @@ replace: class: ktransformers.operators.linear.KTransformersLinear kwargs: - generate_device: "cpu" + generate_device: "cuda" prefill_device: "cuda" generate_op: "KLinearMarlin" prefill_op: "KLinearTorch"