From 48dfbc8f9f3e64c5528d9fa0f75167881de0b4bd Mon Sep 17 00:00:00 2001 From: qiyuxinlin <1668068727@qq.com> Date: Tue, 29 Apr 2025 08:09:39 +0000 Subject: [PATCH] change inject yaml --- .../DeepSeek-V3-Chat-fp8-linear-ggml-experts-serve.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-fp8-linear-ggml-experts-serve.yaml b/ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-fp8-linear-ggml-experts-serve.yaml index 4d5ecb0..56320bf 100644 --- a/ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-fp8-linear-ggml-experts-serve.yaml +++ b/ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-fp8-linear-ggml-experts-serve.yaml @@ -44,7 +44,7 @@ - match: name: "^model\\.layers\\..*\\.self_attn$" replace: - class: ktransformers.operators.attention.flashinfer_attn # optimized MLA implementation + class: ktransformers.operators.balance_serve_attention.flashinfer_attn # optimized MLA implementation kwargs: generate_device: "cuda" prefill_device: "cuda"