change inject yaml

This commit is contained in:
qiyuxinlin 2025-04-29 08:09:39 +00:00
parent 2a224b256e
commit 48dfbc8f9f

View file

@ -44,7 +44,7 @@
- match:
name: "^model\\.layers\\..*\\.self_attn$"
replace:
class: ktransformers.operators.attention.flashinfer_attn # optimized MLA implementation
class: ktransformers.operators.balance_serve_attention.flashinfer_attn # optimized MLA implementation
kwargs:
generate_device: "cuda"
prefill_device: "cuda"