support smt and glm4

This commit is contained in:
djw 2025-07-24 12:31:01 +00:00
parent 613f0b7c37
commit 590fcb41cd
5 changed files with 95 additions and 7 deletions

View file

@ -49,7 +49,7 @@
- match:
name: "^model\\.layers\\..*\\.mlp\\.experts$"
replace:
class: ktransformers.operators.experts.KTransformersExpertsV2 # custom MoE Kernel with expert paralleism
class: ktransformers.operators.experts.KGlm4Experts # custom MoE Kernel with expert paralleism
kwargs:
prefill_device: "cuda"
prefill_op: None