support smt and glm4

This commit is contained in:
djw 2025-07-24 09:39:19 +00:00
parent b66d96db97
commit 613f0b7c37
8 changed files with 115 additions and 28 deletions

View file

@ -49,7 +49,7 @@
- match:
name: "^model\\.layers\\..*\\.block_sparse_moe\\.experts$"
replace:
class: ktransformers.operators.experts.KTransformersExpertsV2 # custom MoE Kernel with expert paralleism
class: ktransformers.operators.experts.KSmallthinkerExperts # custom MoE Kernel with expert paralleism
kwargs:
prefill_device: "cuda"
prefill_op: None