support smt and qlm4

This commit is contained in:
djw 2025-07-25 12:48:51 +00:00
parent 712ad1fa3c
commit 48bc6185b5
9 changed files with 65 additions and 74 deletions

View file

@ -64,7 +64,7 @@ class KGlm4MoeMLP(Glm4MoeMLP, BaseInjectedModule):
generate_device: str = "cuda",
**kwargs):
BaseInjectedModule.__init__(self, key, gguf_loader, config, orig_module, prefill_device, **kwargs)
self.orig_module.__init__(orig_module.config)
self.orig_module.__init__(orig_module.config, orig_module.hidden_size, orig_module.intermediate_size)
def forward(self, x, bsz_tensor):
down_proj = self.down_proj(self.act_fn(self.gate_proj(x, bsz_tensor)) * self.up_proj(x, bsz_tensor), bsz_tensor)
return down_proj