support smt and qlm4

2025-09-16 01:59:42 +00:00 · 2025-07-25 12:48:51 +00:00 · 2025-07-25 12:48:51 +00:00 · 48bc6185b5
commit 48bc6185b5
parent 712ad1fa3c
9 changed files with 65 additions and 74 deletions
--- a/ktransformers/models/modeling_glm4_moe.py
+++ b/ktransformers/models/modeling_glm4_moe.py
@ -625,7 +625,7 @@ class Glm4MoeForCausalLM(Glm4MoePreTrainedModel, GenerationMixin):
            inputs_embeds=inputs_embeds,
            use_cache=use_cache,
            cache_position=cache_position,
-            **kwargs,
+            # **kwargs,
        )

        hidden_states = outputs.last_hidden_state
@ -635,7 +635,7 @@ class Glm4MoeForCausalLM(Glm4MoePreTrainedModel, GenerationMixin):

        loss = None
        if labels is not None:
-            loss = self.loss_function(logits=logits, labels=labels, vocab_size=self.config.vocab_size, **kwargs)
+            loss = self.loss_function(logits=logits, labels=labels, vocab_size=self.config.vocab_size)

        return CausalLMOutputWithPast(
            loss=loss,