support smt and glm4

This commit is contained in:
djw 2025-07-24 12:31:01 +00:00
parent 613f0b7c37
commit 590fcb41cd
5 changed files with 95 additions and 7 deletions

View file

@ -28,7 +28,7 @@ from torch import nn
from transformers.activations import ACT2FN
from transformers.cache_utils import Cache, DynamicCache
from transformers.generation import GenerationMixin
from transformers.integrations import use_kernel_forward_from_hub
# from transformers.integrations import use_kernel_forward_from_hub
from transformers.masking_utils import create_causal_mask
from transformers.modeling_flash_attention_utils import FlashAttentionKwargs
from transformers.modeling_layers import GradientCheckpointingLayer
@ -36,9 +36,9 @@ from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutpu
from transformers.modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update
from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel
from transformers.processing_utils import Unpack
from transformers.utils import TransformersKwargs, auto_docstring, can_return_tuple
# from transformers.utils import auto_docstring, can_return_tuple
from transformers.utils.generic import check_model_inputs
# from transformers.utils import TransformersKwargs, auto_docstring, can_return_tuple
from transformers.utils import auto_docstring, can_return_tuple
# from transformers.utils.generic import check_model_inputs
from .configuration_glm4_moe import Glm4MoeConfig