smallthinker right

This commit is contained in:
qiyuxinlin 2025-07-25 12:46:14 +00:00
parent f8719ee7b9
commit 712ad1fa3c
7 changed files with 48 additions and 108 deletions

View file

@ -97,6 +97,7 @@ class SmallthinkerConfig(PretrainedConfig):
initializer_range=0.02,
**kwargs,
):
moe_layer_layout = [1]*num_hidden_layers
# Configuration sanitizers
assert num_attention_heads % num_key_value_heads == 0, "[Smallthinker config sanitizer] num_attention_heads must be divisible by num_key_value_heads"
assert len(rope_layout) == num_hidden_layers, "[Smallthinker config sanitizer] rope_layout must have the same length as num_hidden_layers"