mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2026-04-28 20:00:06 +00:00
feat(sft): add Qwen3.5 MoE support + fused checkpoint loading
- arch.py: add Qwen3_5Moe arch match, read config from text_config, _get_layers_prefix returns model.language_model.layers for Qwen3.5, _get_model_container_and_layers searches language_model attr - weights.py: load_experts_from_checkpoint_files detects fused format (gate_up_proj in weight_map) and splits into gate/up/down - wrapper.py: hidden_size fallback to text_config Verified: Qwen3.5-35B-A3B (256 experts, fused format) E2E pass. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
58d7eabb9b
commit
dd1da65d90
3 changed files with 54 additions and 20 deletions
|
|
@ -157,7 +157,8 @@ def wrap_moe_layers_with_kt_wrapper(model: nn.Module, kt_plugin: Any) -> list[KT
|
|||
is_rank_0 = dist.get_rank() == 0
|
||||
|
||||
moe_config = get_moe_arch_config(model.config)
|
||||
hidden_size = model.config.hidden_size
|
||||
_text_cfg = getattr(model.config, "text_config", model.config)
|
||||
hidden_size = _text_cfg.hidden_size
|
||||
|
||||
cfg = _get_kt_config(kt_plugin)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue