mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-10 14:51:06 +00:00
support glm4moe
This commit is contained in:
parent
1677e90092
commit
d03d92ba53
31 changed files with 2265 additions and 74 deletions
|
@ -2,6 +2,9 @@ import argparse
|
|||
from ktransformers.server.backend.args import ConfigArgs, default_args
|
||||
from ktransformers.util.utils import get_free_ports
|
||||
from transformers import AutoConfig
|
||||
from ktransformers.models.configuration_qwen3_moe import Qwen3MoeConfig
|
||||
from ktransformers.models.configuration_smallthinker import SmallthinkerConfig
|
||||
from ktransformers.models.configuration_glm4_moe import Glm4MoeConfig
|
||||
|
||||
class ArgumentParser:
|
||||
def __init__(self, cfg):
|
||||
|
@ -135,9 +138,16 @@ class ArgumentParser:
|
|||
self.cfg.server_ip = args.host
|
||||
self.cfg.server_port = args.port
|
||||
self.cfg.user_force_think = args.force_think
|
||||
|
||||
model_config = AutoConfig.from_pretrained(args.model_dir, trust_remote_code=True)
|
||||
if model_config.architectures[0] == "Qwen3MoeForCausalLM" or model_config.architectures[0] == "Qwen2MoeForCausalLM" :
|
||||
try:
|
||||
model_config = AutoConfig.from_pretrained(args.model_dir, trust_remote_code=True)
|
||||
except:
|
||||
try:
|
||||
model_config = Glm4MoeConfig.from_pretrained(args.model_dir, trust_remote_code=True)
|
||||
except:
|
||||
raise ValueError(f"Model {args.model_name} not supported. Please check your model directory or model name.")
|
||||
|
||||
|
||||
if model_config.architectures[0] == "Qwen3MoeForCausalLM" or model_config.architectures[0] == "Qwen2MoeForCausalLM" or model_config.architectures[0] == "SmallThinkerForCausalLM" or model_config.architectures[0] == "Glm4MoeForCausalLM":
|
||||
args.gpu_memory_size = args.cache_lens*2*2*model_config.num_hidden_layers*model_config.num_key_value_heads*model_config.head_dim
|
||||
args.architectures = model_config.architectures[0]
|
||||
else:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue