support safetensor load, delete architectures argument

2025-09-09 22:05:30 +00:00 · 2025-05-09 10:38:29 +00:00 · 2025-05-09 10:38:29 +00:00 · c6aa379de2
commit c6aa379de2
parent 900a7f7c3e
30 changed files with 1075 additions and 328 deletions
--- a/ktransformers/models/custom_modeling_qwen2_moe.py
+++ b/ktransformers/models/custom_modeling_qwen2_moe.py
@ -39,7 +39,7 @@ class KQwen2MoeForCausalLM(Qwen2MoePreTrainedModel):
        self.cache = cache
        self.vocab_size = config.vocab_size
        self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
-        self.attn = [None] * 10
+        self.attn = [None] * 100
        
    def init_wrapper(self, use_cuda_graph, device, max_batch_token, max_batch_size, max_pages, cuda_graph_idx = 0):
        self.attn[cuda_graph_idx] = flashInferAttn(use_cuda_graph=use_cuda_graph, max_batch_token=max_batch_token, max_batch_size=max_batch_size, max_pages=max_pages, device=device)