support safetensor load, delete architectures argument

This commit is contained in:
qiyuxinlin 2025-05-09 10:38:29 +00:00
parent 900a7f7c3e
commit c6aa379de2
30 changed files with 1075 additions and 328 deletions

View file

@ -11,7 +11,7 @@ from ktransformers.models.modeling_qwen2_moe import Qwen2MoeAttention
from ktransformers.models.modeling_qwen3_moe import Qwen3MoeAttention
from typing import Optional, Tuple
from ktransformers.operators.base_operator import BaseInjectedModule
from ktransformers.util.custom_gguf import GGUFLoader
from ktransformers.util.custom_loader import GGUFLoader
import logging
from transformers.configuration_utils import PretrainedConfig
from flashinfer import BatchMLAPagedAttentionWrapper