support npu

This commit is contained in:
Dongjw 2025-07-23 09:54:55 +00:00
parent a641aa8063
commit b982815325
22 changed files with 162 additions and 1562 deletions

View file

@ -36,6 +36,7 @@ from abc import ABC, abstractmethod
from ktransformers.operators.linear import KLinearMarlin, KLinearTorch, KTransformersLinear
import time
from ktransformers.operators.cpuinfer import CPUInfer
from ktransformers.util.custom_loader import translate_name_to_gguf
def deduplicate_and_sort(lst):
@ -396,6 +397,16 @@ class KExpertsCPU(KExpertsBase):
gate_type = self.gguf_loader.get_ggml_type(key + ".ffn_gate.0.weight")
up_type = self.gguf_loader.get_ggml_type(key + ".ffn_up.0.weight")
down_type = self.gguf_loader.get_ggml_type(key + ".ffn_down.0.weight")
elif self.gguf_loader.safetensor_loader is not None:
# for npu
# using a temp ugly way to temprary load the tensor
translate_key = translate_name_to_gguf(key)
gate = self.gguf_loader.safetensor_loader.load_tensor(translate_key + ".ffn_gate_exps.weight").numpy()
up = self.gguf_loader.safetensor_loader.load_tensor(translate_key + ".ffn_up_exps.weight").numpy()
down = self.gguf_loader.safetensor_loader.load_tensor(translate_key + ".ffn_down_exps.weight").numpy()
gate_type = self.gguf_loader.safetensor_loader.load_tensor(translate_key + ".ffn_gate_exps.ggml_type").item()
up_type = self.gguf_loader.safetensor_loader.load_tensor(translate_key + ".ffn_up_exps.ggml_type").item()
down_type = self.gguf_loader.safetensor_loader.load_tensor(translate_key + ".ffn_down_exps.ggml_type").item()
else:
raise ValueError(f"Experts {key} not found in gguf_loader")
res = {key:{"gate": gate, "up": up, "down": down, "gate_type": gate_type, "up_type": up_type, "down_type": down_type}}