mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-14 09:09:42 +00:00
support npu
This commit is contained in:
parent
a641aa8063
commit
b982815325
22 changed files with 162 additions and 1562 deletions
|
@ -36,6 +36,7 @@ from abc import ABC, abstractmethod
|
|||
from ktransformers.operators.linear import KLinearMarlin, KLinearTorch, KTransformersLinear
|
||||
import time
|
||||
from ktransformers.operators.cpuinfer import CPUInfer
|
||||
from ktransformers.util.custom_loader import translate_name_to_gguf
|
||||
|
||||
|
||||
def deduplicate_and_sort(lst):
|
||||
|
@ -396,6 +397,16 @@ class KExpertsCPU(KExpertsBase):
|
|||
gate_type = self.gguf_loader.get_ggml_type(key + ".ffn_gate.0.weight")
|
||||
up_type = self.gguf_loader.get_ggml_type(key + ".ffn_up.0.weight")
|
||||
down_type = self.gguf_loader.get_ggml_type(key + ".ffn_down.0.weight")
|
||||
elif self.gguf_loader.safetensor_loader is not None:
|
||||
# for npu
|
||||
# using a temp ugly way to temprary load the tensor
|
||||
translate_key = translate_name_to_gguf(key)
|
||||
gate = self.gguf_loader.safetensor_loader.load_tensor(translate_key + ".ffn_gate_exps.weight").numpy()
|
||||
up = self.gguf_loader.safetensor_loader.load_tensor(translate_key + ".ffn_up_exps.weight").numpy()
|
||||
down = self.gguf_loader.safetensor_loader.load_tensor(translate_key + ".ffn_down_exps.weight").numpy()
|
||||
gate_type = self.gguf_loader.safetensor_loader.load_tensor(translate_key + ".ffn_gate_exps.ggml_type").item()
|
||||
up_type = self.gguf_loader.safetensor_loader.load_tensor(translate_key + ".ffn_up_exps.ggml_type").item()
|
||||
down_type = self.gguf_loader.safetensor_loader.load_tensor(translate_key + ".ffn_down_exps.ggml_type").item()
|
||||
else:
|
||||
raise ValueError(f"Experts {key} not found in gguf_loader")
|
||||
res = {key:{"gate": gate, "up": up, "down": down, "gate_type": gate_type, "up_type": up_type, "down_type": down_type}}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue