mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2026-04-28 11:49:51 +00:00
update: Qwen3 MoE model adaptation for NPU (framework) (#1706)
This commit is contained in:
parent
53f6a6d6e1
commit
adcfa9080f
10 changed files with 867 additions and 174 deletions
|
|
@ -104,7 +104,11 @@ class SafeTensorLoader(ModelLoader):
|
|||
f = self.file_handle_map.get(file)
|
||||
if f is None:
|
||||
raise FileNotFoundError(f"File {file} not found in Safetensor files")
|
||||
tensor = f.get_tensor(key)
|
||||
if use_torch_npu:
|
||||
tensor = f.get_tensor(key).to(torch.float16)
|
||||
else:
|
||||
tensor = f.get_tensor(key)
|
||||
|
||||
return tensor.to(device)
|
||||
|
||||
def load_experts(self, key: str, device: str="cpu"):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue