update: Qwen3 MoE model adaptation for NPU (framework) (#1706)

This commit is contained in:
Shaoxu Cheng 2025-12-11 17:07:57 +08:00 committed by GitHub
parent 53f6a6d6e1
commit adcfa9080f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 867 additions and 174 deletions

View file

@ -104,7 +104,11 @@ class SafeTensorLoader(ModelLoader):
f = self.file_handle_map.get(file)
if f is None:
raise FileNotFoundError(f"File {file} not found in Safetensor files")
tensor = f.get_tensor(key)
if use_torch_npu:
tensor = f.get_tensor(key).to(torch.float16)
else:
tensor = f.get_tensor(key)
return tensor.to(device)
def load_experts(self, key: str, device: str="cpu"):