update: Qwen3 MoE model adaptation for NPU (framework) (#1706)

2026-04-28 11:49:51 +00:00 · 2025-12-11 17:07:57 +08:00 · 2025-12-11 17:07:57 +08:00 · adcfa9080f
commit adcfa9080f
parent 53f6a6d6e1
10 changed files with 867 additions and 174 deletions
--- a/archive/ktransformers/util/custom_loader.py
+++ b/archive/ktransformers/util/custom_loader.py
@ -104,7 +104,11 @@ class SafeTensorLoader(ModelLoader):
        f = self.file_handle_map.get(file)
        if f is None:
            raise FileNotFoundError(f"File {file} not found in Safetensor files")
-        tensor = f.get_tensor(key)
+        if use_torch_npu:
+            tensor = f.get_tensor(key).to(torch.float16)
+        else:
+            tensor = f.get_tensor(key)
+
        return tensor.to(device)

    def load_experts(self, key: str, device: str="cpu"):