Merge pull request #51 from molamooo/fix-f16-dequantize-device

[fix] f16 dequantize device ignored
2025-09-06 04:30:03 +00:00 · 2024-08-22 16:31:45 +08:00 · 2024-08-22 16:31:45 +08:00 · 1f85db3d73
commit 1f85db3d73
parent cbc47d0b68 29f4151ebc
1 changed files with 1 additions and 1 deletions
--- a/ktransformers/util/custom_gguf.py
+++ b/ktransformers/util/custom_gguf.py
@ -681,7 +681,7 @@ def dequantize_f16_gpu(data, device):
    res = torch.from_numpy(data)
    res_gpu = torch.empty_like(res, device=device)
    res_gpu.copy_(res)
-    return res
+    return res_gpu

 GGML_DEQUANTIZE = {
    "F32": dequantize_f32,