From 29f4151ebc76834797d8b6a050e53902388d25dc Mon Sep 17 00:00:00 2001 From: molamooo Date: Thu, 22 Aug 2024 15:10:06 +0800 Subject: [PATCH] [fix] f16 dequantize device ignored --- ktransformers/util/custom_gguf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ktransformers/util/custom_gguf.py b/ktransformers/util/custom_gguf.py index bd5c5b0..b3929be 100644 --- a/ktransformers/util/custom_gguf.py +++ b/ktransformers/util/custom_gguf.py @@ -681,7 +681,7 @@ def dequantize_f16_gpu(data, device): res = torch.from_numpy(data) res_gpu = torch.empty_like(res, device=device) res_gpu.copy_(res) - return res + return res_gpu GGML_DEQUANTIZE = { "F32": dequantize_f32,