fix KExpertsMarlin on GPU with out CUDA Graph

2025-09-13 00:29:59 +00:00 · 2025-02-24 09:30:54 +00:00 · 2025-02-24 09:30:54 +00:00 · f327695079
commit f327695079
parent f5f6c6b95d
2 changed files with 13 additions and 0 deletions
--- a/ktransformers/util/custom_gguf.py
+++ b/ktransformers/util/custom_gguf.py
@ -310,6 +310,8 @@ class GGUFLoader:
            values = GGML_DEQUANTIZE[ggml_name](data)
            values = torch.from_numpy(values.copy())

+        if ggml_name == "BF16":
+            values = values.view(torch.bfloat16)
        values = values.view(shape[-2::-1])

        return values