fix KExpertsMarlin on GPU with out CUDA Graph

This commit is contained in:
Atream 2025-02-24 09:30:54 +00:00
parent f5f6c6b95d
commit f327695079
2 changed files with 13 additions and 0 deletions

View file

@ -310,6 +310,8 @@ class GGUFLoader:
values = GGML_DEQUANTIZE[ggml_name](data)
values = torch.from_numpy(values.copy())
if ggml_name == "BF16":
values = values.view(torch.bfloat16)
values = values.view(shape[-2::-1])
return values