From 5a5f1038337b1b38db76872ecc0c19545bce45ed Mon Sep 17 00:00:00 2001 From: Zonghang Li Date: Wed, 16 Apr 2025 08:55:07 +0400 Subject: [PATCH] fix q6k and q80 --- common/profiler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/profiler.cpp b/common/profiler.cpp index 48af0950..69a20af0 100644 --- a/common/profiler.cpp +++ b/common/profiler.cpp @@ -370,7 +370,7 @@ float device_inp_embd_delay(struct llama_model * model, enum ggml_type src0t, in case GGML_TYPE_Q6_K: case GGML_TYPE_Q8_K: case GGML_TYPE_Q8_0: - QK_K = 256; + QK_K = ggml_blck_size(src0t); matrix_B = malloc((embd_size / QK_K) * ggml_type_size(src0t)); break; default: