From ae670dbe0e18c868349f89b408cb9321b82de7e8 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Sat, 22 Mar 2025 00:33:27 +0800 Subject: [PATCH] no repacking for avx2 for kcpp because it breaks 4_0_4_4 quants --- ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp | 10 +++++----- src/llama-model.cpp | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp b/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp index 7255c0143..92d2d972d 100644 --- a/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp +++ b/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp @@ -5642,11 +5642,11 @@ static const ggml::cpu::tensor_traits * ggml_aarch64_get_optimal_repack_type(con } } } else if (cur->type == GGML_TYPE_Q4_K) { - if (ggml_cpu_has_avx2()) { - if (cur->ne[1] % 8 == 0) { - return &ggml::cpu::aarch64::q4_K_8x8_q8_K; - } - } + // if (ggml_cpu_has_avx2()) { //we shall just use the regular avx2 handling, no repacking otherwise massive slowdown with gpu + // if (cur->ne[1] % 8 == 0) { + // return &ggml::cpu::aarch64::q4_K_8x8_q8_K; + // } + // } } else if (cur->type == GGML_TYPE_IQ4_NL) { if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) { if (cur->ne[1] % 4 == 0) { diff --git a/src/llama-model.cpp b/src/llama-model.cpp index d240f5371..9d0ca8591 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -286,7 +286,7 @@ static buft_list_t make_cpu_buft_list(const std::vector & de // add extra buffer types, only if no GPU device is present // ref: https://github.com/ggml-org/llama.cpp/issues/12481#issuecomment-2743136094 - if (!has_gpu_device) { + if (true) { //kcpp needs this to be true, otherwise 4_0_4_4 quants will break. avx2 repacking dont affect us cause we disabled it auto * cpu_dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU); auto * cpu_reg = ggml_backend_dev_backend_reg(cpu_dev); auto ggml_backend_dev_get_extra_bufts_fn = (ggml_backend_dev_get_extra_bufts_t)