From 54c4c1c26ef79827a0074007cb97ad036695d59c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?leeetao=C2=A0?= <3122669219@qq.com> Date: Fri, 7 Mar 2025 02:47:00 +0000 Subject: [PATCH] Fixed the flops test for iq1s and q2k quantization types --- common/profiler.cpp | 6 ++++++ ggml/include/ggml.h | 2 +- ggml/src/ggml.c | 4 ++-- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/common/profiler.cpp b/common/profiler.cpp index adc9a9e7..e262b6a2 100644 --- a/common/profiler.cpp +++ b/common/profiler.cpp @@ -188,6 +188,9 @@ static float device_flops(struct llama_model * model, enum ggml_type src0t, enum }; struct ggml_context * ctx = ggml_init(params); + if(n_embd < ggml_blck_size(src0t)){ + n_embd = 2 * ggml_blck_size(src0t); + } struct ggml_tensor * tensor_a = ggml_new_tensor_2d(ctx, src0t, n_embd, n_embd); struct ggml_tensor * tensor_b = ggml_new_tensor_2d(ctx, src1t, n_embd, n_embd); @@ -208,10 +211,12 @@ static float device_flops(struct llama_model * model, enum ggml_type src0t, enum ctx_cgraph = ggml_init(params0); gf = ggml_new_graph(ctx_cgraph); + cur = ggml_mul_mat(ctx_cgraph, tensor_a, tensor_b); for (int i = 0; i < n_repeat - 1; i++) { cur = ggml_mul_mat(ctx_cgraph, tensor_a, cur); } + ggml_build_forward_expand(gf, cur); } @@ -1713,6 +1718,7 @@ void device_print_props(struct device_info * dev_info_set, int n, struct llama_m for (int i = 0; i < n; ++i) { LOG_INF("| %-10.1f ", dev_info_set[i].cpu_props.flops_q2k_f32); } + LOG_INF("\n"); LOG_INF("| CPU flops (Q4K x F32, GFLOPS)"); for (int i = 0; i < n; ++i) { diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index 3052bb65..4af68abc 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -390,7 +390,7 @@ extern "C" { // GGML_TYPE_Q4_0_8_8 = 33, GGML_TYPE_TQ1_0 = 34, GGML_TYPE_TQ2_0 = 35, - GGML_TYPE_COUNT, + GGML_TYPE_COUNT = 39, }; // precision diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 43a953c6..ffae7f2e 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -3424,7 +3424,7 @@ size_t ggml_row_size(enum ggml_type type, int64_t ne) { double ggml_type_sizef(enum ggml_type type) { return ((double)(type_traits[type].type_size))/type_traits[type].blck_size; } - + const char * ggml_type_name(enum ggml_type type) { return type < GGML_TYPE_COUNT ? type_traits[type].type_name : "NONE"; } @@ -4056,7 +4056,7 @@ static struct ggml_tensor * ggml_new_tensor_impl( /*.data =*/ obj_alloc_size > 0 ? (void *)(result + 1) : data, /*.name =*/ { 0 }, /*.extra =*/ NULL, - ///*.padding =*/ { 0 }, + // /*.padding =*/ { 0 }, }; #ifdef __clang__