From 54c4c1c26ef79827a0074007cb97ad036695d59c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?leeetao=C2=A0?= <3122669219@qq.com>
Date: Fri, 7 Mar 2025 02:47:00 +0000
Subject: [PATCH] Fixed the flops test for iq1s and q2k quantization types

---
 common/profiler.cpp | 6 ++++++
 ggml/include/ggml.h | 2 +-
 ggml/src/ggml.c     | 4 ++--
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/common/profiler.cpp b/common/profiler.cpp
index adc9a9e7..e262b6a2 100644
--- a/common/profiler.cpp
+++ b/common/profiler.cpp
@@ -188,6 +188,9 @@ static float device_flops(struct llama_model * model, enum ggml_type src0t, enum
     };
     struct ggml_context * ctx = ggml_init(params);
 
+    if(n_embd < ggml_blck_size(src0t)){
+        n_embd = 2 * ggml_blck_size(src0t);
+    }
     struct ggml_tensor * tensor_a = ggml_new_tensor_2d(ctx, src0t, n_embd, n_embd);
     struct ggml_tensor * tensor_b = ggml_new_tensor_2d(ctx, src1t, n_embd, n_embd);
 
@@ -208,10 +211,12 @@ static float device_flops(struct llama_model * model, enum ggml_type src0t, enum
         ctx_cgraph = ggml_init(params0);
 
         gf = ggml_new_graph(ctx_cgraph);
+        
         cur = ggml_mul_mat(ctx_cgraph, tensor_a, tensor_b);
         for (int i = 0; i < n_repeat - 1; i++) {
             cur = ggml_mul_mat(ctx_cgraph, tensor_a, cur);
         }
+
         ggml_build_forward_expand(gf, cur);
     }
 
@@ -1713,6 +1718,7 @@ void device_print_props(struct device_info * dev_info_set, int n, struct llama_m
     for (int i = 0; i < n; ++i) {
         LOG_INF("| %-10.1f   ", dev_info_set[i].cpu_props.flops_q2k_f32);
     }
+    LOG_INF("\n");
 
     LOG_INF("| CPU flops (Q4K x F32, GFLOPS)");
     for (int i = 0; i < n; ++i) {
diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h
index 3052bb65..4af68abc 100644
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -390,7 +390,7 @@ extern "C" {
         // GGML_TYPE_Q4_0_8_8 = 33,
         GGML_TYPE_TQ1_0   = 34,
         GGML_TYPE_TQ2_0   = 35,
-        GGML_TYPE_COUNT,
+        GGML_TYPE_COUNT   = 39,
     };
 
     // precision
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
index 43a953c6..ffae7f2e 100644
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -3424,7 +3424,7 @@ size_t ggml_row_size(enum ggml_type type, int64_t ne) {
 double ggml_type_sizef(enum ggml_type type) {
     return ((double)(type_traits[type].type_size))/type_traits[type].blck_size;
 }
-
+ 
 const char * ggml_type_name(enum ggml_type type) {
     return type < GGML_TYPE_COUNT ? type_traits[type].type_name : "NONE";
 }
@@ -4056,7 +4056,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
         /*.data         =*/ obj_alloc_size > 0 ? (void *)(result + 1) : data,
         /*.name         =*/ { 0 },
         /*.extra        =*/ NULL,
-        ///*.padding      =*/ { 0 },
+        // /*.padding      =*/ { 0 },
     };
 
 #ifdef __clang__