mirror of
https://github.com/Lizonghang/prima.cpp.git
synced 2025-09-06 06:49:02 +00:00
Fixed the flops test for iq1s and q2k quantization types
This commit is contained in:
parent
2f049b8428
commit
54c4c1c26e
3 changed files with 9 additions and 3 deletions
|
@ -188,6 +188,9 @@ static float device_flops(struct llama_model * model, enum ggml_type src0t, enum
|
||||||
};
|
};
|
||||||
struct ggml_context * ctx = ggml_init(params);
|
struct ggml_context * ctx = ggml_init(params);
|
||||||
|
|
||||||
|
if(n_embd < ggml_blck_size(src0t)){
|
||||||
|
n_embd = 2 * ggml_blck_size(src0t);
|
||||||
|
}
|
||||||
struct ggml_tensor * tensor_a = ggml_new_tensor_2d(ctx, src0t, n_embd, n_embd);
|
struct ggml_tensor * tensor_a = ggml_new_tensor_2d(ctx, src0t, n_embd, n_embd);
|
||||||
struct ggml_tensor * tensor_b = ggml_new_tensor_2d(ctx, src1t, n_embd, n_embd);
|
struct ggml_tensor * tensor_b = ggml_new_tensor_2d(ctx, src1t, n_embd, n_embd);
|
||||||
|
|
||||||
|
@ -208,10 +211,12 @@ static float device_flops(struct llama_model * model, enum ggml_type src0t, enum
|
||||||
ctx_cgraph = ggml_init(params0);
|
ctx_cgraph = ggml_init(params0);
|
||||||
|
|
||||||
gf = ggml_new_graph(ctx_cgraph);
|
gf = ggml_new_graph(ctx_cgraph);
|
||||||
|
|
||||||
cur = ggml_mul_mat(ctx_cgraph, tensor_a, tensor_b);
|
cur = ggml_mul_mat(ctx_cgraph, tensor_a, tensor_b);
|
||||||
for (int i = 0; i < n_repeat - 1; i++) {
|
for (int i = 0; i < n_repeat - 1; i++) {
|
||||||
cur = ggml_mul_mat(ctx_cgraph, tensor_a, cur);
|
cur = ggml_mul_mat(ctx_cgraph, tensor_a, cur);
|
||||||
}
|
}
|
||||||
|
|
||||||
ggml_build_forward_expand(gf, cur);
|
ggml_build_forward_expand(gf, cur);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1713,6 +1718,7 @@ void device_print_props(struct device_info * dev_info_set, int n, struct llama_m
|
||||||
for (int i = 0; i < n; ++i) {
|
for (int i = 0; i < n; ++i) {
|
||||||
LOG_INF("| %-10.1f ", dev_info_set[i].cpu_props.flops_q2k_f32);
|
LOG_INF("| %-10.1f ", dev_info_set[i].cpu_props.flops_q2k_f32);
|
||||||
}
|
}
|
||||||
|
LOG_INF("\n");
|
||||||
|
|
||||||
LOG_INF("| CPU flops (Q4K x F32, GFLOPS)");
|
LOG_INF("| CPU flops (Q4K x F32, GFLOPS)");
|
||||||
for (int i = 0; i < n; ++i) {
|
for (int i = 0; i < n; ++i) {
|
||||||
|
|
|
@ -390,7 +390,7 @@ extern "C" {
|
||||||
// GGML_TYPE_Q4_0_8_8 = 33,
|
// GGML_TYPE_Q4_0_8_8 = 33,
|
||||||
GGML_TYPE_TQ1_0 = 34,
|
GGML_TYPE_TQ1_0 = 34,
|
||||||
GGML_TYPE_TQ2_0 = 35,
|
GGML_TYPE_TQ2_0 = 35,
|
||||||
GGML_TYPE_COUNT,
|
GGML_TYPE_COUNT = 39,
|
||||||
};
|
};
|
||||||
|
|
||||||
// precision
|
// precision
|
||||||
|
|
|
@ -4056,7 +4056,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
||||||
/*.data =*/ obj_alloc_size > 0 ? (void *)(result + 1) : data,
|
/*.data =*/ obj_alloc_size > 0 ? (void *)(result + 1) : data,
|
||||||
/*.name =*/ { 0 },
|
/*.name =*/ { 0 },
|
||||||
/*.extra =*/ NULL,
|
/*.extra =*/ NULL,
|
||||||
///*.padding =*/ { 0 },
|
// /*.padding =*/ { 0 },
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef __clang__
|
#ifdef __clang__
|
||||||
|
|
Loading…
Add table
Reference in a new issue