mirror of
https://github.com/Lizonghang/prima.cpp.git
synced 2025-09-14 08:09:50 +00:00
add q80xf32 and count_n_params
This commit is contained in:
parent
3fe00a16a0
commit
a7a95b53fe
3 changed files with 297 additions and 69 deletions
|
@ -12,6 +12,7 @@ struct cpu_props {
|
|||
float flops_f16_f32; // in GFLOPS
|
||||
float flops_q4k_f32; // in GFLOPS
|
||||
float flops_q6k_f32; // in GFLOPS
|
||||
float flops_q80_f32; // in GFLOPS
|
||||
|
||||
cpu_props() :
|
||||
name(""),
|
||||
|
@ -20,7 +21,8 @@ struct cpu_props {
|
|||
flops_f32_f32(0.0f),
|
||||
flops_f16_f32(0.0f),
|
||||
flops_q4k_f32(0.0f),
|
||||
flops_q6k_f32(0.0f) {}
|
||||
flops_q6k_f32(0.0f),
|
||||
flops_q80_f32(0.0f) {}
|
||||
};
|
||||
|
||||
struct memory_info {
|
||||
|
@ -66,10 +68,12 @@ struct gpu_props {
|
|||
float metal_flops_f16_f32; // in GFLOPS
|
||||
float metal_flops_q4k_f32; // in GFLOPS
|
||||
float metal_flops_q6k_f32; // in GFLOPS
|
||||
float metal_flops_q80_f32; // in GFLOPS
|
||||
float cuda_flops_f32_f32; // in GFLOPS
|
||||
float cuda_flops_f16_f32; // in GFLOPS
|
||||
float cuda_flops_q4k_f32; // in GFLOPS
|
||||
float cuda_flops_q6k_f32; // in GFLOPS
|
||||
float cuda_flops_q80_f32; // in GFLOPS
|
||||
|
||||
gpu_props() :
|
||||
name(""),
|
||||
|
@ -80,38 +84,72 @@ struct gpu_props {
|
|||
metal_flops_f16_f32(0.0f),
|
||||
metal_flops_q4k_f32(0.0f),
|
||||
metal_flops_q6k_f32(0.0f),
|
||||
metal_flops_q80_f32(0.0f),
|
||||
cuda_flops_f32_f32 (0.0f),
|
||||
cuda_flops_f16_f32 (0.0f),
|
||||
cuda_flops_q4k_f32 (0.0f),
|
||||
cuda_flops_q6k_f32 (0.0f) {}
|
||||
cuda_flops_q6k_f32 (0.0f),
|
||||
cuda_flops_q80_f32 (0.0f) {}
|
||||
};
|
||||
|
||||
struct model_flops {
|
||||
int64_t output_f32_f32;
|
||||
int64_t output_f16_f32;
|
||||
int64_t output_q4k_f32;
|
||||
int64_t output_q6k_f32;
|
||||
int64_t output_q80_f32;
|
||||
int64_t layer_f32_f32;
|
||||
int64_t layer_f16_f32;
|
||||
int64_t layer_q4k_f32;
|
||||
int64_t layer_q6k_f32;
|
||||
int64_t layer_q80_f32;
|
||||
|
||||
model_flops() :
|
||||
output_f32_f32(0),
|
||||
output_f16_f32(0),
|
||||
output_q4k_f32(0),
|
||||
output_q6k_f32(0),
|
||||
output_q80_f32(0),
|
||||
layer_f32_f32 (0),
|
||||
layer_f16_f32 (0),
|
||||
layer_q4k_f32 (0),
|
||||
layer_q6k_f32 (0) {}
|
||||
layer_q6k_f32 (0),
|
||||
layer_q80_f32 (0) {}
|
||||
};
|
||||
|
||||
struct model_params {
|
||||
int64_t input_params;
|
||||
int64_t output_params;
|
||||
int64_t layer_params;
|
||||
int64_t input_f32;
|
||||
int64_t input_f16;
|
||||
int64_t input_q4k;
|
||||
int64_t input_q6k;
|
||||
int64_t input_q80;
|
||||
int64_t output_f32;
|
||||
int64_t output_f16;
|
||||
int64_t output_q4k;
|
||||
int64_t output_q6k;
|
||||
int64_t output_q80;
|
||||
int64_t layer_f32;
|
||||
int64_t layer_f16;
|
||||
int64_t layer_q4k;
|
||||
int64_t layer_q6k;
|
||||
int64_t layer_q80;
|
||||
|
||||
model_params() :
|
||||
input_params (0),
|
||||
output_params(0),
|
||||
layer_params (0) {}
|
||||
input_f32 (0),
|
||||
input_f16 (0),
|
||||
input_q4k (0),
|
||||
input_q6k (0),
|
||||
input_q80 (0),
|
||||
output_f32(0),
|
||||
output_f16(0),
|
||||
output_q4k(0),
|
||||
output_q6k(0),
|
||||
output_q80(0),
|
||||
layer_f32 (0),
|
||||
layer_f16 (0),
|
||||
layer_q4k (0),
|
||||
layer_q6k (0),
|
||||
layer_q80 (0) {}
|
||||
};
|
||||
|
||||
struct device_info {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue