add q80xf32 and count_n_params

This commit is contained in:
Lizonghang 2024-11-24 23:11:12 +04:00
parent 3fe00a16a0
commit a7a95b53fe
3 changed files with 297 additions and 69 deletions

View file

@ -12,6 +12,7 @@ struct cpu_props {
float flops_f16_f32; // in GFLOPS
float flops_q4k_f32; // in GFLOPS
float flops_q6k_f32; // in GFLOPS
float flops_q80_f32; // in GFLOPS
cpu_props() :
name(""),
@ -20,7 +21,8 @@ struct cpu_props {
flops_f32_f32(0.0f),
flops_f16_f32(0.0f),
flops_q4k_f32(0.0f),
flops_q6k_f32(0.0f) {}
flops_q6k_f32(0.0f),
flops_q80_f32(0.0f) {}
};
struct memory_info {
@ -66,10 +68,12 @@ struct gpu_props {
float metal_flops_f16_f32; // in GFLOPS
float metal_flops_q4k_f32; // in GFLOPS
float metal_flops_q6k_f32; // in GFLOPS
float metal_flops_q80_f32; // in GFLOPS
float cuda_flops_f32_f32; // in GFLOPS
float cuda_flops_f16_f32; // in GFLOPS
float cuda_flops_q4k_f32; // in GFLOPS
float cuda_flops_q6k_f32; // in GFLOPS
float cuda_flops_q80_f32; // in GFLOPS
gpu_props() :
name(""),
@ -80,38 +84,72 @@ struct gpu_props {
metal_flops_f16_f32(0.0f),
metal_flops_q4k_f32(0.0f),
metal_flops_q6k_f32(0.0f),
metal_flops_q80_f32(0.0f),
cuda_flops_f32_f32 (0.0f),
cuda_flops_f16_f32 (0.0f),
cuda_flops_q4k_f32 (0.0f),
cuda_flops_q6k_f32 (0.0f) {}
cuda_flops_q6k_f32 (0.0f),
cuda_flops_q80_f32 (0.0f) {}
};
struct model_flops {
int64_t output_f32_f32;
int64_t output_f16_f32;
int64_t output_q4k_f32;
int64_t output_q6k_f32;
int64_t output_q80_f32;
int64_t layer_f32_f32;
int64_t layer_f16_f32;
int64_t layer_q4k_f32;
int64_t layer_q6k_f32;
int64_t layer_q80_f32;
model_flops() :
output_f32_f32(0),
output_f16_f32(0),
output_q4k_f32(0),
output_q6k_f32(0),
output_q80_f32(0),
layer_f32_f32 (0),
layer_f16_f32 (0),
layer_q4k_f32 (0),
layer_q6k_f32 (0) {}
layer_q6k_f32 (0),
layer_q80_f32 (0) {}
};
struct model_params {
int64_t input_params;
int64_t output_params;
int64_t layer_params;
int64_t input_f32;
int64_t input_f16;
int64_t input_q4k;
int64_t input_q6k;
int64_t input_q80;
int64_t output_f32;
int64_t output_f16;
int64_t output_q4k;
int64_t output_q6k;
int64_t output_q80;
int64_t layer_f32;
int64_t layer_f16;
int64_t layer_q4k;
int64_t layer_q6k;
int64_t layer_q80;
model_params() :
input_params (0),
output_params(0),
layer_params (0) {}
input_f32 (0),
input_f16 (0),
input_q4k (0),
input_q6k (0),
input_q80 (0),
output_f32(0),
output_f16(0),
output_q4k(0),
output_q6k(0),
output_q80(0),
layer_f32 (0),
layer_f16 (0),
layer_q4k (0),
layer_q6k (0),
layer_q80 (0) {}
};
struct device_info {