mirror of
https://github.com/Lizonghang/prima.cpp.git
synced 2025-09-04 22:09:04 +00:00
fix support for Q5_0
This commit is contained in:
parent
c8e615d69c
commit
9cbdf01645
3 changed files with 23 additions and 1 deletions
|
@ -1116,7 +1116,7 @@ static bool assign_layers_to_device(
|
|||
dev.model_flops.layer_f32_f32 / (dev.cpu_props.flops_f32_f32 * 1e9 + EPS) +
|
||||
dev.model_flops.layer_f16_f32 / (dev.cpu_props.flops_f16_f32 * 1e9 + EPS) +
|
||||
dev.model_flops.layer_q4k_f32 / (dev.cpu_props.flops_q4k_f32 * 1e9 + EPS) +
|
||||
dev.model_flops.layer_q5k_f32 / (dev.cpu_props.flops_q50_f32 * 1e9 + EPS) +
|
||||
dev.model_flops.layer_q50_f32 / (dev.cpu_props.flops_q50_f32 * 1e9 + EPS) +
|
||||
dev.model_flops.layer_q5k_f32 / (dev.cpu_props.flops_q5k_f32 * 1e9 + EPS) +
|
||||
dev.model_flops.layer_q6k_f32 / (dev.cpu_props.flops_q6k_f32 * 1e9 + EPS) +
|
||||
dev.model_flops.layer_q80_f32 / (dev.cpu_props.flops_q80_f32 * 1e9 + EPS)) * 1000; // in ms
|
||||
|
|
|
@ -1978,6 +1978,10 @@ void device_print_props(struct device_info * dev_info_set, int n, struct llama_m
|
|||
LOG_INF("| %-10" PRId64 " ", dev_info_set[0].model_flops.output_q4k_f32);
|
||||
LOG_INF("\n");
|
||||
|
||||
LOG_INF("| Model flops (output Q50xF32) ");
|
||||
LOG_INF("| %-10" PRId64 " ", dev_info_set[0].model_flops.output_q50_f32);
|
||||
LOG_INF("\n");
|
||||
|
||||
LOG_INF("| Model flops (output Q5KxF32) ");
|
||||
LOG_INF("| %-10" PRId64 " ", dev_info_set[0].model_flops.output_q5k_f32);
|
||||
LOG_INF("\n");
|
||||
|
@ -2002,6 +2006,10 @@ void device_print_props(struct device_info * dev_info_set, int n, struct llama_m
|
|||
LOG_INF("| %-10" PRId64 " ", dev_info_set[0].model_flops.layer_q4k_f32);
|
||||
LOG_INF("\n");
|
||||
|
||||
LOG_INF("| Model flops (layer Q50xF32) ");
|
||||
LOG_INF("| %-10" PRId64 " ", dev_info_set[0].model_flops.layer_q50_f32);
|
||||
LOG_INF("\n");
|
||||
|
||||
LOG_INF("| Model flops (layer Q5KxF32) ");
|
||||
LOG_INF("| %-10" PRId64 " ", dev_info_set[0].model_flops.layer_q5k_f32);
|
||||
LOG_INF("\n");
|
||||
|
@ -2026,6 +2034,10 @@ void device_print_props(struct device_info * dev_info_set, int n, struct llama_m
|
|||
LOG_INF("| %-10" PRId64 " ", dev_info_set[0].model_params.input_q4k);
|
||||
LOG_INF("\n");
|
||||
|
||||
LOG_INF("| Model params (input Q50) ");
|
||||
LOG_INF("| %-10" PRId64 " ", dev_info_set[0].model_params.input_q50);
|
||||
LOG_INF("\n");
|
||||
|
||||
LOG_INF("| Model params (input Q5K) ");
|
||||
LOG_INF("| %-10" PRId64 " ", dev_info_set[0].model_params.input_q5k);
|
||||
LOG_INF("\n");
|
||||
|
@ -2050,6 +2062,10 @@ void device_print_props(struct device_info * dev_info_set, int n, struct llama_m
|
|||
LOG_INF("| %-10" PRId64 " ", dev_info_set[0].model_params.layer_q4k);
|
||||
LOG_INF("\n");
|
||||
|
||||
LOG_INF("| Model params (layer Q50) ");
|
||||
LOG_INF("| %-10" PRId64 " ", dev_info_set[0].model_params.layer_q50);
|
||||
LOG_INF("\n");
|
||||
|
||||
LOG_INF("| Model params (layer Q5K) ");
|
||||
LOG_INF("| %-10" PRId64 " ", dev_info_set[0].model_params.layer_q5k);
|
||||
LOG_INF("\n");
|
||||
|
@ -2074,6 +2090,10 @@ void device_print_props(struct device_info * dev_info_set, int n, struct llama_m
|
|||
LOG_INF("| %-10" PRId64 " ", dev_info_set[0].model_params.output_q4k);
|
||||
LOG_INF("\n");
|
||||
|
||||
LOG_INF("| Model params (output Q50) ");
|
||||
LOG_INF("| %-10" PRId64 " ", dev_info_set[0].model_params.output_q50);
|
||||
LOG_INF("\n");
|
||||
|
||||
LOG_INF("| Model params (output Q5K) ");
|
||||
LOG_INF("| %-10" PRId64 " ", dev_info_set[0].model_params.output_q5k);
|
||||
LOG_INF("\n");
|
||||
|
|
|
@ -21476,6 +21476,7 @@ void llama_model_n_flops(
|
|||
n_flops->layer_f32_f32 = static_cast<int64_t>((double)n_flops->layer_f32_f32 / (double)n_layer);
|
||||
n_flops->layer_f16_f32 = static_cast<int64_t>((double)n_flops->layer_f16_f32 / (double)n_layer);
|
||||
n_flops->layer_q4k_f32 = static_cast<int64_t>((double)n_flops->layer_q4k_f32 / (double)n_layer);
|
||||
n_flops->layer_q50_f32 = static_cast<int64_t>((double)n_flops->layer_q50_f32 / (double)n_layer);
|
||||
n_flops->layer_q5k_f32 = static_cast<int64_t>((double)n_flops->layer_q5k_f32 / (double)n_layer);
|
||||
n_flops->layer_q6k_f32 = static_cast<int64_t>((double)n_flops->layer_q6k_f32 / (double)n_layer);
|
||||
n_flops->layer_q80_f32 = static_cast<int64_t>((double)n_flops->layer_q80_f32 / (double)n_layer);
|
||||
|
@ -21483,6 +21484,7 @@ void llama_model_n_flops(
|
|||
n_params->layer_f32 = static_cast<int64_t>((double)n_params->layer_f32 / (double)n_layer);
|
||||
n_params->layer_f16 = static_cast<int64_t>((double)n_params->layer_f16 / (double)n_layer);
|
||||
n_params->layer_q4k = static_cast<int64_t>((double)n_params->layer_q4k / (double)n_layer);
|
||||
n_params->layer_q50 = static_cast<int64_t>((double)n_params->layer_q50 / (double)n_layer);
|
||||
n_params->layer_q5k = static_cast<int64_t>((double)n_params->layer_q5k / (double)n_layer);
|
||||
n_params->layer_q6k = static_cast<int64_t>((double)n_params->layer_q6k / (double)n_layer);
|
||||
n_params->layer_q80 = static_cast<int64_t>((double)n_params->layer_q80 / (double)n_layer);
|
||||
|
|
Loading…
Add table
Reference in a new issue