mirror of
https://github.com/Lizonghang/prima.cpp.git
synced 2025-09-06 18:39:04 +00:00
add EPS in device_compute_delay
This commit is contained in:
parent
fa210d2034
commit
d9beb030ee
2 changed files with 44 additions and 44 deletions
|
@ -1239,29 +1239,29 @@ static float device_compute_delay(struct device_info & dev_info, int n_layers, c
|
||||||
#ifdef GGML_USE_CUDA
|
#ifdef GGML_USE_CUDA
|
||||||
struct gpu_props gpu = dev_info.gpu_props;
|
struct gpu_props gpu = dev_info.gpu_props;
|
||||||
|
|
||||||
gpu_latency_per_layer += (double)n_flops.layer_f32_f32 / (double)gpu.cuda_flops_f32_f32 / 1e9;
|
gpu_latency_per_layer += (double)n_flops.layer_f32_f32 / ((double)gpu.cuda_flops_f32_f32 + EPS) / 1e9;
|
||||||
gpu_latency_per_layer += (double)n_flops.layer_f16_f32 / (double)gpu.cuda_flops_f16_f32 / 1e9;
|
gpu_latency_per_layer += (double)n_flops.layer_f16_f32 / ((double)gpu.cuda_flops_f16_f32 + EPS) / 1e9;
|
||||||
gpu_latency_per_layer += (double)n_flops.layer_q4k_f32 / (double)gpu.cuda_flops_q4k_f32 / 1e9;
|
gpu_latency_per_layer += (double)n_flops.layer_q4k_f32 / ((double)gpu.cuda_flops_q4k_f32 + EPS) / 1e9;
|
||||||
gpu_latency_per_layer += (double)n_flops.layer_q5k_f32 / (double)gpu.cuda_flops_q5k_f32 / 1e9;
|
gpu_latency_per_layer += (double)n_flops.layer_q5k_f32 / ((double)gpu.cuda_flops_q5k_f32 + EPS) / 1e9;
|
||||||
gpu_latency_per_layer += (double)n_flops.layer_q6k_f32 / (double)gpu.cuda_flops_q6k_f32 / 1e9;
|
gpu_latency_per_layer += (double)n_flops.layer_q6k_f32 / ((double)gpu.cuda_flops_q6k_f32 + EPS) / 1e9;
|
||||||
gpu_latency_per_layer += (double)n_flops.layer_q80_f32 / (double)gpu.cuda_flops_q80_f32 / 1e9;
|
gpu_latency_per_layer += (double)n_flops.layer_q80_f32 / ((double)gpu.cuda_flops_q80_f32 + EPS) / 1e9;
|
||||||
#elif GGML_USE_METAL
|
#elif GGML_USE_METAL
|
||||||
struct gpu_props gpu = dev_info.gpu_props;
|
struct gpu_props gpu = dev_info.gpu_props;
|
||||||
|
|
||||||
gpu_latency_per_layer += (double)n_flops.layer_f32_f32 / (double)gpu.metal_flops_f32_f32 / 1e9;
|
gpu_latency_per_layer += (double)n_flops.layer_f32_f32 / ((double)gpu.metal_flops_f32_f32 + EPS) / 1e9;
|
||||||
gpu_latency_per_layer += (double)n_flops.layer_f16_f32 / (double)gpu.metal_flops_f16_f32 / 1e9;
|
gpu_latency_per_layer += (double)n_flops.layer_f16_f32 / ((double)gpu.metal_flops_f16_f32 + EPS) / 1e9;
|
||||||
gpu_latency_per_layer += (double)n_flops.layer_q4k_f32 / (double)gpu.metal_flops_q4k_f32 / 1e9;
|
gpu_latency_per_layer += (double)n_flops.layer_q4k_f32 / ((double)gpu.metal_flops_q4k_f32 + EPS) / 1e9;
|
||||||
gpu_latency_per_layer += (double)n_flops.layer_q5k_f32 / (double)gpu.metal_flops_q5k_f32 / 1e9;
|
gpu_latency_per_layer += (double)n_flops.layer_q5k_f32 / ((double)gpu.metal_flops_q5k_f32 + EPS) / 1e9;
|
||||||
gpu_latency_per_layer += (double)n_flops.layer_q6k_f32 / (double)gpu.metal_flops_q6k_f32 / 1e9;
|
gpu_latency_per_layer += (double)n_flops.layer_q6k_f32 / ((double)gpu.metal_flops_q6k_f32 + EPS) / 1e9;
|
||||||
gpu_latency_per_layer += (double)n_flops.layer_q80_f32 / (double)gpu.metal_flops_q80_f32 / 1e9;
|
gpu_latency_per_layer += (double)n_flops.layer_q80_f32 / ((double)gpu.metal_flops_q80_f32 + EPS) / 1e9;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
cpu_latency_per_layer += (double)n_flops.layer_f32_f32 / (double)cpu.flops_f32_f32 / 1e9;
|
cpu_latency_per_layer += (double)n_flops.layer_f32_f32 / ((double)cpu.flops_f32_f32 + EPS) / 1e9;
|
||||||
cpu_latency_per_layer += (double)n_flops.layer_f16_f32 / (double)cpu.flops_f16_f32 / 1e9;
|
cpu_latency_per_layer += (double)n_flops.layer_f16_f32 / ((double)cpu.flops_f16_f32 + EPS) / 1e9;
|
||||||
cpu_latency_per_layer += (double)n_flops.layer_q4k_f32 / (double)cpu.flops_q4k_f32 / 1e9;
|
cpu_latency_per_layer += (double)n_flops.layer_q4k_f32 / ((double)cpu.flops_q4k_f32 + EPS) / 1e9;
|
||||||
cpu_latency_per_layer += (double)n_flops.layer_q5k_f32 / (double)cpu.flops_q5k_f32 / 1e9;
|
cpu_latency_per_layer += (double)n_flops.layer_q5k_f32 / ((double)cpu.flops_q5k_f32 + EPS) / 1e9;
|
||||||
cpu_latency_per_layer += (double)n_flops.layer_q6k_f32 / (double)cpu.flops_q6k_f32 / 1e9;
|
cpu_latency_per_layer += (double)n_flops.layer_q6k_f32 / ((double)cpu.flops_q6k_f32 + EPS) / 1e9;
|
||||||
cpu_latency_per_layer += (double)n_flops.layer_q80_f32 / (double)cpu.flops_q80_f32 / 1e9;
|
cpu_latency_per_layer += (double)n_flops.layer_q80_f32 / ((double)cpu.flops_q80_f32 + EPS) / 1e9;
|
||||||
|
|
||||||
double total_latency = 0.0f;
|
double total_latency = 0.0f;
|
||||||
|
|
||||||
|
@ -1274,12 +1274,12 @@ static float device_compute_delay(struct device_info & dev_info, int n_layers, c
|
||||||
total_latency += cpu_latency_per_layer * n_layers;
|
total_latency += cpu_latency_per_layer * n_layers;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
total_latency += (double)n_flops.output_f32_f32 / (double)cpu.flops_f32_f32 / 1e9;
|
total_latency += (double)n_flops.output_f32_f32 / ((double)cpu.flops_f32_f32 + EPS) / 1e9;
|
||||||
total_latency += (double)n_flops.output_f16_f32 / (double)cpu.flops_f16_f32 / 1e9;
|
total_latency += (double)n_flops.output_f16_f32 / ((double)cpu.flops_f16_f32 + EPS) / 1e9;
|
||||||
total_latency += (double)n_flops.output_q4k_f32 / (double)cpu.flops_q4k_f32 / 1e9;
|
total_latency += (double)n_flops.output_q4k_f32 / ((double)cpu.flops_q4k_f32 + EPS) / 1e9;
|
||||||
total_latency += (double)n_flops.output_q5k_f32 / (double)cpu.flops_q5k_f32 / 1e9;
|
total_latency += (double)n_flops.output_q5k_f32 / ((double)cpu.flops_q5k_f32 + EPS) / 1e9;
|
||||||
total_latency += (double)n_flops.output_q6k_f32 / (double)cpu.flops_q6k_f32 / 1e9;
|
total_latency += (double)n_flops.output_q6k_f32 / ((double)cpu.flops_q6k_f32 + EPS) / 1e9;
|
||||||
total_latency += (double)n_flops.output_q80_f32 / (double)cpu.flops_q80_f32 / 1e9;
|
total_latency += (double)n_flops.output_q80_f32 / ((double)cpu.flops_q80_f32 + EPS) / 1e9;
|
||||||
|
|
||||||
total_latency *= 1000; // convert to ms
|
total_latency *= 1000; // convert to ms
|
||||||
|
|
||||||
|
|
|
@ -26,12 +26,12 @@ struct cpu_props {
|
||||||
name(""),
|
name(""),
|
||||||
description(""),
|
description(""),
|
||||||
cores(0),
|
cores(0),
|
||||||
flops_f32_f32(EPS),
|
flops_f32_f32(0.0f),
|
||||||
flops_f16_f32(EPS),
|
flops_f16_f32(0.0f),
|
||||||
flops_q4k_f32(EPS),
|
flops_q4k_f32(0.0f),
|
||||||
flops_q5k_f32(EPS),
|
flops_q5k_f32(0.0f),
|
||||||
flops_q6k_f32(EPS),
|
flops_q6k_f32(0.0f),
|
||||||
flops_q80_f32(EPS) {}
|
flops_q80_f32(0.0f) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct memory_info {
|
struct memory_info {
|
||||||
|
@ -100,20 +100,20 @@ struct gpu_props {
|
||||||
memory_free (0.0f),
|
memory_free (0.0f),
|
||||||
memory_total (0.0f),
|
memory_total (0.0f),
|
||||||
metal_read_vram_bw (0.0f),
|
metal_read_vram_bw (0.0f),
|
||||||
metal_flops_f32_f32(EPS),
|
metal_flops_f32_f32(0.0f),
|
||||||
metal_flops_f16_f32(EPS),
|
metal_flops_f16_f32(0.0f),
|
||||||
metal_flops_q4k_f32(EPS),
|
metal_flops_q4k_f32(0.0f),
|
||||||
metal_flops_q5k_f32(EPS),
|
metal_flops_q5k_f32(0.0f),
|
||||||
metal_flops_q6k_f32(EPS),
|
metal_flops_q6k_f32(0.0f),
|
||||||
metal_flops_q80_f32(EPS),
|
metal_flops_q80_f32(0.0f),
|
||||||
metal_mem_cpy_delay(0.0f),
|
metal_mem_cpy_delay(0.0f),
|
||||||
cuda_read_vram_bw (0.0f),
|
cuda_read_vram_bw (0.0f),
|
||||||
cuda_flops_f32_f32 (EPS),
|
cuda_flops_f32_f32 (0.0f),
|
||||||
cuda_flops_f16_f32 (EPS),
|
cuda_flops_f16_f32 (0.0f),
|
||||||
cuda_flops_q4k_f32 (EPS),
|
cuda_flops_q4k_f32 (0.0f),
|
||||||
cuda_flops_q5k_f32 (EPS),
|
cuda_flops_q5k_f32 (0.0f),
|
||||||
cuda_flops_q6k_f32 (EPS),
|
cuda_flops_q6k_f32 (0.0f),
|
||||||
cuda_flops_q80_f32 (EPS),
|
cuda_flops_q80_f32 (0.0f),
|
||||||
cuda_mem_cpy_delay (0.0f) {}
|
cuda_mem_cpy_delay (0.0f) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue