From d9beb030eecde965f5b51914930a160cb643aabb Mon Sep 17 00:00:00 2001 From: Lizonghang <870644199@qq.com> Date: Sun, 29 Dec 2024 22:31:45 +0400 Subject: [PATCH] add EPS in device_compute_delay --- common/profiler.cpp | 48 ++++++++++++++++++++++----------------------- common/profiler.h | 40 ++++++++++++++++++------------------- 2 files changed, 44 insertions(+), 44 deletions(-) diff --git a/common/profiler.cpp b/common/profiler.cpp index 9559d526..e29f98ec 100644 --- a/common/profiler.cpp +++ b/common/profiler.cpp @@ -1239,29 +1239,29 @@ static float device_compute_delay(struct device_info & dev_info, int n_layers, c #ifdef GGML_USE_CUDA struct gpu_props gpu = dev_info.gpu_props; - gpu_latency_per_layer += (double)n_flops.layer_f32_f32 / (double)gpu.cuda_flops_f32_f32 / 1e9; - gpu_latency_per_layer += (double)n_flops.layer_f16_f32 / (double)gpu.cuda_flops_f16_f32 / 1e9; - gpu_latency_per_layer += (double)n_flops.layer_q4k_f32 / (double)gpu.cuda_flops_q4k_f32 / 1e9; - gpu_latency_per_layer += (double)n_flops.layer_q5k_f32 / (double)gpu.cuda_flops_q5k_f32 / 1e9; - gpu_latency_per_layer += (double)n_flops.layer_q6k_f32 / (double)gpu.cuda_flops_q6k_f32 / 1e9; - gpu_latency_per_layer += (double)n_flops.layer_q80_f32 / (double)gpu.cuda_flops_q80_f32 / 1e9; + gpu_latency_per_layer += (double)n_flops.layer_f32_f32 / ((double)gpu.cuda_flops_f32_f32 + EPS) / 1e9; + gpu_latency_per_layer += (double)n_flops.layer_f16_f32 / ((double)gpu.cuda_flops_f16_f32 + EPS) / 1e9; + gpu_latency_per_layer += (double)n_flops.layer_q4k_f32 / ((double)gpu.cuda_flops_q4k_f32 + EPS) / 1e9; + gpu_latency_per_layer += (double)n_flops.layer_q5k_f32 / ((double)gpu.cuda_flops_q5k_f32 + EPS) / 1e9; + gpu_latency_per_layer += (double)n_flops.layer_q6k_f32 / ((double)gpu.cuda_flops_q6k_f32 + EPS) / 1e9; + gpu_latency_per_layer += (double)n_flops.layer_q80_f32 / ((double)gpu.cuda_flops_q80_f32 + EPS) / 1e9; #elif GGML_USE_METAL struct gpu_props gpu = dev_info.gpu_props; - gpu_latency_per_layer += (double)n_flops.layer_f32_f32 / (double)gpu.metal_flops_f32_f32 / 1e9; - gpu_latency_per_layer += (double)n_flops.layer_f16_f32 / (double)gpu.metal_flops_f16_f32 / 1e9; - gpu_latency_per_layer += (double)n_flops.layer_q4k_f32 / (double)gpu.metal_flops_q4k_f32 / 1e9; - gpu_latency_per_layer += (double)n_flops.layer_q5k_f32 / (double)gpu.metal_flops_q5k_f32 / 1e9; - gpu_latency_per_layer += (double)n_flops.layer_q6k_f32 / (double)gpu.metal_flops_q6k_f32 / 1e9; - gpu_latency_per_layer += (double)n_flops.layer_q80_f32 / (double)gpu.metal_flops_q80_f32 / 1e9; + gpu_latency_per_layer += (double)n_flops.layer_f32_f32 / ((double)gpu.metal_flops_f32_f32 + EPS) / 1e9; + gpu_latency_per_layer += (double)n_flops.layer_f16_f32 / ((double)gpu.metal_flops_f16_f32 + EPS) / 1e9; + gpu_latency_per_layer += (double)n_flops.layer_q4k_f32 / ((double)gpu.metal_flops_q4k_f32 + EPS) / 1e9; + gpu_latency_per_layer += (double)n_flops.layer_q5k_f32 / ((double)gpu.metal_flops_q5k_f32 + EPS) / 1e9; + gpu_latency_per_layer += (double)n_flops.layer_q6k_f32 / ((double)gpu.metal_flops_q6k_f32 + EPS) / 1e9; + gpu_latency_per_layer += (double)n_flops.layer_q80_f32 / ((double)gpu.metal_flops_q80_f32 + EPS) / 1e9; #endif - cpu_latency_per_layer += (double)n_flops.layer_f32_f32 / (double)cpu.flops_f32_f32 / 1e9; - cpu_latency_per_layer += (double)n_flops.layer_f16_f32 / (double)cpu.flops_f16_f32 / 1e9; - cpu_latency_per_layer += (double)n_flops.layer_q4k_f32 / (double)cpu.flops_q4k_f32 / 1e9; - cpu_latency_per_layer += (double)n_flops.layer_q5k_f32 / (double)cpu.flops_q5k_f32 / 1e9; - cpu_latency_per_layer += (double)n_flops.layer_q6k_f32 / (double)cpu.flops_q6k_f32 / 1e9; - cpu_latency_per_layer += (double)n_flops.layer_q80_f32 / (double)cpu.flops_q80_f32 / 1e9; + cpu_latency_per_layer += (double)n_flops.layer_f32_f32 / ((double)cpu.flops_f32_f32 + EPS) / 1e9; + cpu_latency_per_layer += (double)n_flops.layer_f16_f32 / ((double)cpu.flops_f16_f32 + EPS) / 1e9; + cpu_latency_per_layer += (double)n_flops.layer_q4k_f32 / ((double)cpu.flops_q4k_f32 + EPS) / 1e9; + cpu_latency_per_layer += (double)n_flops.layer_q5k_f32 / ((double)cpu.flops_q5k_f32 + EPS) / 1e9; + cpu_latency_per_layer += (double)n_flops.layer_q6k_f32 / ((double)cpu.flops_q6k_f32 + EPS) / 1e9; + cpu_latency_per_layer += (double)n_flops.layer_q80_f32 / ((double)cpu.flops_q80_f32 + EPS) / 1e9; double total_latency = 0.0f; @@ -1274,12 +1274,12 @@ static float device_compute_delay(struct device_info & dev_info, int n_layers, c total_latency += cpu_latency_per_layer * n_layers; #endif - total_latency += (double)n_flops.output_f32_f32 / (double)cpu.flops_f32_f32 / 1e9; - total_latency += (double)n_flops.output_f16_f32 / (double)cpu.flops_f16_f32 / 1e9; - total_latency += (double)n_flops.output_q4k_f32 / (double)cpu.flops_q4k_f32 / 1e9; - total_latency += (double)n_flops.output_q5k_f32 / (double)cpu.flops_q5k_f32 / 1e9; - total_latency += (double)n_flops.output_q6k_f32 / (double)cpu.flops_q6k_f32 / 1e9; - total_latency += (double)n_flops.output_q80_f32 / (double)cpu.flops_q80_f32 / 1e9; + total_latency += (double)n_flops.output_f32_f32 / ((double)cpu.flops_f32_f32 + EPS) / 1e9; + total_latency += (double)n_flops.output_f16_f32 / ((double)cpu.flops_f16_f32 + EPS) / 1e9; + total_latency += (double)n_flops.output_q4k_f32 / ((double)cpu.flops_q4k_f32 + EPS) / 1e9; + total_latency += (double)n_flops.output_q5k_f32 / ((double)cpu.flops_q5k_f32 + EPS) / 1e9; + total_latency += (double)n_flops.output_q6k_f32 / ((double)cpu.flops_q6k_f32 + EPS) / 1e9; + total_latency += (double)n_flops.output_q80_f32 / ((double)cpu.flops_q80_f32 + EPS) / 1e9; total_latency *= 1000; // convert to ms diff --git a/common/profiler.h b/common/profiler.h index e32fb895..5e81ffec 100644 --- a/common/profiler.h +++ b/common/profiler.h @@ -26,12 +26,12 @@ struct cpu_props { name(""), description(""), cores(0), - flops_f32_f32(EPS), - flops_f16_f32(EPS), - flops_q4k_f32(EPS), - flops_q5k_f32(EPS), - flops_q6k_f32(EPS), - flops_q80_f32(EPS) {} + flops_f32_f32(0.0f), + flops_f16_f32(0.0f), + flops_q4k_f32(0.0f), + flops_q5k_f32(0.0f), + flops_q6k_f32(0.0f), + flops_q80_f32(0.0f) {} }; struct memory_info { @@ -84,7 +84,7 @@ struct gpu_props { float metal_flops_q5k_f32; // in GFLOPS float metal_flops_q6k_f32; // in GFLOPS float metal_flops_q80_f32; // in GFLOPS - float metal_mem_cpy_delay; // in ms + float metal_mem_cpy_delay; // in ms float cuda_read_vram_bw; // in GB/s float cuda_flops_f32_f32; // in GFLOPS float cuda_flops_f16_f32; // in GFLOPS @@ -92,7 +92,7 @@ struct gpu_props { float cuda_flops_q5k_f32; // in GFLOPS float cuda_flops_q6k_f32; // in GFLOPS float cuda_flops_q80_f32; // in GFLOPS - float cuda_mem_cpy_delay; // in ms + float cuda_mem_cpy_delay; // in ms gpu_props() : name(""), @@ -100,20 +100,20 @@ struct gpu_props { memory_free (0.0f), memory_total (0.0f), metal_read_vram_bw (0.0f), - metal_flops_f32_f32(EPS), - metal_flops_f16_f32(EPS), - metal_flops_q4k_f32(EPS), - metal_flops_q5k_f32(EPS), - metal_flops_q6k_f32(EPS), - metal_flops_q80_f32(EPS), + metal_flops_f32_f32(0.0f), + metal_flops_f16_f32(0.0f), + metal_flops_q4k_f32(0.0f), + metal_flops_q5k_f32(0.0f), + metal_flops_q6k_f32(0.0f), + metal_flops_q80_f32(0.0f), metal_mem_cpy_delay(0.0f), cuda_read_vram_bw (0.0f), - cuda_flops_f32_f32 (EPS), - cuda_flops_f16_f32 (EPS), - cuda_flops_q4k_f32 (EPS), - cuda_flops_q5k_f32 (EPS), - cuda_flops_q6k_f32 (EPS), - cuda_flops_q80_f32 (EPS), + cuda_flops_f32_f32 (0.0f), + cuda_flops_f16_f32 (0.0f), + cuda_flops_q4k_f32 (0.0f), + cuda_flops_q5k_f32 (0.0f), + cuda_flops_q6k_f32 (0.0f), + cuda_flops_q80_f32 (0.0f), cuda_mem_cpy_delay (0.0f) {} };