From 1b3b6a506f8538c0192fa659dcb524d394bee7c1 Mon Sep 17 00:00:00 2001 From: "Li, Zonghang" <870644199@qq.com> Date: Tue, 3 Jun 2025 17:10:09 +0400 Subject: [PATCH] fix: add warm-up in profiling to prevent init delay --- common/profiler.cpp | 5 ++++- src/llama.cpp | 10 +++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/common/profiler.cpp b/common/profiler.cpp index b54bb0be..a2ac33b5 100644 --- a/common/profiler.cpp +++ b/common/profiler.cpp @@ -439,7 +439,7 @@ float device_inp_embd_delay(struct llama_model * model, enum ggml_type src0t, in } // warm-up - // ggml_backend_graph_compute(backend, gf); + ggml_backend_graph_compute(backend, gf); const int64_t t_start = ggml_time_us(); ggml_backend_graph_compute(backend, gf); @@ -1288,6 +1288,9 @@ static float device_mem_copy(struct llama_model * model, enum profiler_backend_t ggml_backend_cpu_set_n_threads(backend, n_threads); } + // warm-up + ggml_backend_graph_compute(backend, gf); + const int64_t t_start = ggml_time_us(); ggml_backend_graph_compute(backend, gf); const int64_t t_end = ggml_time_us(); diff --git a/src/llama.cpp b/src/llama.cpp index cd5a95b1..2cc8da15 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -18211,7 +18211,7 @@ static int llama_decode_internal( [&]{ llama_kv_cache_clear (&lctx); }, [&]{ llama_send_kv_cache_clear (&lctx); }, is_last_dev)) { - LLAMA_LOG_INFO("%s: received signal kv_cache_clear\n", __func__); + LLAMA_LOG_DEBUG("%s: received signal kv_cache_clear\n", __func__); return -1; } @@ -18219,7 +18219,7 @@ static int llama_decode_internal( [&]{ llama_kv_cache_seq_rm (&lctx, meta.rm_seq_id, meta.rm_p0, meta.rm_p1); }, [&]{ llama_send_kv_cache_seq_rm (&lctx, meta.rm_seq_id, meta.rm_p0, meta.rm_p1); }, is_last_dev)) { - LLAMA_LOG_INFO("%s: received signal kv_cache_seq_rm\n", __func__); + LLAMA_LOG_DEBUG("%s: received signal kv_cache_seq_rm\n", __func__); return -1; } @@ -18227,7 +18227,7 @@ static int llama_decode_internal( [&]{ llama_kv_cache_seq_add (&lctx, meta.add_seq_id, meta.add_p0, meta.add_p1, meta.add_delta); }, [&]{ llama_send_kv_cache_seq_add(&lctx, meta.add_seq_id, meta.add_p0, meta.add_p1, meta.add_delta); }, is_last_dev)) { - LLAMA_LOG_INFO("%s: received signal kv_cache_seq_add\n", __func__); + LLAMA_LOG_DEBUG("%s: received signal kv_cache_seq_add\n", __func__); return -1; } @@ -18235,7 +18235,7 @@ static int llama_decode_internal( [&]{ llama_kv_cache_seq_cp (&lctx, meta.cp_src_seq_id, meta.cp_dst_seq_id, meta.cp_p0, meta.cp_p1); }, [&]{ llama_send_kv_cache_seq_cp (&lctx, meta.cp_src_seq_id, meta.cp_dst_seq_id, meta.cp_p0, meta.cp_p1); }, is_last_dev)) { - LLAMA_LOG_INFO("%s: received signal kv_cache_seq_cp\n", __func__); + LLAMA_LOG_DEBUG("%s: received signal kv_cache_seq_cp\n", __func__); return -1; } @@ -18243,7 +18243,7 @@ static int llama_decode_internal( [&]{ llama_kv_cache_seq_div (&lctx, meta.div_seq_id, meta.div_p0, meta.div_p1, meta.div_factor); }, [&]{ llama_send_kv_cache_seq_div(&lctx, meta.div_seq_id, meta.div_p0, meta.div_p1, meta.div_factor); }, is_last_dev)) { - LLAMA_LOG_INFO("%s: received signal kv_cache_seq_div\n", __func__); + LLAMA_LOG_DEBUG("%s: received signal kv_cache_seq_div\n", __func__); return -1; } }