common : more accurate sampling timing (#17382)

* common : more accurate sampling timing * eval-callback : minor fixes * cont : add time_meas impl * cont : fix log msg [no ci] * cont : fix multiple definitions of time_meas * llama-cli : exclude chat template init from time measurement * cont : print percentage of unaccounted time * cont : do not reset timings
2026-05-08 09:59:50 +00:00 · 2025-11-20 13:40:10 +02:00 · 2025-11-20 13:40:10 +02:00 · 196f5083ef
commit 196f5083ef
parent 5088b435d4
7 changed files with 102 additions and 33 deletions
--- a/src/llama-impl.cpp
+++ b/src/llama-impl.cpp
@ -20,10 +20,10 @@ static llama_logger_state g_logger_state;
 time_meas::time_meas(int64_t & t_acc, bool disable) : t_start_us(disable ? -1 : ggml_time_us()), t_acc(t_acc) {}

 time_meas::~time_meas() {
-        if (t_start_us >= 0) {
-            t_acc += ggml_time_us() - t_start_us;
-        }
+    if (t_start_us >= 0) {
+        t_acc += ggml_time_us() - t_start_us;
    }
+}

 void llama_log_set(ggml_log_callback log_callback, void * user_data) {
    ggml_log_set(log_callback, user_data);