fix t_load_us

2025-09-06 15:59:07 +00:00 · 2024-11-28 15:55:21 +04:00 · 2024-11-28 15:55:21 +04:00 · 9a7bbce7ad
commit 9a7bbce7ad
parent 740f7f0b95
3 changed files with 8 additions and 0 deletions
--- a/common/common.cpp
+++ b/common/common.cpp
@ -944,6 +944,8 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
        return iparams;
    }

+    llama_perf_context_sync(lctx, model);
+
    if (llama_context_setup_backend(model, cparams, lctx) == nullptr) {
        LOG_ERR("%s: failed to setup context with model '%s'\n", __func__, params.model.c_str());
        llama_free_model(model);
--- a/include/llama.h
+++ b/include/llama.h
@ -1255,6 +1255,7 @@ extern "C" {
    LLAMA_API struct llama_perf_context_data llama_perf_context      (const struct llama_context * ctx);
    LLAMA_API void                           llama_perf_context_print(const struct llama_context * ctx);
    LLAMA_API void                           llama_perf_context_reset(      struct llama_context * ctx);
+    LLAMA_API void                           llama_perf_context_sync (      struct llama_context * ctx, const struct llama_model * model);

    // NOTE: the following work only with samplers constructed via llama_sampler_chain_init
    LLAMA_API struct llama_perf_sampler_data llama_perf_sampler      (const struct llama_sampler * chain);
--- a/src/llama.cpp
+++ b/src/llama.cpp
@ -3547,6 +3547,11 @@ static ggml_backend_buffer_type_t llama_default_buffer_type_offload(const llama_
    GGML_UNUSED(model);
 }

+void llama_perf_context_sync(struct llama_context * ctx, const struct llama_model * model) {
+    ctx->t_start_us = model->t_start_us;
+    ctx->t_load_us  = model->t_load_us;
+}
+
 void llama_profile_device(device_info * dev_info, struct llama_model * model, llama_model_loader * ml, int n_threads) {
    dev_info->device_name               = device_name();
    dev_info->cpu_props.cores           = device_cpu_cores();