From 44b4718c8b0c9cfbdd34ac48680edb2d0909e706 Mon Sep 17 00:00:00 2001 From: Zonghang Li Date: Wed, 4 Dec 2024 12:29:24 +0400 Subject: [PATCH] recover device_memory_access_delay --- common/profiler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/profiler.cpp b/common/profiler.cpp index 1730598d..63da8c62 100644 --- a/common/profiler.cpp +++ b/common/profiler.cpp @@ -1522,7 +1522,7 @@ void device_print_props(struct device_info * dev_info_set, int n, struct llama_m float latency = 0.0f; int n_layers = llama_model_n_layers (model); latency += device_compute_delay (dev_info_set[0], n_layers, cparams); - // latency += device_memory_access_delay(dev_info_set[0], cparams, n_layers); + latency += device_memory_access_delay(dev_info_set[0], cparams, n_layers); latency += device_disk_access_delay (dev_info_set[0], model, cparams); // if physical memory is not enough, some tensor weights will be released from memory and reloaded by mmap later LOG_INF("| Token latency (ms) ");