mirror of
https://github.com/Lizonghang/prima.cpp.git
synced 2025-09-11 09:14:33 +00:00
recover device_memory_access_delay
This commit is contained in:
parent
89aad7880e
commit
44b4718c8b
1 changed files with 1 additions and 1 deletions
|
@ -1522,7 +1522,7 @@ void device_print_props(struct device_info * dev_info_set, int n, struct llama_m
|
||||||
float latency = 0.0f;
|
float latency = 0.0f;
|
||||||
int n_layers = llama_model_n_layers (model);
|
int n_layers = llama_model_n_layers (model);
|
||||||
latency += device_compute_delay (dev_info_set[0], n_layers, cparams);
|
latency += device_compute_delay (dev_info_set[0], n_layers, cparams);
|
||||||
// latency += device_memory_access_delay(dev_info_set[0], cparams, n_layers);
|
latency += device_memory_access_delay(dev_info_set[0], cparams, n_layers);
|
||||||
latency += device_disk_access_delay (dev_info_set[0], model, cparams); // if physical memory is not enough, some tensor weights will be released from memory and reloaded by mmap later
|
latency += device_disk_access_delay (dev_info_set[0], model, cparams); // if physical memory is not enough, some tensor weights will be released from memory and reloaded by mmap later
|
||||||
|
|
||||||
LOG_INF("| Token latency (ms) ");
|
LOG_INF("| Token latency (ms) ");
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue