mirror of
https://github.com/Lizonghang/prima.cpp.git
synced 2025-09-07 03:19:02 +00:00
device_disk_access_delay: adapt for linux (on termux)
This commit is contained in:
parent
2c2171cebf
commit
cead0f2e5a
1 changed files with 10 additions and 2 deletions
|
@ -1212,10 +1212,10 @@ static float device_disk_access_delay(struct device_info & dev_info, struct llam
|
||||||
uint64_t gpu_compute_buf;
|
uint64_t gpu_compute_buf;
|
||||||
|
|
||||||
#if defined(GGML_USE_METAL) || defined(GGML_USE_CUDA)
|
#if defined(GGML_USE_METAL) || defined(GGML_USE_CUDA)
|
||||||
llama_total_kv_size(&cpu_kv_size, &gpu_kv_size, model, cparams, true);
|
llama_kv_size(&cpu_kv_size, &gpu_kv_size, model, cparams, true);
|
||||||
llama_model_compute_buf_size(&cpu_compute_buf, &gpu_compute_buf, model, cparams, true);
|
llama_model_compute_buf_size(&cpu_compute_buf, &gpu_compute_buf, model, cparams, true);
|
||||||
#else
|
#else
|
||||||
llama_total_kv_size(&cpu_kv_size, &gpu_kv_size, model, cparams, false);
|
llama_kv_size(&cpu_kv_size, &gpu_kv_size, model, cparams, false);
|
||||||
llama_model_compute_buf_size(&cpu_compute_buf, &gpu_compute_buf, model, cparams, false);
|
llama_model_compute_buf_size(&cpu_compute_buf, &gpu_compute_buf, model, cparams, false);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -1249,8 +1249,16 @@ static float device_disk_access_delay(struct device_info & dev_info, struct llam
|
||||||
float disk_read_bw = dev_info.disk.read_rnd_bw * 1e9 / 1024.0 / 1024.0 / 1024.0; // convert GB/s to GiB/s
|
float disk_read_bw = dev_info.disk.read_rnd_bw * 1e9 / 1024.0 / 1024.0 / 1024.0; // convert GB/s to GiB/s
|
||||||
|
|
||||||
if (cpu_total_bytes_gib + cpu_kv_size_gib + cpu_compute_buf_gib > cpu_mem_avail) {
|
if (cpu_total_bytes_gib + cpu_kv_size_gib + cpu_compute_buf_gib > cpu_mem_avail) {
|
||||||
|
|
||||||
|
#if defined(__APPLE__) && defined(__MACH__)
|
||||||
// if physical memory reaches busy, all mapped tensors should be re-loaded
|
// if physical memory reaches busy, all mapped tensors should be re-loaded
|
||||||
return cpu_total_bytes_gib / disk_read_bw * 1000; // convert to ms
|
return cpu_total_bytes_gib / disk_read_bw * 1000; // convert to ms
|
||||||
|
#else
|
||||||
|
// only part of the mapped tensors needs to be re-loaded
|
||||||
|
float gbytes_to_load = cpu_total_bytes_gib - (cpu_mem_avail - cpu_kv_size_gib - cpu_compute_buf_gib);
|
||||||
|
return gbytes_to_load / disk_read_bw * 1000; // convert to ms
|
||||||
|
#endif
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
// if physical memory is enough, all mapped tensors can be stored in memory and will not be released
|
// if physical memory is enough, all mapped tensors can be stored in memory and will not be released
|
||||||
return 0.0f;
|
return 0.0f;
|
||||||
|
|
Loading…
Add table
Reference in a new issue