fix compute buffer estimate: tested on metal

2025-09-09 23:34:33 +00:00 · 2025-06-20 13:43:55 +04:00 · 2025-06-20 13:43:55 +04:00 · 80e5b71b48
commit 80e5b71b48
parent dd589561b4
5 changed files with 134 additions and 31 deletions
--- a/common/profiler.cpp
+++ b/common/profiler.cpp
@ -1603,10 +1603,20 @@ static float device_disk_access_delay(struct device_info & dev_info, struct llam

 #if defined(GGML_USE_METAL) || defined(GGML_USE_CUDA)
    llama_kv_size(&cpu_kv_size, &gpu_kv_size, model, cparams, true);
-    llama_model_compute_buf_size(&cpu_compute_buf, &gpu_compute_buf, model, cparams, true,  true, n_bytes, n_layers > n_gpu_layers);
+
+    enum backend_type backend;
+#if GGML_USE_METAL
+    backend = BACKEND_METAL;
+#elif GGML_USE_CUDA
+    backend = BACKEND_CUDA;
+#endif
+    llama_model_compute_buf_size(&cpu_compute_buf, &gpu_compute_buf, model, cparams, backend,  true, n_bytes, n_layers > n_gpu_layers, n_gpu_layers > 0);
+
 #else
    llama_kv_size(&cpu_kv_size, &gpu_kv_size, model, cparams, false);
-    llama_model_compute_buf_size(&cpu_compute_buf, &gpu_compute_buf, model, cparams, false, true, n_bytes, n_layers > n_gpu_layers);
+
+    enum backend_type backend = BACKEND_CPU;
+    llama_model_compute_buf_size(&cpu_compute_buf, &gpu_compute_buf, model, cparams, backend, true, n_bytes, n_layers > n_gpu_layers, n_gpu_layers > 0);
 #endif

    double cpu_kv_size_gib     = static_cast<double>(cpu_kv_size) / 1024.0 / 1024.0 / 1024.0;     // convert to GiB