fix cuda mem limitation

2025-09-10 18:24:34 +00:00 · 2025-01-16 09:48:08 +04:00 · 2025-01-16 09:48:08 +04:00 · e7fae2acdb
commit e7fae2acdb
parent c634c2fcbe
1 changed files with 7 additions and 1 deletions
--- a/src/llama.cpp
+++ b/src/llama.cpp
@ -3620,7 +3620,13 @@ void llama_profile_device(

    dev_info->gpu_props.name                = gpu_props.name;
    dev_info->gpu_props.description         = gpu_props.description;
-    dev_info->gpu_props.memory_free         = std::min((double)cuda_mem, round(gpu_props.memory_free  / (double)(1 << 30) * 100) / 100);
+    dev_info->gpu_props.memory_free         = round(gpu_props.memory_free  / (double)(1 << 30) * 100) / 100;
+
+#ifdef GGML_USE_CUDA
+    // CUDA memory limitation
+    dev_info->gpu_props.memory_free         = std::min((float)cuda_mem, dev_info->gpu_props.memory_free);
+#endif
+
    dev_info->gpu_props.memory_total        = round(gpu_props.memory_total / (double)(1 << 30) * 100) / 100;
    dev_info->gpu_props.metal_read_vram_bw  = device_metal_read_vram_bw();
    dev_info->gpu_props.cuda_read_vram_bw   = device_cuda_read_vram_bw();