From e7fae2acdbe26e9003845c67e3fa8c3d74f115e5 Mon Sep 17 00:00:00 2001
From: Lizonghang <870644199@qq.com>
Date: Thu, 16 Jan 2025 09:48:08 +0400
Subject: [PATCH] fix cuda mem limitation

---
 src/llama.cpp | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/llama.cpp b/src/llama.cpp
index 1ca03938..117ba1b2 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -3620,7 +3620,13 @@ void llama_profile_device(
 
     dev_info->gpu_props.name                = gpu_props.name;
     dev_info->gpu_props.description         = gpu_props.description;
-    dev_info->gpu_props.memory_free         = std::min((double)cuda_mem, round(gpu_props.memory_free  / (double)(1 << 30) * 100) / 100);
+    dev_info->gpu_props.memory_free         = round(gpu_props.memory_free  / (double)(1 << 30) * 100) / 100;
+
+#ifdef GGML_USE_CUDA
+    // CUDA memory limitation
+    dev_info->gpu_props.memory_free         = std::min((float)cuda_mem, dev_info->gpu_props.memory_free);
+#endif
+
     dev_info->gpu_props.memory_total        = round(gpu_props.memory_total / (double)(1 << 30) * 100) / 100;
     dev_info->gpu_props.metal_read_vram_bw  = device_metal_read_vram_bw();
     dev_info->gpu_props.cuda_read_vram_bw   = device_cuda_read_vram_bw();