diff --git a/src/llama.cpp b/src/llama.cpp
index 2531f0fe..e20877fa 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -3623,8 +3623,8 @@ void llama_profile_device(
     dev_info->gpu_props.description         = gpu_props.description;
 
     // reserved/limit memory to avoid potential OOM, default to 300 MiB
-    dev_info->gpu_props.memory_free         = round(gpu_props.memory_free  / (double)(1 << 30) * 100) / 100 - 0.3;
-    dev_info->gpu_props.memory_free         = std::min((float)gpu_mem, dev_info->gpu_props.memory_free);
+    dev_info->gpu_props.memory_free         = round(gpu_props.memory_free  / (double)(1 << 30) * 100) / 100;
+    dev_info->gpu_props.memory_free         = std::min((float)gpu_mem, dev_info->gpu_props.memory_free) - 0.3;
 
     dev_info->gpu_props.memory_total        = round(gpu_props.memory_total / (double)(1 << 30) * 100) / 100;
     dev_info->gpu_props.metal_read_vram_bw  = device_metal_read_vram_bw();