fix compute buffer estimate: reserve 200 MiB VRAM to avoid potential OOM

2025-09-05 01:09:03 +00:00 · 2025-06-24 20:39:49 +04:00 · 2025-06-24 20:39:49 +04:00 · 72701ae872
commit 72701ae872
parent 4dde8458cf
1 changed files with 2 additions and 2 deletions
--- a/src/llama.cpp
+++ b/src/llama.cpp
@ -22013,8 +22013,8 @@ void llama_model_compute_buf_size(
        // context GPU memory usage, i.e. the initial memory cost of creating a CUDA context, 
        // even before you launch any kernels or allocate your own buffers.
        // this value may vary by GPU and CUDA version, but it's lower than 400 MiB in most cases,
-        // another 100 MiB is used to prevent accidental OOM.
-        *gpu_buf += 500 * 1024 * 1024;
+        // another 200 MiB is used to prevent accidental OOM.
+        *gpu_buf += 600 * 1024 * 1024;
    }
 }