From e7fae2acdbe26e9003845c67e3fa8c3d74f115e5 Mon Sep 17 00:00:00 2001 From: Lizonghang <870644199@qq.com> Date: Thu, 16 Jan 2025 09:48:08 +0400 Subject: [PATCH] fix cuda mem limitation --- src/llama.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/llama.cpp b/src/llama.cpp index 1ca03938..117ba1b2 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -3620,7 +3620,13 @@ void llama_profile_device( dev_info->gpu_props.name = gpu_props.name; dev_info->gpu_props.description = gpu_props.description; - dev_info->gpu_props.memory_free = std::min((double)cuda_mem, round(gpu_props.memory_free / (double)(1 << 30) * 100) / 100); + dev_info->gpu_props.memory_free = round(gpu_props.memory_free / (double)(1 << 30) * 100) / 100; + +#ifdef GGML_USE_CUDA + // CUDA memory limitation + dev_info->gpu_props.memory_free = std::min((float)cuda_mem, dev_info->gpu_props.memory_free); +#endif + dev_info->gpu_props.memory_total = round(gpu_props.memory_total / (double)(1 << 30) * 100) / 100; dev_info->gpu_props.metal_read_vram_bw = device_metal_read_vram_bw(); dev_info->gpu_props.cuda_read_vram_bw = device_cuda_read_vram_bw();