fix cuda mem limitation

This commit is contained in:
Lizonghang 2025-01-16 09:48:08 +04:00
parent c634c2fcbe
commit e7fae2acdb

View file

@ -3620,7 +3620,13 @@ void llama_profile_device(
dev_info->gpu_props.name = gpu_props.name; dev_info->gpu_props.name = gpu_props.name;
dev_info->gpu_props.description = gpu_props.description; dev_info->gpu_props.description = gpu_props.description;
dev_info->gpu_props.memory_free = std::min((double)cuda_mem, round(gpu_props.memory_free / (double)(1 << 30) * 100) / 100); dev_info->gpu_props.memory_free = round(gpu_props.memory_free / (double)(1 << 30) * 100) / 100;
#ifdef GGML_USE_CUDA
// CUDA memory limitation
dev_info->gpu_props.memory_free = std::min((float)cuda_mem, dev_info->gpu_props.memory_free);
#endif
dev_info->gpu_props.memory_total = round(gpu_props.memory_total / (double)(1 << 30) * 100) / 100; dev_info->gpu_props.memory_total = round(gpu_props.memory_total / (double)(1 << 30) * 100) / 100;
dev_info->gpu_props.metal_read_vram_bw = device_metal_read_vram_bw(); dev_info->gpu_props.metal_read_vram_bw = device_metal_read_vram_bw();
dev_info->gpu_props.cuda_read_vram_bw = device_cuda_read_vram_bw(); dev_info->gpu_props.cuda_read_vram_bw = device_cuda_read_vram_bw();