fix automatic VRAM detection for ROCm and Vulkan backends (#1715)

* use rocminfo for ROCm VRAM detection * vulkan VRAM detection needs to consider all heaps, don't print that we're unable to detect VRAM until all detection is ran
2026-05-31 21:39:42 +00:00 · 2025-08-30 00:22:32 -07:00 · 2025-08-30 00:22:32 -07:00 · cb9bd2fc4a
commit cb9bd2fc4a
parent 7b396bd917
1 changed files with 43 additions and 18 deletions
--- a/koboldcpp.py
+++ b/koboldcpp.py
@ -1195,22 +1195,45 @@ def fetch_gpu_properties(testCL,testCU,testVK):
            FetchedCUfreeMem = []
            pass
        if len(FetchedCUdevices)==0:
-            try: # Get AMD ROCm GPU names
+            try: # Get AMD ROCm GPU names and VRAM from rocminfo
                output = subprocess.run(['rocminfo'], capture_output=True, text=True, check=True, encoding='utf-8', timeout=10).stdout
                device_name = None
+                current_agent_is_gpu = False
+                in_pool_section = False
+                
                for line in output.splitlines(): # read through the output line by line
                    line = line.strip()
-                    if line.startswith("Marketing Name:"):
+                    if line.startswith("Agent ") and "Agent" in line:
+                        # Reset state for new agent
+                        device_name = None
+                        current_agent_is_gpu = False
+                        in_pool_section = False
+                    elif line.startswith("Marketing Name:"):
                        device_name = line.split(":", 1)[1].strip() # if we find a named device, temporarily save the name
-                    elif line.startswith("Device Type:") and "GPU" in line and device_name is not None: # if the following Device Type is a GPU (not a CPU) then add it to devices list
+                    elif line.startswith("Device Type:") and "GPU" in line and device_name is not None: 
+                        # if the following Device Type is a GPU (not a CPU) then add it to devices list
                        FetchedCUdevices.append(device_name)
+                        current_agent_is_gpu = True
                        AMDgpu = True
                    elif line.startswith("Device Type:") and "GPU" not in line:
                        device_name = None
-                if FetchedCUdevices:
-                    getamdvram = subprocess.run(['rocm-smi', '--showmeminfo', 'vram', '--csv'], capture_output=True, text=True, check=True, encoding='utf-8', timeout=10).stdout # fetch VRAM of devices
-                    if getamdvram:
-                        FetchedCUdeviceMem = [line.split(",")[1].strip() for line in getamdvram.splitlines()[1:] if line.strip()]
+                        current_agent_is_gpu = False
+                    elif line.startswith("Pool Info:") and current_agent_is_gpu:
+                        in_pool_section = True
+                    elif in_pool_section and current_agent_is_gpu and line.startswith("Segment:") and "GLOBAL" in line and "COARSE GRAINED" in line:
+                        # This is the main VRAM pool for this GPU
+                        continue
+                    elif in_pool_section and current_agent_is_gpu and line.startswith("Size:"):
+                        # Extract VRAM size in KB and convert to MB
+                        size_match = re.search(r'(\d+)\(0x[0-9a-fA-F]+\)\s*KB', line)
+                        if size_match:
+                            vram_kb = int(size_match.group(1))
+                            vram_mb = vram_kb // 1024
+                            FetchedCUdeviceMem.append(str(vram_mb))
+                            in_pool_section = False
+                
+                if FetchedCUdevices and FetchedCUdeviceMem:
+                    print(f"Detected AMD GPU VRAM from rocminfo: {list(zip(FetchedCUdevices, FetchedCUdeviceMem))} MB")
            except Exception:
                FetchedCUdeviceMem = []
                FetchedCUfreeMem = []
@ -1236,12 +1259,10 @@ def fetch_gpu_properties(testCL,testCU,testVK):
        MaxMemory[0] = max(lowestcumem,MaxMemory[0])
        MaxFreeMemory[0] = max(lowestfreecumem,MaxFreeMemory[0])

-        if MaxMemory[0] < (1024*1024*256):
-            print("Unable to detect VRAM, please set layers manually.")
-
    if testVK:
        try: # Get Vulkan names
            foundVkGPU = False
+            lowestvkmem = 0 
            output = subprocess.run(['vulkaninfo','--summary'], capture_output=True, text=True, check=True, encoding='utf-8', timeout=10).stdout
            devicelist = [line.split("=")[1].strip() for line in output.splitlines() if "deviceName" in line]
            devicetypes = [line.split("=")[1].strip() for line in output.splitlines() if "deviceType" in line]
@ -1265,16 +1286,15 @@ def fetch_gpu_properties(testCL,testCU,testVK):
                    output = subprocess.run(['vulkaninfo'], capture_output=True, text=True, check=True, encoding='utf-8', timeout=10).stdout
                    devicechunks = output.split("VkPhysicalDeviceMemoryProperties")[1:]
                    gpuidx = 0
-                    lowestvkmem = 0
                    for chunk in devicechunks:
                        heaps = chunk.split("memoryTypes:")[0].split("memoryHeaps[")[1:]
-                        snippet = heaps[0]
-                        if "MEMORY_HEAP_DEVICE_LOCAL_BIT" in snippet and "size" in snippet:
-                            match = re.search(r"size\s*=\s*(\d+)", snippet)
-                            if match:
-                                dmem = int(match.group(1))
-                                if dmem > gpumem_ignore_limit_min and dmem < gpumem_ignore_limit_max:
-                                    lowestvkmem = dmem if lowestvkmem==0 else (dmem if dmem<lowestvkmem else lowestvkmem)
+                        for heap in heaps:  # Check all heaps, not just the first one
+                            if "MEMORY_HEAP_DEVICE_LOCAL_BIT" in heap and "size" in heap:
+                                match = re.search(r"size\s*=\s*(\d+)", heap)
+                                if match:
+                                    dmem = int(match.group(1))
+                                    if dmem > gpumem_ignore_limit_min and dmem < gpumem_ignore_limit_max:
+                                        lowestvkmem = dmem if lowestvkmem==0 else (dmem if dmem<lowestvkmem else lowestvkmem)
                        gpuidx += 1
                except Exception: # failed to get vulkan vram
                    pass
@ -1311,6 +1331,11 @@ def fetch_gpu_properties(testCL,testCU,testVK):
            MaxMemory[0] = max(lowestclmem,MaxMemory[0])
        except Exception:
            pass
+    
+    # Check VRAM detection after all backends have been tested
+    if MaxMemory[0] < (1024*1024*256):
+        print("Unable to detect VRAM, please set layers manually.")
+    
    return

 def auto_set_backend_cli():