mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-07 00:41:50 +00:00
fix automatic VRAM detection for ROCm and Vulkan backends (#1715)
* use rocminfo for ROCm VRAM detection * vulkan VRAM detection needs to consider all heaps, don't print that we're unable to detect VRAM until all detection is ran
This commit is contained in:
parent
7b396bd917
commit
cb9bd2fc4a
1 changed files with 43 additions and 18 deletions
61
koboldcpp.py
61
koboldcpp.py
|
|
@ -1195,22 +1195,45 @@ def fetch_gpu_properties(testCL,testCU,testVK):
|
|||
FetchedCUfreeMem = []
|
||||
pass
|
||||
if len(FetchedCUdevices)==0:
|
||||
try: # Get AMD ROCm GPU names
|
||||
try: # Get AMD ROCm GPU names and VRAM from rocminfo
|
||||
output = subprocess.run(['rocminfo'], capture_output=True, text=True, check=True, encoding='utf-8', timeout=10).stdout
|
||||
device_name = None
|
||||
current_agent_is_gpu = False
|
||||
in_pool_section = False
|
||||
|
||||
for line in output.splitlines(): # read through the output line by line
|
||||
line = line.strip()
|
||||
if line.startswith("Marketing Name:"):
|
||||
if line.startswith("Agent ") and "Agent" in line:
|
||||
# Reset state for new agent
|
||||
device_name = None
|
||||
current_agent_is_gpu = False
|
||||
in_pool_section = False
|
||||
elif line.startswith("Marketing Name:"):
|
||||
device_name = line.split(":", 1)[1].strip() # if we find a named device, temporarily save the name
|
||||
elif line.startswith("Device Type:") and "GPU" in line and device_name is not None: # if the following Device Type is a GPU (not a CPU) then add it to devices list
|
||||
elif line.startswith("Device Type:") and "GPU" in line and device_name is not None:
|
||||
# if the following Device Type is a GPU (not a CPU) then add it to devices list
|
||||
FetchedCUdevices.append(device_name)
|
||||
current_agent_is_gpu = True
|
||||
AMDgpu = True
|
||||
elif line.startswith("Device Type:") and "GPU" not in line:
|
||||
device_name = None
|
||||
if FetchedCUdevices:
|
||||
getamdvram = subprocess.run(['rocm-smi', '--showmeminfo', 'vram', '--csv'], capture_output=True, text=True, check=True, encoding='utf-8', timeout=10).stdout # fetch VRAM of devices
|
||||
if getamdvram:
|
||||
FetchedCUdeviceMem = [line.split(",")[1].strip() for line in getamdvram.splitlines()[1:] if line.strip()]
|
||||
current_agent_is_gpu = False
|
||||
elif line.startswith("Pool Info:") and current_agent_is_gpu:
|
||||
in_pool_section = True
|
||||
elif in_pool_section and current_agent_is_gpu and line.startswith("Segment:") and "GLOBAL" in line and "COARSE GRAINED" in line:
|
||||
# This is the main VRAM pool for this GPU
|
||||
continue
|
||||
elif in_pool_section and current_agent_is_gpu and line.startswith("Size:"):
|
||||
# Extract VRAM size in KB and convert to MB
|
||||
size_match = re.search(r'(\d+)\(0x[0-9a-fA-F]+\)\s*KB', line)
|
||||
if size_match:
|
||||
vram_kb = int(size_match.group(1))
|
||||
vram_mb = vram_kb // 1024
|
||||
FetchedCUdeviceMem.append(str(vram_mb))
|
||||
in_pool_section = False
|
||||
|
||||
if FetchedCUdevices and FetchedCUdeviceMem:
|
||||
print(f"Detected AMD GPU VRAM from rocminfo: {list(zip(FetchedCUdevices, FetchedCUdeviceMem))} MB")
|
||||
except Exception:
|
||||
FetchedCUdeviceMem = []
|
||||
FetchedCUfreeMem = []
|
||||
|
|
@ -1236,12 +1259,10 @@ def fetch_gpu_properties(testCL,testCU,testVK):
|
|||
MaxMemory[0] = max(lowestcumem,MaxMemory[0])
|
||||
MaxFreeMemory[0] = max(lowestfreecumem,MaxFreeMemory[0])
|
||||
|
||||
if MaxMemory[0] < (1024*1024*256):
|
||||
print("Unable to detect VRAM, please set layers manually.")
|
||||
|
||||
if testVK:
|
||||
try: # Get Vulkan names
|
||||
foundVkGPU = False
|
||||
lowestvkmem = 0
|
||||
output = subprocess.run(['vulkaninfo','--summary'], capture_output=True, text=True, check=True, encoding='utf-8', timeout=10).stdout
|
||||
devicelist = [line.split("=")[1].strip() for line in output.splitlines() if "deviceName" in line]
|
||||
devicetypes = [line.split("=")[1].strip() for line in output.splitlines() if "deviceType" in line]
|
||||
|
|
@ -1265,16 +1286,15 @@ def fetch_gpu_properties(testCL,testCU,testVK):
|
|||
output = subprocess.run(['vulkaninfo'], capture_output=True, text=True, check=True, encoding='utf-8', timeout=10).stdout
|
||||
devicechunks = output.split("VkPhysicalDeviceMemoryProperties")[1:]
|
||||
gpuidx = 0
|
||||
lowestvkmem = 0
|
||||
for chunk in devicechunks:
|
||||
heaps = chunk.split("memoryTypes:")[0].split("memoryHeaps[")[1:]
|
||||
snippet = heaps[0]
|
||||
if "MEMORY_HEAP_DEVICE_LOCAL_BIT" in snippet and "size" in snippet:
|
||||
match = re.search(r"size\s*=\s*(\d+)", snippet)
|
||||
if match:
|
||||
dmem = int(match.group(1))
|
||||
if dmem > gpumem_ignore_limit_min and dmem < gpumem_ignore_limit_max:
|
||||
lowestvkmem = dmem if lowestvkmem==0 else (dmem if dmem<lowestvkmem else lowestvkmem)
|
||||
for heap in heaps: # Check all heaps, not just the first one
|
||||
if "MEMORY_HEAP_DEVICE_LOCAL_BIT" in heap and "size" in heap:
|
||||
match = re.search(r"size\s*=\s*(\d+)", heap)
|
||||
if match:
|
||||
dmem = int(match.group(1))
|
||||
if dmem > gpumem_ignore_limit_min and dmem < gpumem_ignore_limit_max:
|
||||
lowestvkmem = dmem if lowestvkmem==0 else (dmem if dmem<lowestvkmem else lowestvkmem)
|
||||
gpuidx += 1
|
||||
except Exception: # failed to get vulkan vram
|
||||
pass
|
||||
|
|
@ -1311,6 +1331,11 @@ def fetch_gpu_properties(testCL,testCU,testVK):
|
|||
MaxMemory[0] = max(lowestclmem,MaxMemory[0])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Check VRAM detection after all backends have been tested
|
||||
if MaxMemory[0] < (1024*1024*256):
|
||||
print("Unable to detect VRAM, please set layers manually.")
|
||||
|
||||
return
|
||||
|
||||
def auto_set_backend_cli():
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue