mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-09 19:46:11 +00:00
split memory detection functions and add debug command (#1832)
This commit is contained in:
parent
df6e303fd3
commit
2ae6bff5bd
1 changed files with 49 additions and 11 deletions
60
koboldcpp.py
60
koboldcpp.py
|
|
@ -1195,11 +1195,7 @@ def autoset_gpu_layers(ctxsize, sdquanted, bbs, qkv_level): #shitty algo to dete
|
|||
except Exception:
|
||||
return 0
|
||||
|
||||
def fetch_gpu_properties(testCL,testCU,testVK):
|
||||
gpumem_ignore_limit_min = 1024*1024*600 #600 mb min
|
||||
gpumem_ignore_limit_max = 1024*1024*1024*300 #300 gb max
|
||||
|
||||
if testCU:
|
||||
def detect_memory_cu(gpumem_ignore_limit_min, gpumem_ignore_limit_max):
|
||||
FetchedCUdevices = []
|
||||
FetchedCUdeviceMem = []
|
||||
FetchedCUfreeMem = []
|
||||
|
|
@ -1276,10 +1272,11 @@ def fetch_gpu_properties(testCL,testCU,testVK):
|
|||
lowestcumem = 0
|
||||
lowestfreecumem = 0
|
||||
|
||||
MaxMemory[0] = max(lowestcumem,MaxMemory[0])
|
||||
MaxFreeMemory[0] = max(lowestfreecumem,MaxFreeMemory[0])
|
||||
return lowestcumem, lowestfreecumem
|
||||
|
||||
|
||||
def detect_memory_vk(gpumem_ignore_limit_min, gpumem_ignore_limit_max):
|
||||
|
||||
if testVK:
|
||||
try: # Get Vulkan names
|
||||
foundVkGPU = False
|
||||
lowestvkmem = 0
|
||||
|
|
@ -1318,11 +1315,15 @@ def fetch_gpu_properties(testCL,testCU,testVK):
|
|||
gpuidx += 1
|
||||
except Exception: # failed to get vulkan vram
|
||||
pass
|
||||
MaxMemory[0] = max(lowestvkmem,MaxMemory[0])
|
||||
return lowestvkmem
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if testCL:
|
||||
return 0
|
||||
|
||||
|
||||
def detect_memory_cl(gpumem_ignore_limit_min, gpumem_ignore_limit_max):
|
||||
|
||||
try: # Get OpenCL GPU names on windows using a special binary. overwrite at known index if found.
|
||||
basepath = os.path.abspath(os.path.dirname(__file__))
|
||||
output = ""
|
||||
|
|
@ -1348,10 +1349,40 @@ def fetch_gpu_properties(testCL,testCU,testVK):
|
|||
lowestclmem = dmem if lowestclmem==0 else (dmem if dmem<lowestclmem else lowestclmem)
|
||||
dev += 1
|
||||
plat += 1
|
||||
MaxMemory[0] = max(lowestclmem,MaxMemory[0])
|
||||
return lowestclmem
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def fetch_gpu_properties(testCL,testCU,testVK,testmemory=False):
|
||||
gpumem_ignore_limit_min = 1024*1024*600 #600 mb min
|
||||
gpumem_ignore_limit_max = 1024*1024*1024*300 #300 gb max
|
||||
|
||||
if testCU:
|
||||
|
||||
cumem, freecumem = detect_memory_cu(gpumem_ignore_limit_min, gpumem_ignore_limit_max)
|
||||
MaxMemory[0] = max(cumem,MaxMemory[0])
|
||||
MaxFreeMemory[0] = max(freecumem,MaxFreeMemory[0])
|
||||
if testmemory:
|
||||
print(f'detected CUDA memory: {cumem/(1024*1024)} MB, {freecumem/(1024*102)} MB free')
|
||||
|
||||
if testVK:
|
||||
|
||||
vkmem = detect_memory_vk(gpumem_ignore_limit_min, gpumem_ignore_limit_max)
|
||||
MaxMemory[0] = max(vkmem,MaxMemory[0])
|
||||
if testmemory:
|
||||
print(f'detected Vulkan memory: {vkmem/(1024*1024)} MB')
|
||||
|
||||
if testCL:
|
||||
|
||||
clmem = detect_memory_cl(gpumem_ignore_limit_min, gpumem_ignore_limit_max)
|
||||
MaxMemory[0] = max(clmem,MaxMemory[0])
|
||||
if testmemory:
|
||||
print(f'detected OpenCL memory: {clmem/(1024*1024)} MB')
|
||||
|
||||
|
||||
# Check VRAM detection after all backends have been tested
|
||||
if MaxMemory[0] < (1024*1024*256):
|
||||
print("Unable to detect VRAM, please set layers manually.")
|
||||
|
|
@ -6995,6 +7026,10 @@ def main(launch_args, default_args):
|
|||
print(f"{KcppVersion}") # just print version and exit
|
||||
return
|
||||
|
||||
if args.testmemory:
|
||||
fetch_gpu_properties(True, True, True, testmemory=True)
|
||||
return
|
||||
|
||||
#prevent disallowed combos
|
||||
if (args.nomodel or args.benchmark or args.launch or args.admin) and args.cli:
|
||||
exit_with_error(1, "Error: --cli cannot be combined with --launch, --nomodel, --admin or --benchmark")
|
||||
|
|
@ -8100,4 +8135,7 @@ if __name__ == '__main__':
|
|||
compatgroup3.add_argument("--nommap","--no-mmap", help=argparse.SUPPRESS, action='store_true')
|
||||
deprecatedgroup.add_argument("--sdnotile", help=argparse.SUPPRESS, action='store_true') # legacy option, see sdtiledvae
|
||||
|
||||
debuggroup = parser.add_argument_group('Debug Commands')
|
||||
debuggroup.add_argument("--testmemory", help=argparse.SUPPRESS, action='store_true')
|
||||
|
||||
main(launch_args=parser.parse_args(),default_args=parser.parse_args([]))
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue