split memory detection functions and add debug command (#1832)

2026-05-09 19:46:11 +00:00 · 2025-11-10 07:07:15 -03:00 · 2025-11-10 07:07:15 -03:00 · 2ae6bff5bd
commit 2ae6bff5bd
parent df6e303fd3
1 changed files with 49 additions and 11 deletions
--- a/koboldcpp.py
+++ b/koboldcpp.py
@ -1195,11 +1195,7 @@ def autoset_gpu_layers(ctxsize, sdquanted, bbs, qkv_level): #shitty algo to dete
    except Exception:
        return 0

-def fetch_gpu_properties(testCL,testCU,testVK):
-    gpumem_ignore_limit_min = 1024*1024*600 #600 mb min
-    gpumem_ignore_limit_max = 1024*1024*1024*300 #300 gb max
-
-    if testCU:
+def detect_memory_cu(gpumem_ignore_limit_min, gpumem_ignore_limit_max):
        FetchedCUdevices = []
        FetchedCUdeviceMem = []
        FetchedCUfreeMem = []
@ -1276,10 +1272,11 @@ def fetch_gpu_properties(testCL,testCU,testVK):
            lowestcumem = 0
            lowestfreecumem = 0

-        MaxMemory[0] = max(lowestcumem,MaxMemory[0])
-        MaxFreeMemory[0] = max(lowestfreecumem,MaxFreeMemory[0])
+        return lowestcumem, lowestfreecumem
+
+
+def detect_memory_vk(gpumem_ignore_limit_min, gpumem_ignore_limit_max):

-    if testVK:
        try: # Get Vulkan names
            foundVkGPU = False
            lowestvkmem = 0
@ -1318,11 +1315,15 @@ def fetch_gpu_properties(testCL,testCU,testVK):
                        gpuidx += 1
                except Exception: # failed to get vulkan vram
                    pass
-            MaxMemory[0] = max(lowestvkmem,MaxMemory[0])
+            return lowestvkmem
        except Exception:
            pass

-    if testCL:
+        return 0
+
+
+def detect_memory_cl(gpumem_ignore_limit_min, gpumem_ignore_limit_max):
+
        try: # Get OpenCL GPU names on windows using a special binary. overwrite at known index if found.
            basepath = os.path.abspath(os.path.dirname(__file__))
            output = ""
@ -1348,10 +1349,40 @@ def fetch_gpu_properties(testCL,testCU,testVK):
                            lowestclmem = dmem if lowestclmem==0 else (dmem if dmem<lowestclmem else lowestclmem)
                    dev += 1
                plat += 1
-            MaxMemory[0] = max(lowestclmem,MaxMemory[0])
+            return lowestclmem
        except Exception:
            pass

+        return 0
+
+
+def fetch_gpu_properties(testCL,testCU,testVK,testmemory=False):
+    gpumem_ignore_limit_min = 1024*1024*600 #600 mb min
+    gpumem_ignore_limit_max = 1024*1024*1024*300 #300 gb max
+
+    if testCU:
+
+        cumem, freecumem = detect_memory_cu(gpumem_ignore_limit_min, gpumem_ignore_limit_max)
+        MaxMemory[0] = max(cumem,MaxMemory[0])
+        MaxFreeMemory[0] = max(freecumem,MaxFreeMemory[0])
+        if testmemory:
+            print(f'detected CUDA memory: {cumem/(1024*1024)} MB, {freecumem/(1024*102)} MB free')
+
+    if testVK:
+
+        vkmem = detect_memory_vk(gpumem_ignore_limit_min, gpumem_ignore_limit_max)
+        MaxMemory[0] = max(vkmem,MaxMemory[0])
+        if testmemory:
+            print(f'detected Vulkan memory: {vkmem/(1024*1024)} MB')
+
+    if testCL:
+
+        clmem = detect_memory_cl(gpumem_ignore_limit_min, gpumem_ignore_limit_max)
+        MaxMemory[0] = max(clmem,MaxMemory[0])
+        if testmemory:
+            print(f'detected OpenCL memory: {clmem/(1024*1024)} MB')
+
+
    # Check VRAM detection after all backends have been tested
    if MaxMemory[0] < (1024*1024*256):
        print("Unable to detect VRAM, please set layers manually.")
@ -6995,6 +7026,10 @@ def main(launch_args, default_args):
        print(f"{KcppVersion}") # just print version and exit
        return

+    if args.testmemory:
+        fetch_gpu_properties(True, True, True, testmemory=True)
+        return
+
    #prevent disallowed combos
    if (args.nomodel or args.benchmark or args.launch or args.admin) and args.cli:
        exit_with_error(1, "Error: --cli cannot be combined with --launch, --nomodel, --admin or --benchmark")
@ -8100,4 +8135,7 @@ if __name__ == '__main__':
    compatgroup3.add_argument("--nommap","--no-mmap", help=argparse.SUPPRESS, action='store_true')
    deprecatedgroup.add_argument("--sdnotile", help=argparse.SUPPRESS, action='store_true') # legacy option, see sdtiledvae

+    debuggroup = parser.add_argument_group('Debug Commands')
+    debuggroup.add_argument("--testmemory", help=argparse.SUPPRESS, action='store_true')
+
    main(launch_args=parser.parse_args(),default_args=parser.parse_args([]))