autofit improvement e.g. for strix (+1 squashed commits)

Squashed commits: [6f6fd59c3] autofit improvement e.g. for strix
2026-06-01 14:29:33 +00:00 · 2026-03-10 20:54:44 +08:00 · 2026-03-10 20:54:44 +08:00 · eafb5ff4c5
commit eafb5ff4c5
parent 500a1ab466
1 changed files with 12 additions and 8 deletions
--- a/koboldcpp.py
+++ b/koboldcpp.py
@ -1571,7 +1571,7 @@ def fetch_gpu_properties(testCU,testVK,testmemory=False):

    # Check VRAM detection after all backends have been tested
    if MaxMemory[0] < (1024*1024*256):
-        print("Unable to detect VRAM, please set layers manually.")
+        print("Unable to detect VRAM.")

    return

@ -8671,18 +8671,22 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
                fetch_gpu_properties(True,True)
                pass
            if args.gpulayers==-1:
-                if MaxMemory[0] > 0 and (not args.usecpu) and ((args.usecuda is not None) or (args.usevulkan is not None) or sys.platform=="darwin"):
-                    extract_modelfile_params(args.model_param,args.sdmodel,args.whispermodel,args.mmproj,args.draftmodel,args.ttsmodel if args.ttsgpu else "",args.embeddingsmodel if args.embeddingsgpu else "")
-                    layeramt = autoset_gpu_layers(args.contextsize,args.sdquant,args.batchsize,(0 if args.noflashattention else args.quantkv))
-                    print(f"Auto Recommended GPU Layers: {layeramt}")
-                    args.gpulayers = layeramt
-                    # enable autofit also if permissible
+                if (not args.usecpu) and ((args.usecuda is not None) or (args.usevulkan is not None) or sys.platform=="darwin"):
+                    if MaxMemory[0] > 0:
+                        extract_modelfile_params(args.model_param,args.sdmodel,args.whispermodel,args.mmproj,args.draftmodel,args.ttsmodel if args.ttsgpu else "",args.embeddingsmodel if args.embeddingsgpu else "")
+                        layeramt = autoset_gpu_layers(args.contextsize,args.sdquant,args.batchsize,(0 if args.noflashattention else args.quantkv))
+                        print(f"Auto Recommended GPU Layers: {layeramt}")
+                        args.gpulayers = layeramt
+                    else:
+                        print("Unable to detect VRAM, but autofit may still be used if applicable.")
+                        args.gpulayers = 0
+                    # also enable autofit also if permissible
                    if not args.autofit and not args.tensor_split and not args.overridetensors and not args.moecpu:
                        args.autofit = True
                        args.autofitpadding = default_autofit_padding
                        print("GPU layers is default: Will enable AutoFit for increased estimation accuracy.")
                else:
-                    print("No GPU backend found, or could not automatically determine GPU layers. Please set it manually.")
+                    print("No GPU backend found, or could not automatically determine GPU layers. You may prefer to set layers manually.")
                    args.gpulayers = 0

    if args.threads <= 0: