mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-07 00:41:50 +00:00
autofit improvement e.g. for strix (+1 squashed commits)
Squashed commits:
[6f6fd59c3] autofit improvement e.g. for strix
This commit is contained in:
parent
500a1ab466
commit
eafb5ff4c5
1 changed files with 12 additions and 8 deletions
20
koboldcpp.py
20
koboldcpp.py
|
|
@ -1571,7 +1571,7 @@ def fetch_gpu_properties(testCU,testVK,testmemory=False):
|
|||
|
||||
# Check VRAM detection after all backends have been tested
|
||||
if MaxMemory[0] < (1024*1024*256):
|
||||
print("Unable to detect VRAM, please set layers manually.")
|
||||
print("Unable to detect VRAM.")
|
||||
|
||||
return
|
||||
|
||||
|
|
@ -8671,18 +8671,22 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
|
|||
fetch_gpu_properties(True,True)
|
||||
pass
|
||||
if args.gpulayers==-1:
|
||||
if MaxMemory[0] > 0 and (not args.usecpu) and ((args.usecuda is not None) or (args.usevulkan is not None) or sys.platform=="darwin"):
|
||||
extract_modelfile_params(args.model_param,args.sdmodel,args.whispermodel,args.mmproj,args.draftmodel,args.ttsmodel if args.ttsgpu else "",args.embeddingsmodel if args.embeddingsgpu else "")
|
||||
layeramt = autoset_gpu_layers(args.contextsize,args.sdquant,args.batchsize,(0 if args.noflashattention else args.quantkv))
|
||||
print(f"Auto Recommended GPU Layers: {layeramt}")
|
||||
args.gpulayers = layeramt
|
||||
# enable autofit also if permissible
|
||||
if (not args.usecpu) and ((args.usecuda is not None) or (args.usevulkan is not None) or sys.platform=="darwin"):
|
||||
if MaxMemory[0] > 0:
|
||||
extract_modelfile_params(args.model_param,args.sdmodel,args.whispermodel,args.mmproj,args.draftmodel,args.ttsmodel if args.ttsgpu else "",args.embeddingsmodel if args.embeddingsgpu else "")
|
||||
layeramt = autoset_gpu_layers(args.contextsize,args.sdquant,args.batchsize,(0 if args.noflashattention else args.quantkv))
|
||||
print(f"Auto Recommended GPU Layers: {layeramt}")
|
||||
args.gpulayers = layeramt
|
||||
else:
|
||||
print("Unable to detect VRAM, but autofit may still be used if applicable.")
|
||||
args.gpulayers = 0
|
||||
# also enable autofit also if permissible
|
||||
if not args.autofit and not args.tensor_split and not args.overridetensors and not args.moecpu:
|
||||
args.autofit = True
|
||||
args.autofitpadding = default_autofit_padding
|
||||
print("GPU layers is default: Will enable AutoFit for increased estimation accuracy.")
|
||||
else:
|
||||
print("No GPU backend found, or could not automatically determine GPU layers. Please set it manually.")
|
||||
print("No GPU backend found, or could not automatically determine GPU layers. You may prefer to set layers manually.")
|
||||
args.gpulayers = 0
|
||||
|
||||
if args.threads <= 0:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue