mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
more janky nonsense
This commit is contained in:
parent
804d3520a9
commit
6bf78967f9
1 changed files with 6 additions and 4 deletions
10
koboldcpp.py
10
koboldcpp.py
|
@ -622,7 +622,7 @@ def extract_modelfile_params(filepath,sdfilepath,whisperfilepath,mmprojfilepath)
|
|||
except Exception as ex:
|
||||
modelfile_extracted_meta = None
|
||||
|
||||
def autoset_gpu_layers(ctxsize,gpumem,sdquanted): #shitty algo to determine how many layers to use
|
||||
def autoset_gpu_layers(ctxsize,gpumem,sdquanted,bbs): #shitty algo to determine how many layers to use
|
||||
global modelfile_extracted_meta # reference cached values instead
|
||||
try:
|
||||
if not modelfile_extracted_meta:
|
||||
|
@ -653,7 +653,7 @@ def autoset_gpu_layers(ctxsize,gpumem,sdquanted): #shitty algo to determine how
|
|||
headcount = ggufmeta[1]
|
||||
headkvlen = (ggufmeta[2] if ggufmeta[2] > 0 else 128)
|
||||
ratio = mem/(fsize*csmul*1.5)
|
||||
computemem = layers*4*headkvlen*cs*4*1.5 # For now the first 4 is the hardcoded result for a blasbatchsize of 512. Ideally we automatically calculate blasbatchsize / 4 but I couldn't easily grab the value yet - Henk
|
||||
computemem = layers*(4 if bbs <= 512 else (bbs/128))*headkvlen*cs*4*1.5 # For now the first 4 is the hardcoded result for a blasbatchsize of 512. Ideally we automatically calculate blasbatchsize / 4 but I couldn't easily grab the value yet - Henk
|
||||
contextmem = layers*headcount*headkvlen*cs*4*1.1
|
||||
reservedmem = 1.5*1024*1024*1024 # Users often don't have their GPU's VRAM worth of memory, we assume 500MB to avoid driver swapping + 500MB for the OS + 500MB for background apps / browser - Henk
|
||||
if headcount > 0:
|
||||
|
@ -2435,7 +2435,7 @@ def show_gui():
|
|||
pass
|
||||
|
||||
def changed_gpulayers_estimate(*args):
|
||||
predicted_gpu_layers = autoset_gpu_layers(int(contextsize_text[context_var.get()]),MaxMemory[0],(sd_quant_var.get()==1))
|
||||
predicted_gpu_layers = autoset_gpu_layers(int(contextsize_text[context_var.get()]),MaxMemory[0],(sd_quant_var.get()==1),int(blasbatchsize_values[int(blas_size_var.get())]))
|
||||
max_gpu_layers = (f"/{modelfile_extracted_meta[0][0]+3}" if (modelfile_extracted_meta and modelfile_extracted_meta[0] and modelfile_extracted_meta[0][0]!=0) else "")
|
||||
index = runopts_var.get()
|
||||
gpu_be = (index == "Use Vulkan" or index == "Vulkan NoAVX2 (Old CPU)" or index == "Use CLBlast" or index == "CLBlast NoAVX2 (Old CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)")
|
||||
|
@ -2667,6 +2667,8 @@ def show_gui():
|
|||
makelabelentry(hardware_tab, "BLAS threads:" , blas_threads_var, 14, 50,tooltip="How many threads to use during BLAS processing.\nIf left blank, uses same value as regular thread count.")
|
||||
# blas batch size
|
||||
makeslider(hardware_tab, "BLAS Batch Size:", blasbatchsize_text, blas_size_var, 0, 7, 16,width=200, set=5,tooltip="How many tokens to process at once per batch.\nLarger values use more memory.")
|
||||
blas_size_var.trace("w", changed_gpulayers_estimate)
|
||||
|
||||
# force version
|
||||
makelabelentry(hardware_tab, "Force Version:" , version_var, 100, 50,tooltip="If the autodetected version is wrong, you can change it here.\nLeave as 0 for default.")
|
||||
ctk.CTkButton(hardware_tab , text = "Run Benchmark", command = guibench ).grid(row=110,column=0, stick="se", padx= 0, pady=2)
|
||||
|
@ -3900,7 +3902,7 @@ def main(launch_args,start_server=True):
|
|||
pass
|
||||
if MaxMemory[0] > 0:
|
||||
extract_modelfile_params(args.model_param,args.sdmodel,args.whispermodel,args.mmproj)
|
||||
layeramt = autoset_gpu_layers(args.contextsize, MaxMemory[0],args.sdquant)
|
||||
layeramt = autoset_gpu_layers(args.contextsize, MaxMemory[0],args.sdquant,args.blasbatchsize)
|
||||
print(f"Auto Recommended Layers: {layeramt}")
|
||||
args.gpulayers = layeramt
|
||||
else:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue