mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-08 18:30:50 +00:00
include overhead fromn music models
This commit is contained in:
parent
6adcd0b5db
commit
8095bf9807
1 changed files with 25 additions and 7 deletions
32
koboldcpp.py
32
koboldcpp.py
|
|
@ -1329,7 +1329,7 @@ def read_gguf_metadata(file_path):
|
|||
except Exception:
|
||||
return None
|
||||
|
||||
def extract_modelfile_params(filepath,sdfilepath,whisperfilepath,mmprojfilepath,draftmodelpath,ttsmodelpath,embdmodelpath):
|
||||
def extract_modelfile_params(filepath,sdfilepath,whisperfilepath,mmprojfilepath,draftmodelpath,ttsmodelpath,embdmodelpath,musicllmpath,musicditpath):
|
||||
global modelfile_extracted_meta
|
||||
modelfile_extracted_meta = None
|
||||
sdfsize = 0
|
||||
|
|
@ -1338,6 +1338,8 @@ def extract_modelfile_params(filepath,sdfilepath,whisperfilepath,mmprojfilepath,
|
|||
draftmodelsize = 0
|
||||
ttsmodelsize = 0
|
||||
embdmodelsize = 0
|
||||
musicllmsize = 0
|
||||
musicditsize = 0
|
||||
if sdfilepath and os.path.exists(sdfilepath):
|
||||
sdfsize = os.path.getsize(sdfilepath)
|
||||
if whisperfilepath and os.path.exists(whisperfilepath):
|
||||
|
|
@ -1350,16 +1352,20 @@ def extract_modelfile_params(filepath,sdfilepath,whisperfilepath,mmprojfilepath,
|
|||
ttsmodelsize = os.path.getsize(ttsmodelpath)
|
||||
if embdmodelpath and os.path.exists(embdmodelpath):
|
||||
embdmodelsize = os.path.getsize(embdmodelpath)
|
||||
if musicllmpath and os.path.exists(musicllmpath):
|
||||
musicllmsize = os.path.getsize(musicllmpath)
|
||||
if musicditpath and os.path.exists(musicditpath):
|
||||
musicditsize = os.path.getsize(musicditpath)
|
||||
if filepath and os.path.exists(filepath):
|
||||
try:
|
||||
fsize = os.path.getsize(filepath)
|
||||
if fsize>10000000: #dont bother with models < 10mb as they are probably bad
|
||||
ggufmeta = read_gguf_metadata(filepath)
|
||||
modelfile_extracted_meta = [filepath,ggufmeta,fsize,sdfsize,whisperfsize,mmprojsize,draftmodelsize,ttsmodelsize,embdmodelsize] #extract done. note that meta may be null
|
||||
modelfile_extracted_meta = [filepath,ggufmeta,fsize,sdfsize,whisperfsize,mmprojsize,draftmodelsize,ttsmodelsize,embdmodelsize,musicllmsize,musicditsize] #extract done. note that meta may be null
|
||||
except Exception:
|
||||
modelfile_extracted_meta = None
|
||||
|
||||
def autoset_gpu_layers(ctxsize, sdquanted, bbs, qkv_level): #shitty algo to determine how many layers to use
|
||||
def autoset_gpu_layers(ctxsize, sdquanted, bbs, qkv_level, musiclowvram): #shitty algo to determine how many layers to use
|
||||
global showusedmemwarning, showmultigpuwarning, modelfile_extracted_meta, calulated_gpu_overhead # reference cached values instead
|
||||
gpumem = MaxMemory[0]
|
||||
usedmem = 0
|
||||
|
|
@ -1389,6 +1395,8 @@ def autoset_gpu_layers(ctxsize, sdquanted, bbs, qkv_level): #shitty algo to dete
|
|||
fsize *= total_parts
|
||||
|
||||
calulated_gpu_overhead = 0
|
||||
musicoh1 = 0
|
||||
musicoh2 = 0
|
||||
if modelfile_extracted_meta[3] > 1024*1024*1024*5: #sdxl tax
|
||||
calulated_gpu_overhead += 1024*1024*1024*(9 - sdquanted * 1.5) # 9, 7.5, 6
|
||||
elif modelfile_extracted_meta[3] > 1024*1024*512: #normal sd tax
|
||||
|
|
@ -1403,6 +1411,14 @@ def autoset_gpu_layers(ctxsize, sdquanted, bbs, qkv_level): #shitty algo to dete
|
|||
calulated_gpu_overhead += max(600*1024*1024, modelfile_extracted_meta[7] * 3)
|
||||
if modelfile_extracted_meta[8] > 1024*1024*10: #embeddings model tax
|
||||
calulated_gpu_overhead += max(350*1024*1024, modelfile_extracted_meta[8] * 1.5)
|
||||
if modelfile_extracted_meta[9] > 1024*1024*10: #music llm tax
|
||||
musicoh1 = modelfile_extracted_meta[9] * 1.05
|
||||
if modelfile_extracted_meta[10] > 1024*1024*10: #music dit tax
|
||||
musicoh2 = modelfile_extracted_meta[10] * 1.05 + (600*1024*1024)
|
||||
if musiclowvram:
|
||||
calulated_gpu_overhead += max(musicoh1,musicoh2)
|
||||
else:
|
||||
calulated_gpu_overhead += musicoh1 + musicoh2
|
||||
|
||||
mem -= calulated_gpu_overhead
|
||||
mem = 0 if mem < 0 else mem
|
||||
|
|
@ -6302,7 +6318,9 @@ def show_gui():
|
|||
draftmodelpath = draftmodel_var.get()
|
||||
ttsmodelpath = tts_model_var.get() if ttsgpu_var.get()==1 else ""
|
||||
embdmodelpath = embeddings_model_var.get() if embeddings_gpu_var.get()==1 else ""
|
||||
extract_modelfile_params(filepath,sdfilepath,whisperfilepath,mmprojfilepath,draftmodelpath,ttsmodelpath,embdmodelpath)
|
||||
musicllmpath = musicllm_var.get()
|
||||
musicditpath = musicdiffusion_var.get()
|
||||
extract_modelfile_params(filepath,sdfilepath,whisperfilepath,mmprojfilepath,draftmodelpath,ttsmodelpath,embdmodelpath,musicllmpath,musicditpath)
|
||||
changed_gpulayers_estimate()
|
||||
pass
|
||||
|
||||
|
|
@ -6314,7 +6332,7 @@ def show_gui():
|
|||
changed_gpulayers_estimate()
|
||||
|
||||
def changed_gpulayers_estimate(*args):
|
||||
autoset_gpu_layers(int(contextsize_text[context_var.get()]),sd_quant_option(sd_quant_var.get()),int(batchsize_values[int(blas_size_var.get())]),(quantkv_var.get() if flashattention_var.get()==1 else 0))
|
||||
autoset_gpu_layers(int(contextsize_text[context_var.get()]),sd_quant_option(sd_quant_var.get()),int(batchsize_values[int(blas_size_var.get())]),(quantkv_var.get() if flashattention_var.get()==1 else 0),musiclowvram_var.get()==1)
|
||||
max_gpu_layers = (f"{modelfile_extracted_meta[1][0]+1}" if (modelfile_extracted_meta and modelfile_extracted_meta[1] and modelfile_extracted_meta[1][0]!=0) else "")
|
||||
index = runopts_var.get()
|
||||
gpu_be = (index == "Use Vulkan" or index == "Use Vulkan (Old CPU)" or index == "Use Vulkan (Older CPU)" or index == "Use CUDA" or index == "Use hipBLAS (ROCm)")
|
||||
|
|
@ -8789,8 +8807,8 @@ def kcpp_main_process(launch_args, g_memory=None, gui_launcher=False):
|
|||
if args.gpulayers==-1:
|
||||
if (not args.usecpu) and ((args.usecuda is not None) or (args.usevulkan is not None) or sys.platform=="darwin"):
|
||||
if MaxMemory[0] > 0:
|
||||
extract_modelfile_params(args.model_param,args.sdmodel,args.whispermodel,args.mmproj,args.draftmodel,args.ttsmodel if args.ttsgpu else "",args.embeddingsmodel if args.embeddingsgpu else "")
|
||||
layeramt = autoset_gpu_layers(args.contextsize,args.sdquant,args.batchsize,(0 if args.noflashattention else args.quantkv))
|
||||
extract_modelfile_params(args.model_param,args.sdmodel,args.whispermodel,args.mmproj,args.draftmodel,args.ttsmodel if args.ttsgpu else "",args.embeddingsmodel if args.embeddingsgpu else "", args.musicllm, args.musicdiffusion)
|
||||
layeramt = autoset_gpu_layers(args.contextsize,args.sdquant,args.batchsize,(0 if args.noflashattention else args.quantkv),args.musiclowvram)
|
||||
print(f"Auto Recommended GPU Layers: {layeramt}")
|
||||
args.gpulayers = layeramt
|
||||
else:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue