From 81553e6524c50a64b7e7bd38738eab6e2efdd69e Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Sat, 23 May 2026 00:04:12 +0800 Subject: [PATCH] mmproj overhead estimate calculated but only used on python side --- koboldcpp.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/koboldcpp.py b/koboldcpp.py index 4e6269b61..4a652b29f 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -1672,6 +1672,7 @@ def autoset_gpu_layers(ctxsize, sdquanted, bbs, musiclowvram): #shitty algo to d fsize *= total_parts calulated_gpu_overhead = 0 + unsubmitted_overhead = 0 #this overhead is used to calculate for local estimate but not sent to backend musicoh1 = 0 musicoh2 = 0 if modelfile_extracted_meta[3] > 1024*1024*1024*5: #sdxl tax @@ -1680,8 +1681,8 @@ def autoset_gpu_layers(ctxsize, sdquanted, bbs, musiclowvram): #shitty algo to d calulated_gpu_overhead += 1024*1024*1024*(4.25 - sdquanted * 0.5) # 4.25, 3.75, 3.25 if modelfile_extracted_meta[4] > 1024*1024*10: #whisper tax calulated_gpu_overhead += max(350*1024*1024,modelfile_extracted_meta[4]*1.5) - # if modelfile_extracted_meta[5] > 1024*1024*10: #mmproj tax (now internal to kcpp) - # calulated_gpu_overhead += max(350*1024*1024,modelfile_extracted_meta[5]*1.5) + if modelfile_extracted_meta[5] > 1024*1024*10: #mmproj tax (now internal to kcpp) + unsubmitted_overhead += max(350*1024*1024,modelfile_extracted_meta[5]*1.5) if modelfile_extracted_meta[6] > 1024*1024*10: #draft model tax calulated_gpu_overhead += (modelfile_extracted_meta[6] * 1.5) if modelfile_extracted_meta[7] > 1024*1024*10: #tts model tax @@ -1701,6 +1702,7 @@ def autoset_gpu_layers(ctxsize, sdquanted, bbs, musiclowvram): #shitty algo to d calulated_gpu_overhead += musicoh1 + musicoh2 mem -= calulated_gpu_overhead + mem -= unsubmitted_overhead mem = 0 if mem < 0 else mem csmul = (cs/4096) if cs >= 8192 else 1.8 if cs > 4096 else 1.2 if cs > 2048 else 1.0