From 6143a754260d6bd45a69cee1ad3a5a95cecda71e Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Sat, 14 Mar 2026 00:36:52 +0800 Subject: [PATCH] improve autofit padding heuristics --- koboldcpp.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/koboldcpp.py b/koboldcpp.py index c5508d26e..6c635cf5b 100755 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -1411,7 +1411,10 @@ def autoset_gpu_layers(ctxsize, sdquanted, bbs, qkv_level, musiclowvram): #shitt if modelfile_extracted_meta[6] > 1024*1024*10: #draft model tax calulated_gpu_overhead += (modelfile_extracted_meta[6] * 1.5) if modelfile_extracted_meta[7] > 1024*1024*10: #tts model tax - calulated_gpu_overhead += max(600*1024*1024, modelfile_extracted_meta[7] * 3) + if modelfile_extracted_meta[7] < 1024*1024*1024: #less than 1gb probably means outetts, which needs more vram + calulated_gpu_overhead += max(600*1024*1024, modelfile_extracted_meta[7] * 3) + else: + calulated_gpu_overhead += max(600*1024*1024, (150*1024*1024 + modelfile_extracted_meta[7] * 1.3)) if modelfile_extracted_meta[8] > 1024*1024*10: #embeddings model tax calulated_gpu_overhead += max(350*1024*1024, modelfile_extracted_meta[8] * 1.5) if modelfile_extracted_meta[9] > 1024*1024*10: #music llm tax