remove extra padding for layer guessing

2025-09-10 17:14:36 +00:00 · 2024-07-24 16:36:34 +08:00 · 2024-07-24 16:36:34 +08:00 · c76f3401e3
commit c76f3401e3
parent 44ef87f14c
1 changed files with 3 additions and 3 deletions
--- a/koboldcpp.py
+++ b/koboldcpp.py
@ -612,11 +612,11 @@ def autoset_gpu_layers(filepath,ctxsize,gpumem): #shitty algo to determine how m
                headcount = ggufmeta[1]
                headkvlen = (ggufmeta[2] if ggufmeta[2] > 0 else 128)
                ratio = mem/(fsize*csmul*1.5)
-                computemem = layers*4*headkvlen*cs*4*1.26 # For now the first 4 is the hardcoded result for a blasbatchsize of 512. Ideally we automatically calculate blasbatchsize / 4 but I couldn't easily grab the value yet - Henk
-                contextmem = layers*headcount*headkvlen*cs*4.02
+                computemem = layers*4*headkvlen*cs*4*1.25 # For now the first 4 is the hardcoded result for a blasbatchsize of 512. Ideally we automatically calculate blasbatchsize / 4 but I couldn't easily grab the value yet - Henk
+                contextmem = layers*headcount*headkvlen*cs*4
                reservedmem = 1.5*1024*1024*1024 # Users often don't have their GPU's VRAM worth of memory, we assume 500MB to avoid driver swapping + 500MB for the OS + 500MB for background apps / browser - Henk
                if headcount > 0:
-                    ratio = max(ratio, (mem - reservedmem - computemem) / (fsize*1.01 + contextmem))
+                    ratio = max(ratio, (mem - reservedmem - computemem) / (fsize + contextmem))
                layerlimit = min(int(ratio*layers), (layers + 3))
        return layerlimit
    except Exception as ex: