diff --git a/koboldcpp.py b/koboldcpp.py
index a2ebb9bcf..30877b3a2 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -622,7 +622,7 @@ def extract_modelfile_params(filepath,sdfilepath,whisperfilepath,mmprojfilepath)
         except Exception as ex:
             modelfile_extracted_meta = None
 
-def autoset_gpu_layers(ctxsize,gpumem,sdquanted): #shitty algo to determine how many layers to use
+def autoset_gpu_layers(ctxsize,gpumem,sdquanted,bbs): #shitty algo to determine how many layers to use
     global modelfile_extracted_meta # reference cached values instead
     try:
         if not modelfile_extracted_meta:
@@ -653,7 +653,7 @@ def autoset_gpu_layers(ctxsize,gpumem,sdquanted): #shitty algo to determine how
                 headcount = ggufmeta[1]
                 headkvlen = (ggufmeta[2] if ggufmeta[2] > 0 else 128)
                 ratio = mem/(fsize*csmul*1.5)
-                computemem = layers*4*headkvlen*cs*4*1.5 # For now the first 4 is the hardcoded result for a blasbatchsize of 512. Ideally we automatically calculate blasbatchsize / 4 but I couldn't easily grab the value yet - Henk
+                computemem = layers*(4 if bbs <= 512 else (bbs/128))*headkvlen*cs*4*1.5 # For now the first 4 is the hardcoded result for a blasbatchsize of 512. Ideally we automatically calculate blasbatchsize / 4 but I couldn't easily grab the value yet - Henk
                 contextmem = layers*headcount*headkvlen*cs*4*1.1
                 reservedmem = 1.5*1024*1024*1024 # Users often don't have their GPU's VRAM worth of memory, we assume 500MB to avoid driver swapping + 500MB for the OS + 500MB for background apps / browser - Henk
                 if headcount > 0:
@@ -2435,7 +2435,7 @@ def show_gui():
         pass
 
     def changed_gpulayers_estimate(*args):
-        predicted_gpu_layers = autoset_gpu_layers(int(contextsize_text[context_var.get()]),MaxMemory[0],(sd_quant_var.get()==1))
+        predicted_gpu_layers = autoset_gpu_layers(int(contextsize_text[context_var.get()]),MaxMemory[0],(sd_quant_var.get()==1),int(blasbatchsize_values[int(blas_size_var.get())]))
         max_gpu_layers = (f"/{modelfile_extracted_meta[0][0]+3}" if (modelfile_extracted_meta and modelfile_extracted_meta[0] and modelfile_extracted_meta[0][0]!=0) else "")
         index = runopts_var.get()
         gpu_be = (index == "Use Vulkan" or index == "Vulkan NoAVX2 (Old CPU)" or index == "Use CLBlast" or index == "CLBlast NoAVX2 (Old CPU)" or index == "Use CuBLAS" or index == "Use hipBLAS (ROCm)")
@@ -2667,6 +2667,8 @@ def show_gui():
     makelabelentry(hardware_tab, "BLAS threads:" , blas_threads_var, 14, 50,tooltip="How many threads to use during BLAS processing.\nIf left blank, uses same value as regular thread count.")
     # blas batch size
     makeslider(hardware_tab, "BLAS Batch Size:", blasbatchsize_text, blas_size_var, 0, 7, 16,width=200, set=5,tooltip="How many tokens to process at once per batch.\nLarger values use more memory.")
+    blas_size_var.trace("w", changed_gpulayers_estimate)
+
     # force version
     makelabelentry(hardware_tab, "Force Version:" , version_var, 100, 50,tooltip="If the autodetected version is wrong, you can change it here.\nLeave as 0 for default.")
     ctk.CTkButton(hardware_tab , text = "Run Benchmark", command = guibench ).grid(row=110,column=0, stick="se", padx= 0, pady=2)
@@ -3900,7 +3902,7 @@ def main(launch_args,start_server=True):
                 pass
             if MaxMemory[0] > 0:
                 extract_modelfile_params(args.model_param,args.sdmodel,args.whispermodel,args.mmproj)
-                layeramt = autoset_gpu_layers(args.contextsize, MaxMemory[0],args.sdquant)
+                layeramt = autoset_gpu_layers(args.contextsize, MaxMemory[0],args.sdquant,args.blasbatchsize)
                 print(f"Auto Recommended Layers: {layeramt}")
                 args.gpulayers = layeramt
             else: