From e28c42d7f76c5b33d13d427f7faadc4f93272388 Mon Sep 17 00:00:00 2001
From: Concedo <39025047+LostRuins@users.noreply.github.com>
Date: Wed, 24 Jul 2024 21:54:49 +0800
Subject: [PATCH] adjusted layer estimation

---
 .github/workflows/kcpp-build-release-win-cuda.yaml        | 2 +-
 .github/workflows/kcpp-build-release-win-full-cu12.yaml   | 2 +-
 .github/workflows/kcpp-build-release-win-full.yaml        | 2 +-
 .github/workflows/kcpp-build-release-win-oldcpu-full.yaml | 2 +-
 klite.embd                                                | 6 ++++--
 koboldcpp.py                                              | 2 +-
 6 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/kcpp-build-release-win-cuda.yaml b/.github/workflows/kcpp-build-release-win-cuda.yaml
index 618c21e63..2c4e335a1 100644
--- a/.github/workflows/kcpp-build-release-win-cuda.yaml
+++ b/.github/workflows/kcpp-build-release-win-cuda.yaml
@@ -25,7 +25,7 @@ jobs:
           mkdir build
           cd build
           cmake .. -DLLAMA_CUBLAS=ON -DCMAKE_SYSTEM_VERSION="10.0.19041.0"
-          cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
+          cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1))
 
       - name: Save artifact
         uses: actions/upload-artifact@v3
diff --git a/.github/workflows/kcpp-build-release-win-full-cu12.yaml b/.github/workflows/kcpp-build-release-win-full-cu12.yaml
index e58684b6c..09ba45dfa 100644
--- a/.github/workflows/kcpp-build-release-win-full-cu12.yaml
+++ b/.github/workflows/kcpp-build-release-win-full-cu12.yaml
@@ -49,7 +49,7 @@ jobs:
           mkdir build
           cd build
           cmake .. -DLLAMA_CUBLAS=ON -DCMAKE_SYSTEM_VERSION="10.0.19041.0"
-          cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
+          cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1))
           mv bin/Release/koboldcpp_cublas.dll ../koboldcpp_cublas.dll
           cd ..
 
diff --git a/.github/workflows/kcpp-build-release-win-full.yaml b/.github/workflows/kcpp-build-release-win-full.yaml
index 47f80e368..c26bb4751 100644
--- a/.github/workflows/kcpp-build-release-win-full.yaml
+++ b/.github/workflows/kcpp-build-release-win-full.yaml
@@ -49,7 +49,7 @@ jobs:
           mkdir build
           cd build
           cmake .. -DLLAMA_CUBLAS=ON -DCMAKE_SYSTEM_VERSION="10.0.19041.0"
-          cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
+          cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1))
           mv bin/Release/koboldcpp_cublas.dll ../koboldcpp_cublas.dll
           cd ..
 
diff --git a/.github/workflows/kcpp-build-release-win-oldcpu-full.yaml b/.github/workflows/kcpp-build-release-win-oldcpu-full.yaml
index 928bc344a..996e95137 100644
--- a/.github/workflows/kcpp-build-release-win-oldcpu-full.yaml
+++ b/.github/workflows/kcpp-build-release-win-oldcpu-full.yaml
@@ -49,7 +49,7 @@ jobs:
           mkdir build
           cd build
           cmake .. -DLLAMA_CUBLAS=ON -DLLAMA_AVX2=OFF -DCMAKE_SYSTEM_VERSION="10.0.19041.0"
-          cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
+          cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1))
           mv bin/Release/koboldcpp_cublas.dll ../koboldcpp_cublas.dll
           cd ..
 
diff --git a/klite.embd b/klite.embd
index 8a3d5edac..914dc89f7 100644
--- a/klite.embd
+++ b/klite.embd
@@ -13080,8 +13080,9 @@ Current version indicated by LITEVER below.
 					if(found == 0)
 					{
 						gentxt = gentxt.slice(st2.length);
+						found = gentxt.indexOf(st2);
 					}
-					else if (found != -1) //if found, truncate to it
+					if (found != -1) //if found, truncate to it
 					{
 						splitresponse = gentxt.split(st2);
 						gentxt = splitresponse[0];
@@ -13094,8 +13095,9 @@ Current version indicated by LITEVER below.
 					if(found == 0)
 					{
 						gentxt = gentxt.slice(et2.length);
+						found = gentxt.indexOf(et2);
 					}
-					else if (found != -1) //if found, truncate to it
+					if (found != -1) //if found, truncate to it
 					{
 						splitresponse = gentxt.split(et2);
 						gentxt = splitresponse[0];
diff --git a/koboldcpp.py b/koboldcpp.py
index b77ba90bb..28f6a5e24 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -612,7 +612,7 @@ def autoset_gpu_layers(filepath,ctxsize,gpumem): #shitty algo to determine how m
                 headcount = ggufmeta[1]
                 headkvlen = (ggufmeta[2] if ggufmeta[2] > 0 else 128)
                 ratio = mem/(fsize*csmul*1.5)
-                computemem = layers*4*headkvlen*cs*4*1.25 # For now the first 4 is the hardcoded result for a blasbatchsize of 512. Ideally we automatically calculate blasbatchsize / 4 but I couldn't easily grab the value yet - Henk
+                computemem = layers*4*headkvlen*cs*4*1.35 # For now the first 4 is the hardcoded result for a blasbatchsize of 512. Ideally we automatically calculate blasbatchsize / 4 but I couldn't easily grab the value yet - Henk
                 contextmem = layers*headcount*headkvlen*cs*4
                 reservedmem = 1.5*1024*1024*1024 # Users often don't have their GPU's VRAM worth of memory, we assume 500MB to avoid driver swapping + 500MB for the OS + 500MB for background apps / browser - Henk
                 if headcount > 0: