adjusted layer estimation

2025-09-11 01:24:36 +00:00 · 2024-07-24 21:54:49 +08:00 · 2024-07-24 21:54:49 +08:00 · e28c42d7f7
commit e28c42d7f7
parent b7fc8e644a
6 changed files with 9 additions and 7 deletions
--- a/.github/workflows/kcpp-build-release-win-cuda.yaml
+++ b/.github/workflows/kcpp-build-release-win-cuda.yaml
@ -25,7 +25,7 @@ jobs:
          mkdir build
          cd build
          cmake .. -DLLAMA_CUBLAS=ON -DCMAKE_SYSTEM_VERSION="10.0.19041.0"
-          cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
+          cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1))
      - name: Save artifact
        uses: actions/upload-artifact@v3
--- a/.github/workflows/kcpp-build-release-win-full-cu12.yaml
+++ b/.github/workflows/kcpp-build-release-win-full-cu12.yaml
@ -49,7 +49,7 @@ jobs:
          mkdir build
          cd build
          cmake .. -DLLAMA_CUBLAS=ON -DCMAKE_SYSTEM_VERSION="10.0.19041.0"
-          cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
+          cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1))
          mv bin/Release/koboldcpp_cublas.dll ../koboldcpp_cublas.dll
          cd ..
--- a/.github/workflows/kcpp-build-release-win-full.yaml
+++ b/.github/workflows/kcpp-build-release-win-full.yaml
@ -49,7 +49,7 @@ jobs:
          mkdir build
          cd build
          cmake .. -DLLAMA_CUBLAS=ON -DCMAKE_SYSTEM_VERSION="10.0.19041.0"
-          cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
+          cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1))
          mv bin/Release/koboldcpp_cublas.dll ../koboldcpp_cublas.dll
          cd ..
--- a/.github/workflows/kcpp-build-release-win-oldcpu-full.yaml
+++ b/.github/workflows/kcpp-build-release-win-oldcpu-full.yaml
@ -49,7 +49,7 @@ jobs:
          mkdir build
          cd build
          cmake .. -DLLAMA_CUBLAS=ON -DLLAMA_AVX2=OFF -DCMAKE_SYSTEM_VERSION="10.0.19041.0"
-          cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
+          cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1))
          mv bin/Release/koboldcpp_cublas.dll ../koboldcpp_cublas.dll
          cd ..
--- a/klite.embd
+++ b/klite.embd
@ -13080,8 +13080,9 @@ Current version indicated by LITEVER below.
 					if(found == 0)
 					{
 						gentxt = gentxt.slice(st2.length);
 						found = gentxt.indexOf(st2);
 					}
-					else if (found != -1) //if found, truncate to it
+					if (found != -1) //if found, truncate to it
 					{
 						splitresponse = gentxt.split(st2);
 						gentxt = splitresponse[0];
@ -13094,8 +13095,9 @@ Current version indicated by LITEVER below.
 					if(found == 0)
 					{
 						gentxt = gentxt.slice(et2.length);
 						found = gentxt.indexOf(et2);
 					}
-					else if (found != -1) //if found, truncate to it
+					if (found != -1) //if found, truncate to it
 					{
 						splitresponse = gentxt.split(et2);
 						gentxt = splitresponse[0];
--- a/koboldcpp.py
+++ b/koboldcpp.py
@ -612,7 +612,7 @@ def autoset_gpu_layers(filepath,ctxsize,gpumem): #shitty algo to determine how m
                headcount = ggufmeta[1]
                headkvlen = (ggufmeta[2] if ggufmeta[2] > 0 else 128)
                ratio = mem/(fsize*csmul*1.5)
-                computemem = layers*4*headkvlen*cs*4*1.25 # For now the first 4 is the hardcoded result for a blasbatchsize of 512. Ideally we automatically calculate blasbatchsize / 4 but I couldn't easily grab the value yet - Henk
+                computemem = layers*4*headkvlen*cs*4*1.35 # For now the first 4 is the hardcoded result for a blasbatchsize of 512. Ideally we automatically calculate blasbatchsize / 4 but I couldn't easily grab the value yet - Henk
                contextmem = layers*headcount*headkvlen*cs*4
                reservedmem = 1.5*1024*1024*1024 # Users often don't have their GPU's VRAM worth of memory, we assume 500MB to avoid driver swapping + 500MB for the OS + 500MB for background apps / browser - Henk
                if headcount > 0: