From e28c42d7f76c5b33d13d427f7faadc4f93272388 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Wed, 24 Jul 2024 21:54:49 +0800 Subject: [PATCH] adjusted layer estimation --- .github/workflows/kcpp-build-release-win-cuda.yaml | 2 +- .github/workflows/kcpp-build-release-win-full-cu12.yaml | 2 +- .github/workflows/kcpp-build-release-win-full.yaml | 2 +- .github/workflows/kcpp-build-release-win-oldcpu-full.yaml | 2 +- klite.embd | 6 ++++-- koboldcpp.py | 2 +- 6 files changed, 9 insertions(+), 7 deletions(-) diff --git a/.github/workflows/kcpp-build-release-win-cuda.yaml b/.github/workflows/kcpp-build-release-win-cuda.yaml index 618c21e63..2c4e335a1 100644 --- a/.github/workflows/kcpp-build-release-win-cuda.yaml +++ b/.github/workflows/kcpp-build-release-win-cuda.yaml @@ -25,7 +25,7 @@ jobs: mkdir build cd build cmake .. -DLLAMA_CUBLAS=ON -DCMAKE_SYSTEM_VERSION="10.0.19041.0" - cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} + cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1)) - name: Save artifact uses: actions/upload-artifact@v3 diff --git a/.github/workflows/kcpp-build-release-win-full-cu12.yaml b/.github/workflows/kcpp-build-release-win-full-cu12.yaml index e58684b6c..09ba45dfa 100644 --- a/.github/workflows/kcpp-build-release-win-full-cu12.yaml +++ b/.github/workflows/kcpp-build-release-win-full-cu12.yaml @@ -49,7 +49,7 @@ jobs: mkdir build cd build cmake .. -DLLAMA_CUBLAS=ON -DCMAKE_SYSTEM_VERSION="10.0.19041.0" - cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} + cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1)) mv bin/Release/koboldcpp_cublas.dll ../koboldcpp_cublas.dll cd .. diff --git a/.github/workflows/kcpp-build-release-win-full.yaml b/.github/workflows/kcpp-build-release-win-full.yaml index 47f80e368..c26bb4751 100644 --- a/.github/workflows/kcpp-build-release-win-full.yaml +++ b/.github/workflows/kcpp-build-release-win-full.yaml @@ -49,7 +49,7 @@ jobs: mkdir build cd build cmake .. -DLLAMA_CUBLAS=ON -DCMAKE_SYSTEM_VERSION="10.0.19041.0" - cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} + cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1)) mv bin/Release/koboldcpp_cublas.dll ../koboldcpp_cublas.dll cd .. diff --git a/.github/workflows/kcpp-build-release-win-oldcpu-full.yaml b/.github/workflows/kcpp-build-release-win-oldcpu-full.yaml index 928bc344a..996e95137 100644 --- a/.github/workflows/kcpp-build-release-win-oldcpu-full.yaml +++ b/.github/workflows/kcpp-build-release-win-oldcpu-full.yaml @@ -49,7 +49,7 @@ jobs: mkdir build cd build cmake .. -DLLAMA_CUBLAS=ON -DLLAMA_AVX2=OFF -DCMAKE_SYSTEM_VERSION="10.0.19041.0" - cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} + cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1)) mv bin/Release/koboldcpp_cublas.dll ../koboldcpp_cublas.dll cd .. diff --git a/klite.embd b/klite.embd index 8a3d5edac..914dc89f7 100644 --- a/klite.embd +++ b/klite.embd @@ -13080,8 +13080,9 @@ Current version indicated by LITEVER below. if(found == 0) { gentxt = gentxt.slice(st2.length); + found = gentxt.indexOf(st2); } - else if (found != -1) //if found, truncate to it + if (found != -1) //if found, truncate to it { splitresponse = gentxt.split(st2); gentxt = splitresponse[0]; @@ -13094,8 +13095,9 @@ Current version indicated by LITEVER below. if(found == 0) { gentxt = gentxt.slice(et2.length); + found = gentxt.indexOf(et2); } - else if (found != -1) //if found, truncate to it + if (found != -1) //if found, truncate to it { splitresponse = gentxt.split(et2); gentxt = splitresponse[0]; diff --git a/koboldcpp.py b/koboldcpp.py index b77ba90bb..28f6a5e24 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -612,7 +612,7 @@ def autoset_gpu_layers(filepath,ctxsize,gpumem): #shitty algo to determine how m headcount = ggufmeta[1] headkvlen = (ggufmeta[2] if ggufmeta[2] > 0 else 128) ratio = mem/(fsize*csmul*1.5) - computemem = layers*4*headkvlen*cs*4*1.25 # For now the first 4 is the hardcoded result for a blasbatchsize of 512. Ideally we automatically calculate blasbatchsize / 4 but I couldn't easily grab the value yet - Henk + computemem = layers*4*headkvlen*cs*4*1.35 # For now the first 4 is the hardcoded result for a blasbatchsize of 512. Ideally we automatically calculate blasbatchsize / 4 but I couldn't easily grab the value yet - Henk contextmem = layers*headcount*headkvlen*cs*4 reservedmem = 1.5*1024*1024*1024 # Users often don't have their GPU's VRAM worth of memory, we assume 500MB to avoid driver swapping + 500MB for the OS + 500MB for background apps / browser - Henk if headcount > 0: