adjusted layer estimation

This commit is contained in:
Concedo 2024-07-24 21:54:49 +08:00
parent b7fc8e644a
commit e28c42d7f7
6 changed files with 9 additions and 7 deletions

View file

@ -25,7 +25,7 @@ jobs:
mkdir build mkdir build
cd build cd build
cmake .. -DLLAMA_CUBLAS=ON -DCMAKE_SYSTEM_VERSION="10.0.19041.0" cmake .. -DLLAMA_CUBLAS=ON -DCMAKE_SYSTEM_VERSION="10.0.19041.0"
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1))
- name: Save artifact - name: Save artifact
uses: actions/upload-artifact@v3 uses: actions/upload-artifact@v3

View file

@ -49,7 +49,7 @@ jobs:
mkdir build mkdir build
cd build cd build
cmake .. -DLLAMA_CUBLAS=ON -DCMAKE_SYSTEM_VERSION="10.0.19041.0" cmake .. -DLLAMA_CUBLAS=ON -DCMAKE_SYSTEM_VERSION="10.0.19041.0"
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1))
mv bin/Release/koboldcpp_cublas.dll ../koboldcpp_cublas.dll mv bin/Release/koboldcpp_cublas.dll ../koboldcpp_cublas.dll
cd .. cd ..

View file

@ -49,7 +49,7 @@ jobs:
mkdir build mkdir build
cd build cd build
cmake .. -DLLAMA_CUBLAS=ON -DCMAKE_SYSTEM_VERSION="10.0.19041.0" cmake .. -DLLAMA_CUBLAS=ON -DCMAKE_SYSTEM_VERSION="10.0.19041.0"
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1))
mv bin/Release/koboldcpp_cublas.dll ../koboldcpp_cublas.dll mv bin/Release/koboldcpp_cublas.dll ../koboldcpp_cublas.dll
cd .. cd ..

View file

@ -49,7 +49,7 @@ jobs:
mkdir build mkdir build
cd build cd build
cmake .. -DLLAMA_CUBLAS=ON -DLLAMA_AVX2=OFF -DCMAKE_SYSTEM_VERSION="10.0.19041.0" cmake .. -DLLAMA_CUBLAS=ON -DLLAMA_AVX2=OFF -DCMAKE_SYSTEM_VERSION="10.0.19041.0"
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1))
mv bin/Release/koboldcpp_cublas.dll ../koboldcpp_cublas.dll mv bin/Release/koboldcpp_cublas.dll ../koboldcpp_cublas.dll
cd .. cd ..

View file

@ -13080,8 +13080,9 @@ Current version indicated by LITEVER below.
if(found == 0) if(found == 0)
{ {
gentxt = gentxt.slice(st2.length); gentxt = gentxt.slice(st2.length);
found = gentxt.indexOf(st2);
} }
else if (found != -1) //if found, truncate to it if (found != -1) //if found, truncate to it
{ {
splitresponse = gentxt.split(st2); splitresponse = gentxt.split(st2);
gentxt = splitresponse[0]; gentxt = splitresponse[0];
@ -13094,8 +13095,9 @@ Current version indicated by LITEVER below.
if(found == 0) if(found == 0)
{ {
gentxt = gentxt.slice(et2.length); gentxt = gentxt.slice(et2.length);
found = gentxt.indexOf(et2);
} }
else if (found != -1) //if found, truncate to it if (found != -1) //if found, truncate to it
{ {
splitresponse = gentxt.split(et2); splitresponse = gentxt.split(et2);
gentxt = splitresponse[0]; gentxt = splitresponse[0];

View file

@ -612,7 +612,7 @@ def autoset_gpu_layers(filepath,ctxsize,gpumem): #shitty algo to determine how m
headcount = ggufmeta[1] headcount = ggufmeta[1]
headkvlen = (ggufmeta[2] if ggufmeta[2] > 0 else 128) headkvlen = (ggufmeta[2] if ggufmeta[2] > 0 else 128)
ratio = mem/(fsize*csmul*1.5) ratio = mem/(fsize*csmul*1.5)
computemem = layers*4*headkvlen*cs*4*1.25 # For now the first 4 is the hardcoded result for a blasbatchsize of 512. Ideally we automatically calculate blasbatchsize / 4 but I couldn't easily grab the value yet - Henk computemem = layers*4*headkvlen*cs*4*1.35 # For now the first 4 is the hardcoded result for a blasbatchsize of 512. Ideally we automatically calculate blasbatchsize / 4 but I couldn't easily grab the value yet - Henk
contextmem = layers*headcount*headkvlen*cs*4 contextmem = layers*headcount*headkvlen*cs*4
reservedmem = 1.5*1024*1024*1024 # Users often don't have their GPU's VRAM worth of memory, we assume 500MB to avoid driver swapping + 500MB for the OS + 500MB for background apps / browser - Henk reservedmem = 1.5*1024*1024*1024 # Users often don't have their GPU's VRAM worth of memory, we assume 500MB to avoid driver swapping + 500MB for the OS + 500MB for background apps / browser - Henk
if headcount > 0: if headcount > 0: