mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 01:24:36 +00:00
adjusted layer estimation
This commit is contained in:
parent
b7fc8e644a
commit
e28c42d7f7
6 changed files with 9 additions and 7 deletions
|
@ -25,7 +25,7 @@ jobs:
|
||||||
mkdir build
|
mkdir build
|
||||||
cd build
|
cd build
|
||||||
cmake .. -DLLAMA_CUBLAS=ON -DCMAKE_SYSTEM_VERSION="10.0.19041.0"
|
cmake .. -DLLAMA_CUBLAS=ON -DCMAKE_SYSTEM_VERSION="10.0.19041.0"
|
||||||
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
|
cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1))
|
||||||
|
|
||||||
- name: Save artifact
|
- name: Save artifact
|
||||||
uses: actions/upload-artifact@v3
|
uses: actions/upload-artifact@v3
|
||||||
|
|
|
@ -49,7 +49,7 @@ jobs:
|
||||||
mkdir build
|
mkdir build
|
||||||
cd build
|
cd build
|
||||||
cmake .. -DLLAMA_CUBLAS=ON -DCMAKE_SYSTEM_VERSION="10.0.19041.0"
|
cmake .. -DLLAMA_CUBLAS=ON -DCMAKE_SYSTEM_VERSION="10.0.19041.0"
|
||||||
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
|
cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1))
|
||||||
mv bin/Release/koboldcpp_cublas.dll ../koboldcpp_cublas.dll
|
mv bin/Release/koboldcpp_cublas.dll ../koboldcpp_cublas.dll
|
||||||
cd ..
|
cd ..
|
||||||
|
|
||||||
|
|
|
@ -49,7 +49,7 @@ jobs:
|
||||||
mkdir build
|
mkdir build
|
||||||
cd build
|
cd build
|
||||||
cmake .. -DLLAMA_CUBLAS=ON -DCMAKE_SYSTEM_VERSION="10.0.19041.0"
|
cmake .. -DLLAMA_CUBLAS=ON -DCMAKE_SYSTEM_VERSION="10.0.19041.0"
|
||||||
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
|
cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1))
|
||||||
mv bin/Release/koboldcpp_cublas.dll ../koboldcpp_cublas.dll
|
mv bin/Release/koboldcpp_cublas.dll ../koboldcpp_cublas.dll
|
||||||
cd ..
|
cd ..
|
||||||
|
|
||||||
|
|
|
@ -49,7 +49,7 @@ jobs:
|
||||||
mkdir build
|
mkdir build
|
||||||
cd build
|
cd build
|
||||||
cmake .. -DLLAMA_CUBLAS=ON -DLLAMA_AVX2=OFF -DCMAKE_SYSTEM_VERSION="10.0.19041.0"
|
cmake .. -DLLAMA_CUBLAS=ON -DLLAMA_AVX2=OFF -DCMAKE_SYSTEM_VERSION="10.0.19041.0"
|
||||||
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
|
cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1))
|
||||||
mv bin/Release/koboldcpp_cublas.dll ../koboldcpp_cublas.dll
|
mv bin/Release/koboldcpp_cublas.dll ../koboldcpp_cublas.dll
|
||||||
cd ..
|
cd ..
|
||||||
|
|
||||||
|
|
|
@ -13080,8 +13080,9 @@ Current version indicated by LITEVER below.
|
||||||
if(found == 0)
|
if(found == 0)
|
||||||
{
|
{
|
||||||
gentxt = gentxt.slice(st2.length);
|
gentxt = gentxt.slice(st2.length);
|
||||||
|
found = gentxt.indexOf(st2);
|
||||||
}
|
}
|
||||||
else if (found != -1) //if found, truncate to it
|
if (found != -1) //if found, truncate to it
|
||||||
{
|
{
|
||||||
splitresponse = gentxt.split(st2);
|
splitresponse = gentxt.split(st2);
|
||||||
gentxt = splitresponse[0];
|
gentxt = splitresponse[0];
|
||||||
|
@ -13094,8 +13095,9 @@ Current version indicated by LITEVER below.
|
||||||
if(found == 0)
|
if(found == 0)
|
||||||
{
|
{
|
||||||
gentxt = gentxt.slice(et2.length);
|
gentxt = gentxt.slice(et2.length);
|
||||||
|
found = gentxt.indexOf(et2);
|
||||||
}
|
}
|
||||||
else if (found != -1) //if found, truncate to it
|
if (found != -1) //if found, truncate to it
|
||||||
{
|
{
|
||||||
splitresponse = gentxt.split(et2);
|
splitresponse = gentxt.split(et2);
|
||||||
gentxt = splitresponse[0];
|
gentxt = splitresponse[0];
|
||||||
|
|
|
@ -612,7 +612,7 @@ def autoset_gpu_layers(filepath,ctxsize,gpumem): #shitty algo to determine how m
|
||||||
headcount = ggufmeta[1]
|
headcount = ggufmeta[1]
|
||||||
headkvlen = (ggufmeta[2] if ggufmeta[2] > 0 else 128)
|
headkvlen = (ggufmeta[2] if ggufmeta[2] > 0 else 128)
|
||||||
ratio = mem/(fsize*csmul*1.5)
|
ratio = mem/(fsize*csmul*1.5)
|
||||||
computemem = layers*4*headkvlen*cs*4*1.25 # For now the first 4 is the hardcoded result for a blasbatchsize of 512. Ideally we automatically calculate blasbatchsize / 4 but I couldn't easily grab the value yet - Henk
|
computemem = layers*4*headkvlen*cs*4*1.35 # For now the first 4 is the hardcoded result for a blasbatchsize of 512. Ideally we automatically calculate blasbatchsize / 4 but I couldn't easily grab the value yet - Henk
|
||||||
contextmem = layers*headcount*headkvlen*cs*4
|
contextmem = layers*headcount*headkvlen*cs*4
|
||||||
reservedmem = 1.5*1024*1024*1024 # Users often don't have their GPU's VRAM worth of memory, we assume 500MB to avoid driver swapping + 500MB for the OS + 500MB for background apps / browser - Henk
|
reservedmem = 1.5*1024*1024*1024 # Users often don't have their GPU's VRAM worth of memory, we assume 500MB to avoid driver swapping + 500MB for the OS + 500MB for background apps / browser - Henk
|
||||||
if headcount > 0:
|
if headcount > 0:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue