mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-10 04:00:53 +00:00
Merge branch 'master' into concedo_experimental
# Conflicts: # .dockerignore # .github/workflows/build.yml # CMakeLists.txt # Makefile # README.md # flake.lock # flake.nix # tests/CMakeLists.txt
This commit is contained in:
commit
4b00916ac7
53 changed files with 4980 additions and 1910 deletions
26
scripts/convert-gg.sh
Executable file
26
scripts/convert-gg.sh
Executable file
|
|
@ -0,0 +1,26 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
# LLaMA v1
|
||||
python3 convert.py ../llama1/7B --outfile models/llama-7b/ggml-model-f16.gguf --outtype f16
|
||||
python3 convert.py ../llama1/13B --outfile models/llama-13b/ggml-model-f16.gguf --outtype f16
|
||||
python3 convert.py ../llama1/30B --outfile models/llama-30b/ggml-model-f16.gguf --outtype f16
|
||||
python3 convert.py ../llama1/65B --outfile models/llama-65b/ggml-model-f16.gguf --outtype f16
|
||||
|
||||
# LLaMA v2
|
||||
python3 convert.py ../llama2/llama-2-7b --outfile models/llama-7b-v2/ggml-model-f16.gguf --outtype f16
|
||||
python3 convert.py ../llama2/llama-2-13b --outfile models/llama-13b-v2/ggml-model-f16.gguf --outtype f16
|
||||
python3 convert.py ../llama2/llama-2-70b --outfile models/llama-70b-v2/ggml-model-f16.gguf --outtype f16
|
||||
|
||||
# Code Llama
|
||||
python3 convert.py ../codellama/CodeLlama-7b/ --outfile models/codellama-7b/ggml-model-f16.gguf --outtype f16
|
||||
python3 convert.py ../codellama/CodeLlama-13b/ --outfile models/codellama-13b/ggml-model-f16.gguf --outtype f16
|
||||
python3 convert.py ../codellama/CodeLlama-34b/ --outfile models/codellama-34b/ggml-model-f16.gguf --outtype f16
|
||||
|
||||
# Falcon
|
||||
python3 convert-falcon-hf-to-gguf.py ../falcon/falcon-7b 1
|
||||
mv -v ../falcon/falcon-7b/ggml-model-f16.gguf models/falcon-7b/ggml-model-f16.gguf
|
||||
|
||||
python3 convert-falcon-hf-to-gguf.py ../falcon/falcon-40b 1
|
||||
mv -v ../falcon/falcon-40b/ggml-model-f16.gguf models/falcon-40b/ggml-model-f16.gguf
|
||||
29
scripts/qnt-all.sh
Executable file
29
scripts/qnt-all.sh
Executable file
|
|
@ -0,0 +1,29 @@
|
|||
#!/bin/bash
|
||||
|
||||
qnt=(q8_0 q6_k q5_k q5_1 q5_0 q4_k q4_1 q4_0 q3_k q2_k)
|
||||
args=""
|
||||
|
||||
if [ -z "$1" ]; then
|
||||
echo "usage: $0 <model> [qnt] [args]"
|
||||
echo "default: $0 <model> \"${qnt[@]}\" \"${args}\""
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -z "$2" ]; then
|
||||
qnt=($2)
|
||||
fi
|
||||
|
||||
if [ ! -z "$3" ]; then
|
||||
args="$3"
|
||||
fi
|
||||
|
||||
model="$1"
|
||||
out="../tmp/results-${model}"
|
||||
|
||||
set -e
|
||||
|
||||
mkdir -p ${out}
|
||||
|
||||
for q in ${qnt[@]}; do
|
||||
time ./bin/quantize ../models/${model}/ggml-model-f16.gguf ../models/${model}/ggml-model-${q}.gguf ${q} 2>&1 ${args} | tee ${out}/qnt-${q}.txt
|
||||
done
|
||||
33
scripts/run-all-perf.sh
Executable file
33
scripts/run-all-perf.sh
Executable file
|
|
@ -0,0 +1,33 @@
|
|||
#!/bin/bash
|
||||
|
||||
qnt=(f16 q8_0 q6_k q5_k q5_1 q5_0 q4_k q4_1 q4_0 q3_k q2_k)
|
||||
args="-ngl 999 -n 64 -p 512"
|
||||
|
||||
if [ -z "$1" ]; then
|
||||
echo "usage: $0 <model> [qnt] [args]"
|
||||
echo "default: $0 <model> \"${qnt[@]}\" \"${args}\""
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -z "$2" ]; then
|
||||
qnt=($2)
|
||||
fi
|
||||
|
||||
if [ ! -z "$3" ]; then
|
||||
args="$3"
|
||||
fi
|
||||
|
||||
model="$1"
|
||||
out="../tmp/results-${model}"
|
||||
|
||||
set -e
|
||||
|
||||
mkdir -p ${out}
|
||||
|
||||
mstr=""
|
||||
|
||||
for q in ${qnt[@]}; do
|
||||
mstr="${mstr} -m ../models/${model}/ggml-model-${q}.gguf"
|
||||
done
|
||||
|
||||
./bin/llama-bench ${mstr} ${args} 2> /dev/null
|
||||
29
scripts/run-all-ppl.sh
Executable file
29
scripts/run-all-ppl.sh
Executable file
|
|
@ -0,0 +1,29 @@
|
|||
#!/bin/bash
|
||||
|
||||
qnt=(f16 q8_0 q6_k q5_k q5_1 q5_0 q4_k q4_1 q4_0 q3_k q2_k)
|
||||
args="-ngl 999 -t 8"
|
||||
|
||||
if [ -z "$1" ]; then
|
||||
echo "usage: $0 <model> [qnt] [args]"
|
||||
echo "default: $0 <model> \"${qnt[@]}\" \"${args}\""
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -z "$2" ]; then
|
||||
qnt=($2)
|
||||
fi
|
||||
|
||||
if [ ! -z "$3" ]; then
|
||||
args="$3"
|
||||
fi
|
||||
|
||||
set -e
|
||||
|
||||
model="$1"
|
||||
out="../tmp/results-${model}"
|
||||
|
||||
mkdir -p ${out}
|
||||
|
||||
for q in ${qnt[@]}; do
|
||||
time ./bin/perplexity -m ../models/${model}/ggml-model-f16.gguf -f ./wiki.test.raw ${args} 2>&1 | tee ${out}/ppl-${q}.txt
|
||||
done
|
||||
Loading…
Add table
Add a link
Reference in a new issue