Merge branch 'master' into concedo_experimental

# Conflicts:
#	.dockerignore
#	.github/workflows/build.yml
#	CMakeLists.txt
#	Makefile
#	README.md
#	flake.lock
#	flake.nix
#	tests/CMakeLists.txt
This commit is contained in:
Concedo 2023-08-28 14:19:05 +08:00
commit 4b00916ac7
53 changed files with 4980 additions and 1910 deletions

26
scripts/convert-gg.sh Executable file
View file

@ -0,0 +1,26 @@
#!/bin/bash
set -e
# LLaMA v1
python3 convert.py ../llama1/7B --outfile models/llama-7b/ggml-model-f16.gguf --outtype f16
python3 convert.py ../llama1/13B --outfile models/llama-13b/ggml-model-f16.gguf --outtype f16
python3 convert.py ../llama1/30B --outfile models/llama-30b/ggml-model-f16.gguf --outtype f16
python3 convert.py ../llama1/65B --outfile models/llama-65b/ggml-model-f16.gguf --outtype f16
# LLaMA v2
python3 convert.py ../llama2/llama-2-7b --outfile models/llama-7b-v2/ggml-model-f16.gguf --outtype f16
python3 convert.py ../llama2/llama-2-13b --outfile models/llama-13b-v2/ggml-model-f16.gguf --outtype f16
python3 convert.py ../llama2/llama-2-70b --outfile models/llama-70b-v2/ggml-model-f16.gguf --outtype f16
# Code Llama
python3 convert.py ../codellama/CodeLlama-7b/ --outfile models/codellama-7b/ggml-model-f16.gguf --outtype f16
python3 convert.py ../codellama/CodeLlama-13b/ --outfile models/codellama-13b/ggml-model-f16.gguf --outtype f16
python3 convert.py ../codellama/CodeLlama-34b/ --outfile models/codellama-34b/ggml-model-f16.gguf --outtype f16
# Falcon
python3 convert-falcon-hf-to-gguf.py ../falcon/falcon-7b 1
mv -v ../falcon/falcon-7b/ggml-model-f16.gguf models/falcon-7b/ggml-model-f16.gguf
python3 convert-falcon-hf-to-gguf.py ../falcon/falcon-40b 1
mv -v ../falcon/falcon-40b/ggml-model-f16.gguf models/falcon-40b/ggml-model-f16.gguf

29
scripts/qnt-all.sh Executable file
View file

@ -0,0 +1,29 @@
#!/bin/bash
qnt=(q8_0 q6_k q5_k q5_1 q5_0 q4_k q4_1 q4_0 q3_k q2_k)
args=""
if [ -z "$1" ]; then
echo "usage: $0 <model> [qnt] [args]"
echo "default: $0 <model> \"${qnt[@]}\" \"${args}\""
exit 1
fi
if [ ! -z "$2" ]; then
qnt=($2)
fi
if [ ! -z "$3" ]; then
args="$3"
fi
model="$1"
out="../tmp/results-${model}"
set -e
mkdir -p ${out}
for q in ${qnt[@]}; do
time ./bin/quantize ../models/${model}/ggml-model-f16.gguf ../models/${model}/ggml-model-${q}.gguf ${q} 2>&1 ${args} | tee ${out}/qnt-${q}.txt
done

33
scripts/run-all-perf.sh Executable file
View file

@ -0,0 +1,33 @@
#!/bin/bash
qnt=(f16 q8_0 q6_k q5_k q5_1 q5_0 q4_k q4_1 q4_0 q3_k q2_k)
args="-ngl 999 -n 64 -p 512"
if [ -z "$1" ]; then
echo "usage: $0 <model> [qnt] [args]"
echo "default: $0 <model> \"${qnt[@]}\" \"${args}\""
exit 1
fi
if [ ! -z "$2" ]; then
qnt=($2)
fi
if [ ! -z "$3" ]; then
args="$3"
fi
model="$1"
out="../tmp/results-${model}"
set -e
mkdir -p ${out}
mstr=""
for q in ${qnt[@]}; do
mstr="${mstr} -m ../models/${model}/ggml-model-${q}.gguf"
done
./bin/llama-bench ${mstr} ${args} 2> /dev/null

29
scripts/run-all-ppl.sh Executable file
View file

@ -0,0 +1,29 @@
#!/bin/bash
qnt=(f16 q8_0 q6_k q5_k q5_1 q5_0 q4_k q4_1 q4_0 q3_k q2_k)
args="-ngl 999 -t 8"
if [ -z "$1" ]; then
echo "usage: $0 <model> [qnt] [args]"
echo "default: $0 <model> \"${qnt[@]}\" \"${args}\""
exit 1
fi
if [ ! -z "$2" ]; then
qnt=($2)
fi
if [ ! -z "$3" ]; then
args="$3"
fi
set -e
model="$1"
out="../tmp/results-${model}"
mkdir -p ${out}
for q in ${qnt[@]}; do
time ./bin/perplexity -m ../models/${model}/ggml-model-f16.gguf -f ./wiki.test.raw ${args} 2>&1 | tee ${out}/ppl-${q}.txt
done