mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-09 19:46:11 +00:00
remove junk
This commit is contained in:
parent
18c8d4b31c
commit
1e460bb936
74 changed files with 0 additions and 6041 deletions
|
|
@ -1,174 +0,0 @@
|
|||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
|
||||
#
|
||||
# check-requirements.sh checks all requirements files for each top-level
|
||||
# convert*.py script.
|
||||
#
|
||||
# WARNING: This is quite IO intensive, because a fresh venv is set up for every
|
||||
# python script. As of 2023-12-22, this writes ~2.7GB of data. An adequately
|
||||
# sized tmpfs /tmp or ramdisk is recommended if running this frequently.
|
||||
#
|
||||
# usage: check-requirements.sh [<working_dir>]
|
||||
# check-requirements.sh nocleanup [<working_dir>]
|
||||
#
|
||||
# where:
|
||||
# - <working_dir> is a directory that can be used as the base for
|
||||
# setting up the venvs. Defaults to `/tmp`.
|
||||
# - 'nocleanup' as the first argument will disable automatic cleanup
|
||||
# of the files created by this script.
|
||||
#
|
||||
# requires:
|
||||
# - bash >= 3.2.57
|
||||
# - shellcheck
|
||||
#
|
||||
# For each script, it creates a fresh venv, `pip install`s the requirements, and
|
||||
# finally imports the python script to check for `ImportError`.
|
||||
#
|
||||
|
||||
log() {
|
||||
local level=$1 msg=$2
|
||||
printf >&2 '%s: %s\n' "$level" "$msg"
|
||||
}
|
||||
|
||||
debug() {
|
||||
log DEBUG "$@"
|
||||
}
|
||||
|
||||
info() {
|
||||
log INFO "$@"
|
||||
}
|
||||
|
||||
fatal() {
|
||||
log FATAL "$@"
|
||||
exit 1
|
||||
}
|
||||
|
||||
cleanup() {
|
||||
if [[ -n ${workdir+x} && -d $workdir && -w $workdir ]]; then
|
||||
info "Removing $workdir"
|
||||
local count=0
|
||||
rm -rfv -- "$workdir" | while read -r; do
|
||||
if (( count++ > 750 )); then
|
||||
printf .
|
||||
count=0
|
||||
fi
|
||||
done
|
||||
printf '\n'
|
||||
info "Removed $workdir"
|
||||
fi
|
||||
}
|
||||
|
||||
do_cleanup=1
|
||||
if [[ ${1-} == nocleanup ]]; then
|
||||
do_cleanup=0; shift
|
||||
fi
|
||||
|
||||
if (( do_cleanup )); then
|
||||
trap exit INT TERM
|
||||
trap cleanup EXIT
|
||||
fi
|
||||
|
||||
this=$(realpath -- "$0"); readonly this
|
||||
cd "$(dirname "$this")/.." # PWD should stay in llama.cpp project directory
|
||||
|
||||
shellcheck "$this"
|
||||
|
||||
readonly reqs_dir=requirements
|
||||
|
||||
if [[ ${1+x} ]]; then
|
||||
tmp_dir=$(realpath -- "$1")
|
||||
if [[ ! ( -d $tmp_dir && -w $tmp_dir ) ]]; then
|
||||
fatal "$tmp_dir is not a writable directory"
|
||||
fi
|
||||
else
|
||||
tmp_dir=/tmp
|
||||
fi
|
||||
|
||||
workdir=$(mktemp -d "$tmp_dir/check-requirements.XXXX"); readonly workdir
|
||||
info "Working directory: $workdir"
|
||||
|
||||
check_requirements() {
|
||||
local reqs=$1
|
||||
|
||||
info "$reqs: beginning check"
|
||||
pip --disable-pip-version-check install -qr "$reqs"
|
||||
info "$reqs: OK"
|
||||
}
|
||||
|
||||
check_convert_script() {
|
||||
local py=$1 # e.g. ./convert-hf-to-gguf.py
|
||||
local pyname=${py##*/} # e.g. convert-hf-to-gguf.py
|
||||
pyname=${pyname%.py} # e.g. convert-hf-to-gguf
|
||||
|
||||
info "$py: beginning check"
|
||||
|
||||
local reqs="$reqs_dir/requirements-$pyname.txt"
|
||||
if [[ ! -r $reqs ]]; then
|
||||
fatal "$py missing requirements. Expected: $reqs"
|
||||
fi
|
||||
|
||||
local venv="$workdir/$pyname-venv"
|
||||
python3 -m venv "$venv"
|
||||
|
||||
(
|
||||
# shellcheck source=/dev/null
|
||||
source "$venv/bin/activate"
|
||||
|
||||
check_requirements "$reqs"
|
||||
|
||||
python - "$py" "$pyname" <<'EOF'
|
||||
import sys
|
||||
from importlib.machinery import SourceFileLoader
|
||||
py, pyname = sys.argv[1:]
|
||||
SourceFileLoader(pyname, py).load_module()
|
||||
EOF
|
||||
)
|
||||
|
||||
if (( do_cleanup )); then
|
||||
rm -rf -- "$venv"
|
||||
fi
|
||||
|
||||
info "$py: imports OK"
|
||||
}
|
||||
|
||||
readonly ignore_eq_eq='check_requirements: ignore "=="'
|
||||
|
||||
for req in "$reqs_dir"/*; do
|
||||
# Check that all sub-requirements are added to top-level requirements.txt
|
||||
if ! grep -qF "$req" requirements.txt; then
|
||||
fatal "$req needs to be added to requirements.txt"
|
||||
fi
|
||||
|
||||
# Make sure exact release versions aren't being pinned in the requirements
|
||||
# Filters out the ignore string
|
||||
if grep -vF "$ignore_eq_eq" "$req" | grep -q '=='; then
|
||||
tab=$'\t'
|
||||
cat >&2 <<EOF
|
||||
FATAL: Avoid pinning exact package versions. Use '~=' instead.
|
||||
You can suppress this error by appending the following to the line:
|
||||
$tab# $ignore_eq_eq
|
||||
EOF
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
all_venv="$workdir/all-venv"
|
||||
python3 -m venv "$all_venv"
|
||||
|
||||
(
|
||||
# shellcheck source=/dev/null
|
||||
source "$all_venv/bin/activate"
|
||||
check_requirements requirements.txt
|
||||
)
|
||||
|
||||
if (( do_cleanup )); then
|
||||
rm -rf -- "$all_venv"
|
||||
fi
|
||||
|
||||
check_convert_script convert.py
|
||||
for py in convert-*.py; do
|
||||
check_convert_script "$py"
|
||||
done
|
||||
|
||||
info 'Done! No issues found.'
|
||||
|
|
@ -1,50 +0,0 @@
|
|||
#!/bin/bash
|
||||
set -euo pipefail
|
||||
this=$(realpath "$0"); readonly this
|
||||
cd "$(dirname "$this")"
|
||||
shellcheck "$this"
|
||||
|
||||
if (( $# != 1 && $# != 2 )); then
|
||||
cat >&2 <<'EOF'
|
||||
usage:
|
||||
ci-run.sh <tmp_dir> [<cache_dir>]
|
||||
|
||||
This script wraps ci/run.sh:
|
||||
* If <tmp_dir> is a ramdisk, you can reduce writes to your SSD. If <tmp_dir> is not a ramdisk, keep in mind that total writes will increase by the size of <cache_dir>.
|
||||
(openllama_3b_v2: quantized models are about 30GB)
|
||||
* Persistent model and data files are synced to and from <cache_dir>,
|
||||
excluding generated .gguf files.
|
||||
(openllama_3b_v2: persistent files are about 6.6GB)
|
||||
* <cache_dir> defaults to ~/.cache/llama.cpp
|
||||
EOF
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cd .. # => llama.cpp repo root
|
||||
|
||||
tmp="$1"
|
||||
mkdir -p "$tmp"
|
||||
tmp=$(realpath "$tmp")
|
||||
echo >&2 "Using tmp=$tmp"
|
||||
|
||||
cache="${2-$HOME/.cache/llama.cpp}"
|
||||
mkdir -p "$cache"
|
||||
cache=$(realpath "$cache")
|
||||
echo >&2 "Using cache=$cache"
|
||||
|
||||
_sync() {
|
||||
local from="$1"; shift
|
||||
local to="$1"; shift
|
||||
|
||||
echo >&2 "Syncing from $from to $to"
|
||||
mkdir -p "$from" "$to"
|
||||
rsync -a "$from" "$to" --delete-during "$@"
|
||||
}
|
||||
|
||||
_sync "$(realpath .)/" "$tmp/llama.cpp"
|
||||
_sync "$cache/ci-mnt/models/" "$tmp/llama.cpp/ci-mnt/models/"
|
||||
|
||||
cd "$tmp/llama.cpp"
|
||||
bash ci/run.sh ci-out ci-mnt
|
||||
|
||||
_sync 'ci-mnt/models/' "$cache/ci-mnt/models/" --exclude='*.gguf' -P
|
||||
|
|
@ -1,37 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
if [ $# -lt 2 ]; then
|
||||
echo "usage: ./scripts/compare-commits.sh <commit1> <commit2> [additional llama-bench arguments]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
set -e
|
||||
set -x
|
||||
|
||||
bench_args="${@:3}"
|
||||
|
||||
rm -f llama-bench.sqlite
|
||||
|
||||
backend="cpu"
|
||||
|
||||
if [[ "$OSTYPE" == "darwin"* ]]; then
|
||||
backend="metal"
|
||||
elif command -v nvcc &> /dev/null; then
|
||||
backend="cuda"
|
||||
fi
|
||||
|
||||
make_opts=""
|
||||
|
||||
if [[ "$backend" == "cuda" ]]; then
|
||||
make_opts="LLAMA_CUBLAS=1"
|
||||
fi
|
||||
|
||||
git checkout $1
|
||||
make clean && make -j32 $make_opts llama-bench
|
||||
./llama-bench -o sql $bench_args | tee /dev/tty | sqlite3 llama-bench.sqlite
|
||||
|
||||
git checkout $2
|
||||
make clean && make -j32 $make_opts llama-bench
|
||||
./llama-bench -o sql $bench_args | tee /dev/tty | sqlite3 llama-bench.sqlite
|
||||
|
||||
./scripts/compare-llama-bench.py -b $1 -c $2
|
||||
|
|
@ -1,374 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import heapq
|
||||
import sys
|
||||
import os
|
||||
from glob import glob
|
||||
import sqlite3
|
||||
|
||||
try:
|
||||
import git
|
||||
from tabulate import tabulate
|
||||
except ImportError as e:
|
||||
print("ERROR: the following Python libraries are required: GitPython, tabulate.")
|
||||
raise e
|
||||
|
||||
# Properties by which to differentiate results per commit:
|
||||
KEY_PROPERTIES = [
|
||||
"cpu_info", "gpu_info", "n_gpu_layers", "main_gpu", "cuda", "opencl", "metal", "gpu_blas",
|
||||
"blas", "model_filename", "model_type", "model_size", "model_n_params", "n_batch", "n_threads",
|
||||
"type_k", "type_v", "no_kv_offload", "mul_mat_q", "tensor_split", "n_prompt", "n_gen"
|
||||
]
|
||||
|
||||
# Properties that are boolean and are converted to Yes/No for the table:
|
||||
BOOL_PROPERTIES = ["cuda", "opencl", "metal", "gpu_blas", "blas"]
|
||||
|
||||
# Header names for the table:
|
||||
PRETTY_NAMES = {
|
||||
"cuda": "CUDA", "opencl": "OpenCL", "metal": "Metal", "gpu_blas": "GPU BLAS", "blas": "BLAS",
|
||||
"cpu_info": "CPU", "gpu_info": "GPU", "model_filename": "File", "model_type": "Model",
|
||||
"model_size": "Model Size [GiB]", "model_n_params": "Num. of Parameters",
|
||||
"n_batch": "Batch size", "n_threads": "Threads", "type_k": "K type", "type_v": "V type",
|
||||
"n_gpu_layers": "GPU layers", "main_gpu": "Main GPU", "no_kv_offload": "NKVO",
|
||||
"mul_mat_q": "MMQ", "tensor_split": "Tensor split"
|
||||
}
|
||||
|
||||
DEFAULT_SHOW = ["model_type"] # Always show these properties by default.
|
||||
DEFAULT_HIDE = ["model_filename"] # Always hide these properties by default.
|
||||
GPU_NAME_STRIP = ["NVIDIA GeForce ", "Tesla ", "AMD Radeon "] # Strip prefixes for smaller tables.
|
||||
MODEL_SUFFIX_REPLACE = {" - Small": "_S", " - Medium": "_M", " - Large": "_L"}
|
||||
|
||||
DESCRIPTION = """Creates tables from llama-bench data written to an SQLite database. Example usage (Linux):
|
||||
|
||||
$ git checkout master
|
||||
$ make clean && make llama-bench
|
||||
$ ./llama-bench -o sql | sqlite3 llama-bench.sqlite
|
||||
$ git checkout some_branch
|
||||
$ make clean && make llama-bench
|
||||
$ ./llama-bench -o sql | sqlite3 llama-bench.sqlite
|
||||
$ ./scripts/compare-llama-bench.py
|
||||
|
||||
Performance numbers from multiple runs per commit are averaged WITHOUT being weighted by the --repetitions parameter of llama-bench.
|
||||
"""
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description=DESCRIPTION, formatter_class=argparse.RawDescriptionHelpFormatter)
|
||||
help_b = (
|
||||
"The baseline commit to compare performance to. "
|
||||
"Accepts either a branch name, tag name, or commit hash. "
|
||||
"Defaults to latest master commit with data."
|
||||
)
|
||||
parser.add_argument("-b", "--baseline", help=help_b)
|
||||
help_c = (
|
||||
"The commit whose performance is to be compared to the baseline. "
|
||||
"Accepts either a branch name, tag name, or commit hash. "
|
||||
"Defaults to the non-master commit for which llama-bench was run most recently."
|
||||
)
|
||||
parser.add_argument("-c", "--compare", help=help_c)
|
||||
help_i = (
|
||||
"Input SQLite file for comparing commits. "
|
||||
"Defaults to 'llama-bench.sqlite' in the current working directory. "
|
||||
"If no such file is found and there is exactly one .sqlite file in the current directory, "
|
||||
"that file is instead used as input."
|
||||
)
|
||||
parser.add_argument("-i", "--input", help=help_i)
|
||||
help_o = (
|
||||
"Output format for the table. "
|
||||
"Defaults to 'pipe' (GitHub compatible). "
|
||||
"Also supports e.g. 'latex' or 'mediawiki'. "
|
||||
"See tabulate documentation for full list."
|
||||
)
|
||||
parser.add_argument("-o", "--output", help=help_o, default="pipe")
|
||||
help_s = (
|
||||
"Columns to add to the table. "
|
||||
"Accepts a comma-separated list of values. "
|
||||
f"Legal values: {', '.join(KEY_PROPERTIES[:-2])}. "
|
||||
"Defaults to model name (model_type) and CPU and/or GPU name (cpu_info, gpu_info) "
|
||||
"plus any column where not all data points are the same. "
|
||||
"If the columns are manually specified, then the results for each unique combination of the "
|
||||
"specified values are averaged WITHOUT weighing by the --repetitions parameter of llama-bench."
|
||||
)
|
||||
parser.add_argument("-s", "--show", help=help_s)
|
||||
|
||||
known_args, unknown_args = parser.parse_known_args()
|
||||
|
||||
if unknown_args:
|
||||
print(f"ERROR: Received unknown args: {unknown_args}.")
|
||||
print()
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
|
||||
input_file = known_args.input
|
||||
if input_file is None and os.path.exists("./llama-bench.sqlite"):
|
||||
input_file = "llama-bench.sqlite"
|
||||
if input_file is None:
|
||||
sqlite_files = glob("*.sqlite")
|
||||
if len(sqlite_files) == 1:
|
||||
input_file = sqlite_files[0]
|
||||
|
||||
if input_file is None:
|
||||
print("ERROR: Cannot find a suitable input file, please provide one.")
|
||||
print()
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
|
||||
connection = sqlite3.connect(input_file)
|
||||
cursor = connection.cursor()
|
||||
builds = cursor.execute("SELECT DISTINCT build_commit FROM test;").fetchall()
|
||||
|
||||
try:
|
||||
repo = git.Repo(".", search_parent_directories=True)
|
||||
except git.exc.InvalidGitRepositoryError:
|
||||
repo = None
|
||||
|
||||
|
||||
def find_parent_in_data(commit):
|
||||
"""Helper function to find the most recent parent measured in number of commits for which there is data."""
|
||||
heap = [(0, commit)]
|
||||
seen_hexsha8 = set()
|
||||
while heap:
|
||||
depth, current_commit = heapq.heappop(heap)
|
||||
current_hexsha8 = commit.hexsha[:8]
|
||||
if (current_hexsha8,) in builds:
|
||||
return current_hexsha8
|
||||
for parent in commit.parents:
|
||||
parent_hexsha8 = parent.hexsha[:8]
|
||||
if parent_hexsha8 not in seen_hexsha8:
|
||||
seen_hexsha8.add(parent_hexsha8)
|
||||
heapq.heappush(heap, (depth + 1, parent))
|
||||
return None
|
||||
|
||||
|
||||
def get_all_parent_hexsha8s(commit):
|
||||
"""Helper function to recursively get hexsha8 values for all parents of a commit."""
|
||||
unvisited = [commit]
|
||||
visited = []
|
||||
|
||||
while unvisited:
|
||||
current_commit = unvisited.pop(0)
|
||||
visited.append(current_commit.hexsha[:8])
|
||||
for parent in current_commit.parents:
|
||||
if parent.hexsha[:8] not in visited:
|
||||
unvisited.append(parent)
|
||||
|
||||
return visited
|
||||
|
||||
|
||||
def get_commit_name(hexsha8):
|
||||
"""Helper function to find a human-readable name for a commit if possible."""
|
||||
if repo is None:
|
||||
return hexsha8
|
||||
for h in repo.heads:
|
||||
if h.commit.hexsha[:8] == hexsha8:
|
||||
return h.name
|
||||
for t in repo.tags:
|
||||
if t.commit.hexsha[:8] == hexsha8:
|
||||
return t.name
|
||||
return hexsha8
|
||||
|
||||
|
||||
def get_commit_hexsha8(name):
|
||||
"""Helper function to search for a commit given a human-readable name."""
|
||||
if repo is None:
|
||||
return None
|
||||
for h in repo.heads:
|
||||
if h.name == name:
|
||||
return h.commit.hexsha[:8]
|
||||
for t in repo.tags:
|
||||
if t.name == name:
|
||||
return t.commit.hexsha[:8]
|
||||
return None
|
||||
|
||||
|
||||
hexsha8_baseline = name_baseline = None
|
||||
|
||||
# If the user specified a baseline, try to find a commit for it:
|
||||
if known_args.baseline is not None:
|
||||
if (known_args.baseline,) in builds:
|
||||
hexsha8_baseline = known_args.baseline
|
||||
if hexsha8_baseline is None:
|
||||
hexsha8_baseline = get_commit_hexsha8(known_args.baseline)
|
||||
name_baseline = known_args.baseline
|
||||
if hexsha8_baseline is None:
|
||||
print(f"ERROR: cannot find data for baseline={known_args.baseline}.")
|
||||
sys.exit(1)
|
||||
# Otherwise, search for the most recent parent of master for which there is data:
|
||||
elif repo is not None:
|
||||
hexsha8_baseline = find_parent_in_data(repo.heads.master.commit)
|
||||
|
||||
if hexsha8_baseline is None:
|
||||
print("ERROR: No baseline was provided and did not find data for any master branch commits.")
|
||||
print()
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
else:
|
||||
print(
|
||||
"ERROR: No baseline was provided and the current working directory "
|
||||
"is not part of a git repository from which a baseline could be inferred."
|
||||
)
|
||||
print()
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
name_baseline = get_commit_name(hexsha8_baseline)
|
||||
|
||||
hexsha8_compare = name_compare = None
|
||||
|
||||
# If the user has specified a compare value, try to find a corresponding commit:
|
||||
if known_args.compare is not None:
|
||||
if (known_args.compare,) in builds:
|
||||
hexsha8_compare = known_args.compare
|
||||
if hexsha8_compare is None:
|
||||
hexsha8_compare = get_commit_hexsha8(known_args.compare)
|
||||
name_compare = known_args.compare
|
||||
if hexsha8_compare is None:
|
||||
print(f"ERROR: cannot find data for baseline={known_args.compare}.")
|
||||
sys.exit(1)
|
||||
# Otherwise, search for the commit for llama-bench was most recently run
|
||||
# and that is not a parent of master:
|
||||
elif repo is not None:
|
||||
hexsha8s_master = get_all_parent_hexsha8s(repo.heads.master.commit)
|
||||
builds_timestamp = cursor.execute(
|
||||
"SELECT build_commit, test_time FROM test ORDER BY test_time;").fetchall()
|
||||
for (hexsha8, _) in reversed(builds_timestamp):
|
||||
if hexsha8 not in hexsha8s_master:
|
||||
hexsha8_compare = hexsha8
|
||||
break
|
||||
|
||||
if hexsha8_compare is None:
|
||||
print("ERROR: No compare target was provided and did not find data for any non-master commits.")
|
||||
print()
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
else:
|
||||
print(
|
||||
"ERROR: No compare target was provided and the current working directory "
|
||||
"is not part of a git repository from which a compare target could be inferred."
|
||||
)
|
||||
print()
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
|
||||
name_compare = get_commit_name(hexsha8_compare)
|
||||
|
||||
|
||||
def get_rows(properties):
|
||||
"""
|
||||
Helper function that gets table rows for some list of properties.
|
||||
Rows are created by combining those where all provided properties are equal.
|
||||
The resulting rows are then grouped by the provided properties and the t/s values are averaged.
|
||||
The returned rows are unique in terms of property combinations.
|
||||
"""
|
||||
select_string = ", ".join(
|
||||
[f"tb.{p}" for p in properties] + ["tb.n_prompt", "tb.n_gen", "AVG(tb.avg_ts)", "AVG(tc.avg_ts)"])
|
||||
equal_string = " AND ".join(
|
||||
[f"tb.{p} = tc.{p}" for p in KEY_PROPERTIES] + [
|
||||
f"tb.build_commit = '{hexsha8_baseline}'", f"tc.build_commit = '{hexsha8_compare}'"]
|
||||
)
|
||||
group_order_string = ", ".join([f"tb.{p}" for p in properties] + ["tb.n_gen", "tb.n_prompt"])
|
||||
query = (f"SELECT {select_string} FROM test tb JOIN test tc ON {equal_string} "
|
||||
f"GROUP BY {group_order_string} ORDER BY {group_order_string};")
|
||||
return cursor.execute(query).fetchall()
|
||||
|
||||
|
||||
# If the user provided columns to group the results by, use them:
|
||||
if known_args.show is not None:
|
||||
show = known_args.show.split(",")
|
||||
unknown_cols = []
|
||||
for prop in show:
|
||||
if prop not in KEY_PROPERTIES[:-2]: # Last two values are n_prompt, n_gen.
|
||||
unknown_cols.append(prop)
|
||||
if unknown_cols:
|
||||
print(f"ERROR: Unknown values for --show: {', '.join(unknown_cols)}")
|
||||
print()
|
||||
parser.print_usage()
|
||||
sys.exit(1)
|
||||
rows_show = get_rows(show)
|
||||
# Otherwise, select those columns where the values are not all the same:
|
||||
else:
|
||||
rows_full = get_rows(KEY_PROPERTIES)
|
||||
properties_different = []
|
||||
for i, kp_i in enumerate(KEY_PROPERTIES):
|
||||
if kp_i in DEFAULT_SHOW or kp_i == "n_prompt" or kp_i == "n_gen":
|
||||
continue
|
||||
for row_full in rows_full:
|
||||
if row_full[i] != rows_full[0][i]:
|
||||
properties_different.append(kp_i)
|
||||
break
|
||||
|
||||
show = []
|
||||
# Show CPU and/or GPU by default even if the hardware for all results is the same:
|
||||
if "gpu_blas" not in properties_different and "n_gpu_layers" not in properties_different:
|
||||
gpu_blas = bool(rows_full[0][KEY_PROPERTIES.index("gpu_blas")])
|
||||
ngl = int(rows_full[0][KEY_PROPERTIES.index("n_gpu_layers")])
|
||||
|
||||
if not gpu_blas or ngl != 99 and "cpu_info" not in properties_different:
|
||||
show.append("cpu_info")
|
||||
if gpu_blas and "gpu_info" not in properties_different:
|
||||
show.append("gpu_info")
|
||||
|
||||
show += properties_different
|
||||
|
||||
index_default = 0
|
||||
for prop in ["cpu_info", "gpu_info", "n_gpu_layers", "main_gpu"]:
|
||||
if prop in show:
|
||||
index_default += 1
|
||||
show = show[:index_default] + DEFAULT_SHOW + show[index_default:]
|
||||
for prop in DEFAULT_HIDE:
|
||||
try:
|
||||
show.remove(prop)
|
||||
except ValueError:
|
||||
pass
|
||||
rows_show = get_rows(show)
|
||||
|
||||
table = []
|
||||
for row in rows_show:
|
||||
n_prompt = int(row[-4])
|
||||
n_gen = int(row[-3])
|
||||
assert n_prompt == 0 or n_gen == 0
|
||||
test_name = f"tg{n_gen}" if n_prompt == 0 else f"pp{n_prompt}"
|
||||
# Regular columns test name avg t/s values Speedup
|
||||
# VVVVVVVVVVVVV VVVVVVVVV VVVVVVVVVVVVVV VVVVVVV
|
||||
table.append(list(row[:-4]) + [test_name] + list(row[-2:]) + [float(row[-1]) / float(row[-2])])
|
||||
|
||||
# Some a-posteriori fixes to make the table contents prettier:
|
||||
for bool_property in BOOL_PROPERTIES:
|
||||
if bool_property in show:
|
||||
ip = show.index(bool_property)
|
||||
for row_table in table:
|
||||
row_table[ip] = "Yes" if int(row_table[ip]) == 1 else "No"
|
||||
|
||||
if "model_type" in show:
|
||||
ip = show.index("model_type")
|
||||
for (old, new) in MODEL_SUFFIX_REPLACE.items():
|
||||
for row_table in table:
|
||||
row_table[ip] = row_table[ip].replace(old, new)
|
||||
|
||||
if "model_size" in show:
|
||||
ip = show.index("model_size")
|
||||
for row_table in table:
|
||||
row_table[ip] = float(row_table[ip]) / 1024 ** 3
|
||||
|
||||
if "gpu_info" in show:
|
||||
ip = show.index("gpu_info")
|
||||
for row_table in table:
|
||||
for gns in GPU_NAME_STRIP:
|
||||
row_table[ip] = row_table[ip].replace(gns, "")
|
||||
|
||||
gpu_names = row_table[ip].split("/")
|
||||
num_gpus = len(gpu_names)
|
||||
all_names_the_same = len(set(gpu_names)) == 1
|
||||
if len(gpu_names) >= 2 and all_names_the_same:
|
||||
row_table[ip] = f"{num_gpus}x {gpu_names[0]}"
|
||||
|
||||
headers = [PRETTY_NAMES[p] for p in show]
|
||||
headers += ["Test", f"t/s {name_baseline}", f"t/s {name_compare}", "Speedup"]
|
||||
|
||||
print(tabulate(
|
||||
table,
|
||||
headers=headers,
|
||||
floatfmt=".2f",
|
||||
tablefmt=known_args.output
|
||||
))
|
||||
|
|
@ -1,24 +0,0 @@
|
|||
include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake)
|
||||
|
||||
set(TEMPLATE_FILE "${CMAKE_CURRENT_SOURCE_DIR}/common/build-info.cpp.in")
|
||||
set(OUTPUT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/common/build-info.cpp")
|
||||
|
||||
# Only write the build info if it changed
|
||||
if(EXISTS ${OUTPUT_FILE})
|
||||
file(READ ${OUTPUT_FILE} CONTENTS)
|
||||
string(REGEX MATCH "LLAMA_COMMIT = \"([^\"]*)\";" _ ${CONTENTS})
|
||||
set(OLD_COMMIT ${CMAKE_MATCH_1})
|
||||
string(REGEX MATCH "LLAMA_COMPILER = \"([^\"]*)\";" _ ${CONTENTS})
|
||||
set(OLD_COMPILER ${CMAKE_MATCH_1})
|
||||
string(REGEX MATCH "LLAMA_BUILD_TARGET = \"([^\"]*)\";" _ ${CONTENTS})
|
||||
set(OLD_TARGET ${CMAKE_MATCH_1})
|
||||
if (
|
||||
NOT OLD_COMMIT STREQUAL BUILD_COMMIT OR
|
||||
NOT OLD_COMPILER STREQUAL BUILD_COMPILER OR
|
||||
NOT OLD_TARGET STREQUAL BUILD_TARGET
|
||||
)
|
||||
configure_file(${TEMPLATE_FILE} ${OUTPUT_FILE})
|
||||
endif()
|
||||
else()
|
||||
configure_file(${TEMPLATE_FILE} ${OUTPUT_FILE})
|
||||
endif()
|
||||
|
|
@ -1,38 +0,0 @@
|
|||
ifeq '' '$(findstring clang,$(shell $(GF_CC) --version))'
|
||||
GF_CC_IS_GCC = 1
|
||||
GF_CC_VER := $(shell { $(GF_CC) -dumpfullversion 2>/dev/null || $(GF_CC) -dumpversion; } | awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }')
|
||||
else
|
||||
GF_CC_IS_CLANG = 1
|
||||
ifeq '' '$(findstring Apple,$(shell $(GF_CC) --version))'
|
||||
GF_CC_IS_LLVM_CLANG = 1
|
||||
else
|
||||
GF_CC_IS_APPLE_CLANG = 1
|
||||
endif
|
||||
GF_CC_VER := \
|
||||
$(shell $(GF_CC) --version | sed -n 's/^.* version \([0-9.]*\).*$$/\1/p' \
|
||||
| awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }')
|
||||
endif
|
||||
|
||||
ifeq ($(GF_CC_IS_CLANG), 1)
|
||||
# clang options
|
||||
GF_CFLAGS = -Wunreachable-code-break -Wunreachable-code-return
|
||||
GF_CXXFLAGS = -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi
|
||||
|
||||
ifneq '' '$(and $(GF_CC_IS_LLVM_CLANG),$(filter 1,$(shell expr $(GF_CC_VER) \>= 030800)))'
|
||||
GF_CFLAGS += -Wdouble-promotion
|
||||
endif
|
||||
ifneq '' '$(and $(GF_CC_IS_APPLE_CLANG),$(filter 1,$(shell expr $(GF_CC_VER) \>= 070300)))'
|
||||
GF_CFLAGS += -Wdouble-promotion
|
||||
endif
|
||||
else
|
||||
# gcc options
|
||||
GF_CFLAGS = -Wdouble-promotion
|
||||
GF_CXXFLAGS = -Wno-array-bounds
|
||||
|
||||
ifeq ($(shell expr $(GF_CC_VER) \>= 070100), 1)
|
||||
GF_CXXFLAGS += -Wno-format-truncation
|
||||
endif
|
||||
ifeq ($(shell expr $(GF_CC_VER) \>= 080100), 1)
|
||||
GF_CXXFLAGS += -Wextra-semi
|
||||
endif
|
||||
endif
|
||||
|
|
@ -1,10 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
wget https://raw.githubusercontent.com/klosax/hellaswag_text_data/main/hellaswag_val_full.txt
|
||||
|
||||
echo "Usage:"
|
||||
echo ""
|
||||
echo " ./perplexity -m model.gguf -f hellaswag_val_full.txt --hellaswag [--hellaswag-tasks N] [other params]"
|
||||
echo ""
|
||||
|
||||
exit 0
|
||||
|
|
@ -1,70 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
function usage {
|
||||
echo "usage: <n>$0"
|
||||
echo "note: n is the number of essays to download"
|
||||
echo "for specific n, the resulting pg.txt file will have the following number of tokens:"
|
||||
echo "n | tokens"
|
||||
echo "--- | ---"
|
||||
echo "1 | 6230"
|
||||
echo "2 | 23619"
|
||||
echo "5 | 25859"
|
||||
echo "10 | 36888"
|
||||
echo "15 | 50188"
|
||||
echo "20 | 59094"
|
||||
echo "25 | 88764"
|
||||
echo "30 | 103121"
|
||||
echo "32 | 108338"
|
||||
echo "35 | 113403"
|
||||
echo "40 | 127699"
|
||||
echo "45 | 135896"
|
||||
exit 1
|
||||
}
|
||||
|
||||
function has_cmd {
|
||||
if ! [ -x "$(command -v $1)" ]; then
|
||||
echo "error: $1 is not available" >&2
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# check for: curl, html2text, tail, sed, fmt
|
||||
has_cmd curl
|
||||
has_cmd html2text
|
||||
has_cmd tail
|
||||
has_cmd sed
|
||||
|
||||
if [ $# -ne 1 ]; then
|
||||
usage
|
||||
fi
|
||||
|
||||
n=$1
|
||||
|
||||
# get urls
|
||||
urls="$(curl http://www.aaronsw.com/2002/feeds/pgessays.rss | grep html | sed -e "s/.*http/http/" | sed -e "s/html.*/html/" | head -n $n)"
|
||||
|
||||
printf "urls:\n%s\n" "$urls"
|
||||
|
||||
if [ -f pg.txt ]; then
|
||||
rm pg.txt
|
||||
fi
|
||||
|
||||
c=1
|
||||
for url in $urls; do
|
||||
echo "processing $url"
|
||||
|
||||
cc=$(printf "%03d" $c)
|
||||
|
||||
curl -L $url | html2text | tail -n +4 | sed -E "s/^[[:space:]]+//g" | fmt -w 80 >> pg-$cc-one.txt
|
||||
cat pg-$cc-one.txt >> pg.txt
|
||||
|
||||
cp -v pg.txt pg-$cc-all.txt
|
||||
c=$((c+1))
|
||||
|
||||
# don't flood the server
|
||||
sleep 1
|
||||
done
|
||||
|
||||
echo "done. data in pg.txt"
|
||||
|
||||
exit 0
|
||||
|
|
@ -1,10 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
wget https://huggingface.co/datasets/ikawrakow/winogrande-eval-for-llama.cpp/raw/main/winogrande-debiased-eval.csv
|
||||
|
||||
echo "Usage:"
|
||||
echo ""
|
||||
echo " ./perplexity -m model.gguf -f winogrande-debiased-eval.csv --winogrande [--winogrande-tasks N] [other params]"
|
||||
echo ""
|
||||
|
||||
exit 0
|
||||
107
scripts/hf.sh
107
scripts/hf.sh
|
|
@ -1,107 +0,0 @@
|
|||
#!/bin/bash
|
||||
#
|
||||
# Shortcut for downloading HF models
|
||||
#
|
||||
# Usage:
|
||||
# ./main -m $(./examples/hf.sh https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF/resolve/main/mixtral-8x7b-v0.1.Q4_K_M.gguf)
|
||||
# ./main -m $(./examples/hf.sh --url https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF/blob/main/mixtral-8x7b-v0.1.Q4_K_M.gguf)
|
||||
# ./main -m $(./examples/hf.sh --repo TheBloke/Mixtral-8x7B-v0.1-GGUF --file mixtral-8x7b-v0.1.Q4_K_M.gguf)
|
||||
#
|
||||
|
||||
# all logs go to stderr
|
||||
function log {
|
||||
echo "$@" 1>&2
|
||||
}
|
||||
|
||||
function usage {
|
||||
log "Usage: $0 [[--url] <url>] [--repo <repo>] [--file <file>] [-h|--help]"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# check for curl or wget
|
||||
function has_cmd {
|
||||
if ! [ -x "$(command -v $1)" ]; then
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
if has_cmd wget; then
|
||||
cmd="wget -q --show-progress -c -O %s %s"
|
||||
elif has_cmd curl; then
|
||||
cmd="curl -C - -f -o %s -L %s"
|
||||
else
|
||||
log "[E] curl or wget not found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
url=""
|
||||
repo=""
|
||||
file=""
|
||||
|
||||
# parse args
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--url)
|
||||
url="$2"
|
||||
shift 2
|
||||
;;
|
||||
--repo)
|
||||
repo="$2"
|
||||
shift 2
|
||||
;;
|
||||
--file)
|
||||
file="$2"
|
||||
shift 2
|
||||
;;
|
||||
-h|--help)
|
||||
usage
|
||||
;;
|
||||
*)
|
||||
url="$1"
|
||||
shift
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ -n "$repo" ] && [ -n "$file" ]; then
|
||||
url="https://huggingface.co/$repo/resolve/main/$file"
|
||||
fi
|
||||
|
||||
if [ -z "$url" ]; then
|
||||
log "[E] missing --url"
|
||||
usage
|
||||
fi
|
||||
|
||||
# check if the URL is a HuggingFace model, and if so, try to download it
|
||||
is_url=false
|
||||
|
||||
if [[ ${#url} -gt 22 ]]; then
|
||||
if [[ ${url:0:22} == "https://huggingface.co" ]]; then
|
||||
is_url=true
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "$is_url" = false ]; then
|
||||
log "[E] invalid URL, must start with https://huggingface.co"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# replace "blob/main" with "resolve/main"
|
||||
url=${url/blob\/main/resolve\/main}
|
||||
|
||||
basename=$(basename $url)
|
||||
|
||||
log "[+] attempting to download $basename"
|
||||
|
||||
if [ -n "$cmd" ]; then
|
||||
cmd=$(printf "$cmd" "$basename" "$url")
|
||||
log "[+] $cmd"
|
||||
if $cmd; then
|
||||
echo $basename
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
|
||||
log "[-] failed to download"
|
||||
|
||||
exit 1
|
||||
|
|
@ -1,19 +0,0 @@
|
|||
:: MIT license
|
||||
:: Copyright (C) 2024 Intel Corporation
|
||||
:: SPDX-License-Identifier: MIT
|
||||
|
||||
|
||||
set URL=%1
|
||||
set COMPONENTS=%2
|
||||
|
||||
curl.exe --output %TEMP%\webimage.exe --url %URL% --retry 5 --retry-delay 5
|
||||
start /b /wait %TEMP%\webimage.exe -s -x -f webimage_extracted --log extract.log
|
||||
del %TEMP%\webimage.exe
|
||||
if "%COMPONENTS%"=="" (
|
||||
webimage_extracted\bootstrapper.exe -s --action install --eula=accept -p=NEED_VS2017_INTEGRATION=0 -p=NEED_VS2019_INTEGRATION=0 -p=NEED_VS2022_INTEGRATION=0 --log-dir=.
|
||||
) else (
|
||||
webimage_extracted\bootstrapper.exe -s --action install --components=%COMPONENTS% --eula=accept -p=NEED_VS2017_INTEGRATION=0 -p=NEED_VS2019_INTEGRATION=0 -p=NEED_VS2022_INTEGRATION=0 --log-dir=.
|
||||
)
|
||||
set installer_exit_code=%ERRORLEVEL%
|
||||
rd /s/q "webimage_extracted"
|
||||
exit /b %installer_exit_code%
|
||||
|
|
@ -1,140 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
import yaml
|
||||
|
||||
CLI_ARGS_MAIN_PERPLEXITY = [
|
||||
"batch-size", "cfg-negative-prompt", "cfg-scale", "chunks", "color", "ctx-size", "escape",
|
||||
"export", "file", "frequency-penalty", "grammar", "grammar-file", "hellaswag",
|
||||
"hellaswag-tasks", "ignore-eos", "in-prefix", "in-prefix-bos", "in-suffix", "instruct",
|
||||
"interactive", "interactive-first", "keep", "logdir", "logit-bias", "lora", "lora-base",
|
||||
"low-vram", "main-gpu", "memory-f32", "mirostat", "mirostat-ent", "mirostat-lr", "mlock",
|
||||
"model", "multiline-input", "n-gpu-layers", "n-predict", "no-mmap", "no-mul-mat-q",
|
||||
"np-penalize-nl", "numa", "ppl-output-type", "ppl-stride", "presence-penalty", "prompt",
|
||||
"prompt-cache", "prompt-cache-all", "prompt-cache-ro", "random-prompt", "repeat-last-n",
|
||||
"repeat-penalty", "reverse-prompt", "rope-freq-base", "rope-freq-scale", "rope-scale", "seed",
|
||||
"simple-io", "tensor-split", "threads", "temp", "tfs", "top-k", "top-p", "typical",
|
||||
"verbose-prompt"
|
||||
]
|
||||
|
||||
CLI_ARGS_LLAMA_BENCH = [
|
||||
"batch-size", "memory-f32", "low-vram", "model", "mul-mat-q", "n-gen", "n-gpu-layers",
|
||||
"n-prompt", "output", "repetitions", "tensor-split", "threads", "verbose"
|
||||
]
|
||||
|
||||
CLI_ARGS_SERVER = [
|
||||
"alias", "batch-size", "ctx-size", "embedding", "host", "memory-f32", "lora", "lora-base",
|
||||
"low-vram", "main-gpu", "mlock", "model", "n-gpu-layers", "n-probs", "no-mmap", "no-mul-mat-q",
|
||||
"numa", "path", "port", "rope-freq-base", "timeout", "rope-freq-scale", "tensor-split",
|
||||
"threads", "verbose"
|
||||
]
|
||||
|
||||
description = """Run llama.cpp binaries with presets from YAML file(s).
|
||||
To specify which binary should be run, specify the "binary" property (main, perplexity, llama-bench, and server are supported).
|
||||
To get a preset file template, run a llama.cpp binary with the "--logdir" CLI argument.
|
||||
|
||||
Formatting considerations:
|
||||
- The YAML property names are the same as the CLI argument names of the corresponding binary.
|
||||
- Properties must use the long name of their corresponding llama.cpp CLI arguments.
|
||||
- Like the llama.cpp binaries the property names do not differentiate between hyphens and underscores.
|
||||
- Flags must be defined as "<PROPERTY_NAME>: true" to be effective.
|
||||
- To define the logit_bias property, the expected format is "<TOKEN_ID>: <BIAS>" in the "logit_bias" namespace.
|
||||
- To define multiple "reverse_prompt" properties simultaneously the expected format is a list of strings.
|
||||
- To define a tensor split, pass a list of floats.
|
||||
"""
|
||||
usage = "run-with-preset.py [-h] [yaml_files ...] [--<ARG_NAME> <ARG_VALUE> ...]"
|
||||
epilog = (" --<ARG_NAME> specify additional CLI ars to be passed to the binary (override all preset files). "
|
||||
"Unknown args will be ignored.")
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description=description, usage=usage, epilog=epilog, formatter_class=argparse.RawTextHelpFormatter)
|
||||
parser.add_argument("-bin", "--binary", help="The binary to run.")
|
||||
parser.add_argument("yaml_files", nargs="*",
|
||||
help="Arbitrary number of YAML files from which to read preset values. "
|
||||
"If two files specify the same values the later one will be used.")
|
||||
|
||||
known_args, unknown_args = parser.parse_known_args()
|
||||
|
||||
if not known_args.yaml_files and not unknown_args:
|
||||
parser.print_help()
|
||||
sys.exit(0)
|
||||
|
||||
props = dict()
|
||||
|
||||
for yaml_file in known_args.yaml_files:
|
||||
with open(yaml_file, "r") as f:
|
||||
props.update(yaml.load(f, yaml.SafeLoader))
|
||||
|
||||
props = {prop.replace("_", "-"): val for prop, val in props.items()}
|
||||
|
||||
binary = props.pop("binary", "main")
|
||||
if known_args.binary:
|
||||
binary = known_args.binary
|
||||
|
||||
if os.path.exists(f"./{binary}"):
|
||||
binary = f"./{binary}"
|
||||
|
||||
if binary.lower().endswith("main") or binary.lower().endswith("perplexity"):
|
||||
cli_args = CLI_ARGS_MAIN_PERPLEXITY
|
||||
elif binary.lower().endswith("llama-bench"):
|
||||
cli_args = CLI_ARGS_LLAMA_BENCH
|
||||
elif binary.lower().endswith("server"):
|
||||
cli_args = CLI_ARGS_SERVER
|
||||
else:
|
||||
print(f"Unknown binary: {binary}")
|
||||
sys.exit(1)
|
||||
|
||||
command_list = [binary]
|
||||
|
||||
for cli_arg in cli_args:
|
||||
value = props.pop(cli_arg, None)
|
||||
|
||||
if not value or value == -1:
|
||||
continue
|
||||
|
||||
if cli_arg == "logit-bias":
|
||||
for token, bias in value.items():
|
||||
command_list.append("--logit-bias")
|
||||
command_list.append(f"{token}{bias:+}")
|
||||
continue
|
||||
|
||||
if cli_arg == "reverse-prompt" and not isinstance(value, str):
|
||||
for rp in value:
|
||||
command_list.append("--reverse-prompt")
|
||||
command_list.append(str(rp))
|
||||
continue
|
||||
|
||||
command_list.append(f"--{cli_arg}")
|
||||
|
||||
if cli_arg == "tensor-split":
|
||||
command_list.append(",".join([str(v) for v in value]))
|
||||
continue
|
||||
|
||||
value = str(value)
|
||||
|
||||
if value != "True":
|
||||
command_list.append(str(value))
|
||||
|
||||
num_unused = len(props)
|
||||
if num_unused > 10:
|
||||
print(f"The preset file contained a total of {num_unused} unused properties.")
|
||||
elif num_unused > 0:
|
||||
print("The preset file contained the following unused properties:")
|
||||
for prop, value in props.items():
|
||||
print(f" {prop}: {value}")
|
||||
|
||||
command_list += unknown_args
|
||||
|
||||
sp = subprocess.Popen(command_list)
|
||||
|
||||
while sp.returncode is None:
|
||||
try:
|
||||
sp.wait()
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
|
||||
sys.exit(sp.returncode)
|
||||
|
|
@ -1,423 +0,0 @@
|
|||
#!/bin/bash
|
||||
#
|
||||
# Helper script for deploying llama.cpp server with a single Bash command
|
||||
#
|
||||
# - Works on Linux and macOS
|
||||
# - Supports: CPU, CUDA, Metal, OpenCL
|
||||
# - Can run all GGUF models from HuggingFace
|
||||
# - Can serve requests in parallel
|
||||
# - Always builds latest llama.cpp from GitHub
|
||||
#
|
||||
# Limitations
|
||||
#
|
||||
# - Chat templates are poorly supported (base models recommended)
|
||||
# - Might be unstable!
|
||||
#
|
||||
# Usage:
|
||||
# ./server-llm.sh [--port] [--repo] [--wtype] [--backend] [--gpu-id] [--n-parallel] [--n-kv] [--verbose] [-non-interactive]
|
||||
#
|
||||
# --port: port number, default is 8888
|
||||
# --repo: path to a repo containing GGUF model files
|
||||
# --wtype: weights type (f16, q8_0, q4_0, q4_1), default is user-input
|
||||
# --backend: cpu, cuda, metal, opencl, depends on the OS
|
||||
# --gpu-id: gpu id, default is 0
|
||||
# --n-parallel: number of parallel requests, default is 8
|
||||
# --n-kv: KV cache size, default is 4096
|
||||
# --verbose: verbose output
|
||||
# --non-interactive: run without asking a permission to run
|
||||
#
|
||||
# Example:
|
||||
#
|
||||
# bash -c "$(curl -s https://ggml.ai/server-llm.sh)"
|
||||
#
|
||||
|
||||
set -e
|
||||
|
||||
# required utils: curl, git, make
|
||||
if ! command -v curl &> /dev/null; then
|
||||
printf "[-] curl not found\n"
|
||||
exit 1
|
||||
fi
|
||||
if ! command -v git &> /dev/null; then
|
||||
printf "[-] git not found\n"
|
||||
exit 1
|
||||
fi
|
||||
if ! command -v make &> /dev/null; then
|
||||
printf "[-] make not found\n"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# parse arguments
|
||||
is_interactive=1
|
||||
port=8888
|
||||
repo=""
|
||||
wtype=""
|
||||
backend="cpu"
|
||||
|
||||
# if macOS, use metal backend by default
|
||||
if [[ "$OSTYPE" == "darwin"* ]]; then
|
||||
backend="metal"
|
||||
elif command -v nvcc &> /dev/null; then
|
||||
backend="cuda"
|
||||
fi
|
||||
|
||||
gpu_id=0
|
||||
n_parallel=8
|
||||
n_kv=4096
|
||||
verbose=0
|
||||
|
||||
function print_usage {
|
||||
printf "Usage:\n"
|
||||
printf " ./server-llm.sh [--port] [--repo] [--wtype] [--backend] [--gpu-id] [--n-parallel] [--n-kv] [--verbose] [-non-interactive]\n\n"
|
||||
printf " --port: port number, default is 8888\n"
|
||||
printf " --repo: path to a repo containing GGUF model files\n"
|
||||
printf " --wtype: weights type (f16, q8_0, q4_0, q4_1), default is user-input\n"
|
||||
printf " --backend: cpu, cuda, metal, opencl, depends on the OS\n"
|
||||
printf " --gpu-id: gpu id, default is 0\n"
|
||||
printf " --n-parallel: number of parallel requests, default is 8\n"
|
||||
printf " --n-kv: KV cache size, default is 4096\n"
|
||||
printf " --verbose: verbose output\n\n"
|
||||
printf " --non-interactive: run without asking a permission to run\n"
|
||||
printf "Example:\n\n"
|
||||
printf ' bash -c "$(curl -s https://ggml.ai/server-llm.sh)"\n\n'
|
||||
}
|
||||
|
||||
while [[ $# -gt 0 ]]; do
|
||||
key="$1"
|
||||
case $key in
|
||||
--non-interactive)
|
||||
is_interactive=0
|
||||
shift
|
||||
;;
|
||||
--port)
|
||||
port="$2"
|
||||
shift
|
||||
shift
|
||||
;;
|
||||
--repo)
|
||||
repo="$2"
|
||||
shift
|
||||
shift
|
||||
;;
|
||||
--wtype)
|
||||
wtype="$2"
|
||||
shift
|
||||
shift
|
||||
;;
|
||||
--backend)
|
||||
backend="$2"
|
||||
shift
|
||||
shift
|
||||
;;
|
||||
--gpu-id)
|
||||
gpu_id="$2"
|
||||
shift
|
||||
shift
|
||||
;;
|
||||
--n-parallel)
|
||||
n_parallel="$2"
|
||||
shift
|
||||
shift
|
||||
;;
|
||||
--n-kv)
|
||||
n_kv="$2"
|
||||
shift
|
||||
shift
|
||||
;;
|
||||
--verbose)
|
||||
verbose=1
|
||||
shift
|
||||
;;
|
||||
--help)
|
||||
print_usage
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "Unknown argument: $key"
|
||||
print_usage
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# available weights types
|
||||
wtypes=("F16" "Q8_0" "Q4_0" "Q4_1" "Q5_0" "Q5_1" "Q6_K" "Q5_K_M" "Q5_K_S" "Q4_K_M" "Q4_K_S" "Q3_K_L" "Q3_K_M" "Q3_K_S" "Q2_K")
|
||||
|
||||
wfiles=()
|
||||
for wt in "${wtypes[@]}"; do
|
||||
wfiles+=("")
|
||||
done
|
||||
|
||||
# map wtype input to index
|
||||
if [[ ! -z "$wtype" ]]; then
|
||||
iw=-1
|
||||
is=0
|
||||
for wt in "${wtypes[@]}"; do
|
||||
# uppercase
|
||||
uwt=$(echo "$wt" | tr '[:lower:]' '[:upper:]')
|
||||
if [[ "$uwt" == "$wtype" ]]; then
|
||||
iw=$is
|
||||
break
|
||||
fi
|
||||
is=$((is+1))
|
||||
done
|
||||
|
||||
if [[ $iw -eq -1 ]]; then
|
||||
printf "[-] Invalid weight type: %s\n" "$wtype"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
wtype="$iw"
|
||||
fi
|
||||
|
||||
# sample repos
|
||||
repos=(
|
||||
"https://huggingface.co/TheBloke/Llama-2-7B-GGUF"
|
||||
"https://huggingface.co/TheBloke/Llama-2-13B-GGUF"
|
||||
"https://huggingface.co/TheBloke/Llama-2-70B-GGUF"
|
||||
"https://huggingface.co/TheBloke/CodeLlama-7B-GGUF"
|
||||
"https://huggingface.co/TheBloke/CodeLlama-13B-GGUF"
|
||||
"https://huggingface.co/TheBloke/CodeLlama-34B-GGUF"
|
||||
"https://huggingface.co/TheBloke/Mistral-7B-v0.1-GGUF"
|
||||
"https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF"
|
||||
"https://huggingface.co/TheBloke/OpenHermes-2-Mistral-7B-GGUF"
|
||||
"https://huggingface.co/TheBloke/CausalLM-7B-GGUF"
|
||||
)
|
||||
if [ $is_interactive -eq 1 ]; then
|
||||
printf "\n"
|
||||
printf "[I] This is a helper script for deploying llama.cpp's server on this machine.\n\n"
|
||||
printf " Based on the options that follow, the script might download a model file\n"
|
||||
printf " from the internet, which can be a few GBs in size. The script will also\n"
|
||||
printf " build the latest llama.cpp source code from GitHub, which can be unstable.\n"
|
||||
printf "\n"
|
||||
printf " Upon success, an HTTP server will be started and it will serve the selected\n"
|
||||
printf " model using llama.cpp for demonstration purposes.\n"
|
||||
printf "\n"
|
||||
printf " Please note:\n"
|
||||
printf "\n"
|
||||
printf " - All new data will be stored in the current folder\n"
|
||||
printf " - The server will be listening on all network interfaces\n"
|
||||
printf " - The server will run with default settings which are not always optimal\n"
|
||||
printf " - Do not judge the quality of a model based on the results from this script\n"
|
||||
printf " - Do not use this script to benchmark llama.cpp\n"
|
||||
printf " - Do not use this script in production\n"
|
||||
printf " - This script is only for demonstration purposes\n"
|
||||
printf "\n"
|
||||
printf " If you don't know what you are doing, please press Ctrl-C to abort now\n"
|
||||
printf "\n"
|
||||
printf " Press Enter to continue ...\n\n"
|
||||
|
||||
read
|
||||
fi
|
||||
|
||||
if [[ -z "$repo" ]]; then
|
||||
printf "[+] No repo provided from the command line\n"
|
||||
printf " Please select a number from the list below or enter an URL:\n\n"
|
||||
|
||||
is=0
|
||||
for r in "${repos[@]}"; do
|
||||
printf " %2d) %s\n" $is "$r"
|
||||
is=$((is+1))
|
||||
done
|
||||
|
||||
# ask for repo until index of sample repo is provided or an URL
|
||||
while [[ -z "$repo" ]]; do
|
||||
printf "\n Or choose one from: https://huggingface.co/models?sort=trending&search=gguf\n\n"
|
||||
read -p "[+] Select repo: " repo
|
||||
|
||||
# check if the input is a number
|
||||
if [[ "$repo" =~ ^[0-9]+$ ]]; then
|
||||
if [[ "$repo" -ge 0 && "$repo" -lt ${#repos[@]} ]]; then
|
||||
repo="${repos[$repo]}"
|
||||
else
|
||||
printf "[-] Invalid repo index: %s\n" "$repo"
|
||||
repo=""
|
||||
fi
|
||||
elif [[ "$repo" =~ ^https?:// ]]; then
|
||||
repo="$repo"
|
||||
else
|
||||
printf "[-] Invalid repo URL: %s\n" "$repo"
|
||||
repo=""
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
# remove suffix
|
||||
repo=$(echo "$repo" | sed -E 's/\/tree\/main$//g')
|
||||
|
||||
printf "[+] Checking for GGUF model files in %s\n" "$repo"
|
||||
|
||||
# find GGUF files in the source
|
||||
# TODO: better logic
|
||||
model_tree="${repo%/}/tree/main"
|
||||
model_files=$(curl -s "$model_tree" | grep -i "\\.gguf</span>" | sed -E 's/.*<span class="truncate group-hover:underline">(.*)<\/span><\/a>/\1/g')
|
||||
|
||||
# list all files in the provided git repo
|
||||
printf "[+] Model files:\n\n"
|
||||
for file in $model_files; do
|
||||
# determine iw by grepping the filename with wtypes
|
||||
iw=-1
|
||||
is=0
|
||||
for wt in "${wtypes[@]}"; do
|
||||
# uppercase
|
||||
ufile=$(echo "$file" | tr '[:lower:]' '[:upper:]')
|
||||
if [[ "$ufile" =~ "$wt" ]]; then
|
||||
iw=$is
|
||||
break
|
||||
fi
|
||||
is=$((is+1))
|
||||
done
|
||||
|
||||
if [[ $iw -eq -1 ]]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
wfiles[$iw]="$file"
|
||||
|
||||
have=" "
|
||||
if [[ -f "$file" ]]; then
|
||||
have="*"
|
||||
fi
|
||||
|
||||
printf " %2d) %s %s\n" $iw "$have" "$file"
|
||||
done
|
||||
|
||||
wfile="${wfiles[$wtype]}"
|
||||
|
||||
# ask for weights type until provided and available
|
||||
while [[ -z "$wfile" ]]; do
|
||||
printf "\n"
|
||||
read -p "[+] Select weight type: " wtype
|
||||
wfile="${wfiles[$wtype]}"
|
||||
|
||||
if [[ -z "$wfile" ]]; then
|
||||
printf "[-] Invalid weight type: %s\n" "$wtype"
|
||||
wtype=""
|
||||
fi
|
||||
done
|
||||
|
||||
printf "[+] Selected weight type: %s (%s)\n" "$wtype" "$wfile"
|
||||
|
||||
url="${repo%/}/resolve/main/$wfile"
|
||||
|
||||
# check file if the model has been downloaded before
|
||||
chk="$wfile.chk"
|
||||
|
||||
# check if we should download the file
|
||||
# - if $wfile does not exist
|
||||
# - if $wfile exists but $chk does not exist
|
||||
# - if $wfile exists and $chk exists but $wfile is newer than $chk
|
||||
# TODO: better logic using git lfs info
|
||||
|
||||
do_download=0
|
||||
|
||||
if [[ ! -f "$wfile" ]]; then
|
||||
do_download=1
|
||||
elif [[ ! -f "$chk" ]]; then
|
||||
do_download=1
|
||||
elif [[ "$wfile" -nt "$chk" ]]; then
|
||||
do_download=1
|
||||
fi
|
||||
|
||||
if [[ $do_download -eq 1 ]]; then
|
||||
printf "[+] Downloading weights from %s\n" "$url"
|
||||
|
||||
# download the weights file
|
||||
curl -o "$wfile" -# -L "$url"
|
||||
|
||||
# create a check file if successful
|
||||
if [[ $? -eq 0 ]]; then
|
||||
printf "[+] Creating check file %s\n" "$chk"
|
||||
touch "$chk"
|
||||
fi
|
||||
else
|
||||
printf "[+] Using cached weights %s\n" "$wfile"
|
||||
fi
|
||||
|
||||
# get latest llama.cpp and build
|
||||
|
||||
printf "[+] Downloading latest llama.cpp\n"
|
||||
|
||||
llama_cpp_dir="__llama_cpp_port_${port}__"
|
||||
|
||||
if [[ -d "$llama_cpp_dir" && ! -f "$llama_cpp_dir/__ggml_script__" ]]; then
|
||||
# if the dir exists and there isn't a file "__ggml_script__" in it, abort
|
||||
printf "[-] Directory %s already exists\n" "$llama_cpp_dir"
|
||||
printf "[-] Please remove it and try again\n"
|
||||
exit 1
|
||||
elif [[ -d "$llama_cpp_dir" ]]; then
|
||||
printf "[+] Directory %s already exists\n" "$llama_cpp_dir"
|
||||
printf "[+] Using cached llama.cpp\n"
|
||||
|
||||
cd "$llama_cpp_dir"
|
||||
git reset --hard
|
||||
git fetch
|
||||
git checkout origin/master
|
||||
|
||||
cd ..
|
||||
else
|
||||
printf "[+] Cloning llama.cpp\n"
|
||||
|
||||
git clone https://github.com/ggerganov/llama.cpp "$llama_cpp_dir"
|
||||
fi
|
||||
|
||||
# mark that that the directory is made by this script
|
||||
touch "$llama_cpp_dir/__ggml_script__"
|
||||
|
||||
if [[ $verbose -eq 1 ]]; then
|
||||
set -x
|
||||
fi
|
||||
|
||||
# build
|
||||
cd "$llama_cpp_dir"
|
||||
|
||||
make clean
|
||||
|
||||
log="--silent"
|
||||
if [[ $verbose -eq 1 ]]; then
|
||||
log=""
|
||||
fi
|
||||
|
||||
if [[ "$backend" == "cuda" ]]; then
|
||||
printf "[+] Building with CUDA backend\n"
|
||||
LLAMA_CUBLAS=1 make -j server $log
|
||||
elif [[ "$backend" == "cpu" ]]; then
|
||||
printf "[+] Building with CPU backend\n"
|
||||
make -j server $log
|
||||
elif [[ "$backend" == "metal" ]]; then
|
||||
printf "[+] Building with Metal backend\n"
|
||||
make -j server $log
|
||||
elif [[ "$backend" == "opencl" ]]; then
|
||||
printf "[+] Building with OpenCL backend\n"
|
||||
LLAMA_CLBLAST=1 make -j server $log
|
||||
else
|
||||
printf "[-] Unknown backend: %s\n" "$backend"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# run the server
|
||||
|
||||
printf "[+] Running server\n"
|
||||
|
||||
args=""
|
||||
if [[ "$backend" == "cuda" ]]; then
|
||||
export CUDA_VISIBLE_DEVICES=$gpu_id
|
||||
args="-ngl 999"
|
||||
elif [[ "$backend" == "cpu" ]]; then
|
||||
args="-ngl 0"
|
||||
elif [[ "$backend" == "metal" ]]; then
|
||||
args="-ngl 999"
|
||||
elif [[ "$backend" == "opencl" ]]; then
|
||||
args="-ngl 999"
|
||||
else
|
||||
printf "[-] Unknown backend: %s\n" "$backend"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ $verbose -eq 1 ]]; then
|
||||
args="$args --verbose"
|
||||
fi
|
||||
|
||||
./server -m "../$wfile" --host 0.0.0.0 --port "$port" -c $n_kv -np "$n_parallel" $args
|
||||
|
||||
exit 0
|
||||
|
|
@ -1,168 +0,0 @@
|
|||
#!/bin/bash
|
||||
#
|
||||
# Synchronize ggml changes to llama.cpp
|
||||
#
|
||||
# Usage:
|
||||
#
|
||||
# $ cd /path/to/llama.cpp
|
||||
# $ ./scripts/sync-ggml-am.sh -skip hash0,hash1,hash2...
|
||||
#
|
||||
|
||||
set -e
|
||||
|
||||
sd=$(dirname $0)
|
||||
cd $sd/../
|
||||
|
||||
SRC_LLAMA=$(pwd)
|
||||
SRC_GGML=$(cd ../ggml; pwd)
|
||||
|
||||
if [ ! -d $SRC_GGML ]; then
|
||||
echo "ggml not found at $SRC_GGML"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
lc=$(cat $SRC_LLAMA/scripts/sync-ggml.last)
|
||||
echo "Syncing ggml changes since commit $lc"
|
||||
|
||||
to_skip=""
|
||||
if [ "$1" == "-skip" ]; then
|
||||
to_skip=$2
|
||||
fi
|
||||
|
||||
cd $SRC_GGML
|
||||
|
||||
git log --oneline $lc..HEAD
|
||||
git log --oneline $lc..HEAD --reverse | grep -v "(llama/[0-9]*)" | cut -d' ' -f1 > $SRC_LLAMA/ggml-commits
|
||||
|
||||
if [ ! -s $SRC_LLAMA/ggml-commits ]; then
|
||||
rm -v $SRC_LLAMA/ggml-commits
|
||||
echo "No new commits"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [ -f $SRC_LLAMA/ggml-src.patch ]; then
|
||||
rm -v $SRC_LLAMA/ggml-src.patch
|
||||
fi
|
||||
|
||||
while read c; do
|
||||
if [ -n "$to_skip" ]; then
|
||||
if [[ $to_skip == *"$c"* ]]; then
|
||||
echo "Skipping $c"
|
||||
continue
|
||||
fi
|
||||
fi
|
||||
|
||||
git format-patch -k $c~1..$c --stdout -- \
|
||||
include/ggml/ggml*.h \
|
||||
src/ggml*.h \
|
||||
src/ggml*.c \
|
||||
src/ggml*.cpp \
|
||||
src/ggml*.m \
|
||||
src/ggml*.metal \
|
||||
src/ggml*.cu \
|
||||
tests/test-opt.cpp \
|
||||
tests/test-grad0.cpp \
|
||||
tests/test-quantize-fns.cpp \
|
||||
tests/test-quantize-perf.cpp \
|
||||
tests/test-backend-ops.cpp \
|
||||
>> $SRC_LLAMA/ggml-src.patch
|
||||
done < $SRC_LLAMA/ggml-commits
|
||||
|
||||
rm -v $SRC_LLAMA/ggml-commits
|
||||
|
||||
# delete files if empty
|
||||
if [ ! -s $SRC_LLAMA/ggml-src.patch ]; then
|
||||
rm -v $SRC_LLAMA/ggml-src.patch
|
||||
fi
|
||||
|
||||
cd $SRC_LLAMA
|
||||
|
||||
if [ -f $SRC_LLAMA/ggml-src.patch ]; then
|
||||
# replace PR numbers
|
||||
#
|
||||
# Subject: some text (#1234)
|
||||
# Subject: some text (ggml/1234)
|
||||
cat ggml-src.patch | sed -e 's/^Subject: \(.*\) (#\([0-9]*\))/Subject: \1 (ggml\/\2)/' > ggml-src.patch.tmp
|
||||
mv ggml-src.patch.tmp ggml-src.patch
|
||||
|
||||
cat ggml-src.patch | sed -e 's/^\(.*\) (#\([0-9]*\))$/\1 (ggml\/\2)/' > ggml-src.patch.tmp
|
||||
mv ggml-src.patch.tmp ggml-src.patch
|
||||
|
||||
# replace filenames:
|
||||
#
|
||||
# src/ggml.c -> ggml.c
|
||||
# src/ggml-alloc.c -> ggml-alloc.c
|
||||
# src/ggml-backend-impl.h -> ggml-backend-impl.h
|
||||
# src/ggml-backend.c -> ggml-backend.c
|
||||
# src/ggml-cuda.cu -> ggml-cuda.cu
|
||||
# src/ggml-cuda.h -> ggml-cuda.h
|
||||
# src/ggml-impl.h -> ggml-impl.h
|
||||
# src/ggml-kompute.cpp -> ggml-kompute.cpp
|
||||
# src/ggml-kompute.h -> ggml-kompute.h
|
||||
# src/ggml-metal.h -> ggml-metal.h
|
||||
# src/ggml-metal.m -> ggml-metal.m
|
||||
# src/ggml-mpi.h -> ggml-mpi.h
|
||||
# src/ggml-mpi.c -> ggml-mpi.c
|
||||
# src/ggml-opencl.cpp -> ggml-opencl.cpp
|
||||
# src/ggml-opencl.h -> ggml-opencl.h
|
||||
# src/ggml-quants.c -> ggml-quants.c
|
||||
# src/ggml-quants.h -> ggml-quants.h
|
||||
# src/ggml-sycl.cpp -> ggml-sycl.cpp
|
||||
# src/ggml-sycl.h -> ggml-sycl.h
|
||||
# src/ggml-vulkan.cpp -> ggml-vulkan.cpp
|
||||
# src/ggml-vulkan.h -> ggml-vulkan.h
|
||||
# include/ggml/ggml.h -> ggml.h
|
||||
# include/ggml/ggml-alloc.h -> ggml-alloc.h
|
||||
# include/ggml/ggml-backend.h -> ggml-backend.h
|
||||
#
|
||||
# tests/test-opt.cpp -> tests/test-opt.cpp
|
||||
# tests/test-grad0.cpp -> tests/test-grad0.cpp
|
||||
# tests/test-quantize-fns.cpp -> tests/test-quantize-fns.cpp
|
||||
# tests/test-quantize-perf.cpp -> tests/test-quantize-perf.cpp
|
||||
# tests/test-backend-ops.cpp -> tests/test-backend-ops.cpp
|
||||
|
||||
cat ggml-src.patch | sed \
|
||||
-e 's/src\/ggml\.c/ggml.c/g' \
|
||||
-e 's/src\/ggml-alloc\.c/ggml-alloc.c/g' \
|
||||
-e 's/src\/ggml-backend-impl\.h/ggml-backend-impl.h/g' \
|
||||
-e 's/src\/ggml-backend\.c/ggml-backend.c/g' \
|
||||
-e 's/src\/ggml-cuda\.cu/ggml-cuda.cu/g' \
|
||||
-e 's/src\/ggml-cuda\.h/ggml-cuda.h/g' \
|
||||
-e 's/src\/ggml-impl\.h/ggml-impl.h/g' \
|
||||
-e 's/src\/ggml-kompute\.cpp/ggml-kompute.cpp/g' \
|
||||
-e 's/src\/ggml-kompute\.h/ggml-kompute.h/g' \
|
||||
-e 's/src\/ggml-metal\.h/ggml-metal.h/g' \
|
||||
-e 's/src\/ggml-metal\.m/ggml-metal.m/g' \
|
||||
-e 's/src\/ggml-mpi\.h/ggml-mpi.h/g' \
|
||||
-e 's/src\/ggml-mpi\.c/ggml-mpi.c/g' \
|
||||
-e 's/src\/ggml-opencl\.cpp/ggml-opencl.cpp/g' \
|
||||
-e 's/src\/ggml-opencl\.h/ggml-opencl.h/g' \
|
||||
-e 's/src\/ggml-quants\.c/ggml-quants.c/g' \
|
||||
-e 's/src\/ggml-quants\.h/ggml-quants.h/g' \
|
||||
-e 's/src\/ggml-sycl\.cpp/ggml-sycl.cpp/g' \
|
||||
-e 's/src\/ggml-sycl\.h/ggml-sycl.h/g' \
|
||||
-e 's/src\/ggml-vulkan\.cpp/ggml-vulkan.cpp/g' \
|
||||
-e 's/src\/ggml-vulkan\.h/ggml-vulkan.h/g' \
|
||||
-e 's/include\/ggml\/ggml\.h/ggml.h/g' \
|
||||
-e 's/include\/ggml\/ggml-alloc\.h/ggml-alloc.h/g' \
|
||||
-e 's/include\/ggml\/ggml-backend\.h/ggml-backend.h/g' \
|
||||
-e 's/tests\/test-opt\.cpp/tests\/test-opt.cpp/g' \
|
||||
-e 's/tests\/test-grad0\.cpp/tests\/test-grad0.cpp/g' \
|
||||
-e 's/tests\/test-quantize-fns\.cpp/tests\/test-quantize-fns.cpp/g' \
|
||||
-e 's/tests\/test-quantize-perf\.cpp/tests\/test-quantize-perf.cpp/g' \
|
||||
-e 's/tests\/test-backend-ops\.cpp/tests\/test-backend-ops.cpp/g' \
|
||||
> ggml-src.patch.tmp
|
||||
mv ggml-src.patch.tmp ggml-src.patch
|
||||
|
||||
git am ggml-src.patch
|
||||
|
||||
rm -v $SRC_LLAMA/ggml-src.patch
|
||||
fi
|
||||
|
||||
# update last commit
|
||||
cd $SRC_GGML
|
||||
git log -1 --format=%H > $SRC_LLAMA/scripts/sync-ggml.last
|
||||
|
||||
echo "Done"
|
||||
|
||||
exit 0
|
||||
|
|
@ -1 +0,0 @@
|
|||
5070f078a67c18c11736e78316ab715ca9afde16
|
||||
Loading…
Add table
Add a link
Reference in a new issue