remove junk

2026-05-10 04:00:53 +00:00 · 2024-02-17 17:12:59 +08:00 · 2024-02-17 17:12:59 +08:00 · 1e460bb936
commit 1e460bb936
parent 18c8d4b31c
74 changed files with 0 additions and 6041 deletions
--- a/scripts/check-requirements.sh
+++ b/scripts/check-requirements.sh
@ -1,174 +0,0 @@
-#!/bin/bash
-set -euo pipefail
-
-#
-# check-requirements.sh checks all requirements files for each top-level
-# convert*.py script.
-#
-# WARNING: This is quite IO intensive, because a fresh venv is set up for every
-# python script. As of 2023-12-22, this writes ~2.7GB of data. An adequately
-# sized tmpfs /tmp or ramdisk is recommended if running this frequently.
-#
-# usage:    check-requirements.sh [<working_dir>]
-#           check-requirements.sh nocleanup [<working_dir>]
-#
-# where:
-#           - <working_dir> is a directory that can be used as the base for
-#               setting up the venvs. Defaults to `/tmp`.
-#           - 'nocleanup' as the first argument will disable automatic cleanup
-#               of the files created by this script.
-#
-# requires:
-#           - bash >= 3.2.57
-#           - shellcheck
-#
-# For each script, it creates a fresh venv, `pip install`s the requirements, and
-# finally imports the python script to check for `ImportError`.
-#
-
-log() {
-    local level=$1 msg=$2
-    printf >&2 '%s: %s\n' "$level" "$msg"
-}
-
-debug() {
-    log DEBUG "$@"
-}
-
-info() {
-    log INFO "$@"
-}
-
-fatal() {
-    log FATAL "$@"
-    exit 1
-}
-
-cleanup() {
-    if [[ -n ${workdir+x} && -d $workdir && -w $workdir ]]; then
-        info "Removing $workdir"
-        local count=0
-        rm -rfv -- "$workdir" | while read -r; do
-            if (( count++ > 750 )); then
-                printf .
-                count=0
-            fi
-        done
-        printf '\n'
-        info "Removed $workdir"
-    fi
-}
-
-do_cleanup=1
-if [[ ${1-} == nocleanup ]]; then
-    do_cleanup=0; shift
-fi
-
-if (( do_cleanup )); then
-    trap exit INT TERM
-    trap cleanup EXIT
-fi
-
-this=$(realpath -- "$0"); readonly this
-cd "$(dirname "$this")/.." # PWD should stay in llama.cpp project directory
-
-shellcheck "$this"
-
-readonly reqs_dir=requirements
-
-if [[ ${1+x} ]]; then
-    tmp_dir=$(realpath -- "$1")
-    if [[ ! ( -d $tmp_dir && -w $tmp_dir ) ]]; then
-        fatal "$tmp_dir is not a writable directory"
-    fi
-else
-    tmp_dir=/tmp
-fi
-
-workdir=$(mktemp -d "$tmp_dir/check-requirements.XXXX"); readonly workdir
-info "Working directory: $workdir"
-
-check_requirements() {
-    local reqs=$1
-
-    info "$reqs: beginning check"
-    pip --disable-pip-version-check install -qr "$reqs"
-    info "$reqs: OK"
-}
-
-check_convert_script() {
-    local py=$1             # e.g. ./convert-hf-to-gguf.py
-    local pyname=${py##*/}  # e.g. convert-hf-to-gguf.py
-    pyname=${pyname%.py}    # e.g. convert-hf-to-gguf
-
-    info "$py: beginning check"
-
-    local reqs="$reqs_dir/requirements-$pyname.txt"
-    if [[ ! -r $reqs ]]; then
-        fatal "$py missing requirements. Expected: $reqs"
-    fi
-
-    local venv="$workdir/$pyname-venv"
-    python3 -m venv "$venv"
-
-    (
-        # shellcheck source=/dev/null
-        source "$venv/bin/activate"
-
-        check_requirements "$reqs"
-
-        python - "$py" "$pyname" <<'EOF'
-import sys
-from importlib.machinery import SourceFileLoader
-py, pyname = sys.argv[1:]
-SourceFileLoader(pyname, py).load_module()
-EOF
-    )
-
-    if (( do_cleanup )); then
-        rm -rf -- "$venv"
-    fi
-
-    info "$py: imports OK"
-}
-
-readonly ignore_eq_eq='check_requirements: ignore "=="'
-
-for req in "$reqs_dir"/*; do
-    # Check that all sub-requirements are added to top-level requirements.txt
-    if ! grep -qF "$req" requirements.txt; then
-        fatal "$req needs to be added to requirements.txt"
-    fi
-
-    # Make sure exact release versions aren't being pinned in the requirements
-    # Filters out the ignore string
-    if grep -vF "$ignore_eq_eq" "$req" | grep -q '=='; then
-        tab=$'\t'
-        cat >&2 <<EOF
-FATAL: Avoid pinning exact package versions. Use '~=' instead.
-You can suppress this error by appending the following to the line:
-$tab# $ignore_eq_eq
-EOF
-        exit 1
-    fi
-done
-
-all_venv="$workdir/all-venv"
-python3 -m venv "$all_venv"
-
-(
-    # shellcheck source=/dev/null
-    source "$all_venv/bin/activate"
-    check_requirements requirements.txt
-)
-
-if (( do_cleanup )); then
-    rm -rf -- "$all_venv"
-fi
-
-check_convert_script convert.py
-for py in convert-*.py; do
-    check_convert_script "$py"
-done
-
-info 'Done! No issues found.'
--- a/scripts/ci-run.sh
+++ b/scripts/ci-run.sh
@ -1,50 +0,0 @@
-#!/bin/bash
-set -euo pipefail
-this=$(realpath "$0"); readonly this
-cd "$(dirname "$this")"
-shellcheck "$this"
-
-if (( $# != 1 && $# != 2  )); then
-    cat >&2 <<'EOF'
-usage:
-    ci-run.sh <tmp_dir> [<cache_dir>]
-
-This script wraps ci/run.sh:
-* If <tmp_dir> is a ramdisk, you can reduce writes to your SSD. If <tmp_dir> is not a ramdisk, keep in mind that total writes will increase by the size of <cache_dir>.
-    (openllama_3b_v2: quantized models are about 30GB)
-* Persistent model and data files are synced to and from <cache_dir>,
-    excluding generated .gguf files.
-    (openllama_3b_v2: persistent files are about 6.6GB)
-* <cache_dir> defaults to  ~/.cache/llama.cpp
-EOF
-    exit 1
-fi
-
-cd .. # => llama.cpp repo root
-
-tmp="$1"
-mkdir -p "$tmp"
-tmp=$(realpath "$tmp")
-echo >&2 "Using tmp=$tmp"
-
-cache="${2-$HOME/.cache/llama.cpp}"
-mkdir -p "$cache"
-cache=$(realpath "$cache")
-echo >&2 "Using cache=$cache"
-
-_sync() {
-    local from="$1"; shift
-    local to="$1"; shift
-
-    echo >&2 "Syncing from $from to $to"
-    mkdir -p "$from" "$to"
-    rsync -a "$from" "$to" --delete-during "$@"
-}
-
-_sync "$(realpath .)/" "$tmp/llama.cpp"
-_sync "$cache/ci-mnt/models/" "$tmp/llama.cpp/ci-mnt/models/"
-
-cd "$tmp/llama.cpp"
-bash ci/run.sh ci-out ci-mnt
-
-_sync 'ci-mnt/models/' "$cache/ci-mnt/models/" --exclude='*.gguf' -P
--- a/scripts/compare-commits.sh
+++ b/scripts/compare-commits.sh
@ -1,37 +0,0 @@
-#!/bin/bash
-
-if [ $# -lt 2 ]; then
-    echo "usage: ./scripts/compare-commits.sh <commit1> <commit2> [additional llama-bench arguments]"
-    exit 1
-fi
-
-set -e
-set -x
-
-bench_args="${@:3}"
-
-rm -f llama-bench.sqlite
-
-backend="cpu"
-
-if [[ "$OSTYPE" == "darwin"* ]]; then
-    backend="metal"
-elif command -v nvcc &> /dev/null; then
-    backend="cuda"
-fi
-
-make_opts=""
-
-if [[ "$backend" == "cuda" ]]; then
-    make_opts="LLAMA_CUBLAS=1"
-fi
-
-git checkout $1
-make clean && make -j32 $make_opts llama-bench
-./llama-bench -o sql $bench_args | tee /dev/tty | sqlite3 llama-bench.sqlite
-
-git checkout $2
-make clean && make -j32 $make_opts llama-bench
-./llama-bench -o sql $bench_args | tee /dev/tty | sqlite3 llama-bench.sqlite
-
-./scripts/compare-llama-bench.py -b $1 -c $2
--- a/scripts/compare-llama-bench.py
+++ b/scripts/compare-llama-bench.py
@ -1,374 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-import heapq
-import sys
-import os
-from glob import glob
-import sqlite3
-
-try:
-    import git
-    from tabulate import tabulate
-except ImportError as e:
-    print("ERROR: the following Python libraries are required: GitPython, tabulate.")
-    raise e
-
-# Properties by which to differentiate results per commit:
-KEY_PROPERTIES = [
-    "cpu_info", "gpu_info", "n_gpu_layers", "main_gpu", "cuda", "opencl", "metal", "gpu_blas",
-    "blas", "model_filename", "model_type", "model_size", "model_n_params", "n_batch", "n_threads",
-    "type_k", "type_v", "no_kv_offload", "mul_mat_q", "tensor_split", "n_prompt", "n_gen"
-]
-
-# Properties that are boolean and are converted to Yes/No for the table:
-BOOL_PROPERTIES = ["cuda", "opencl", "metal", "gpu_blas", "blas"]
-
-# Header names for the table:
-PRETTY_NAMES = {
-    "cuda": "CUDA", "opencl": "OpenCL", "metal": "Metal", "gpu_blas": "GPU BLAS", "blas": "BLAS",
-    "cpu_info": "CPU", "gpu_info": "GPU", "model_filename": "File", "model_type": "Model",
-    "model_size": "Model Size [GiB]", "model_n_params": "Num. of Parameters",
-    "n_batch": "Batch size", "n_threads": "Threads", "type_k": "K type", "type_v": "V type",
-    "n_gpu_layers": "GPU layers", "main_gpu": "Main GPU", "no_kv_offload": "NKVO",
-    "mul_mat_q": "MMQ", "tensor_split": "Tensor split"
-}
-
-DEFAULT_SHOW = ["model_type"]  # Always show these properties by default.
-DEFAULT_HIDE = ["model_filename"]  # Always hide these properties by default.
-GPU_NAME_STRIP = ["NVIDIA GeForce ", "Tesla ", "AMD Radeon "]  # Strip prefixes for smaller tables.
-MODEL_SUFFIX_REPLACE = {" - Small": "_S", " - Medium": "_M", " - Large": "_L"}
-
-DESCRIPTION = """Creates tables from llama-bench data written to an SQLite database. Example usage (Linux):
-
-$ git checkout master
-$ make clean && make llama-bench
-$ ./llama-bench -o sql | sqlite3 llama-bench.sqlite
-$ git checkout some_branch
-$ make clean && make llama-bench
-$ ./llama-bench -o sql | sqlite3 llama-bench.sqlite
-$ ./scripts/compare-llama-bench.py
-
-Performance numbers from multiple runs per commit are averaged WITHOUT being weighted by the --repetitions parameter of llama-bench.
-"""
-
-parser = argparse.ArgumentParser(
-    description=DESCRIPTION, formatter_class=argparse.RawDescriptionHelpFormatter)
-help_b = (
-    "The baseline commit to compare performance to. "
-    "Accepts either a branch name, tag name, or commit hash. "
-    "Defaults to latest master commit with data."
-)
-parser.add_argument("-b", "--baseline", help=help_b)
-help_c = (
-    "The commit whose performance is to be compared to the baseline. "
-    "Accepts either a branch name, tag name, or commit hash. "
-    "Defaults to the non-master commit for which llama-bench was run most recently."
-)
-parser.add_argument("-c", "--compare", help=help_c)
-help_i = (
-    "Input SQLite file for comparing commits. "
-    "Defaults to 'llama-bench.sqlite' in the current working directory. "
-    "If no such file is found and there is exactly one .sqlite file in the current directory, "
-    "that file is instead used as input."
-)
-parser.add_argument("-i", "--input", help=help_i)
-help_o = (
-    "Output format for the table. "
-    "Defaults to 'pipe' (GitHub compatible). "
-    "Also supports e.g. 'latex' or 'mediawiki'. "
-    "See tabulate documentation for full list."
-)
-parser.add_argument("-o", "--output", help=help_o, default="pipe")
-help_s = (
-    "Columns to add to the table. "
-    "Accepts a comma-separated list of values. "
-    f"Legal values: {', '.join(KEY_PROPERTIES[:-2])}. "
-    "Defaults to model name (model_type) and CPU and/or GPU name (cpu_info, gpu_info) "
-    "plus any column where not all data points are the same. "
-    "If the columns are manually specified, then the results for each unique combination of the "
-    "specified values are averaged WITHOUT weighing by the --repetitions parameter of llama-bench."
-)
-parser.add_argument("-s", "--show", help=help_s)
-
-known_args, unknown_args = parser.parse_known_args()
-
-if unknown_args:
-    print(f"ERROR: Received unknown args: {unknown_args}.")
-    print()
-    parser.print_help()
-    sys.exit(1)
-
-input_file = known_args.input
-if input_file is None and os.path.exists("./llama-bench.sqlite"):
-    input_file = "llama-bench.sqlite"
-if input_file is None:
-    sqlite_files = glob("*.sqlite")
-    if len(sqlite_files) == 1:
-        input_file = sqlite_files[0]
-
-if input_file is None:
-    print("ERROR: Cannot find a suitable input file, please provide one.")
-    print()
-    parser.print_help()
-    sys.exit(1)
-
-connection = sqlite3.connect(input_file)
-cursor = connection.cursor()
-builds = cursor.execute("SELECT DISTINCT build_commit FROM test;").fetchall()
-
-try:
-    repo = git.Repo(".", search_parent_directories=True)
-except git.exc.InvalidGitRepositoryError:
-    repo = None
-
-
-def find_parent_in_data(commit):
-    """Helper function to find the most recent parent measured in number of commits for which there is data."""
-    heap = [(0, commit)]
-    seen_hexsha8 = set()
-    while heap:
-        depth, current_commit = heapq.heappop(heap)
-        current_hexsha8 = commit.hexsha[:8]
-        if (current_hexsha8,) in builds:
-            return current_hexsha8
-        for parent in commit.parents:
-            parent_hexsha8 = parent.hexsha[:8]
-            if parent_hexsha8 not in seen_hexsha8:
-                seen_hexsha8.add(parent_hexsha8)
-                heapq.heappush(heap, (depth + 1, parent))
-    return None
-
-
-def get_all_parent_hexsha8s(commit):
-    """Helper function to recursively get hexsha8 values for all parents of a commit."""
-    unvisited = [commit]
-    visited   = []
-
-    while unvisited:
-        current_commit = unvisited.pop(0)
-        visited.append(current_commit.hexsha[:8])
-        for parent in current_commit.parents:
-            if parent.hexsha[:8] not in visited:
-                unvisited.append(parent)
-
-    return visited
-
-
-def get_commit_name(hexsha8):
-    """Helper function to find a human-readable name for a commit if possible."""
-    if repo is None:
-        return hexsha8
-    for h in repo.heads:
-        if h.commit.hexsha[:8] == hexsha8:
-            return h.name
-    for t in repo.tags:
-        if t.commit.hexsha[:8] == hexsha8:
-            return t.name
-    return hexsha8
-
-
-def get_commit_hexsha8(name):
-    """Helper function to search for a commit given a human-readable name."""
-    if repo is None:
-        return None
-    for h in repo.heads:
-        if h.name == name:
-            return h.commit.hexsha[:8]
-    for t in repo.tags:
-        if t.name == name:
-            return t.commit.hexsha[:8]
-    return None
-
-
-hexsha8_baseline = name_baseline = None
-
-# If the user specified a baseline, try to find a commit for it:
-if known_args.baseline is not None:
-    if (known_args.baseline,) in builds:
-        hexsha8_baseline = known_args.baseline
-    if hexsha8_baseline is None:
-        hexsha8_baseline = get_commit_hexsha8(known_args.baseline)
-        name_baseline = known_args.baseline
-    if hexsha8_baseline is None:
-        print(f"ERROR: cannot find data for baseline={known_args.baseline}.")
-        sys.exit(1)
-# Otherwise, search for the most recent parent of master for which there is data:
-elif repo is not None:
-    hexsha8_baseline = find_parent_in_data(repo.heads.master.commit)
-
-    if hexsha8_baseline is None:
-        print("ERROR: No baseline was provided and did not find data for any master branch commits.")
-        print()
-        parser.print_help()
-        sys.exit(1)
-else:
-    print(
-        "ERROR: No baseline was provided and the current working directory "
-        "is not part of a git repository from which a baseline could be inferred."
-    )
-    print()
-    parser.print_help()
-    sys.exit(1)
-
-
-name_baseline = get_commit_name(hexsha8_baseline)
-
-hexsha8_compare = name_compare = None
-
-# If the user has specified a compare value, try to find a corresponding commit:
-if known_args.compare is not None:
-    if (known_args.compare,) in builds:
-        hexsha8_compare = known_args.compare
-    if hexsha8_compare is None:
-        hexsha8_compare = get_commit_hexsha8(known_args.compare)
-        name_compare = known_args.compare
-    if hexsha8_compare is None:
-        print(f"ERROR: cannot find data for baseline={known_args.compare}.")
-        sys.exit(1)
-# Otherwise, search for the commit for llama-bench was most recently run
-# and that is not a parent of master:
-elif repo is not None:
-    hexsha8s_master = get_all_parent_hexsha8s(repo.heads.master.commit)
-    builds_timestamp = cursor.execute(
-        "SELECT build_commit, test_time FROM test ORDER BY test_time;").fetchall()
-    for (hexsha8, _) in reversed(builds_timestamp):
-        if hexsha8 not in hexsha8s_master:
-            hexsha8_compare = hexsha8
-            break
-
-    if hexsha8_compare is None:
-        print("ERROR: No compare target was provided and did not find data for any non-master commits.")
-        print()
-        parser.print_help()
-        sys.exit(1)
-else:
-    print(
-        "ERROR: No compare target was provided and the current working directory "
-        "is not part of a git repository from which a compare target could be inferred."
-    )
-    print()
-    parser.print_help()
-    sys.exit(1)
-
-name_compare = get_commit_name(hexsha8_compare)
-
-
-def get_rows(properties):
-    """
-    Helper function that gets table rows for some list of properties.
-    Rows are created by combining those where all provided properties are equal.
-    The resulting rows are then grouped by the provided properties and the t/s values are averaged.
-    The returned rows are unique in terms of property combinations.
-    """
-    select_string = ", ".join(
-        [f"tb.{p}" for p in properties] + ["tb.n_prompt", "tb.n_gen", "AVG(tb.avg_ts)", "AVG(tc.avg_ts)"])
-    equal_string = " AND ".join(
-        [f"tb.{p} = tc.{p}" for p in KEY_PROPERTIES] + [
-            f"tb.build_commit = '{hexsha8_baseline}'", f"tc.build_commit = '{hexsha8_compare}'"]
-    )
-    group_order_string = ", ".join([f"tb.{p}" for p in properties] + ["tb.n_gen", "tb.n_prompt"])
-    query = (f"SELECT {select_string} FROM test tb JOIN test tc ON {equal_string} "
-             f"GROUP BY {group_order_string} ORDER BY {group_order_string};")
-    return cursor.execute(query).fetchall()
-
-
-# If the user provided columns to group the results by, use them:
-if known_args.show is not None:
-    show = known_args.show.split(",")
-    unknown_cols = []
-    for prop in show:
-        if prop not in KEY_PROPERTIES[:-2]:  # Last two values are n_prompt, n_gen.
-            unknown_cols.append(prop)
-    if unknown_cols:
-        print(f"ERROR: Unknown values for --show: {', '.join(unknown_cols)}")
-        print()
-        parser.print_usage()
-        sys.exit(1)
-    rows_show = get_rows(show)
-# Otherwise, select those columns where the values are not all the same:
-else:
-    rows_full = get_rows(KEY_PROPERTIES)
-    properties_different = []
-    for i, kp_i in enumerate(KEY_PROPERTIES):
-        if kp_i in DEFAULT_SHOW or kp_i == "n_prompt" or kp_i == "n_gen":
-            continue
-        for row_full in rows_full:
-            if row_full[i] != rows_full[0][i]:
-                properties_different.append(kp_i)
-                break
-
-    show = []
-    # Show CPU and/or GPU by default even if the hardware for all results is the same:
-    if "gpu_blas" not in properties_different and "n_gpu_layers" not in properties_different:
-        gpu_blas = bool(rows_full[0][KEY_PROPERTIES.index("gpu_blas")])
-        ngl = int(rows_full[0][KEY_PROPERTIES.index("n_gpu_layers")])
-
-        if not gpu_blas or ngl != 99 and "cpu_info" not in properties_different:
-            show.append("cpu_info")
-        if gpu_blas and "gpu_info" not in properties_different:
-            show.append("gpu_info")
-
-    show += properties_different
-
-    index_default = 0
-    for prop in ["cpu_info", "gpu_info", "n_gpu_layers", "main_gpu"]:
-        if prop in show:
-            index_default += 1
-    show = show[:index_default] + DEFAULT_SHOW + show[index_default:]
-    for prop in DEFAULT_HIDE:
-        try:
-            show.remove(prop)
-        except ValueError:
-            pass
-    rows_show = get_rows(show)
-
-table = []
-for row in rows_show:
-    n_prompt = int(row[-4])
-    n_gen    = int(row[-3])
-    assert n_prompt == 0 or n_gen == 0
-    test_name = f"tg{n_gen}" if n_prompt == 0 else f"pp{n_prompt}"
-    #           Regular columns    test name    avg t/s values              Speedup
-    #            VVVVVVVVVVVVV     VVVVVVVVV    VVVVVVVVVVVVVV              VVVVVVV
-    table.append(list(row[:-4]) + [test_name] + list(row[-2:]) + [float(row[-1]) / float(row[-2])])
-
-# Some a-posteriori fixes to make the table contents prettier:
-for bool_property in BOOL_PROPERTIES:
-    if bool_property in show:
-        ip = show.index(bool_property)
-        for row_table in table:
-            row_table[ip] = "Yes" if int(row_table[ip]) == 1 else "No"
-
-if "model_type" in show:
-    ip = show.index("model_type")
-    for (old, new) in MODEL_SUFFIX_REPLACE.items():
-        for row_table in table:
-            row_table[ip] = row_table[ip].replace(old, new)
-
-if "model_size" in show:
-    ip = show.index("model_size")
-    for row_table in table:
-        row_table[ip] = float(row_table[ip]) / 1024 ** 3
-
-if "gpu_info" in show:
-    ip = show.index("gpu_info")
-    for row_table in table:
-        for gns in GPU_NAME_STRIP:
-            row_table[ip] = row_table[ip].replace(gns, "")
-
-        gpu_names = row_table[ip].split("/")
-        num_gpus = len(gpu_names)
-        all_names_the_same = len(set(gpu_names)) == 1
-        if len(gpu_names) >= 2 and all_names_the_same:
-            row_table[ip] = f"{num_gpus}x {gpu_names[0]}"
-
-headers  = [PRETTY_NAMES[p] for p in show]
-headers += ["Test", f"t/s {name_baseline}", f"t/s {name_compare}", "Speedup"]
-
-print(tabulate(
-    table,
-    headers=headers,
-    floatfmt=".2f",
-    tablefmt=known_args.output
-))
--- a/scripts/gen-build-info-cpp.cmake
+++ b/scripts/gen-build-info-cpp.cmake
@ -1,24 +0,0 @@
-include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake)
-
-set(TEMPLATE_FILE "${CMAKE_CURRENT_SOURCE_DIR}/common/build-info.cpp.in")
-set(OUTPUT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/common/build-info.cpp")
-
-# Only write the build info if it changed
-if(EXISTS ${OUTPUT_FILE})
-    file(READ ${OUTPUT_FILE} CONTENTS)
-    string(REGEX MATCH "LLAMA_COMMIT = \"([^\"]*)\";" _ ${CONTENTS})
-    set(OLD_COMMIT ${CMAKE_MATCH_1})
-    string(REGEX MATCH "LLAMA_COMPILER = \"([^\"]*)\";" _ ${CONTENTS})
-    set(OLD_COMPILER ${CMAKE_MATCH_1})
-    string(REGEX MATCH "LLAMA_BUILD_TARGET = \"([^\"]*)\";" _ ${CONTENTS})
-    set(OLD_TARGET ${CMAKE_MATCH_1})
-    if (
-        NOT OLD_COMMIT   STREQUAL BUILD_COMMIT   OR
-        NOT OLD_COMPILER STREQUAL BUILD_COMPILER OR
-        NOT OLD_TARGET   STREQUAL BUILD_TARGET
-    )
-        configure_file(${TEMPLATE_FILE} ${OUTPUT_FILE})
-    endif()
-else()
-    configure_file(${TEMPLATE_FILE} ${OUTPUT_FILE})
-endif()
--- a/scripts/get-flags.mk
+++ b/scripts/get-flags.mk
@ -1,38 +0,0 @@
-ifeq '' '$(findstring clang,$(shell $(GF_CC) --version))'
-	GF_CC_IS_GCC = 1
-	GF_CC_VER := $(shell { $(GF_CC) -dumpfullversion 2>/dev/null || $(GF_CC) -dumpversion; } | awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }')
-else
-	GF_CC_IS_CLANG = 1
-	ifeq '' '$(findstring Apple,$(shell $(GF_CC) --version))'
-		GF_CC_IS_LLVM_CLANG = 1
-	else
-		GF_CC_IS_APPLE_CLANG = 1
-	endif
-	GF_CC_VER := \
-		$(shell $(GF_CC) --version | sed -n 's/^.* version \([0-9.]*\).*$$/\1/p' \
-		| awk -F. '{ printf("%02d%02d%02d", $$1, $$2, $$3) }')
-endif
-
-ifeq ($(GF_CC_IS_CLANG), 1)
-	# clang options
-	GF_CFLAGS   = -Wunreachable-code-break -Wunreachable-code-return
-	GF_CXXFLAGS = -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi
-
-	ifneq '' '$(and $(GF_CC_IS_LLVM_CLANG),$(filter 1,$(shell expr $(GF_CC_VER) \>= 030800)))'
-		GF_CFLAGS += -Wdouble-promotion
-	endif
-	ifneq '' '$(and $(GF_CC_IS_APPLE_CLANG),$(filter 1,$(shell expr $(GF_CC_VER) \>= 070300)))'
-		GF_CFLAGS += -Wdouble-promotion
-	endif
-else
-	# gcc options
-	GF_CFLAGS   = -Wdouble-promotion
-	GF_CXXFLAGS = -Wno-array-bounds
-
-	ifeq ($(shell expr $(GF_CC_VER) \>= 070100), 1)
-		GF_CXXFLAGS += -Wno-format-truncation
-	endif
-	ifeq ($(shell expr $(GF_CC_VER) \>= 080100), 1)
-		GF_CXXFLAGS += -Wextra-semi
-	endif
-endif
--- a/scripts/get-hellaswag.sh
+++ b/scripts/get-hellaswag.sh
@ -1,10 +0,0 @@
-#!/bin/bash
-
-wget https://raw.githubusercontent.com/klosax/hellaswag_text_data/main/hellaswag_val_full.txt
-
-echo "Usage:"
-echo ""
-echo "  ./perplexity -m model.gguf -f hellaswag_val_full.txt --hellaswag [--hellaswag-tasks N] [other params]"
-echo ""
-
-exit 0
--- a/scripts/get-pg.sh
+++ b/scripts/get-pg.sh
@ -1,70 +0,0 @@
-#!/bin/bash
-
-function usage {
-    echo "usage: <n>$0"
-    echo "note: n is the number of essays to download"
-    echo "for specific n, the resulting pg.txt file will have the following number of tokens:"
-    echo "n   | tokens"
-    echo "--- | ---"
-    echo "1   | 6230"
-    echo "2   | 23619"
-    echo "5   | 25859"
-    echo "10  | 36888"
-    echo "15  | 50188"
-    echo "20  | 59094"
-    echo "25  | 88764"
-    echo "30  | 103121"
-    echo "32  | 108338"
-    echo "35  | 113403"
-    echo "40  | 127699"
-    echo "45  | 135896"
-    exit 1
-}
-
-function has_cmd {
-    if ! [ -x "$(command -v $1)" ]; then
-        echo "error: $1 is not available" >&2
-        exit 1
-    fi
-}
-
-# check for: curl, html2text, tail, sed, fmt
-has_cmd curl
-has_cmd html2text
-has_cmd tail
-has_cmd sed
-
-if [ $# -ne 1 ]; then
-    usage
-fi
-
-n=$1
-
-# get urls
-urls="$(curl http://www.aaronsw.com/2002/feeds/pgessays.rss | grep html | sed -e "s/.*http/http/" | sed -e "s/html.*/html/" | head -n $n)"
-
-printf "urls:\n%s\n" "$urls"
-
-if [ -f pg.txt ]; then
-    rm pg.txt
-fi
-
-c=1
-for url in $urls; do
-    echo "processing $url"
-
-    cc=$(printf "%03d" $c)
-
-    curl -L $url | html2text | tail -n +4 | sed -E "s/^[[:space:]]+//g" | fmt -w 80 >> pg-$cc-one.txt
-    cat pg-$cc-one.txt >> pg.txt
-
-    cp -v pg.txt pg-$cc-all.txt
-    c=$((c+1))
-
-    # don't flood the server
-    sleep 1
-done
-
-echo "done. data in pg.txt"
-
-exit 0
--- a/scripts/get-winogrande.sh
+++ b/scripts/get-winogrande.sh
@ -1,10 +0,0 @@
-#!/bin/bash
-
-wget https://huggingface.co/datasets/ikawrakow/winogrande-eval-for-llama.cpp/raw/main/winogrande-debiased-eval.csv
-
-echo "Usage:"
-echo ""
-echo "  ./perplexity -m model.gguf -f winogrande-debiased-eval.csv --winogrande [--winogrande-tasks N] [other params]"
-echo ""
-
-exit 0
--- a/scripts/hf.sh
+++ b/scripts/hf.sh
@ -1,107 +0,0 @@
-#!/bin/bash
-#
-# Shortcut for downloading HF models
-#
-# Usage:
-#   ./main -m $(./examples/hf.sh https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF/resolve/main/mixtral-8x7b-v0.1.Q4_K_M.gguf)
-#   ./main -m $(./examples/hf.sh --url https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF/blob/main/mixtral-8x7b-v0.1.Q4_K_M.gguf)
-#   ./main -m $(./examples/hf.sh --repo TheBloke/Mixtral-8x7B-v0.1-GGUF --file mixtral-8x7b-v0.1.Q4_K_M.gguf)
-#
-
-# all logs go to stderr
-function log {
-    echo "$@" 1>&2
-}
-
-function usage {
-    log "Usage: $0 [[--url] <url>] [--repo <repo>] [--file <file>] [-h|--help]"
-    exit 1
-}
-
-# check for curl or wget
-function has_cmd {
-    if ! [ -x "$(command -v $1)" ]; then
-        return 1
-    fi
-}
-
-if has_cmd wget; then
-    cmd="wget -q --show-progress -c -O %s %s"
-elif has_cmd curl; then
-    cmd="curl -C - -f -o %s -L %s"
-else
-    log "[E] curl or wget not found"
-    exit 1
-fi
-
-url=""
-repo=""
-file=""
-
-# parse args
-while [[ $# -gt 0 ]]; do
-    case "$1" in
-        --url)
-            url="$2"
-            shift 2
-            ;;
-        --repo)
-            repo="$2"
-            shift 2
-            ;;
-        --file)
-            file="$2"
-            shift 2
-            ;;
-        -h|--help)
-            usage
-            ;;
-        *)
-            url="$1"
-            shift
-            ;;
-    esac
-done
-
-if [ -n "$repo" ] && [ -n "$file" ]; then
-    url="https://huggingface.co/$repo/resolve/main/$file"
-fi
-
-if [ -z "$url" ]; then
-    log "[E] missing --url"
-    usage
-fi
-
-# check if the URL is a HuggingFace model, and if so, try to download it
-is_url=false
-
-if [[ ${#url} -gt 22 ]]; then
-    if [[ ${url:0:22} == "https://huggingface.co" ]]; then
-        is_url=true
-    fi
-fi
-
-if [ "$is_url" = false ]; then
-    log "[E] invalid URL, must start with https://huggingface.co"
-    exit 0
-fi
-
-# replace "blob/main" with "resolve/main"
-url=${url/blob\/main/resolve\/main}
-
-basename=$(basename $url)
-
-log "[+] attempting to download $basename"
-
-if [ -n "$cmd" ]; then
-    cmd=$(printf "$cmd" "$basename" "$url")
-    log "[+] $cmd"
-    if $cmd; then
-        echo $basename
-        exit 0
-    fi
-fi
-
-log "[-] failed to download"
-
-exit 1
--- a/scripts/install-oneapi.bat
+++ b/scripts/install-oneapi.bat
@ -1,19 +0,0 @@
-::  MIT license
-::  Copyright (C) 2024 Intel Corporation
-::  SPDX-License-Identifier: MIT
-
-
-set URL=%1
-set COMPONENTS=%2
-
-curl.exe --output %TEMP%\webimage.exe --url %URL% --retry 5 --retry-delay 5
-start /b /wait %TEMP%\webimage.exe -s -x -f webimage_extracted --log extract.log
-del %TEMP%\webimage.exe
-if "%COMPONENTS%"=="" (
-  webimage_extracted\bootstrapper.exe -s --action install --eula=accept -p=NEED_VS2017_INTEGRATION=0 -p=NEED_VS2019_INTEGRATION=0 -p=NEED_VS2022_INTEGRATION=0 --log-dir=.
-) else (
-  webimage_extracted\bootstrapper.exe -s --action install --components=%COMPONENTS% --eula=accept -p=NEED_VS2017_INTEGRATION=0 -p=NEED_VS2019_INTEGRATION=0 -p=NEED_VS2022_INTEGRATION=0 --log-dir=.
-)
-set installer_exit_code=%ERRORLEVEL%
-rd /s/q "webimage_extracted"
-exit /b %installer_exit_code%
--- a/scripts/run-with-preset.py
+++ b/scripts/run-with-preset.py
@ -1,140 +0,0 @@
-#!/usr/bin/env python3
-
-import argparse
-import os
-import subprocess
-import sys
-
-import yaml
-
-CLI_ARGS_MAIN_PERPLEXITY = [
-    "batch-size", "cfg-negative-prompt", "cfg-scale", "chunks", "color", "ctx-size", "escape",
-    "export", "file", "frequency-penalty", "grammar", "grammar-file", "hellaswag",
-    "hellaswag-tasks", "ignore-eos", "in-prefix", "in-prefix-bos", "in-suffix", "instruct",
-    "interactive", "interactive-first", "keep", "logdir", "logit-bias", "lora", "lora-base",
-    "low-vram", "main-gpu", "memory-f32", "mirostat", "mirostat-ent", "mirostat-lr", "mlock",
-    "model", "multiline-input", "n-gpu-layers", "n-predict", "no-mmap", "no-mul-mat-q",
-    "np-penalize-nl", "numa", "ppl-output-type", "ppl-stride", "presence-penalty", "prompt",
-    "prompt-cache", "prompt-cache-all", "prompt-cache-ro", "random-prompt", "repeat-last-n",
-    "repeat-penalty", "reverse-prompt", "rope-freq-base", "rope-freq-scale", "rope-scale", "seed",
-    "simple-io", "tensor-split", "threads", "temp", "tfs", "top-k", "top-p", "typical",
-    "verbose-prompt"
-]
-
-CLI_ARGS_LLAMA_BENCH = [
-    "batch-size", "memory-f32", "low-vram", "model", "mul-mat-q", "n-gen", "n-gpu-layers",
-    "n-prompt", "output", "repetitions", "tensor-split", "threads", "verbose"
-]
-
-CLI_ARGS_SERVER = [
-    "alias", "batch-size", "ctx-size", "embedding", "host", "memory-f32", "lora", "lora-base",
-    "low-vram", "main-gpu", "mlock", "model", "n-gpu-layers", "n-probs", "no-mmap", "no-mul-mat-q",
-    "numa", "path", "port", "rope-freq-base", "timeout", "rope-freq-scale", "tensor-split",
-    "threads", "verbose"
-]
-
-description = """Run llama.cpp binaries with presets from YAML file(s).
-To specify which binary should be run, specify the "binary" property (main, perplexity, llama-bench, and server are supported).
-To get a preset file template, run a llama.cpp binary with the "--logdir" CLI argument.
-
-Formatting considerations:
- The YAML property names are the same as the CLI argument names of the corresponding binary.
- Properties must use the long name of their corresponding llama.cpp CLI arguments.
- Like the llama.cpp binaries the property names do not differentiate between hyphens and underscores.
- Flags must be defined as "<PROPERTY_NAME>: true" to be effective.
- To define the logit_bias property, the expected format is "<TOKEN_ID>: <BIAS>" in the "logit_bias" namespace.
- To define multiple "reverse_prompt" properties simultaneously the expected format is a list of strings.
- To define a tensor split, pass a list of floats.
-"""
-usage = "run-with-preset.py [-h] [yaml_files ...] [--<ARG_NAME> <ARG_VALUE> ...]"
-epilog = ("  --<ARG_NAME> specify additional CLI ars to be passed to the binary (override all preset files). "
-          "Unknown args will be ignored.")
-
-parser = argparse.ArgumentParser(
-    description=description, usage=usage, epilog=epilog, formatter_class=argparse.RawTextHelpFormatter)
-parser.add_argument("-bin", "--binary", help="The binary to run.")
-parser.add_argument("yaml_files", nargs="*",
-                    help="Arbitrary number of YAML files from which to read preset values. "
-                    "If two files specify the same values the later one will be used.")
-
-known_args, unknown_args = parser.parse_known_args()
-
-if not known_args.yaml_files and not unknown_args:
-    parser.print_help()
-    sys.exit(0)
-
-props = dict()
-
-for yaml_file in known_args.yaml_files:
-    with open(yaml_file, "r") as f:
-        props.update(yaml.load(f, yaml.SafeLoader))
-
-props = {prop.replace("_", "-"): val for prop, val in props.items()}
-
-binary = props.pop("binary", "main")
-if known_args.binary:
-    binary = known_args.binary
-
-if os.path.exists(f"./{binary}"):
-    binary = f"./{binary}"
-
-if binary.lower().endswith("main") or binary.lower().endswith("perplexity"):
-    cli_args = CLI_ARGS_MAIN_PERPLEXITY
-elif binary.lower().endswith("llama-bench"):
-    cli_args = CLI_ARGS_LLAMA_BENCH
-elif binary.lower().endswith("server"):
-    cli_args = CLI_ARGS_SERVER
-else:
-    print(f"Unknown binary: {binary}")
-    sys.exit(1)
-
-command_list = [binary]
-
-for cli_arg in cli_args:
-    value = props.pop(cli_arg, None)
-
-    if not value or value == -1:
-        continue
-
-    if cli_arg == "logit-bias":
-        for token, bias in value.items():
-            command_list.append("--logit-bias")
-            command_list.append(f"{token}{bias:+}")
-        continue
-
-    if cli_arg == "reverse-prompt" and not isinstance(value, str):
-        for rp in value:
-            command_list.append("--reverse-prompt")
-            command_list.append(str(rp))
-        continue
-
-    command_list.append(f"--{cli_arg}")
-
-    if cli_arg == "tensor-split":
-        command_list.append(",".join([str(v) for v in value]))
-        continue
-
-    value = str(value)
-
-    if value != "True":
-        command_list.append(str(value))
-
-num_unused = len(props)
-if num_unused > 10:
-    print(f"The preset file contained a total of {num_unused} unused properties.")
-elif num_unused > 0:
-    print("The preset file contained the following unused properties:")
-    for prop, value in props.items():
-        print(f"  {prop}: {value}")
-
-command_list += unknown_args
-
-sp = subprocess.Popen(command_list)
-
-while sp.returncode is None:
-    try:
-        sp.wait()
-    except KeyboardInterrupt:
-        pass
-
-sys.exit(sp.returncode)
--- a/scripts/server-llm.sh
+++ b/scripts/server-llm.sh
@ -1,423 +0,0 @@
-#!/bin/bash
-#
-# Helper script for deploying llama.cpp server with a single Bash command
-#
-# - Works on Linux and macOS
-# - Supports: CPU, CUDA, Metal, OpenCL
-# - Can run all GGUF models from HuggingFace
-# - Can serve requests in parallel
-# - Always builds latest llama.cpp from GitHub
-#
-# Limitations
-#
-# - Chat templates are poorly supported (base models recommended)
-# - Might be unstable!
-#
-# Usage:
-#   ./server-llm.sh [--port] [--repo] [--wtype] [--backend] [--gpu-id] [--n-parallel] [--n-kv] [--verbose] [-non-interactive]
-#
-#   --port:            port number, default is 8888
-#   --repo:            path to a repo containing GGUF model files
-#   --wtype:           weights type (f16, q8_0, q4_0, q4_1), default is user-input
-#   --backend:         cpu, cuda, metal, opencl, depends on the OS
-#   --gpu-id:          gpu id, default is 0
-#   --n-parallel:      number of parallel requests, default is 8
-#   --n-kv:            KV cache size, default is 4096
-#   --verbose:         verbose output
-#   --non-interactive: run without asking a permission to run
-#
-# Example:
-#
-#   bash -c "$(curl -s https://ggml.ai/server-llm.sh)"
-#
-
-set -e
-
-# required utils: curl, git, make
-if ! command -v curl &> /dev/null; then
-    printf "[-] curl not found\n"
-    exit 1
-fi
-if ! command -v git &> /dev/null; then
-    printf "[-] git not found\n"
-    exit 1
-fi
-if ! command -v make &> /dev/null; then
-    printf "[-] make not found\n"
-    exit 1
-fi
-
-# parse arguments
-is_interactive=1
-port=8888
-repo=""
-wtype=""
-backend="cpu"
-
-# if macOS, use metal backend by default
-if [[ "$OSTYPE" == "darwin"* ]]; then
-    backend="metal"
-elif command -v nvcc &> /dev/null; then
-    backend="cuda"
-fi
-
-gpu_id=0
-n_parallel=8
-n_kv=4096
-verbose=0
-
-function print_usage {
-    printf "Usage:\n"
-    printf "  ./server-llm.sh [--port] [--repo] [--wtype] [--backend] [--gpu-id] [--n-parallel] [--n-kv] [--verbose] [-non-interactive]\n\n"
-    printf "  --port:             port number, default is 8888\n"
-    printf "  --repo:             path to a repo containing GGUF model files\n"
-    printf "  --wtype:            weights type (f16, q8_0, q4_0, q4_1), default is user-input\n"
-    printf "  --backend:          cpu, cuda, metal, opencl, depends on the OS\n"
-    printf "  --gpu-id:           gpu id, default is 0\n"
-    printf "  --n-parallel:       number of parallel requests, default is 8\n"
-    printf "  --n-kv:             KV cache size, default is 4096\n"
-    printf "  --verbose:          verbose output\n\n"
-    printf "  --non-interactive:  run without asking a permission to run\n"
-    printf "Example:\n\n"
-    printf '  bash -c "$(curl -s https://ggml.ai/server-llm.sh)"\n\n'
-}
-
-while [[ $# -gt 0 ]]; do
-    key="$1"
-    case $key in
-        --non-interactive)
-            is_interactive=0
-            shift
-            ;;
-        --port)
-            port="$2"
-            shift
-            shift
-            ;;
-        --repo)
-            repo="$2"
-            shift
-            shift
-            ;;
-        --wtype)
-            wtype="$2"
-            shift
-            shift
-            ;;
-        --backend)
-            backend="$2"
-            shift
-            shift
-            ;;
-        --gpu-id)
-            gpu_id="$2"
-            shift
-            shift
-            ;;
-        --n-parallel)
-            n_parallel="$2"
-            shift
-            shift
-            ;;
-        --n-kv)
-            n_kv="$2"
-            shift
-            shift
-            ;;
-        --verbose)
-            verbose=1
-            shift
-            ;;
-        --help)
-            print_usage
-            exit 0
-            ;;
-        *)
-            echo "Unknown argument: $key"
-            print_usage
-            exit 1
-            ;;
-    esac
-done
-
-# available weights types
-wtypes=("F16" "Q8_0" "Q4_0" "Q4_1" "Q5_0" "Q5_1" "Q6_K" "Q5_K_M" "Q5_K_S" "Q4_K_M" "Q4_K_S" "Q3_K_L" "Q3_K_M" "Q3_K_S" "Q2_K")
-
-wfiles=()
-for wt in "${wtypes[@]}"; do
-    wfiles+=("")
-done
-
-# map wtype input to index
-if [[ ! -z "$wtype" ]]; then
-    iw=-1
-    is=0
-    for wt in "${wtypes[@]}"; do
-        # uppercase
-        uwt=$(echo "$wt" | tr '[:lower:]' '[:upper:]')
-        if [[ "$uwt" == "$wtype" ]]; then
-            iw=$is
-            break
-        fi
-        is=$((is+1))
-    done
-
-    if [[ $iw -eq -1 ]]; then
-        printf "[-] Invalid weight type: %s\n" "$wtype"
-        exit 1
-    fi
-
-    wtype="$iw"
-fi
-
-# sample repos
-repos=(
-    "https://huggingface.co/TheBloke/Llama-2-7B-GGUF"
-    "https://huggingface.co/TheBloke/Llama-2-13B-GGUF"
-    "https://huggingface.co/TheBloke/Llama-2-70B-GGUF"
-    "https://huggingface.co/TheBloke/CodeLlama-7B-GGUF"
-    "https://huggingface.co/TheBloke/CodeLlama-13B-GGUF"
-    "https://huggingface.co/TheBloke/CodeLlama-34B-GGUF"
-    "https://huggingface.co/TheBloke/Mistral-7B-v0.1-GGUF"
-    "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF"
-    "https://huggingface.co/TheBloke/OpenHermes-2-Mistral-7B-GGUF"
-    "https://huggingface.co/TheBloke/CausalLM-7B-GGUF"
-)
-if [ $is_interactive -eq 1 ]; then
-    printf "\n"
-    printf "[I] This is a helper script for deploying llama.cpp's server on this machine.\n\n"
-    printf "    Based on the options that follow, the script might download a model file\n"
-    printf "    from the internet, which can be a few GBs in size. The script will also\n"
-    printf "    build the latest llama.cpp source code from GitHub, which can be unstable.\n"
-    printf "\n"
-    printf "    Upon success, an HTTP server will be started and it will serve the selected\n"
-    printf "    model using llama.cpp for demonstration purposes.\n"
-    printf "\n"
-    printf "    Please note:\n"
-    printf "\n"
-    printf "    - All new data will be stored in the current folder\n"
-    printf "    - The server will be listening on all network interfaces\n"
-    printf "    - The server will run with default settings which are not always optimal\n"
-    printf "    - Do not judge the quality of a model based on the results from this script\n"
-    printf "    - Do not use this script to benchmark llama.cpp\n"
-    printf "    - Do not use this script in production\n"
-    printf "    - This script is only for demonstration purposes\n"
-    printf "\n"
-    printf "    If you don't know what you are doing, please press Ctrl-C to abort now\n"
-    printf "\n"
-    printf "    Press Enter to continue ...\n\n"
-
-    read
-fi
-
-if [[ -z "$repo" ]]; then
-    printf "[+] No repo provided from the command line\n"
-    printf "    Please select a number from the list below or enter an URL:\n\n"
-
-    is=0
-    for r in "${repos[@]}"; do
-        printf "    %2d) %s\n" $is "$r"
-        is=$((is+1))
-    done
-
-    # ask for repo until index of sample repo is provided or an URL
-    while [[ -z "$repo" ]]; do
-        printf "\n    Or choose one from: https://huggingface.co/models?sort=trending&search=gguf\n\n"
-        read -p "[+] Select repo: " repo
-
-        # check if the input is a number
-        if [[ "$repo" =~ ^[0-9]+$ ]]; then
-            if [[ "$repo" -ge 0 && "$repo" -lt ${#repos[@]} ]]; then
-                repo="${repos[$repo]}"
-            else
-                printf "[-] Invalid repo index: %s\n" "$repo"
-                repo=""
-            fi
-        elif [[ "$repo" =~ ^https?:// ]]; then
-            repo="$repo"
-        else
-            printf "[-] Invalid repo URL: %s\n" "$repo"
-            repo=""
-        fi
-    done
-fi
-
-# remove suffix
-repo=$(echo "$repo" | sed -E 's/\/tree\/main$//g')
-
-printf "[+] Checking for GGUF model files in %s\n" "$repo"
-
-# find GGUF files in the source
-# TODO: better logic
-model_tree="${repo%/}/tree/main"
-model_files=$(curl -s "$model_tree" | grep -i "\\.gguf</span>" | sed -E 's/.*<span class="truncate group-hover:underline">(.*)<\/span><\/a>/\1/g')
-
-# list all files in the provided git repo
-printf "[+] Model files:\n\n"
-for file in $model_files; do
-    # determine iw by grepping the filename with wtypes
-    iw=-1
-    is=0
-    for wt in "${wtypes[@]}"; do
-        # uppercase
-        ufile=$(echo "$file" | tr '[:lower:]' '[:upper:]')
-        if [[ "$ufile" =~ "$wt" ]]; then
-            iw=$is
-            break
-        fi
-        is=$((is+1))
-    done
-
-    if [[ $iw -eq -1 ]]; then
-        continue
-    fi
-
-    wfiles[$iw]="$file"
-
-    have=" "
-    if [[ -f "$file" ]]; then
-        have="*"
-    fi
-
-    printf "    %2d) %s %s\n" $iw "$have" "$file"
-done
-
-wfile="${wfiles[$wtype]}"
-
-# ask for weights type until provided and available
-while [[ -z "$wfile" ]]; do
-    printf "\n"
-    read -p "[+] Select weight type: " wtype
-    wfile="${wfiles[$wtype]}"
-
-    if [[ -z "$wfile" ]]; then
-        printf "[-] Invalid weight type: %s\n" "$wtype"
-        wtype=""
-    fi
-done
-
-printf "[+] Selected weight type: %s (%s)\n" "$wtype" "$wfile"
-
-url="${repo%/}/resolve/main/$wfile"
-
-# check file if the model has been downloaded before
-chk="$wfile.chk"
-
-# check if we should download the file
-# - if $wfile does not exist
-# - if $wfile exists but $chk does not exist
-# - if $wfile exists and $chk exists but $wfile is newer than $chk
-# TODO: better logic using git lfs info
-
-do_download=0
-
-if [[ ! -f "$wfile" ]]; then
-    do_download=1
-elif [[ ! -f "$chk" ]]; then
-    do_download=1
-elif [[ "$wfile" -nt "$chk" ]]; then
-    do_download=1
-fi
-
-if [[ $do_download -eq 1 ]]; then
-    printf "[+] Downloading weights from %s\n" "$url"
-
-    # download the weights file
-    curl -o "$wfile" -# -L "$url"
-
-    # create a check file if successful
-    if [[ $? -eq 0 ]]; then
-        printf "[+] Creating check file %s\n" "$chk"
-        touch "$chk"
-    fi
-else
-    printf "[+] Using cached weights %s\n" "$wfile"
-fi
-
-# get latest llama.cpp and build
-
-printf "[+] Downloading latest llama.cpp\n"
-
-llama_cpp_dir="__llama_cpp_port_${port}__"
-
-if [[ -d "$llama_cpp_dir" && ! -f "$llama_cpp_dir/__ggml_script__" ]]; then
-    # if the dir exists and there isn't a file "__ggml_script__" in it, abort
-    printf "[-] Directory %s already exists\n" "$llama_cpp_dir"
-    printf "[-] Please remove it and try again\n"
-    exit 1
-elif [[ -d "$llama_cpp_dir" ]]; then
-    printf "[+] Directory %s already exists\n" "$llama_cpp_dir"
-    printf "[+] Using cached llama.cpp\n"
-
-    cd "$llama_cpp_dir"
-    git reset --hard
-    git fetch
-    git checkout origin/master
-
-    cd ..
-else
-    printf "[+] Cloning llama.cpp\n"
-
-    git clone https://github.com/ggerganov/llama.cpp "$llama_cpp_dir"
-fi
-
-# mark that that the directory is made by this script
-touch "$llama_cpp_dir/__ggml_script__"
-
-if [[ $verbose -eq 1 ]]; then
-    set -x
-fi
-
-# build
-cd "$llama_cpp_dir"
-
-make clean
-
-log="--silent"
-if [[ $verbose -eq 1 ]]; then
-    log=""
-fi
-
-if [[ "$backend" == "cuda" ]]; then
-    printf "[+] Building with CUDA backend\n"
-    LLAMA_CUBLAS=1 make -j server $log
-elif [[ "$backend" == "cpu" ]]; then
-    printf "[+] Building with CPU backend\n"
-    make -j server $log
-elif [[ "$backend" == "metal" ]]; then
-    printf "[+] Building with Metal backend\n"
-    make -j server $log
-elif [[ "$backend" == "opencl" ]]; then
-    printf "[+] Building with OpenCL backend\n"
-    LLAMA_CLBLAST=1 make -j server $log
-else
-    printf "[-] Unknown backend: %s\n" "$backend"
-    exit 1
-fi
-
-# run the server
-
-printf "[+] Running server\n"
-
-args=""
-if [[ "$backend" == "cuda" ]]; then
-    export CUDA_VISIBLE_DEVICES=$gpu_id
-    args="-ngl 999"
-elif [[ "$backend" == "cpu" ]]; then
-    args="-ngl 0"
-elif [[ "$backend" == "metal" ]]; then
-    args="-ngl 999"
-elif [[ "$backend" == "opencl" ]]; then
-    args="-ngl 999"
-else
-    printf "[-] Unknown backend: %s\n" "$backend"
-    exit 1
-fi
-
-if [[ $verbose -eq 1 ]]; then
-    args="$args --verbose"
-fi
-
-./server -m "../$wfile" --host 0.0.0.0 --port "$port" -c $n_kv -np "$n_parallel" $args
-
-exit 0
--- a/scripts/sync-ggml-am.sh
+++ b/scripts/sync-ggml-am.sh
@ -1,168 +0,0 @@
-#!/bin/bash
-#
-# Synchronize ggml changes to llama.cpp
-#
-# Usage:
-#
-#   $ cd /path/to/llama.cpp
-#   $ ./scripts/sync-ggml-am.sh -skip hash0,hash1,hash2...
-#
-
-set -e
-
-sd=$(dirname $0)
-cd $sd/../
-
-SRC_LLAMA=$(pwd)
-SRC_GGML=$(cd ../ggml; pwd)
-
-if [ ! -d $SRC_GGML ]; then
-    echo "ggml not found at $SRC_GGML"
-    exit 1
-fi
-
-lc=$(cat $SRC_LLAMA/scripts/sync-ggml.last)
-echo "Syncing ggml changes since commit $lc"
-
-to_skip=""
-if [ "$1" == "-skip" ]; then
-    to_skip=$2
-fi
-
-cd $SRC_GGML
-
-git log --oneline $lc..HEAD
-git log --oneline $lc..HEAD --reverse | grep -v "(llama/[0-9]*)" | cut -d' ' -f1 > $SRC_LLAMA/ggml-commits
-
-if [ ! -s $SRC_LLAMA/ggml-commits ]; then
-    rm -v $SRC_LLAMA/ggml-commits
-    echo "No new commits"
-    exit 0
-fi
-
-if [ -f $SRC_LLAMA/ggml-src.patch ]; then
-    rm -v $SRC_LLAMA/ggml-src.patch
-fi
-
-while read c; do
-    if [ -n "$to_skip" ]; then
-        if [[ $to_skip == *"$c"* ]]; then
-            echo "Skipping $c"
-            continue
-        fi
-    fi
-
-    git format-patch -k $c~1..$c --stdout -- \
-        include/ggml/ggml*.h \
-        src/ggml*.h \
-        src/ggml*.c \
-        src/ggml*.cpp \
-        src/ggml*.m \
-        src/ggml*.metal \
-        src/ggml*.cu \
-        tests/test-opt.cpp \
-        tests/test-grad0.cpp \
-        tests/test-quantize-fns.cpp \
-        tests/test-quantize-perf.cpp \
-        tests/test-backend-ops.cpp \
-        >> $SRC_LLAMA/ggml-src.patch
-done < $SRC_LLAMA/ggml-commits
-
-rm -v $SRC_LLAMA/ggml-commits
-
-# delete files if empty
-if [ ! -s $SRC_LLAMA/ggml-src.patch ]; then
-    rm -v $SRC_LLAMA/ggml-src.patch
-fi
-
-cd $SRC_LLAMA
-
-if [ -f $SRC_LLAMA/ggml-src.patch ]; then
-    # replace PR numbers
-    #
-    # Subject: some text (#1234)
-    # Subject: some text (ggml/1234)
-    cat ggml-src.patch | sed -e 's/^Subject: \(.*\) (#\([0-9]*\))/Subject: \1 (ggml\/\2)/' > ggml-src.patch.tmp
-    mv ggml-src.patch.tmp ggml-src.patch
-
-    cat ggml-src.patch | sed -e 's/^\(.*\) (#\([0-9]*\))$/\1 (ggml\/\2)/' > ggml-src.patch.tmp
-    mv ggml-src.patch.tmp ggml-src.patch
-
-    # replace filenames:
-    #
-    # src/ggml.c                  -> ggml.c
-    # src/ggml-alloc.c            -> ggml-alloc.c
-    # src/ggml-backend-impl.h     -> ggml-backend-impl.h
-    # src/ggml-backend.c          -> ggml-backend.c
-    # src/ggml-cuda.cu            -> ggml-cuda.cu
-    # src/ggml-cuda.h             -> ggml-cuda.h
-    # src/ggml-impl.h             -> ggml-impl.h
-    # src/ggml-kompute.cpp        -> ggml-kompute.cpp
-    # src/ggml-kompute.h          -> ggml-kompute.h
-    # src/ggml-metal.h            -> ggml-metal.h
-    # src/ggml-metal.m            -> ggml-metal.m
-    # src/ggml-mpi.h              -> ggml-mpi.h
-    # src/ggml-mpi.c              -> ggml-mpi.c
-    # src/ggml-opencl.cpp         -> ggml-opencl.cpp
-    # src/ggml-opencl.h           -> ggml-opencl.h
-    # src/ggml-quants.c           -> ggml-quants.c
-    # src/ggml-quants.h           -> ggml-quants.h
-    # src/ggml-sycl.cpp           -> ggml-sycl.cpp
-    # src/ggml-sycl.h             -> ggml-sycl.h
-    # src/ggml-vulkan.cpp         -> ggml-vulkan.cpp
-    # src/ggml-vulkan.h           -> ggml-vulkan.h
-    # include/ggml/ggml.h         -> ggml.h
-    # include/ggml/ggml-alloc.h   -> ggml-alloc.h
-    # include/ggml/ggml-backend.h -> ggml-backend.h
-    #
-    # tests/test-opt.cpp           -> tests/test-opt.cpp
-    # tests/test-grad0.cpp         -> tests/test-grad0.cpp
-    # tests/test-quantize-fns.cpp  -> tests/test-quantize-fns.cpp
-    # tests/test-quantize-perf.cpp -> tests/test-quantize-perf.cpp
-    # tests/test-backend-ops.cpp   -> tests/test-backend-ops.cpp
-
-    cat ggml-src.patch | sed \
-        -e 's/src\/ggml\.c/ggml.c/g' \
-        -e 's/src\/ggml-alloc\.c/ggml-alloc.c/g' \
-        -e 's/src\/ggml-backend-impl\.h/ggml-backend-impl.h/g' \
-        -e 's/src\/ggml-backend\.c/ggml-backend.c/g' \
-        -e 's/src\/ggml-cuda\.cu/ggml-cuda.cu/g' \
-        -e 's/src\/ggml-cuda\.h/ggml-cuda.h/g' \
-        -e 's/src\/ggml-impl\.h/ggml-impl.h/g' \
-        -e 's/src\/ggml-kompute\.cpp/ggml-kompute.cpp/g' \
-        -e 's/src\/ggml-kompute\.h/ggml-kompute.h/g' \
-        -e 's/src\/ggml-metal\.h/ggml-metal.h/g' \
-        -e 's/src\/ggml-metal\.m/ggml-metal.m/g' \
-        -e 's/src\/ggml-mpi\.h/ggml-mpi.h/g' \
-        -e 's/src\/ggml-mpi\.c/ggml-mpi.c/g' \
-        -e 's/src\/ggml-opencl\.cpp/ggml-opencl.cpp/g' \
-        -e 's/src\/ggml-opencl\.h/ggml-opencl.h/g' \
-        -e 's/src\/ggml-quants\.c/ggml-quants.c/g' \
-        -e 's/src\/ggml-quants\.h/ggml-quants.h/g' \
-        -e 's/src\/ggml-sycl\.cpp/ggml-sycl.cpp/g' \
-        -e 's/src\/ggml-sycl\.h/ggml-sycl.h/g' \
-        -e 's/src\/ggml-vulkan\.cpp/ggml-vulkan.cpp/g' \
-        -e 's/src\/ggml-vulkan\.h/ggml-vulkan.h/g' \
-        -e 's/include\/ggml\/ggml\.h/ggml.h/g' \
-        -e 's/include\/ggml\/ggml-alloc\.h/ggml-alloc.h/g' \
-        -e 's/include\/ggml\/ggml-backend\.h/ggml-backend.h/g' \
-        -e 's/tests\/test-opt\.cpp/tests\/test-opt.cpp/g' \
-        -e 's/tests\/test-grad0\.cpp/tests\/test-grad0.cpp/g' \
-        -e 's/tests\/test-quantize-fns\.cpp/tests\/test-quantize-fns.cpp/g' \
-        -e 's/tests\/test-quantize-perf\.cpp/tests\/test-quantize-perf.cpp/g' \
-        -e 's/tests\/test-backend-ops\.cpp/tests\/test-backend-ops.cpp/g' \
-        > ggml-src.patch.tmp
-    mv ggml-src.patch.tmp ggml-src.patch
-
-    git am ggml-src.patch
-
-    rm -v $SRC_LLAMA/ggml-src.patch
-fi
-
-# update last commit
-cd $SRC_GGML
-git log -1 --format=%H > $SRC_LLAMA/scripts/sync-ggml.last
-
-echo "Done"
-
-exit 0
--- a/scripts/sync-ggml.last
+++ b/scripts/sync-ggml.last
@ -1 +0,0 @@
-5070f078a67c18c11736e78316ab715ca9afde16