From c2cd24fbfdfeacc2fc6ad03878379de104264114 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sat, 15 Feb 2025 20:29:56 +0200 Subject: [PATCH 1/4] readme : add notice about new package registry (#11890) * readme : add notice about new package registry * cont : fix whitespace --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index 1764cad81..f70c8ae1e 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,13 @@ Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others) in pure C/C++ +> [!IMPORTANT] +> New `llama.cpp` package location: [ggml-org/llama.cpp](https://github.com/ggml-org/llama.cpp/pkgs/container/llama.cpp) +> +> Update your container URLs to: `ghcr.io/ggml-org/llama.cpp` +> +> More info: https://github.com/ggml-org/llama.cpp/discussions/11801 + ## Recent API changes - [Changelog for `libllama` API](https://github.com/ggml-org/llama.cpp/issues/9289) From 22885105a6b034abaf1c1471ad90fb2ae96146bb Mon Sep 17 00:00:00 2001 From: Adrian Kretz Date: Sat, 15 Feb 2025 19:39:20 +0100 Subject: [PATCH 2/4] metal : optimize dequant q6_K kernel (#11892) --- ggml/src/ggml-metal/ggml-metal.metal | 33 ++++++++++++++++++---------- 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal index da415184b..83e7ac9f4 100644 --- a/ggml/src/ggml-metal/ggml-metal.metal +++ b/ggml/src/ggml-metal/ggml-metal.metal @@ -373,24 +373,33 @@ void dequantize_q5_K(device const block_q5_K *xb, short il, thread type4x4 & reg template void dequantize_q6_K(device const block_q6_K *xb, short il, thread type4x4 & reg) { const half d_all = xb->d; - device const uint8_t * ql = (device const uint8_t *)xb->ql; - device const uint8_t * qh = (device const uint8_t *)xb->qh; + device const uint16_t * ql = (device const uint16_t *)xb->ql; + device const uint16_t * qh = (device const uint16_t *)xb->qh; device const int8_t * scales = (device const int8_t *)xb->scales; - ql = ql + 64*(il/8) + 32*((il/2)&1) + 16*(il&1); - qh = qh + 32*(il/8) + 16*(il&1); + ql = ql + 32*(il/8) + 16*((il/2)&1) + 8*(il&1); + qh = qh + 16*(il/8) + 8*(il&1); float sc = scales[(il%2) + 2 * ((il/2))]; il = (il/2) & 3; - const uint16_t kmask1 = il>1 ? (il>2 ? 192 : 48) : (il>0 ? 12 : 3); - const uint16_t kmask2 = il>1 ? 0xF0 : 0x0F; - const float coef = il>1 ? 1.f/16.f : 1.f; + const uint32_t kmask1 = il>1 ? (il>2 ? 0xC0C0C0C0 : 0x30303030) : (il>0 ? 0x0C0C0C0C : 0x03030303); + const uint32_t kmask2 = il>1 ? 0xF0F0F0F0 : 0x0F0F0F0F; const float ml = d_all * sc * 32.f; - const float dl = d_all * sc * coef; - for (int i = 0; i < 16; ++i) { - const half q = il&1 ? ((ql[i] & kmask2) | ((qh[i] & kmask1) << 2)) - : ((ql[i] & kmask2) | ((qh[i] & kmask1) << 4)); - reg[i/4][i%4] = dl * q - ml; + const float dl0 = d_all * sc; + const float dl1 = dl0 / 256.f; + const float dl2 = dl0 / (256.f * 256.f); + const float dl3 = dl0 / (256.f * 256.f * 256.f); + const uint8_t shr_h = il>2 ? 2 : 0; + const uint8_t shl_h = il>1 ? 0 : (il>0 ? 2 : 4); + const uint8_t shr_l = il>1 ? 4 : 0; + for (int i = 0; i < 4; ++i) { + const uint32_t low = (ql[2*i] | (uint32_t)(ql[2*i+1] << 16)) & kmask2; + const uint32_t high = (qh[2*i] | (uint32_t)(qh[2*i+1] << 16)) & kmask1; + const uint32_t q = ((high << shl_h) >> shr_h) | (low >> shr_l); + reg[i][0] = dl0 * ((half)(q & 0xFF)) - ml; + reg[i][1] = dl1 * ((float)(q & 0xFF00)) - ml; + reg[i][2] = dl2 * ((float)(q & 0xFF0000)) - ml; + reg[i][3] = dl3 * ((float)(q & 0xFF000000)) - ml; } } From fc10c38ded84670955d4c44770f8acfb64617e15 Mon Sep 17 00:00:00 2001 From: 708-145 <40387547+708-145@users.noreply.github.com> Date: Sat, 15 Feb 2025 20:03:30 +0100 Subject: [PATCH 3/4] examples: fix typo in imatrix/README.md (#11884) * simple typo fixed * Update examples/imatrix/README.md --------- Co-authored-by: Tobias Bergmann Co-authored-by: Georgi Gerganov --- examples/imatrix/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/imatrix/README.md b/examples/imatrix/README.md index bdf248cd3..9aa2b2034 100644 --- a/examples/imatrix/README.md +++ b/examples/imatrix/README.md @@ -1,6 +1,6 @@ # llama.cpp/examples/imatrix -Compute an importance matrix for a model and given text dataset. Can be used during quantization to enchance the quality of the quantized models. +Compute an importance matrix for a model and given text dataset. Can be used during quantization to enhance the quality of the quantized models. More information is available here: https://github.com/ggml-org/llama.cpp/pull/4861 ## Usage From 6dde1782483d6b0a1d59f5a5fbcb3119b9d34c27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Sat, 15 Feb 2025 20:23:22 +0100 Subject: [PATCH 4/4] scripts: fix compare-llama-bench commit hash logic (#11891) --- scripts/compare-llama-bench.py | 47 ++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 17 deletions(-) diff --git a/scripts/compare-llama-bench.py b/scripts/compare-llama-bench.py index 239c458d8..6205fe88d 100755 --- a/scripts/compare-llama-bench.py +++ b/scripts/compare-llama-bench.py @@ -124,9 +124,22 @@ if input_file is None: connection = sqlite3.connect(input_file) cursor = connection.cursor() -builds = cursor.execute("SELECT DISTINCT build_commit FROM test;").fetchall() -commit_short_len = len(builds[0][0]) +build_len_min: int = cursor.execute("SELECT MIN(LENGTH(build_commit)) from test;").fetchone()[0] +build_len_max: int = cursor.execute("SELECT MAX(LENGTH(build_commit)) from test;").fetchone()[0] + +if build_len_min != build_len_max: + logger.warning(f"{input_file} contains commit hashes of differing lengths. It's possible that the wrong commits will be compared. " + "Try purging the the database of old commits.") + cursor.execute(f"UPDATE test SET build_commit = SUBSTRING(build_commit, 1, {build_len_min});") + +build_len: int = build_len_min + +builds = cursor.execute("SELECT DISTINCT build_commit FROM test;").fetchall() +builds = list(map(lambda b: b[0], builds)) # list[tuple[str]] -> list[str] + +if not builds: + raise RuntimeError(f"{input_file} does not contain any builds.") try: repo = git.Repo(".", search_parent_directories=True) @@ -140,11 +153,11 @@ def find_parent_in_data(commit: git.Commit): seen_hexsha8 = set() while heap: depth, current_commit = heapq.heappop(heap) - current_hexsha8 = commit.hexsha[:commit_short_len] - if (current_hexsha8,) in builds: + current_hexsha8 = commit.hexsha[:build_len] + if current_hexsha8 in builds: return current_hexsha8 for parent in commit.parents: - parent_hexsha8 = parent.hexsha[:commit_short_len] + parent_hexsha8 = parent.hexsha[:build_len] if parent_hexsha8 not in seen_hexsha8: seen_hexsha8.add(parent_hexsha8) heapq.heappush(heap, (depth + 1, parent)) @@ -158,40 +171,40 @@ def get_all_parent_hexsha8s(commit: git.Commit): while unvisited: current_commit = unvisited.pop(0) - visited.append(current_commit.hexsha[:commit_short_len]) + visited.append(current_commit.hexsha[:build_len]) for parent in current_commit.parents: - if parent.hexsha[:commit_short_len] not in visited: + if parent.hexsha[:build_len] not in visited: unvisited.append(parent) return visited -def get_commit_name(hexsha8): +def get_commit_name(hexsha8: str): """Helper function to find a human-readable name for a commit if possible.""" if repo is None: return hexsha8 for h in repo.heads: - if h.commit.hexsha[:commit_short_len] == hexsha8: + if h.commit.hexsha[:build_len] == hexsha8: return h.name for t in repo.tags: - if t.commit.hexsha[:commit_short_len] == hexsha8: + if t.commit.hexsha[:build_len] == hexsha8: return t.name return hexsha8 -def get_commit_hexsha8(name): +def get_commit_hexsha8(name: str): """Helper function to search for a commit given a human-readable name.""" if repo is None: return None for h in repo.heads: if h.name == name: - return h.commit.hexsha[:commit_short_len] + return h.commit.hexsha[:build_len] for t in repo.tags: if t.name == name: - return t.commit.hexsha[:commit_short_len] + return t.commit.hexsha[:build_len] for c in repo.iter_commits("--all"): - if c.hexsha[:commit_short_len] == name[:commit_short_len]: - return c.hexsha[:commit_short_len] + if c.hexsha[:build_len] == name[:build_len]: + return c.hexsha[:build_len] return None @@ -199,7 +212,7 @@ hexsha8_baseline = name_baseline = None # If the user specified a baseline, try to find a commit for it: if known_args.baseline is not None: - if (known_args.baseline,) in builds: + if known_args.baseline in builds: hexsha8_baseline = known_args.baseline if hexsha8_baseline is None: hexsha8_baseline = get_commit_hexsha8(known_args.baseline) @@ -228,7 +241,7 @@ hexsha8_compare = name_compare = None # If the user has specified a compare value, try to find a corresponding commit: if known_args.compare is not None: - if (known_args.compare,) in builds: + if known_args.compare in builds: hexsha8_compare = known_args.compare if hexsha8_compare is None: hexsha8_compare = get_commit_hexsha8(known_args.compare)