From 5d246a792ddf839e87fb777681dd2a9ac7f7eb5e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= <sigbjorn.skjaeret@scala.com>
Date: Sun, 24 May 2026 09:51:31 +0200
Subject: [PATCH] convert : minor fixes for numpy 2.x (#23571)

---
 examples/convert_legacy_llama.py | 3 ++-
 gguf-py/gguf/quants.py           | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/examples/convert_legacy_llama.py b/examples/convert_legacy_llama.py
index c4ec5c524..5c9305b12 100755
--- a/examples/convert_legacy_llama.py
+++ b/examples/convert_legacy_llama.py
@@ -1308,7 +1308,8 @@ def do_dump_model(model_plus: ModelPlus) -> None:
 
 def main(args_in: list[str] | None = None) -> None:
     output_choices = ["f32", "f16"]
-    if np.uint32(1) == np.uint32(1).newbyteorder("<"):
+    dummy_val = np.uint32(1)
+    if dummy_val == dummy_val.view(dummy_val.dtype.newbyteorder("<")):
         # We currently only support Q8_0 output on little endian systems.
         output_choices.append("q8_0")
     parser = argparse.ArgumentParser(description="Convert a LLaMA model to a GGML compatible file")
diff --git a/gguf-py/gguf/quants.py b/gguf-py/gguf/quants.py
index 1d9d9ab7d..80966b6ef 100644
--- a/gguf-py/gguf/quants.py
+++ b/gguf-py/gguf/quants.py
@@ -28,6 +28,7 @@ def quant_shape_from_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizati
 # This is faster than np.vectorize and np.apply_along_axis because it works on more than one row at a time
 def _apply_over_grouped_rows(func: Callable[[np.ndarray], np.ndarray], arr: np.ndarray, otype: DTypeLike, oshape: tuple[int, ...]) -> np.ndarray:
     rows = arr.reshape((-1, arr.shape[-1]))
+    assert len(rows.shape)
     osize = 1
     for dim in oshape:
         osize *= dim