From 5d246a792ddf839e87fb777681dd2a9ac7f7eb5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Sun, 24 May 2026 09:51:31 +0200 Subject: [PATCH] convert : minor fixes for numpy 2.x (#23571) --- examples/convert_legacy_llama.py | 3 ++- gguf-py/gguf/quants.py | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/convert_legacy_llama.py b/examples/convert_legacy_llama.py index c4ec5c524..5c9305b12 100755 --- a/examples/convert_legacy_llama.py +++ b/examples/convert_legacy_llama.py @@ -1308,7 +1308,8 @@ def do_dump_model(model_plus: ModelPlus) -> None: def main(args_in: list[str] | None = None) -> None: output_choices = ["f32", "f16"] - if np.uint32(1) == np.uint32(1).newbyteorder("<"): + dummy_val = np.uint32(1) + if dummy_val == dummy_val.view(dummy_val.dtype.newbyteorder("<")): # We currently only support Q8_0 output on little endian systems. output_choices.append("q8_0") parser = argparse.ArgumentParser(description="Convert a LLaMA model to a GGML compatible file") diff --git a/gguf-py/gguf/quants.py b/gguf-py/gguf/quants.py index 1d9d9ab7d..80966b6ef 100644 --- a/gguf-py/gguf/quants.py +++ b/gguf-py/gguf/quants.py @@ -28,6 +28,7 @@ def quant_shape_from_byte_shape(shape: Sequence[int], quant_type: GGMLQuantizati # This is faster than np.vectorize and np.apply_along_axis because it works on more than one row at a time def _apply_over_grouped_rows(func: Callable[[np.ndarray], np.ndarray], arr: np.ndarray, otype: DTypeLike, oshape: tuple[int, ...]) -> np.ndarray: rows = arr.reshape((-1, arr.shape[-1])) + assert len(rows.shape) osize = 1 for dim in oshape: osize *= dim