Merge branch 'upstream' into concedo_experimental

# Conflicts: # docs/build-s390x.md # docs/ops.md # docs/ops/zDNN.csv # ggml/include/ggml-zdnn.h # ggml/src/ggml-sycl/binbcast.cpp # ggml/src/ggml-sycl/concat.cpp # ggml/src/ggml-sycl/conv.cpp # ggml/src/ggml-sycl/convert.cpp # ggml/src/ggml-sycl/cpy.cpp # ggml/src/ggml-sycl/dmmv.cpp # ggml/src/ggml-sycl/dpct/helper.hpp # ggml/src/ggml-sycl/element_wise.cpp # ggml/src/ggml-sycl/getrows.cpp # ggml/src/ggml-sycl/ggml-sycl.cpp # ggml/src/ggml-sycl/gla.cpp # ggml/src/ggml-sycl/im2col.cpp # ggml/src/ggml-sycl/mmq.cpp # ggml/src/ggml-sycl/mmvq.cpp # ggml/src/ggml-sycl/norm.cpp # ggml/src/ggml-sycl/rope.cpp # ggml/src/ggml-sycl/set_rows.cpp # ggml/src/ggml-sycl/softmax.cpp # ggml/src/ggml-sycl/tsembd.cpp # ggml/src/ggml-sycl/wkv.cpp # ggml/src/ggml-zdnn/ggml-zdnn-impl.h # ggml/src/ggml-zdnn/ggml-zdnn.cpp # tools/llama-bench/llama-bench.cpp
2026-05-08 18:30:50 +00:00 · 2025-09-13 12:25:30 +08:00 · 2025-09-13 12:25:30 +08:00 · 1dbd2fc259
commit 1dbd2fc259
parent 1b02ee7e7a 40be51152d
14 changed files with 406 additions and 68 deletions
--- a/src/llama-quant.cpp
+++ b/src/llama-quant.cpp
@ -923,7 +923,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
            new_type = tensor->type;
            new_data = tensor->data;
            new_size = ggml_nbytes(tensor);
-            LLAMA_LOG_INFO("size = %8.3f MB\n", ggml_nbytes(tensor)/1024.0/1024.0);
+            LLAMA_LOG_INFO("size = %8.3f MiB\n", ggml_nbytes(tensor)/1024.0/1024.0);
        } else {
            const int64_t nelements = ggml_nelements(tensor);

@ -1040,8 +1040,8 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
    }
    close_ofstream();

-    LLAMA_LOG_INFO("%s: model size  = %8.2f MB\n", __func__, total_size_org/1024.0/1024.0);
-    LLAMA_LOG_INFO("%s: quant size  = %8.2f MB\n", __func__, total_size_new/1024.0/1024.0);
+    LLAMA_LOG_INFO("%s: model size  = %8.2f MiB\n", __func__, total_size_org/1024.0/1024.0);
+    LLAMA_LOG_INFO("%s: quant size  = %8.2f MiB\n", __func__, total_size_new/1024.0/1024.0);

    if (qs.n_fallback > 0) {
        LLAMA_LOG_WARN("%s: WARNING: %d of %d tensor(s) required fallback quantization\n",