Merge branch 'upstream' into concedo_experimental

# Conflicts: # .github/pull_request_template.md # .gitignore # docs/backend/SYCL.md # docs/ops.md # docs/ops/WebGPU.csv # examples/sycl/test.sh # examples/sycl/win-test.bat # ggml/src/ggml-sycl/common.hpp # ggml/src/ggml-sycl/ggml-sycl.cpp # ggml/src/ggml-sycl/sycl_hw.cpp # ggml/src/ggml-sycl/sycl_hw.hpp # ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp # ggml/src/ggml-webgpu/ggml-webgpu.cpp
2026-05-19 16:31:59 +00:00 · 2026-04-25 19:06:32 +08:00 · 2026-04-25 19:06:32 +08:00 · b31877e8ec
commit b31877e8ec
parent c04832bb2b 9d34231bb8
1 changed files with 1 additions and 1 deletions
--- a/src/llama-quant.cpp
+++ b/src/llama-quant.cpp
@ -1285,7 +1285,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
 llama_model_quantize_params llama_model_quantize_default_params() {
    llama_model_quantize_params result = {
        /*.nthread                     =*/ 0,
-        /*.ftype                       =*/ LLAMA_FTYPE_MOSTLY_Q5_1,
+        /*.ftype                       =*/ LLAMA_FTYPE_MOSTLY_Q8_0,
        /*.output_tensor_type          =*/ GGML_TYPE_COUNT,
        /*.token_embedding_type        =*/ GGML_TYPE_COUNT,
        /*.allow_requantize            =*/ false,