Merge branch 'upstream' into concedo_experimental

# Conflicts: # .github/workflows/ai-issues.yml # CONTRIBUTING.md # docs/autoparser.md # docs/ops.md # docs/ops/Metal.csv # ggml/src/ggml-cann/aclnn_ops.cpp # ggml/src/ggml-cann/ggml-cann.cpp # ggml/src/ggml-cpu/CMakeLists.txt # ggml/src/ggml-hexagon/ggml-hexagon.cpp # ggml/src/ggml-hexagon/htp/CMakeLists.txt # ggml/src/ggml-hexagon/htp/hex-dma.h # ggml/src/ggml-hexagon/htp/hex-utils.h # ggml/src/ggml-hexagon/htp/htp-ctx.h # ggml/src/ggml-hexagon/htp/htp-msg.h # ggml/src/ggml-hexagon/htp/htp_iface.idl # ggml/src/ggml-hexagon/htp/hvx-base.h # ggml/src/ggml-hexagon/htp/main.c # ggml/src/ggml-hip/CMakeLists.txt # models/templates/Apriel-1.6-15b-Thinker-fixed.jinja # models/templates/deepseek-ai-DeepSeek-R1-Distill-Qwen-32B.jinja # models/templates/deepseek-ai-DeepSeek-V3.1.jinja # models/templates/llama-cpp-deepseek-r1.jinja # models/templates/meetkai-functionary-medium-v3.1.jinja # scripts/fetch_server_test_models.py # scripts/snapdragon/adb/run-cli.sh # scripts/snapdragon/adb/run-completion.sh # scripts/snapdragon/adb/run-mtmd.sh # scripts/snapdragon/adb/run-tool.sh # tests/test-chat-auto-parser.cpp # tests/test-chat-peg-parser.cpp # tests/test-chat.cpp # tools/cli/cli.cpp # tools/server/README.md
2026-05-20 17:54:31 +00:00 · 2026-03-21 12:06:01 +08:00 · 2026-03-21 12:06:01 +08:00 · 6054bacadd
commit 6054bacadd
parent 98f099aecc b1c70e2e54
33 changed files with 834 additions and 491 deletions
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@ -1948,6 +1948,7 @@ uint32_t llama_context::output_reserve(int32_t n_outputs) {
            LLAMA_LOG_ERROR("%s: failed to allocate output buffer of size %.2f MiB\n", __func__, new_size / (1024.0 * 1024.0));
            return 0;
        }
+        ggml_backend_buffer_clear(buf_output.get(), 0);
    }

    float * output_base = (float *) ggml_backend_buffer_get_base(buf_output.get());