Merge branch 'upstream' into concedo_experimental

# Conflicts: # .devops/vulkan.Dockerfile # .github/workflows/build.yml # .github/workflows/server.yml # common/common.cpp # examples/batched/README.md # ggml/CMakeLists.txt # ggml/src/CMakeLists.txt # ggml/src/ggml-cann/ggml-cann.cpp # ggml/src/ggml-cpu/CMakeLists.txt # ggml/src/ggml-cpu/arch-fallback.h # ggml/src/ggml-opencl/ggml-opencl.cpp # scripts/sync-ggml.last # src/CMakeLists.txt # tests/test-backend-ops.cpp # tools/server/CMakeLists.txt
2026-05-07 17:22:04 +00:00 · 2025-11-25 16:38:07 +08:00 · 2025-11-25 16:38:07 +08:00 · 724763fdec
commit 724763fdec
parent df30473716 877566d512
55 changed files with 5248 additions and 3753 deletions
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@ -1250,7 +1250,7 @@ int llama_context::decode(const llama_batch & batch_inp) {

        // make the outputs have the same order they had in the user-provided batch
        // note: this is mostly relevant for recurrent models atm
-        if (!sorted_output) {
+        if (!sorted_output && n_outputs > 1) {
            GGML_ASSERT((size_t) n_outputs == out_ids.size());

            // TODO: is there something more efficient which also minimizes swaps?