Merge branch 'upstream' into concedo_experimental

# Conflicts: # .devops/musa.Dockerfile # .github/workflows/build.yml # .github/workflows/close-issue.yml # ci/README.md # docs/build.md # docs/docker.md # ggml/CMakeLists.txt # ggml/cmake/ggml-config.cmake.in # ggml/src/ggml-cann/aclnn_ops.cpp # ggml/src/ggml-cann/aclnn_ops.h # ggml/src/ggml-cann/ggml-cann.cpp # ggml/src/ggml-cpu/CMakeLists.txt # ggml/src/ggml-cuda/fattn-wmma-f16.cu # ggml/src/ggml-musa/CMakeLists.txt # ggml/src/ggml-rpc/ggml-rpc.cpp # ggml/src/ggml-sycl/ggml-sycl.cpp # ggml/src/ggml-sycl/vecdotq.hpp # scripts/sync-ggml.last # tests/test-backend-ops.cpp # tools/imatrix/README.md # tools/imatrix/imatrix.cpp
2025-09-11 09:34:37 +00:00 · 2025-07-25 19:53:13 +08:00 · 2025-07-25 19:53:13 +08:00 · 0fcfbdb93c
commit 0fcfbdb93c
parent 0d72c794fa 64bf1c3744
33 changed files with 501 additions and 348 deletions
--- a/include/llama.h
+++ b/include/llama.h
@ -959,6 +959,7 @@ extern "C" {
    // in the order they have appeared in the batch.
    // Rows: number of tokens for which llama_batch.logits[i] != 0
    // Cols: n_vocab
+    // TODO: deprecate in favor of llama_get_logits_ith() (ref: https://github.com/ggml-org/llama.cpp/pull/14853#issuecomment-3113143522)
    LLAMA_API float * llama_get_logits(struct llama_context * ctx);

    // Logits for the ith token. For positive indices, Equivalent to:
@ -973,6 +974,7 @@ extern "C" {
    // in the order they have appeared in the batch.
    // shape: [n_outputs*n_embd]
    // Otherwise, returns NULL.
+    // TODO: deprecate in favor of llama_get_embeddings_ith() (ref: https://github.com/ggml-org/llama.cpp/pull/14853#issuecomment-3113143522)
    LLAMA_API float * llama_get_embeddings(struct llama_context * ctx);

    // Get the embeddings for the ith token. For positive indices, Equivalent to: