Merge branch 'upstream' into concedo_experimental

# Conflicts: # .devops/intel.Dockerfile # CMakeLists.txt # README.md # common/CMakeLists.txt # docs/multimodal.md # ggml/src/CMakeLists.txt # ggml/src/ggml-cpu/CMakeLists.txt # ggml/src/ggml-metal/CMakeLists.txt # ggml/src/ggml-sycl/CMakeLists.txt # ggml/src/ggml-sycl/common.hpp # ggml/src/ggml-sycl/cpy.cpp # ggml/src/ggml-sycl/gemm.hpp # ggml/src/ggml-sycl/ggml-sycl.cpp # src/llama-context.cpp
2025-09-11 17:44:38 +00:00 · 2025-06-14 09:05:45 +08:00 · 2025-06-14 09:05:45 +08:00 · 5f9e96e82d
commit 5f9e96e82d
parent 69e4a32ca2 fb85a288d7
18 changed files with 505 additions and 247 deletions
--- a/src/llama-kv-cache-recurrent.cpp
+++ b/src/llama-kv-cache-recurrent.cpp
@ -359,10 +359,10 @@ llama_pos llama_kv_cache_recurrent::seq_pos_max(llama_seq_id seq_id) const {
    return result;
 }

-llama_memory_state_ptr llama_kv_cache_recurrent::init_batch(const llama_batch & batch, uint32_t n_ubatch, bool embd_pooled, bool logits_all) {
+llama_memory_state_ptr llama_kv_cache_recurrent::init_batch(const llama_batch & batch, uint32_t n_ubatch, bool embd_pooled) {
    GGML_UNUSED(embd_pooled);

-    auto sbatch = llama_sbatch(batch, hparams.n_embd, false, logits_all);
+    auto sbatch = llama_sbatch(batch, hparams.n_embd, false);

    std::vector<llama_ubatch> ubatches;