Merge commit 'd82b7a7c1d' into concedo_experimental

# Conflicts: # ci/run.sh # ggml/CMakeLists.txt # ggml/src/CMakeLists.txt # ggml/src/ggml-cuda/common.cuh # tests/CMakeLists.txt
2026-05-08 01:41:37 +00:00 · 2025-11-30 15:43:11 +08:00 · 2025-11-30 15:43:11 +08:00 · bf5efcf86d
commit bf5efcf86d
parent 65a3b75dac d82b7a7c1d
17 changed files with 1046 additions and 999 deletions
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@ -301,7 +301,7 @@ llama_context::llama_context(

        cross.v_embd.clear();

-        const uint32_t n_seqs = cparams.kv_unified ? 1 : cparams.n_seq_max;
+        const uint32_t n_seqs = cparams.n_seq_max;
        const uint32_t n_tokens = std::min(cparams.n_ctx, cparams.n_ubatch);

        // avoid reserving graphs with zero outputs - assume one output per sequence
@ -545,7 +545,7 @@ bool llama_context::memory_update(bool optimize) {
            throw std::runtime_error("failed to initialize memory context");
        }

-        const uint32_t n_seqs = cparams.kv_unified ? 1 : cparams.n_seq_max;
+        const uint32_t n_seqs = cparams.n_seq_max;
        const uint32_t n_tokens = std::min(cparams.n_ctx, cparams.n_ubatch);

        auto * gf = graph_reserve(n_tokens, n_seqs, n_tokens, mctx.get());