Merge commit 'd82b7a7c1d' into concedo_experimental

# Conflicts:
#	ci/run.sh
#	ggml/CMakeLists.txt
#	ggml/src/CMakeLists.txt
#	ggml/src/ggml-cuda/common.cuh
#	tests/CMakeLists.txt
This commit is contained in:
Concedo 2025-11-30 15:43:11 +08:00
commit bf5efcf86d
17 changed files with 1046 additions and 999 deletions

View file

@ -301,7 +301,7 @@ llama_context::llama_context(
cross.v_embd.clear();
const uint32_t n_seqs = cparams.kv_unified ? 1 : cparams.n_seq_max;
const uint32_t n_seqs = cparams.n_seq_max;
const uint32_t n_tokens = std::min(cparams.n_ctx, cparams.n_ubatch);
// avoid reserving graphs with zero outputs - assume one output per sequence
@ -545,7 +545,7 @@ bool llama_context::memory_update(bool optimize) {
throw std::runtime_error("failed to initialize memory context");
}
const uint32_t n_seqs = cparams.kv_unified ? 1 : cparams.n_seq_max;
const uint32_t n_seqs = cparams.n_seq_max;
const uint32_t n_tokens = std::min(cparams.n_ctx, cparams.n_ubatch);
auto * gf = graph_reserve(n_tokens, n_seqs, n_tokens, mctx.get());