Merge branch 'upstream' into concedo_experimental

# Conflicts: # .github/workflows/build.yml # docs/ops.md # docs/ops/SYCL.csv # ggml/src/ggml-sycl/element_wise.cpp # ggml/src/ggml-sycl/ggml-sycl.cpp # ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp # ggml/src/ggml-webgpu/ggml-webgpu.cpp # pyproject.toml # requirements/requirements-convert_legacy_llama.txt # src/CMakeLists.txt # src/llama-vocab.cpp # tests/test-backend-ops.cpp
2026-05-12 05:52:26 +00:00 · 2026-02-07 15:54:02 +08:00 · 2026-02-07 15:54:02 +08:00 · a0a78dacc4
commit a0a78dacc4
parent 9cf2119cb8 34ba7b5a2f
34 changed files with 2371 additions and 180 deletions
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@ -2023,7 +2023,7 @@ void llama_context::output_reorder() {
 //

 uint32_t llama_context::graph_max_nodes(uint32_t n_tokens) const {
-    if (model.arch == LLM_ARCH_QWEN3NEXT) {
+    if (model.arch == LLM_ARCH_QWEN3NEXT || model.arch == LLM_ARCH_KIMI_LINEAR) {
        return std::max<uint32_t>(n_tokens * 40, 32u * model.n_tensors());
    }
    uint32_t res = std::max<uint32_t>(1024u, 8u*model.n_tensors());