Merge branch 'upstream' into concedo_experimental

# Conflicts:
#	.github/workflows/build.yml
#	docs/ops.md
#	docs/ops/SYCL.csv
#	ggml/src/ggml-sycl/element_wise.cpp
#	ggml/src/ggml-sycl/ggml-sycl.cpp
#	ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp
#	ggml/src/ggml-webgpu/ggml-webgpu.cpp
#	pyproject.toml
#	requirements/requirements-convert_legacy_llama.txt
#	src/CMakeLists.txt
#	src/llama-vocab.cpp
#	tests/test-backend-ops.cpp
This commit is contained in:
Concedo 2026-02-07 15:54:02 +08:00
commit a0a78dacc4
34 changed files with 2371 additions and 180 deletions

View file

@ -2023,7 +2023,7 @@ void llama_context::output_reorder() {
//
uint32_t llama_context::graph_max_nodes(uint32_t n_tokens) const {
if (model.arch == LLM_ARCH_QWEN3NEXT) {
if (model.arch == LLM_ARCH_QWEN3NEXT || model.arch == LLM_ARCH_KIMI_LINEAR) {
return std::max<uint32_t>(n_tokens * 40, 32u * model.n_tensors());
}
uint32_t res = std::max<uint32_t>(1024u, 8u*model.n_tensors());