mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-12 05:52:26 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # .github/workflows/build.yml # docs/ops.md # docs/ops/SYCL.csv # ggml/src/ggml-sycl/element_wise.cpp # ggml/src/ggml-sycl/ggml-sycl.cpp # ggml/src/ggml-webgpu/ggml-webgpu-shader-lib.hpp # ggml/src/ggml-webgpu/ggml-webgpu.cpp # pyproject.toml # requirements/requirements-convert_legacy_llama.txt # src/CMakeLists.txt # src/llama-vocab.cpp # tests/test-backend-ops.cpp
This commit is contained in:
commit
a0a78dacc4
34 changed files with 2371 additions and 180 deletions
|
|
@ -2023,7 +2023,7 @@ void llama_context::output_reorder() {
|
|||
//
|
||||
|
||||
uint32_t llama_context::graph_max_nodes(uint32_t n_tokens) const {
|
||||
if (model.arch == LLM_ARCH_QWEN3NEXT) {
|
||||
if (model.arch == LLM_ARCH_QWEN3NEXT || model.arch == LLM_ARCH_KIMI_LINEAR) {
|
||||
return std::max<uint32_t>(n_tokens * 40, 32u * model.n_tensors());
|
||||
}
|
||||
uint32_t res = std::max<uint32_t>(1024u, 8u*model.n_tensors());
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue