Merge commit 'ddf9f94389' into concedo_experimental

# Conflicts:
#	examples/model-conversion/scripts/causal/run-converted-model.sh
#	examples/model-conversion/scripts/causal/run-org-model.py
#	src/CMakeLists.txt
#	src/llama-quant.cpp
#	tools/server/README.md
This commit is contained in:
Concedo 2025-11-28 23:27:50 +08:00
commit 0ccb298087
23 changed files with 2813 additions and 86 deletions

View file

@ -1,5 +1,6 @@
#include "llama-context.h"
#include "llama-arch.h"
#include "llama-impl.h"
#include "llama-batch.h"
#include "llama-io.h"
@ -1388,6 +1389,9 @@ void llama_context::output_reorder() {
//
uint32_t llama_context::graph_max_nodes() const {
if (model.arch == LLM_ARCH_QWEN3NEXT) {
return std::max<uint32_t>(8192u, 32u*model.n_tensors());
}
return std::max<uint32_t>(1024u, 8u*model.n_tensors());
}