Merge branch 'upstream' into concedo_experimental

# Conflicts:
#	docs/backend/CANN.md
#	docs/multimodal/minicpmo2.6.md
#	docs/multimodal/minicpmv2.5.md
#	docs/multimodal/minicpmv2.6.md
#	examples/speculative-simple/speculative-simple.cpp
#	ggml/cmake/ggml-config.cmake.in
#	ggml/src/ggml-cann/aclnn_ops.cpp
#	ggml/src/ggml-cann/ggml-cann.cpp
#	ggml/src/ggml-cpu/repack.cpp
#	ggml/src/ggml-opencl/CMakeLists.txt
#	ggml/src/ggml-opencl/ggml-opencl.cpp
#	ggml/src/ggml-opencl/kernels/add.cl
#	ggml/src/ggml-opencl/kernels/mul.cl
#	scripts/compare-commits.sh
#	scripts/compare-llama-bench.py
#	scripts/sync-ggml.last
#	tools/server/README.md
This commit is contained in:
Concedo 2025-08-02 10:25:10 +08:00
commit f430916a71
57 changed files with 6028 additions and 731 deletions

View file

@ -532,6 +532,7 @@ struct llm_tokenizer_bpe : llm_tokenizer {
};
break;
case LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM:
case LLAMA_VOCAB_PRE_TYPE_HUNYUAN_DENSE:
regex_exprs = {
"\\p{N}{1,3}",
"[一-龥぀-ゟ゠-ヿ]+",
@ -2200,6 +2201,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
tokenizer_pre == "hunyuan") {
pre_type = LLAMA_VOCAB_PRE_TYPE_HUNYUAN;
clean_spaces = false;
} else if (
tokenizer_pre == "hunyuan-dense") {
pre_type = LLAMA_VOCAB_PRE_TYPE_HUNYUAN_DENSE;
clean_spaces = false;
} else if (
tokenizer_pre == "kimi-k2") {
pre_type = LLAMA_VOCAB_PRE_TYPE_KIMI_K2;