Merge branch 'upstream' into concedo_experimental

# Conflicts:
#	CMakePresets.json
#	README.md
#	common/CMakeLists.txt
#	ggml/src/ggml-cann/ggml-cann.cpp
#	ggml/src/ggml-opencl/CMakeLists.txt
#	ggml/src/ggml-opencl/ggml-opencl.cpp
#	ggml/src/ggml-sycl/ggml-sycl.cpp
#	scripts/sync-ggml.last
#	tests/test-backend-ops.cpp
#	tools/run/CMakeLists.txt
This commit is contained in:
Concedo 2025-07-13 23:39:41 +08:00
commit 8cebec5128
41 changed files with 28682 additions and 366 deletions

View file

@ -1760,7 +1760,9 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
tokenizer_pre == "llama-bpe"||
tokenizer_pre == "falcon3" ||
tokenizer_pre == "falcon-h1" ||
tokenizer_pre == "pixtral") {
tokenizer_pre == "pixtral" ||
tokenizer_pre == "midm-2.0" ||
tokenizer_pre == "lfm2") {
pre_type = LLAMA_VOCAB_PRE_TYPE_LLAMA3;
ignore_merges = true;
add_bos = true;
@ -2085,6 +2087,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|| t.first == "<EOT>"
|| t.first == "_<EOT>"
|| t.first == "<end▁of▁sentence>" // DeepSeek
|| t.first == "<end_of_utterance>" // smoldocling
) {
special_eot_id = t.second;
if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
@ -2244,6 +2247,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|| t.first == "<EOT>"
|| t.first == "_<EOT>"
|| t.first == "<|end_of_text|>"
|| t.first == "<end_of_utterance>" // smoldocling
) {
special_eog_ids.insert(t.second);
if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {