Merge branch 'upstream' into concedo_experimental

# Conflicts:
#	.github/workflows/build.yml
#	README.md
#	docs/backend/SYCL.md
#	ggml/src/ggml-sycl/CMakeLists.txt
#	ggml/src/ggml-vulkan/CMakeLists.txt
#	ggml/src/ggml-vulkan/ggml-vulkan.cpp
#	scripts/sync-ggml.last
#	tests/test-chat-template.cpp
This commit is contained in:
Concedo 2025-04-01 20:16:07 +08:00
commit 9e182b3e78
44 changed files with 2395 additions and 831 deletions

View file

@ -567,6 +567,7 @@ struct llm_tokenizer_bpe : llm_tokenizer {
case LLAMA_VOCAB_PRE_TYPE_MPT:
case LLAMA_VOCAB_PRE_TYPE_OLMO:
case LLAMA_VOCAB_PRE_TYPE_JAIS:
case LLAMA_VOCAB_PRE_TYPE_TRILLION:
regex_exprs = {
"'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
};
@ -631,6 +632,13 @@ struct llm_tokenizer_bpe : llm_tokenizer {
"(?=(\\d{3})+(?!\\d))",
};
break;
case LLAMA_VOCAB_PRE_TYPE_BAILINGMOE:
regex_exprs = {
// original regex from tokenizer.json
// "'(?i:[sdmt]|ll|ve|re)|[^\\r\\n\\p{L}\\p{N}]?+\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]++[\\r\\n]*|\\s*[\\r\\n]|\\s+(?!\\S)|\\s+"
"'(?:[sSdDmMtT]|[lL][lL]|[vV][eE]|[rR][eE])|[^\\r\\n\\p{L}\\p{N}]?+\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]++[\\r\\n]*|\\s*[\\r\\n]|\\s+(?!\\S)|\\s+",
};
break;
default:
// default regex for BPE tokenization pre-processing
regex_exprs = {
@ -1849,6 +1857,14 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
tokenizer_pre == "superbpe") {
pre_type = LLAMA_VOCAB_PRE_TYPE_SUPERBPE;
clean_spaces = false;
} else if (
tokenizer_pre == "trillion") {
pre_type = LLAMA_VOCAB_PRE_TYPE_TRILLION;
clean_spaces = false;
} else if (
tokenizer_pre == "bailingmoe") {
pre_type = LLAMA_VOCAB_PRE_TYPE_BAILINGMOE;
clean_spaces = false;
} else {
throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
}
@ -2029,6 +2045,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|| t.first == "<end_of_turn>"
|| t.first == "<|endoftext|>"
|| t.first == "<EOT>"
|| t.first == "_<EOT>"
|| t.first == "<end▁of▁sentence>" // DeepSeek
) {
special_eot_id = t.second;
@ -2061,6 +2078,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|| t.first == "<fim-prefix>"
|| t.first == "<fim▁begin>" // DeepSeek
|| t.first == "<PRE>"
|| t.first == "▁<PRE>" // CodeLlama
) {
special_fim_pre_id = t.second;
if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
@ -2078,6 +2096,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|| t.first == "<fim-suffix>"
|| t.first == "<fim▁hole>" // DeepSeek
|| t.first == "<SUF>"
|| t.first == "▁<SUF>" // CodeLlama
) {
special_fim_suf_id = t.second;
if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
@ -2095,6 +2114,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|| t.first == "<fim-middle>"
|| t.first == "<fim▁end>" // DeepSeek
|| t.first == "<MID>"
|| t.first == "▁<MID>" // CodeLlama
) {
special_fim_mid_id = t.second;
if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {
@ -2179,6 +2199,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|| t.first == "<|endoftext|>"
|| t.first == "<|eom_id|>"
|| t.first == "<EOT>"
|| t.first == "_<EOT>"
) {
special_eog_ids.insert(t.second);
if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) {