Merge branch 'upstream' into concedo_experimental

# Conflicts:
#	.github/workflows/build.yml
#	README.md
#	ggml/src/ggml-cann/ggml-cann.cpp
#	ggml/src/ggml-opencl/ggml-opencl.cpp
#	ggml/src/ggml-sycl/ggml-sycl.cpp
#	tests/test-backend-ops.cpp
#	tests/test-chat-template.cpp
This commit is contained in:
Concedo 2025-08-06 10:51:29 +08:00
commit 6eea7b88d2
80 changed files with 2737 additions and 185 deletions

View file

@ -2553,6 +2553,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|| t.first == "<|eot_id|>"
|| t.first == "<|im_end|>"
|| t.first == "<|end|>"
|| t.first == "<|return|>" // o200k_harmony
|| t.first == "<|call|>" // o200k_harmony
|| t.first == "<end_of_turn>"
|| t.first == "<|endoftext|>"
|| t.first == "<|eom_id|>"
@ -2576,6 +2578,13 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
}
}
// @ngxson : quick hack for gpt-oss, always render these tokens
for (const auto & t : token_to_id) {
if (t.first == "<|channel|>" || t.first == "<|message|>" || t.first == "<|start|>") {
id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_USER_DEFINED;
}
}
// sanity checks
if (special_eos_id != LLAMA_TOKEN_NULL && special_eog_ids.count(special_eos_id) == 0) {
special_eog_ids.insert(special_eos_id);
@ -2591,6 +2600,36 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
special_eog_ids.insert(special_eom_id);
LLAMA_LOG_WARN("%s: special_eom_id is not in special_eog_ids - the tokenizer config may be incorrect\n", __func__);
}
// TODO: workaround for o200k_harmony tokenizer: the "<|end|>" token should not be EOG
// we don't have a good way to detect this, so for now, if we have "<|return|>" and "<|call|>" tokens,
// we remove the "<|end|>" token from the EOG list
{
bool has_return = false;
bool has_call = false;
bool has_end = false;
llama_token end_id = LLAMA_TOKEN_NULL;
LLAMA_LOG_INFO("%s: printing all EOG tokens:\n", __func__);
for (auto tid : special_eog_ids) {
LLAMA_LOG_INFO("%s: - %d ('%s')\n", __func__, tid, id_to_token[tid].text.c_str());
if (id_to_token[tid].text == "<|return|>") {
has_return = true;
} else if (id_to_token[tid].text == "<|call|>") {
has_call = true;
} else if (id_to_token[tid].text == "<|end|>") {
has_end = true;
end_id = tid;
}
}
if (has_return && has_call && has_end) {
special_eog_ids.erase(end_id);
LLAMA_LOG_WARN("%s: special_eog_ids contains both '<|return|>' and '<|call|>' tokens, removing '<|end|>' token from EOG list\n", __func__);
}
}
}
// build special tokens cache