model : add hunyuan moe (#14425)

* model : add hunyuan moe

* tokenizer ok

* fix tensor name

* cgraph init

* chat template

* wip

* almost working

* skip embed, fix bos

* cleanup

* yarn scaling

* cleanup

* correct rope type

* failed token fix

* ntk alpha freq_base

* tokenization working

* cleanup and pr changes

* vocab_size sanity check

* ntk alpha generic

* Update convert_hf_to_gguf.py

* Apply suggestions from code review

* fix regression

* fix style

---------

Co-authored-by: kooshi <1934337+kooshi@users.noreply.github.com>
This commit is contained in:
Xuan-Son Nguyen 2025-07-08 10:24:06 +02:00 committed by GitHub
parent 53903ae6fa
commit 8f22dc0a53
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 449 additions and 0 deletions

View file

@ -64,6 +64,7 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
{ "bailing", LLM_CHAT_TEMPLATE_BAILING },
{ "llama4", LLM_CHAT_TEMPLATE_LLAMA4 },
{ "smolvlm", LLM_CHAT_TEMPLATE_SMOLVLM },
{ "hunyuan-moe", LLM_CHAT_TEMPLATE_HUNYUAN_MOE },
};
llm_chat_template llm_chat_template_from_str(const std::string & name) {
@ -185,6 +186,8 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
return LLM_CHAT_TEMPLATE_LLAMA4;
} else if (tmpl_contains("<|endofuserprompt|>")) {
return LLM_CHAT_TEMPLATE_DOTS1;
} else if (tmpl_contains("<|startoftext|>") && tmpl_contains("<|extra_4|>")) {
return LLM_CHAT_TEMPLATE_HUNYUAN_MOE;
}
return LLM_CHAT_TEMPLATE_UNKNOWN;
}
@ -665,6 +668,21 @@ int32_t llm_chat_apply_template(
if (add_ass) {
ss << "<|response|>";
}
} else if (tmpl == LLM_CHAT_TEMPLATE_HUNYUAN_MOE) {
// tencent/Hunyuan-A13B-Instruct
for (auto message : chat) {
std::string role(message->role);
if (role == "system") {
ss << "<|startoftext|>" << message->content << "<|extra_4|>";
} else if (role == "assistant") {
ss << "<|startoftext|>" << message->content << "<|eos|>";
} else {
ss << "<|startoftext|>" << message->content << "<|extra_0|>";
}
}
if (add_ass) {
ss << "<|startoftext|>";
}
} else {
// template not supported
return -1;