Merge branch 'upstream' into concedo_experimental

# Conflicts:
#	.github/workflows/build-riscv.yml
#	.github/workflows/build.yml
#	ggml/src/ggml-hexagon/htp/argsort-ops.c
#	ggml/src/ggml-sycl/fattn-tile.hpp
#	tools/mtmd/CMakeLists.txt
This commit is contained in:
Concedo 2026-04-06 20:56:02 +08:00
commit a395af65db
16 changed files with 296 additions and 19 deletions

View file

@ -73,6 +73,7 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
{ "hunyuan-moe", LLM_CHAT_TEMPLATE_HUNYUAN_MOE },
{ "gpt-oss", LLM_CHAT_TEMPLATE_OPENAI_MOE },
{ "hunyuan-dense", LLM_CHAT_TEMPLATE_HUNYUAN_DENSE },
{ "hunyuan-ocr", LLM_CHAT_TEMPLATE_HUNYUAN_OCR },
{ "kimi-k2", LLM_CHAT_TEMPLATE_KIMI_K2 },
{ "seed_oss", LLM_CHAT_TEMPLATE_SEED_OSS },
{ "grok-2", LLM_CHAT_TEMPLATE_GROK_2 },
@ -216,6 +217,8 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
return LLM_CHAT_TEMPLATE_HUNYUAN_MOE;
} else if (tmpl_contains("<|start|>") && tmpl_contains("<|channel|>")) {
return LLM_CHAT_TEMPLATE_OPENAI_MOE;
} else if (tmpl_contains("<hy_Assistant>") && tmpl_contains("<hy_begin▁of▁sentence>")) {
return LLM_CHAT_TEMPLATE_HUNYUAN_OCR;
} else if (tmpl_contains("<hy_Assistant>") && tmpl_contains("<hy_place▁holder▁no▁3>")) {
return LLM_CHAT_TEMPLATE_HUNYUAN_DENSE;
} else if (tmpl_contains("<|im_assistant|>assistant<|im_middle|>")) {
@ -822,6 +825,22 @@ int32_t llm_chat_apply_template(
ss << "<hy_User>" << chat[i]->content << "<hy_Assistant>";
}
}
} else if (tmpl == LLM_CHAT_TEMPLATE_HUNYUAN_OCR) {
// tencent/HunyuanOCR
ss << "<hy_begin▁of▁sentence>";
for (size_t i = 0; i < chat.size(); i++) {
std::string role(chat[i]->role);
if (i == 0 && role == "system") {
ss << chat[i]->content << "<hy_place▁holder▁no▁3>";
continue;
}
if (role == "user") {
ss << chat[i]->content << "<hy_User>";
} else if (role == "assistant") {
ss << chat[i]->content << "<hy_Assistant>";
}
}
} else if (tmpl == LLM_CHAT_TEMPLATE_KIMI_K2) {
// moonshotai/Kimi-K2-Instruct
for (auto message : chat) {

View file

@ -53,6 +53,7 @@ enum llm_chat_template {
LLM_CHAT_TEMPLATE_HUNYUAN_MOE,
LLM_CHAT_TEMPLATE_OPENAI_MOE,
LLM_CHAT_TEMPLATE_HUNYUAN_DENSE,
LLM_CHAT_TEMPLATE_HUNYUAN_OCR,
LLM_CHAT_TEMPLATE_KIMI_K2,
LLM_CHAT_TEMPLATE_SEED_OSS,
LLM_CHAT_TEMPLATE_GROK_2,

View file

@ -128,7 +128,7 @@ static std::string gguf_data_to_str(enum gguf_type type, const void * data, int
case GGUF_TYPE_INT64: return std::to_string(((const int64_t *)data)[i]);
case GGUF_TYPE_FLOAT32: return std::to_string(((const float *)data)[i]);
case GGUF_TYPE_FLOAT64: return std::to_string(((const double *)data)[i]);
case GGUF_TYPE_BOOL: return ((const bool *)data)[i] ? "true" : "false";
case GGUF_TYPE_BOOL: return ((const int8_t *)data)[i] != 0 ? "true" : "false";
default: return format("unknown type %d", type);
}
}

View file

@ -374,8 +374,9 @@ namespace GGUFMeta {
}
} else {
if (arr_info.gt == GGUF_TYPE_BOOL) {
std::transform((const bool *)arr_info.data, (const bool *)arr_info.data + arr_info.length, result.begin(), [](bool x) {
return static_cast<T>(x);
const int8_t * values = (const int8_t *) arr_info.data;
std::transform(values, values + arr_info.length, result.begin(), [](int8_t x) {
return static_cast<T>(x != 0);
});
} else {
std::copy((const T*)arr_info.data, (const T *)arr_info.data + arr_info.length, result.begin());

View file

@ -2564,6 +2564,14 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
if (ml.get_key(LLM_KV_TOKENIZER_ADD_SEP, temp, false)) {
add_sep = temp;
}
// workaround for Gemma 4
// ref: https://github.com/ggml-org/llama.cpp/pull/21500
if (pre_type == LLAMA_VOCAB_PRE_TYPE_GEMMA4 && !add_bos) {
add_bos = true;
LLAMA_LOG_WARN("%s: override '%s' to 'true' for Gemma4\n", __func__, kv(LLM_KV_TOKENIZER_ADD_BOS).c_str());
}
}
// auto-detect special tokens by text