Merge branch 'upstream' into concedo_experimental

# Conflicts: # .github/workflows/build-riscv.yml # .github/workflows/build.yml # ggml/src/ggml-hexagon/htp/argsort-ops.c # ggml/src/ggml-sycl/fattn-tile.hpp # tools/mtmd/CMakeLists.txt
2026-05-18 06:19:19 +00:00 · 2026-04-06 20:56:02 +08:00 · 2026-04-06 20:56:02 +08:00 · a395af65db
commit a395af65db
parent 82cc19e055 400ac8e194
16 changed files with 296 additions and 19 deletions
--- a/src/llama-chat.cpp
+++ b/src/llama-chat.cpp
@ -73,6 +73,7 @@ static const std::map<std::string, llm_chat_template> LLM_CHAT_TEMPLATES = {
    { "hunyuan-moe",       LLM_CHAT_TEMPLATE_HUNYUAN_MOE       },
    { "gpt-oss",           LLM_CHAT_TEMPLATE_OPENAI_MOE        },
    { "hunyuan-dense",     LLM_CHAT_TEMPLATE_HUNYUAN_DENSE     },
+    { "hunyuan-ocr",       LLM_CHAT_TEMPLATE_HUNYUAN_OCR       },
    { "kimi-k2",           LLM_CHAT_TEMPLATE_KIMI_K2           },
    { "seed_oss",          LLM_CHAT_TEMPLATE_SEED_OSS          },
    { "grok-2",            LLM_CHAT_TEMPLATE_GROK_2            },
@ -216,6 +217,8 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) {
        return LLM_CHAT_TEMPLATE_HUNYUAN_MOE;
    } else if (tmpl_contains("<|start|>") && tmpl_contains("<|channel|>")) {
        return LLM_CHAT_TEMPLATE_OPENAI_MOE;
+    } else if (tmpl_contains("<｜hy_Assistant｜>") && tmpl_contains("<｜hy_begin▁of▁sentence｜>")) {
+        return LLM_CHAT_TEMPLATE_HUNYUAN_OCR;
    } else if (tmpl_contains("<｜hy_Assistant｜>") && tmpl_contains("<｜hy_place▁holder▁no▁3｜>")) {
        return LLM_CHAT_TEMPLATE_HUNYUAN_DENSE;
    } else if (tmpl_contains("<|im_assistant|>assistant<|im_middle|>")) {
@ -822,6 +825,22 @@ int32_t llm_chat_apply_template(
                ss << "<｜hy_User｜>" << chat[i]->content << "<｜hy_Assistant｜>";
            }
        }
+    } else if (tmpl == LLM_CHAT_TEMPLATE_HUNYUAN_OCR) {
+        // tencent/HunyuanOCR
+        ss << "<｜hy_begin▁of▁sentence｜>";
+        for (size_t i = 0; i < chat.size(); i++) {
+            std::string role(chat[i]->role);
+            if (i == 0 && role == "system") {
+                ss << chat[i]->content << "<｜hy_place▁holder▁no▁3｜>";
+                continue;
+            }
+
+            if (role == "user") {
+                ss << chat[i]->content << "<｜hy_User｜>";
+            } else if (role == "assistant") {
+                ss << chat[i]->content << "<｜hy_Assistant｜>";
+            }
+        }
    } else if (tmpl == LLM_CHAT_TEMPLATE_KIMI_K2) {
        // moonshotai/Kimi-K2-Instruct
        for (auto message : chat) {
--- a/src/llama-chat.h
+++ b/src/llama-chat.h
@ -53,6 +53,7 @@ enum llm_chat_template {
    LLM_CHAT_TEMPLATE_HUNYUAN_MOE,
    LLM_CHAT_TEMPLATE_OPENAI_MOE,
    LLM_CHAT_TEMPLATE_HUNYUAN_DENSE,
+    LLM_CHAT_TEMPLATE_HUNYUAN_OCR,
    LLM_CHAT_TEMPLATE_KIMI_K2,
    LLM_CHAT_TEMPLATE_SEED_OSS,
    LLM_CHAT_TEMPLATE_GROK_2,
--- a/src/llama-impl.cpp
+++ b/src/llama-impl.cpp
@ -128,7 +128,7 @@ static std::string gguf_data_to_str(enum gguf_type type, const void * data, int
        case GGUF_TYPE_INT64:   return std::to_string(((const int64_t  *)data)[i]);
        case GGUF_TYPE_FLOAT32: return std::to_string(((const float    *)data)[i]);
        case GGUF_TYPE_FLOAT64: return std::to_string(((const double   *)data)[i]);
-        case GGUF_TYPE_BOOL:    return ((const bool *)data)[i] ? "true" : "false";
+        case GGUF_TYPE_BOOL:    return ((const int8_t *)data)[i] != 0 ? "true" : "false";
        default:                return format("unknown type %d", type);
    }
 }
--- a/src/llama-model-loader.cpp
+++ b/src/llama-model-loader.cpp
@ -374,8 +374,9 @@ namespace GGUFMeta {
            }
        } else {
            if (arr_info.gt == GGUF_TYPE_BOOL) {
-                std::transform((const bool *)arr_info.data, (const bool *)arr_info.data + arr_info.length, result.begin(), [](bool x) {
-                    return static_cast<T>(x);
+                const int8_t * values = (const int8_t *) arr_info.data;
+                std::transform(values, values + arr_info.length, result.begin(), [](int8_t x) {
+                    return static_cast<T>(x != 0);
                });
            } else {
                std::copy((const T*)arr_info.data, (const T *)arr_info.data + arr_info.length, result.begin());
--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
@ -2564,6 +2564,14 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
            if (ml.get_key(LLM_KV_TOKENIZER_ADD_SEP, temp, false)) {
                add_sep = temp;
            }
+
+            // workaround for Gemma 4
+            // ref: https://github.com/ggml-org/llama.cpp/pull/21500
+            if (pre_type == LLAMA_VOCAB_PRE_TYPE_GEMMA4 && !add_bos) {
+                add_bos = true;
+
+                LLAMA_LOG_WARN("%s: override '%s' to 'true' for Gemma4\n", __func__, kv(LLM_KV_TOKENIZER_ADD_BOS).c_str());
+            }
        }

        // auto-detect special tokens by text