Merge branch 'upstream' into concedo_experimental

# Conflicts: # README.md # examples/server/README.md # examples/speculative/speculative.cpp # flake.lock # ggml/src/CMakeLists.txt # scripts/sync-ggml.last # tests/test-backend-ops.cpp
2025-09-12 09:59:41 +00:00 · 2024-11-14 21:40:52 +08:00 · 2024-11-14 21:40:52 +08:00 · df080b074d
commit df080b074d
parent bfa118ee45 2a82891a85
41 changed files with 132106 additions and 130341 deletions
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -656,11 +656,16 @@ struct server_context {
    }

    bool validate_model_chat_template() const {
-        llama_chat_message chat[] = {{"user", "test"}};
-
-        const int res = llama_chat_apply_template(model, nullptr, chat, 1, true, nullptr, 0);
-
-        return res > 0;
+        std::vector<char> model_template(2048, 0); // longest known template is about 1200 bytes
+        std::string template_key = "tokenizer.chat_template";
+        int32_t res = llama_model_meta_val_str(model, template_key.c_str(), model_template.data(), model_template.size());
+        if (res >= 0) {
+            llama_chat_message chat[] = {{"user", "test"}};
+            std::string tmpl = std::string(model_template.data(), model_template.size());
+            int32_t chat_res = llama_chat_apply_template(model, tmpl.c_str(), chat, 1, true, nullptr, 0);
+            return chat_res > 0;
+        }
+        return false;
    }

    void init() {