Merge branch 'upstream' into concedo_experimental

# Conflicts: # .github/workflows/build.yml # examples/server/README.md
2026-05-07 09:02:04 +00:00 · 2024-11-06 23:13:00 +08:00 · 2024-11-06 23:13:00 +08:00 · 628dcd640e
commit 628dcd640e
parent 3c36bbdcd7 b11f9ba9b8
10 changed files with 1111 additions and 741 deletions
--- a/src/llama.cpp
+++ b/src/llama.cpp
@ -9182,7 +9182,7 @@ static bool llm_load_tensors(

    // print memory requirements per buffer type
    for (auto & buf : model.bufs) {
-        LLAMA_LOG_INFO("%s: %10s model buffer size = %8.2f MiB\n", __func__, ggml_backend_buffer_name(buf.get()), ggml_backend_buffer_get_size(buf.get()) / 1024.0 / 1024.0);
+        LLAMA_LOG_INFO("%s: %12s model buffer size = %8.2f MiB\n", __func__, ggml_backend_buffer_name(buf.get()), ggml_backend_buffer_get_size(buf.get()) / 1024.0 / 1024.0);
    }

    // populate tensors_by_name
@ -21893,8 +21893,11 @@ static int32_t llama_chat_apply_template_internal(
        // IBM Granite template
        for (const auto & message : chat) {
            std::string role(message->role);
-            ss << "<|start_of_role|>" << role << "<|end_of_role|>"
-               << message->content << "<|end_of_text|>\n";
+            ss << "<|start_of_role|>" << role << "<|end_of_role|>";
+            if (role == "assistant_tool_call") {
+                ss << "<|tool_call|>";
+            }
+            ss << message->content << "<|end_of_text|>\n";
        }
        if (add_ass) {
            ss << "<|start_of_role|>assistant<|end_of_role|>\n";