Merge branch 'upstream' into concedo_experimental

# Conflicts: # .github/workflows/build-android.yml # .github/workflows/build.yml # .github/workflows/release.yml # CMakeLists.txt # CODEOWNERS # common/CMakeLists.txt # common/common.h # docs/ops.md # docs/ops/Metal.csv # examples/batched/CMakeLists.txt # examples/convert-llama2c-to-ggml/CMakeLists.txt # examples/debug/CMakeLists.txt # examples/diffusion/CMakeLists.txt # examples/embedding/CMakeLists.txt # examples/eval-callback/CMakeLists.txt # examples/gen-docs/CMakeLists.txt # examples/idle/CMakeLists.txt # examples/lookahead/CMakeLists.txt # examples/lookup/CMakeLists.txt # examples/parallel/CMakeLists.txt # examples/passkey/CMakeLists.txt # examples/retrieval/CMakeLists.txt # examples/save-load-state/CMakeLists.txt # examples/speculative-simple/CMakeLists.txt # examples/speculative/CMakeLists.txt # examples/sycl/CMakeLists.txt # examples/training/CMakeLists.txt # ggml/src/ggml-hexagon/htp/hmx-matmul-ops.c # ggml/src/ggml-hexagon/htp/htp-ops.h # ggml/src/ggml-hexagon/htp/main.c # ggml/src/ggml-opencl/CMakeLists.txt # ggml/src/ggml-opencl/ggml-opencl.cpp # ggml/src/ggml-opencl/kernels/cvt.cl # pocs/vdot/CMakeLists.txt # src/CMakeLists.txt # tests/CMakeLists.txt # tests/test-quantize-stats.cpp # tools/batched-bench/CMakeLists.txt # tools/cli/CMakeLists.txt # tools/cli/cli.cpp # tools/completion/CMakeLists.txt # tools/cvector-generator/CMakeLists.txt # tools/cvector-generator/cvector-generator.cpp # tools/export-lora/CMakeLists.txt # tools/gguf-split/CMakeLists.txt # tools/gguf-split/gguf-split.cpp # tools/imatrix/CMakeLists.txt # tools/llama-bench/CMakeLists.txt # tools/llama-bench/llama-bench.cpp # tools/mtmd/CMakeLists.txt # tools/perplexity/CMakeLists.txt # tools/quantize/CMakeLists.txt # tools/quantize/quantize.cpp # tools/results/CMakeLists.txt # tools/server/CMakeLists.txt # tools/tokenize/CMakeLists.txt # tools/tts/CMakeLists.txt
2026-05-18 23:49:46 +00:00 · 2026-04-17 22:37:37 +08:00 · 2026-04-17 22:37:37 +08:00 · 79882d669a
commit 79882d669a
parent 768527b031 a279d0f0f4
146 changed files with 1507 additions and 2103 deletions
--- a/tools/server/server-common.cpp
+++ b/tools/server/server-common.cpp
@ -84,12 +84,16 @@ std::string gen_tool_call_id() {
    return random_string();
 }

-static std::string media_marker = "";
 const char * get_media_marker() {
-    if (media_marker.empty()) {
-        media_marker = "<__media_" + random_string() + "__>";
-    }
-    return media_marker.c_str();
+    static const std::string marker = []() {
+        // allow user to pin a reproducible marker via env var
+        const char * env = getenv("LLAMA_MEDIA_MARKER");
+        if (env && env[0] != '\0') {
+            return std::string(env);
+        }
+        return std::string("<__media_") + random_string() + "__>";
+    }();
+    return marker.c_str();
 }

 //
--- a/tools/server/server-context.cpp
+++ b/tools/server/server-context.cpp
@ -4,6 +4,7 @@
 #include "server-task.h"
 #include "server-queue.h"

+#include "build-info.h"
 #include "common.h"
 #include "llama.h"
 #include "log.h"
@ -3010,7 +3011,7 @@ server_context_meta server_context::get_meta() const {
    auto eos_token_str = eos_id != LLAMA_TOKEN_NULL ? common_token_to_piece(impl->ctx, eos_id, true) : "";

    return server_context_meta {
-        /* build_info             */ build_info,
+        /* build_info             */ std::string(llama_build_info()),
        /* model_name             */ impl->model_name,
        /* model_aliases          */ impl->model_aliases,
        /* model_tags             */ impl->model_tags,
--- a/tools/server/server-models.cpp
+++ b/tools/server/server-models.cpp
@ -1,6 +1,7 @@
 #include "server-common.h"
 #include "server-models.h"

+#include "build-info.h"
 #include "preset.h"
 #include "download.h"

@ -936,7 +937,7 @@ void server_models_routes::init_routes() {
                    {"n_ctx",  0},
                }},
                {"webui_settings", webui_settings},
-                {"build_info",     build_info},
+                {"build_info",     std::string(llama_build_info())},
            });
            return res;
        }
--- a/tools/server/server-task.cpp
+++ b/tools/server/server-task.cpp
@ -1,5 +1,6 @@
 #include "server-task.h"

+#include "build-info.h"
 #include "chat.h"
 #include "common.h"
 #include "json-schema-to-grammar.h"
@ -791,7 +792,7 @@ json server_task_result_cmpl_final::to_json_oaicompat() {
        })},
        {"created",            t},
        {"model",              oaicompat_model},
-        {"system_fingerprint", build_info},
+        {"system_fingerprint", std::string(llama_build_info())},
        {"object",             "text_completion"},
        {"usage",              usage_json_oaicompat()},
        {"id", oaicompat_cmpl_id}
@ -839,7 +840,7 @@ json server_task_result_cmpl_final::to_json_oaicompat_chat() {
        {"choices",            json::array({choice})},
        {"created",            t},
        {"model",              oaicompat_model},
-        {"system_fingerprint", build_info},
+        {"system_fingerprint", std::string(llama_build_info())},
        {"object",             "chat.completion"},
        {"usage",              usage_json_oaicompat()},
        {"id", oaicompat_cmpl_id}
@ -876,7 +877,7 @@ json server_task_result_cmpl_final::to_json_oaicompat_chat_stream() {
            {"created", t},
            {"id", oaicompat_cmpl_id},
            {"model", oaicompat_model},
-            {"system_fingerprint", build_info},
+            {"system_fingerprint", std::string(llama_build_info())},
            {"object", "chat.completion.chunk"},
        });
    }
@ -892,7 +893,7 @@ json server_task_result_cmpl_final::to_json_oaicompat_chat_stream() {
        {"created",            t},
        {"id",                 oaicompat_cmpl_id},
        {"model",              oaicompat_model},
-        {"system_fingerprint", build_info},
+        {"system_fingerprint", std::string(llama_build_info())},
        {"object",             "chat.completion.chunk"},
    });

@ -904,7 +905,7 @@ json server_task_result_cmpl_final::to_json_oaicompat_chat_stream() {
            {"created",            t},
            {"id",                 oaicompat_cmpl_id},
            {"model",              oaicompat_model},
-            {"system_fingerprint", build_info},
+            {"system_fingerprint", std::string(llama_build_info())},
            {"object",             "chat.completion.chunk"},
            {"usage",              usage_json_oaicompat()},
        });
@ -1469,7 +1470,7 @@ json server_task_result_cmpl_partial::to_json_oaicompat() {
        })},
        {"created",            t},
        {"model",              oaicompat_model},
-        {"system_fingerprint", build_info},
+        {"system_fingerprint", std::string(llama_build_info())},
        {"object",             "text_completion"},
        {"id",                 oaicompat_cmpl_id}
    };
@ -1506,7 +1507,7 @@ json server_task_result_cmpl_partial::to_json_oaicompat_chat() {
            {"created", t},
            {"id", oaicompat_cmpl_id},
            {"model", oaicompat_model},
-            {"system_fingerprint", build_info},
+            {"system_fingerprint", std::string(llama_build_info())},
            {"object", "chat.completion.chunk"},
        });
    };
--- a/tools/server/server.cpp
+++ b/tools/server/server.cpp
@ -5,6 +5,7 @@
 #include "server-tools.h"

 #include "arg.h"
+#include "build-info.h"
 #include "common.h"
 #include "llama.h"
 #include "log.h"
@ -108,7 +109,7 @@ int main(int argc, char ** argv) {
    llama_backend_init();
    llama_numa_init(params.numa);

-    LOG_INF("build_info: %s\n", build_info.c_str());
+    LOG_INF("build_info: %s\n", llama_build_info());
    LOG_INF("%s\n", common_params_get_system_info(params).c_str());

    server_http_context ctx_http;
--- a/tools/server/tests/unit/test_vision_api.py
+++ b/tools/server/tests/unit/test_vision_api.py
@ -37,6 +37,7 @@ JSON_PROMPT_STRING_KEY = "prompt_string"
@pytest.fixture(autouse=True)
 def create_server():
    global server
+    os.environ['LLAMA_MEDIA_MARKER'] = '<__media__>'
    server = ServerPreset.tinygemma3()

 def test_models_supports_multimodal_capability():