From 60499061334888bdd0c903226db42d0ec2f11264 Mon Sep 17 00:00:00 2001 From: Winston Ma Date: Sun, 17 May 2026 01:57:35 +0800 Subject: [PATCH 01/28] vulkan: removed duplicate #include in headers (#23144) --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index d29a4bab2..a296d0ab4 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -49,7 +49,6 @@ DispatchLoaderDynamic & ggml_vk_default_dispatcher(); #include #include #include -#include #include #include #include From 64b38b561b987679c4e1c6231f93860d3eec2638 Mon Sep 17 00:00:00 2001 From: Pascal Date: Sat, 16 May 2026 21:21:06 +0200 Subject: [PATCH 02/28] server: skip device enumeration in router mode to avoid creating CUDA primary context (#23137) --- common/common.cpp | 18 +++++++++++------- common/common.h | 2 +- tools/server/server.cpp | 6 ++++-- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 8b6d182f5..9cf11ea9f 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -373,7 +373,7 @@ void common_init() { llama_log_set(common_log_default_callback, NULL); } -void common_params_print_info(const common_params & params) { +void common_params_print_info(const common_params & params, bool print_devices) { #ifdef NDEBUG const char * build_type = ""; #else @@ -382,12 +382,16 @@ void common_params_print_info(const common_params & params) { LOG_TRC("%s: build %d (%s) with %s for %s%s\n", __func__, llama_build_number(), llama_commit(), llama_compiler(), llama_build_target(), build_type); LOG_INF("log_info: verbosity = %d (adjust with the `-lv N` CLI arg)\n", common_log_get_verbosity_thold()); - LOG_INF("device_info:\n"); - for (size_t i = 0; i < ggml_backend_dev_count(); ++i) { - auto * dev = ggml_backend_dev_get(i); - size_t free, total; - ggml_backend_dev_memory(dev, &free, &total); - LOG_INF(" - %-8s: %s (%zu MiB, %zu MiB free)\n", ggml_backend_dev_name(dev), ggml_backend_dev_description(dev), total / 1024 / 1024, free / 1024 / 1024); + + // device enumeration creates a primary context on CUDA backends, skip it when the caller does not own any device + if (print_devices) { + LOG_INF("device_info:\n"); + for (size_t i = 0; i < ggml_backend_dev_count(); ++i) { + auto * dev = ggml_backend_dev_get(i); + size_t free, total; + ggml_backend_dev_memory(dev, &free, &total); + LOG_INF(" - %-8s: %s (%zu MiB, %zu MiB free)\n", ggml_backend_dev_name(dev), ggml_backend_dev_description(dev), total / 1024 / 1024, free / 1024 / 1024); + } } LOG_INF("%s\n", common_params_get_system_info(params).c_str()); } diff --git a/common/common.h b/common/common.h index 4cca9d715..514bab119 100644 --- a/common/common.h +++ b/common/common.h @@ -708,7 +708,7 @@ struct common_params { // initializes the logging system and prints info about the build void common_init(); -void common_params_print_info(const common_params & params); +void common_params_print_info(const common_params & params, bool print_devices = true); std::string common_params_get_system_info(const common_params & params); bool parse_cpu_range(const std::string & range, bool(&boolmask)[GGML_MAX_N_THREADS]); diff --git a/tools/server/server.cpp b/tools/server/server.cpp index a23255078..c82f11794 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -86,7 +86,10 @@ int main(int argc, char ** argv) { llama_backend_init(); llama_numa_init(params.numa); - common_params_print_info(params); + // router server never loads a model and must not touch the GPU + // skip device enumeration so the CUDA primary context stays uncreated + const bool is_router_server = params.model.path.empty(); + common_params_print_info(params, !is_router_server); // validate batch size for embeddings // embeddings require all tokens to be processed in a single ubatch @@ -126,7 +129,6 @@ int main(int argc, char ** argv) { server_routes routes(params, ctx_server); server_tools tools; - bool is_router_server = params.model.path.empty(); std::optional models_routes{}; if (is_router_server) { // setup server instances manager From b64739ea393b3c9d07cc9907e0a611f707838051 Mon Sep 17 00:00:00 2001 From: Xuan-Son Nguyen Date: Sat, 16 May 2026 23:42:16 +0200 Subject: [PATCH 03/28] server: (router) alloc tmp buffer on heap (#23159) --- tools/server/server-models.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/server/server-models.cpp b/tools/server/server-models.cpp index 433d2d8f0..6c6fed52d 100644 --- a/tools/server/server-models.cpp +++ b/tools/server/server-models.cpp @@ -798,9 +798,10 @@ void server_models::load(const std::string & name) { std::thread log_thread([&]() { // read stdout/stderr and forward to main server log // also handle status report from child process + std::vector vec_buf(128 * 1024); // large buffer for storing info + char * buffer = vec_buf.data(); if (stdout_file) { - char buffer[128 * 1024]; // large buffer for storing info - while (fgets(buffer, sizeof(buffer), stdout_file) != nullptr) { + while (fgets(buffer, vec_buf.size(), stdout_file) != nullptr) { LOG("[%5d] %s", port, buffer); std::string str(buffer); if (string_starts_with(buffer, CMD_CHILD_TO_ROUTER_READY)) { From 4f13cb742476d81a6b42a2aa5996e82a478c2481 Mon Sep 17 00:00:00 2001 From: Judd <4046440+foldl@users.noreply.github.com> Date: Sun, 17 May 2026 08:13:44 +0800 Subject: [PATCH 04/28] webui: support video files as input (#22830) --- tools/ui/src/app.d.ts | 2 ++ .../app/badges/BadgesModality.svelte | 10 ++++--- ...hatAttachmentsListItemThumbnailFile.svelte | 14 ++++++++-- .../ChatAttachmentsPreview.svelte | 24 ++++++++++++++++- .../ChatAttachmentsPreviewCurrentItem.svelte | 15 ++++++++--- ...tAttachmentsPreviewCurrentItemVideo.svelte | 26 +++++++++++++++++++ ...hatAttachmentsPreviewThumbnailStrip.svelte | 5 +++- .../ChatFormActionAddDropdown.svelte | 3 +++ .../ChatFormActionAddSheet.svelte | 3 +++ .../ChatFormActionsAdd.svelte | 4 +++ .../ChatFormActionModels.svelte | 6 +++++ .../ChatFormActions/ChatFormActions.svelte | 3 +++ .../app/chat/ChatScreen/ChatScreen.svelte | 17 +++++++++++- tools/ui/src/lib/constants/attachment-menu.ts | 9 +++++++ tools/ui/src/lib/constants/icons.ts | 10 ++++--- .../src/lib/constants/supported-file-types.ts | 13 ++++++++++ tools/ui/src/lib/enums/attachment.ts | 5 +++- tools/ui/src/lib/enums/chat.ts | 3 ++- tools/ui/src/lib/enums/files.ts | 16 ++++++++++++ tools/ui/src/lib/enums/index.ts | 1 + tools/ui/src/lib/enums/model.ts | 3 ++- .../lib/hooks/use-attachment-menu.svelte.ts | 1 + tools/ui/src/lib/services/chat.service.ts | 19 ++++++++++++++ tools/ui/src/lib/stores/models.svelte.ts | 20 +++++++++++--- tools/ui/src/lib/types/api.d.ts | 5 ++++ tools/ui/src/lib/types/common.d.ts | 7 ++++- tools/ui/src/lib/types/database.d.ts | 9 +++++++ tools/ui/src/lib/types/index.ts | 1 + tools/ui/src/lib/types/models.d.ts | 2 ++ tools/ui/src/lib/utils/attachment-type.ts | 21 +++++++++++++++ .../src/lib/utils/convert-files-to-extra.ts | 15 +++++++++++ tools/ui/src/lib/utils/file-type.ts | 13 ++++++++++ tools/ui/src/lib/utils/index.ts | 2 +- .../src/lib/utils/modality-file-validation.ts | 16 ++++++++++-- .../src/lib/utils/process-uploaded-files.ts | 4 +++ .../tests/stories/fixtures/storybook-mocks.ts | 11 +++++--- 36 files changed, 310 insertions(+), 28 deletions(-) create mode 100644 tools/ui/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsPreview/ChatAttachmentsPreviewCurrentItem/ChatAttachmentsPreviewCurrentItemVideo.svelte diff --git a/tools/ui/src/app.d.ts b/tools/ui/src/app.d.ts index f5af7323c..ec65952e9 100644 --- a/tools/ui/src/app.d.ts +++ b/tools/ui/src/app.d.ts @@ -39,6 +39,7 @@ import type { DatabaseMessage, DatabaseMessageExtra, DatabaseMessageExtraAudioFile, + DatabaseMessageExtraVideoFile, DatabaseMessageExtraImageFile, DatabaseMessageExtraTextFile, DatabaseMessageExtraPdfFile, @@ -102,6 +103,7 @@ declare global { DatabaseMessage, DatabaseMessageExtra, DatabaseMessageExtraAudioFile, + DatabaseMessageExtraVideoFile, DatabaseMessageExtraImageFile, DatabaseMessageExtraTextFile, DatabaseMessageExtraPdfFile, diff --git a/tools/ui/src/lib/components/app/badges/BadgesModality.svelte b/tools/ui/src/lib/components/app/badges/BadgesModality.svelte index 841f1dd9f..d87184ea9 100644 --- a/tools/ui/src/lib/components/app/badges/BadgesModality.svelte +++ b/tools/ui/src/lib/components/app/badges/BadgesModality.svelte @@ -1,5 +1,5 @@ {#each modalities as modality (modality)} - {#if modality === ModelModality.VISION || modality === ModelModality.AUDIO} + {#if modality === ModelModality.VISION || modality === ModelModality.AUDIO || modality === ModelModality.VIDEO} - Vision + Vision (Image) + {:else if modality === ModelModality.VIDEO} +