diff --git a/common/arg.cpp b/common/arg.cpp index d2578ff65..30b03092b 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -1282,13 +1282,20 @@ common_params_context common_params_parser_init(common_params & params, llama_ex } ).set_env("LLAMA_ARG_SWA_FULL")); add_opt(common_arg( - {"--ctx-checkpoints", "--swa-checkpoints"}, "N", + {"-ctxcp", "--ctx-checkpoints", "--swa-checkpoints"}, "N", string_format("max number of context checkpoints to create per slot (default: %d)" "[(more info)](https://github.com/ggml-org/llama.cpp/pull/15293)", params.n_ctx_checkpoints), [](common_params & params, int value) { params.n_ctx_checkpoints = value; } ).set_env("LLAMA_ARG_CTX_CHECKPOINTS").set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_CLI})); + add_opt(common_arg( + {"-cpent", "--checkpoint-every-n-tokens"}, "N", + string_format("create a checkpoint every n tokens during prefill (processing), -1 to disable (default: %d)", params.checkpoint_every_nt), + [](common_params & params, int value) { + params.checkpoint_every_nt = value; + } + ).set_env("LLAMA_ARG_CHECKPOINT_EVERY_NT").set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_CLI})); add_opt(common_arg( {"-cram", "--cache-ram"}, "N", string_format("set the maximum cache size in MiB (default: %d, -1 - no limit, 0 - disable)" @@ -2830,6 +2837,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex params.webui_config_json = read_file(value); } ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_WEBUI_CONFIG_FILE")); + add_opt(common_arg( + {"--webui-mcp-proxy"}, + {"--no-webui-mcp-proxy"}, + string_format("experimental: whether to enable MCP CORS proxy - do not enable in untrusted environments (default: %s)", params.webui_mcp_proxy ? "enabled" : "disabled"), + [](common_params & params, bool value) { + params.webui_mcp_proxy = value; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_WEBUI_MCP_PROXY")); add_opt(common_arg( {"--webui"}, {"--no-webui"}, diff --git a/common/common.h b/common/common.h index 74aa2c326..8a6bbe92a 100644 --- a/common/common.h +++ b/common/common.h @@ -513,14 +513,15 @@ struct common_params { std::string cls_sep = "\t"; // separator of classification sequences // server params - int32_t port = 8080; // server listens on this network port - int32_t timeout_read = 600; // http read timeout in seconds - int32_t timeout_write = timeout_read; // http write timeout in seconds - int32_t n_threads_http = -1; // number of threads to process HTTP requests (TODO: support threadpool) - int32_t n_cache_reuse = 0; // min chunk size to reuse from the cache via KV shifting - bool cache_prompt = true; // whether to enable prompt caching - int32_t n_ctx_checkpoints = 8; // max number of context checkpoints per slot - int32_t cache_ram_mib = 8192; // -1 = no limit, 0 - disable, 1 = 1 MiB, etc. + int32_t port = 8080; // server listens on this network port + int32_t timeout_read = 600; // http read timeout in seconds + int32_t timeout_write = timeout_read; // http write timeout in seconds + int32_t n_threads_http = -1; // number of threads to process HTTP requests (TODO: support threadpool) + int32_t n_cache_reuse = 0; // min chunk size to reuse from the cache via KV shifting + bool cache_prompt = true; // whether to enable prompt caching + int32_t n_ctx_checkpoints = 32; // max number of context checkpoints per slot + int32_t checkpoint_every_nt = 8192; // make a checkpoint every n tokens during prefill + int32_t cache_ram_mib = 8192; // -1 = no limit, 0 - disable, 1 = 1 MiB, etc. std::string hostname = "127.0.0.1"; std::string public_path = ""; // NOLINT @@ -542,6 +543,7 @@ struct common_params { // webui configs bool webui = true; + bool webui_mcp_proxy = false; std::string webui_config_json; // "advanced" endpoints are disabled by default for better security diff --git a/ggml/src/ggml-blas/ggml-blas.cpp b/ggml/src/ggml-blas/ggml-blas.cpp index 2e9ddf224..5de64b816 100644 --- a/ggml/src/ggml-blas/ggml-blas.cpp +++ b/ggml/src/ggml-blas/ggml-blas.cpp @@ -339,8 +339,8 @@ static const char * ggml_backend_blas_device_get_description(ggml_backend_dev_t } static void ggml_backend_blas_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) { - // TODO - *free = 0; + // no memory to report + *free = 0; *total = 0; GGML_UNUSED(dev); diff --git a/ggml/src/ggml-cpu/llamafile/sgemm.cpp b/ggml/src/ggml-cpu/llamafile/sgemm.cpp index 5fd452a03..c89e5076f 100644 --- a/ggml/src/ggml-cpu/llamafile/sgemm.cpp +++ b/ggml/src/ggml-cpu/llamafile/sgemm.cpp @@ -2497,7 +2497,7 @@ class tinyBLAS_Q0_PPC { for (int r = 0; r < 8; r++) { const block_q4_0 * current_blk = rows_base[r] + blk; vector float v_scale = vec_extract_fp32_from_shorth(vec_splats(current_blk->d)); - vector signed char v_qs = reinterpret_cast(vec_xl(0, current_blk->qs)); + vector signed char v_qs = vec_xl(0, (const vector signed char *)current_blk->qs); vector signed char c1, c2; unpack_q4_to_q8(v_qs, c1, c2); convert_and_scale_q8(c1, v_scale, hp_res[r][0], hp_res[r][1]); @@ -2611,14 +2611,14 @@ class tinyBLAS_Q0_PPC { i = (cols >> 2); if (i > 0) { do { - c1[1] = reinterpret_cast(vec_xl(0, aoffset1->qs)); - c2[1] = reinterpret_cast(vec_xl(0, aoffset2->qs)); - c3[1] = reinterpret_cast(vec_xl(0, aoffset3->qs)); - c4[1] = reinterpret_cast(vec_xl(0, aoffset4->qs)); - c5[1] = reinterpret_cast(vec_xl(0, aoffset5->qs)); - c6[1] = reinterpret_cast(vec_xl(0, aoffset6->qs)); - c7[1] = reinterpret_cast(vec_xl(0, aoffset7->qs)); - c8[1] = reinterpret_cast(vec_xl(0, aoffset8->qs)); + c1[1] = vec_xl(0, (const vector signed char *)aoffset1->qs); + c2[1] = vec_xl(0, (const vector signed char *)aoffset2->qs); + c3[1] = vec_xl(0, (const vector signed char *)aoffset3->qs); + c4[1] = vec_xl(0, (const vector signed char *)aoffset4->qs); + c5[1] = vec_xl(0, (const vector signed char *)aoffset5->qs); + c6[1] = vec_xl(0, (const vector signed char *)aoffset6->qs); + c7[1] = vec_xl(0, (const vector signed char *)aoffset7->qs); + c8[1] = vec_xl(0, (const vector signed char *)aoffset8->qs); process_q4_elements(c1, & comparray[0]); process_q4_elements(c2, & comparray[1]); @@ -2657,10 +2657,10 @@ class tinyBLAS_Q0_PPC { i = (cols >> 2); if (i > 0) { do { - c1[1] = reinterpret_cast(vec_xl(0, aoffset1->qs)); - c2[1] = reinterpret_cast(vec_xl(0, aoffset2->qs)); - c3[1] = reinterpret_cast(vec_xl(0, aoffset3->qs)); - c4[1] = reinterpret_cast(vec_xl(0, aoffset4->qs)); + c1[1] = vec_xl(0, (const vector signed char *)aoffset1->qs); + c2[1] = vec_xl(0, (const vector signed char *)aoffset2->qs); + c3[1] = vec_xl(0, (const vector signed char *)aoffset3->qs); + c4[1] = vec_xl(0, (const vector signed char *)aoffset4->qs); process_q4_elements(c1, & comparray[0]); process_q4_elements(c2, & comparray[1]); @@ -2686,9 +2686,9 @@ class tinyBLAS_Q0_PPC { if (i > 0) { do { switch(rows) { - case 3: c3[1] = reinterpret_cast(vec_xl(0, aoffset3->qs)); - case 2: c2[1] = reinterpret_cast(vec_xl(0, aoffset2->qs)); - case 1: c1[1] = reinterpret_cast(vec_xl(0, aoffset1->qs)); + case 3: c3[1] = vec_xl(0, (const vector signed char *)aoffset3->qs); + case 2: c2[1] = vec_xl(0, (const vector signed char *)aoffset2->qs); + case 1: c1[1] = vec_xl(0, (const vector signed char *)aoffset1->qs); break; } process_q4_elements(c1, & comparray[0]); diff --git a/ggml/src/ggml-cpu/ops.cpp b/ggml/src/ggml-cpu/ops.cpp index ca1b3059b..2c372f963 100644 --- a/ggml/src/ggml-cpu/ops.cpp +++ b/ggml/src/ggml-cpu/ops.cpp @@ -2129,12 +2129,12 @@ static void ggml_compute_forward_gelu_f32( #ifndef NDEBUG for (int k = 0; k < nc; k++) { - const float x = ((float *) ((char *) dst->data + i1*( dst->nb[1])))[k]; + const float x = ((float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*(dst->nb[1])))[k]; GGML_UNUSED(x); assert(!isnan(x)); assert(!isinf(x)); } -#endif +#endif // NDEBUG } } @@ -2176,13 +2176,13 @@ static void ggml_compute_forward_gelu_f16( #ifndef NDEBUG for (int k = 0; k < nc; k++) { - const ggml_fp16_t x = ((ggml_fp16_t *) ((char *) dst->data + i1*( dst->nb[1])))[k]; + const ggml_fp16_t x = ((ggml_fp16_t *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*( dst->nb[1])))[k]; const float v = GGML_CPU_FP16_TO_FP32(x); GGML_UNUSED(v); assert(!isnan(v)); assert(!isinf(v)); } -#endif +#endif // NDEBUG } } @@ -2325,12 +2325,12 @@ static void ggml_compute_forward_gelu_erf_f32( #ifndef NDEBUG for (int k = 0; k < nc; k++) { - const float x = ((float *) ((char *) dst->data + i1*( dst->nb[1])))[k]; + const float x = ((float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*(dst->nb[1])))[k]; GGML_UNUSED(x); assert(!isnan(x)); assert(!isinf(x)); } -#endif +#endif // NDEBUG } } @@ -2372,13 +2372,13 @@ static void ggml_compute_forward_gelu_erf_f16( #ifndef NDEBUG for (int k = 0; k < nc; k++) { - const ggml_fp16_t x = ((ggml_fp16_t *) ((char *) dst->data + i1*( dst->nb[1])))[k]; + const ggml_fp16_t x = ((ggml_fp16_t *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*( dst->nb[1])))[k]; const float v = GGML_CPU_FP16_TO_FP32(x); GGML_UNUSED(v); assert(!isnan(v)); assert(!isinf(v)); } -#endif +#endif // NDEBUG } } @@ -2444,12 +2444,12 @@ static void ggml_compute_forward_gelu_quick_f32( #ifndef NDEBUG for (int k = 0; k < nc; k++) { - const float x = ((float *) ((char *) dst->data + i1*( dst->nb[1])))[k]; + const float x = ((float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*(dst->nb[1])))[k]; GGML_UNUSED(x); assert(!isnan(x)); assert(!isinf(x)); } -#endif +#endif // NDEBUG } } @@ -2491,13 +2491,13 @@ static void ggml_compute_forward_gelu_quick_f16( #ifndef NDEBUG for (int k = 0; k < nc; k++) { - const ggml_fp16_t x = ((ggml_fp16_t *) ((char *) dst->data + i1*( dst->nb[1])))[k]; + const ggml_fp16_t x = ((ggml_fp16_t *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*( dst->nb[1])))[k]; const float v = GGML_CPU_FP16_TO_FP32(x); GGML_UNUSED(v); assert(!isnan(v)); assert(!isinf(v)); } -#endif +#endif // NDEBUG } } @@ -2563,12 +2563,12 @@ static void ggml_compute_forward_silu_f32( #ifndef NDEBUG for (int k = 0; k < nc; k++) { - const float x = ((float *) ((char *) dst->data + i1*(dst->nb[1])))[k]; + const float x = ((float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*(dst->nb[1])))[k]; GGML_UNUSED(x); assert(!isnan(x)); assert(!isinf(x)); } -#endif +#endif // NDEBUG } } @@ -2610,13 +2610,13 @@ static void ggml_compute_forward_silu_f16( #ifndef NDEBUG for (int k = 0; k < nc; k++) { - const ggml_fp16_t x = ((ggml_fp16_t *) ((char *) dst->data + i1*(dst->nb[1])))[k]; + const ggml_fp16_t x = ((ggml_fp16_t *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*( dst->nb[1])))[k]; const float v = GGML_CPU_FP16_TO_FP32(x); GGML_UNUSED(v); assert(!isnan(v)); assert(!isinf(v)); } -#endif +#endif // NDEBUG } } @@ -2766,7 +2766,7 @@ static void ggml_compute_forward_silu_back_f32( assert(!isnan(x)); assert(!isinf(x)); } -#endif +#endif // NDEBUG } } @@ -2802,7 +2802,7 @@ static void ggml_compute_forward_silu_back_f16( (ggml_fp16_t *) ((char *) src1->data + i1*(src1->nb[1])), (ggml_fp16_t *) ((char *) grad->data + i1*(grad->nb[1]))); - #ifndef NDEBUG +#ifndef NDEBUG for (int k = 0; k < nc; k++) { const float x = ((ggml_fp16_t *) ((char *) dst->data + i1*( dst->nb[1])))[k]; const float v = GGML_CPU_FP16_TO_FP32(x); @@ -2810,7 +2810,7 @@ static void ggml_compute_forward_silu_back_f16( assert(!isnan(v)); assert(!isinf(v)); } - #endif +#endif // NDEBUG } } @@ -2893,7 +2893,7 @@ static void ggml_compute_forward_reglu_f32( assert(!isnan(x)); assert(!isinf(x)); } -#endif +#endif // NDEBUG } } @@ -2953,7 +2953,7 @@ static void ggml_compute_forward_reglu_f16( assert(!isnan(v)); assert(!isinf(v)); } -#endif +#endif // NDEBUG } } @@ -3036,7 +3036,7 @@ static void ggml_compute_forward_geglu_f32( assert(!isnan(x)); assert(!isinf(x)); } -#endif +#endif // NDEBUG } } @@ -3096,7 +3096,7 @@ static void ggml_compute_forward_geglu_f16( assert(!isnan(v)); assert(!isinf(v)); } -#endif +#endif // NDEBUG } } @@ -3179,7 +3179,7 @@ static void ggml_compute_forward_swiglu_f32( assert(!isnan(x)); assert(!isinf(x)); } -#endif +#endif // NDEBUG } } @@ -3239,7 +3239,7 @@ static void ggml_compute_forward_swiglu_f16( assert(!isnan(v)); assert(!isinf(v)); } -#endif +#endif // NDEBUG } } @@ -3330,7 +3330,7 @@ static void ggml_compute_forward_swiglu_oai_f32( assert(!isnan(x)); assert(!isinf(x)); } -#endif +#endif // NDEBUG } } @@ -3409,7 +3409,7 @@ static void ggml_compute_forward_geglu_erf_f32( assert(!isnan(x)); assert(!isinf(x)); } -#endif +#endif // NDEBUG } } @@ -3469,7 +3469,7 @@ static void ggml_compute_forward_geglu_erf_f16( assert(!isnan(v)); assert(!isinf(v)); } -#endif +#endif // NDEBUG } } @@ -3552,7 +3552,7 @@ static void ggml_compute_forward_geglu_quick_f32( assert(!isnan(x)); assert(!isinf(x)); } -#endif +#endif // NDEBUG } } @@ -3612,7 +3612,7 @@ static void ggml_compute_forward_geglu_quick_f16( assert(!isnan(v)); assert(!isinf(v)); } -#endif +#endif // NDEBUG } } @@ -5303,7 +5303,7 @@ static void ggml_compute_forward_soft_max_f32( //printf("p[%d] = %f\n", i, p[i]); assert(!isnan(wp[i])); } -#endif +#endif // NDEBUG float max = -INFINITY; ggml_vec_max_f32(ne00, &max, wp); @@ -5328,7 +5328,7 @@ static void ggml_compute_forward_soft_max_f32( assert(!isnan(dp[i])); assert(!isinf(dp[i])); } -#endif +#endif // NDEBUG } } } @@ -5402,7 +5402,7 @@ static void ggml_compute_forward_soft_max_ext_back_f32( assert(!isnan(dy[i])); assert(!isnan(y[i])); } -#endif +#endif // NDEBUG // Jii = yi - yi*yi // Jij = -yi*yj // J = diag(y)-y.T*y @@ -5435,7 +5435,7 @@ static void ggml_compute_forward_soft_max_ext_back_f32( assert(!isnan(dx[i])); assert(!isinf(dx[i])); } -#endif +#endif // NDEBUG } } @@ -5803,28 +5803,33 @@ static void ggml_compute_forward_rope_flt( const int32_t * pos = (const int32_t *) src1->data; + int64_t last_i2 = -1; + for (int64_t i3 = 0; i3 < ne3; i3++) { // batch for (int64_t i2 = 0; i2 < ne2; i2++) { // seq-len - - float * cache = (float *) params->wdata + (ne0 + CACHE_LINE_SIZE_F32)*ith; - if (!mrope_used) { - const int64_t p = pos[i2]; - ggml_rope_cache_init(p, freq_scale, freq_factors, corr_dims, ne0, ext_factor, attn_factor, cache, sin_sign, theta_scale); - } - else { - const int64_t p_t = pos[i2]; - const int64_t p_h = pos[i2 + ne2]; - const int64_t p_w = pos[i2 + ne2 * 2]; - const int64_t p_e = pos[i2 + ne2 * 3]; - ggml_mrope_cache_init( - p_t, p_h, p_w, p_e, sections, is_imrope, is_vision, - freq_scale, freq_factors, corr_dims, ne0, ext_factor, attn_factor, cache, sin_sign, theta_scale); - } - for (int64_t i1 = 0; i1 < ne1; i1++) { // attn-heads - if (ir++ < ir0) continue; + if (ir++ < ir0) continue; // skip rows mapped to other threads if (ir > ir1) break; + float * cache = (float *) params->wdata + (ne0 + CACHE_LINE_SIZE_F32)*ith; + if (last_i2 != i2) { + if (!mrope_used) { + const int64_t p = pos[i2]; + ggml_rope_cache_init(p, freq_scale, freq_factors, corr_dims, ne0, ext_factor, attn_factor, cache, sin_sign, theta_scale); + } + else { + const int64_t p_t = pos[i2]; + const int64_t p_h = pos[i2 + ne2]; + const int64_t p_w = pos[i2 + ne2 * 2]; + const int64_t p_e = pos[i2 + ne2 * 3]; + ggml_mrope_cache_init( + p_t, p_h, p_w, p_e, sections, is_imrope, is_vision, + freq_scale, freq_factors, corr_dims, ne0, ext_factor, attn_factor, cache, sin_sign, theta_scale); + } + + last_i2 = i2; + } + T * src = (T *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01); T * dst_data = (T *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1); @@ -10700,7 +10705,7 @@ static void ggml_compute_forward_cross_entropy_loss_f32( assert(!isnan(s0[i])); assert(!isnan(s1[i])); } -#endif +#endif // NDEBUG float max = -INFINITY; ggml_vec_max_f32(nc, &max, s0); @@ -10719,7 +10724,7 @@ static void ggml_compute_forward_cross_entropy_loss_f32( assert(!isnan(st[i])); assert(!isinf(st[i])); } -#endif +#endif // NDEBUG } sums[ith] = sum_thread; ggml_barrier(params->threadpool); @@ -10792,7 +10797,7 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32( assert(!isnan(s0[i])); assert(!isnan(s1[i])); } -#endif +#endif // NDEBUG // soft_max float max = -INFINITY; @@ -10810,7 +10815,7 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32( assert(!isnan(ds0[i])); assert(!isinf(ds0[i])); } -#endif +#endif // NDEBUG } } diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index 9575746ba..c08ff6f63 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -3364,6 +3364,46 @@ static bool ggml_cuda_can_fuse(const struct ggml_cgraph * cgraph, return true; } + if (ops.size() == 2 && ops.begin()[0] == GGML_OP_SSM_CONV && ops.begin()[1] == GGML_OP_UNARY + && unary_ops.size() == 1 && unary_ops.begin()[0] == GGML_UNARY_OP_SILU) { + const ggml_tensor * ssm_conv = cgraph->nodes[node_idx]; + const ggml_tensor * silu = cgraph->nodes[node_idx+1]; + + if (ssm_conv->type != GGML_TYPE_F32 || silu->type != GGML_TYPE_F32) { + return false; + } + + return true; + } + + if (ops.size() == 2 && ops.begin()[0] == GGML_OP_UNARY && ops.begin()[1] == GGML_OP_MUL + && unary_ops.size() == 1 && (unary_ops.begin()[0] == GGML_UNARY_OP_SILU || unary_ops.begin()[0] == GGML_UNARY_OP_SIGMOID || unary_ops.begin()[0] == GGML_UNARY_OP_SOFTPLUS)) { + const ggml_tensor * unary = cgraph->nodes[node_idx]; + const ggml_tensor * mul = cgraph->nodes[node_idx+1]; + + if (ggml_get_unary_op(unary) != unary_ops.begin()[0]) { + return false; + } + + if (unary->type != GGML_TYPE_F32 && unary->type != GGML_TYPE_F16) { + return false; + } + + if (unary->type != mul->type) { + return false; + } + + const ggml_tensor * other = (mul->src[0] == unary) ? mul->src[1] : mul->src[0]; + if (other->type != unary->type) { + return false; + } + if (!ggml_is_contiguous_1(other) || !ggml_is_contiguous_1(unary->src[0]) || !ggml_are_same_shape(other, unary)) { + return false; + } + + return true; + } + if (ops.size() == 3 && ops.begin()[0] == GGML_OP_SCALE && ops.begin()[1] == GGML_OP_UNARY && ops.begin()[2] == GGML_OP_SCALE && unary_ops.size() == 1 && unary_ops.begin()[0] == GGML_UNARY_OP_TANH) { const ggml_tensor *scale = cgraph->nodes[node_idx]; @@ -3388,6 +3428,69 @@ static bool ggml_cuda_can_fuse(const struct ggml_cgraph * cgraph, return false; } +// returns whether the write (out) nodes overwrite the read nodes in operation +static bool ggml_cuda_check_fusion_memory_ranges(ggml_cgraph * cgraph, + int node_idx, + int node_count, + int * out_nodes, + int out_count) { + auto nodes_overlap = [&](const ggml_tensor * a, const ggml_tensor * b) { + const int64_t a_start = (int64_t) a->data; + const int64_t a_end = a_start + ggml_nbytes(a); + + const int64_t b_start = (int64_t) b->data; + const int64_t b_end = b_start + ggml_nbytes(b); + + if ((b_start <= a_start && a_start < b_end) || (a_start <= b_start && b_start < a_end)) { + return true; + } + + return false; + }; + + bool is_ok = true; + // for nrows=1, all fusion operations correctly read the src before writing dst or do it elementwise, so we should be ok + if (ggml_nrows(cgraph->nodes[node_idx]) == 1) { + return true; + } + + for (int i = 0; i < out_count; ++i) { + const ggml_tensor * dst = cgraph->nodes[out_nodes[i]]; + + for (int j = node_idx; j < node_idx + node_count; ++j) { + // Loop over all srcs of all nodes in the fusion. If the src overlaps + // the destination and the src is not an intermediate node that's being + // elided, then disable fusion. + + for (int src_idx = 0; src_idx < GGML_MAX_SRC; ++src_idx) { + const ggml_tensor * src = cgraph->nodes[j]->src[src_idx]; + + if (!src || src->op == GGML_OP_NONE) { + continue; + } + + if (nodes_overlap(dst, src)) { + bool found = false; + + for (int k = node_idx; k < j; ++k) { + if (cgraph->nodes[k] == src) { + found = true; + break; + } + } + + if (!found) { + is_ok = false; + break; + } + } + } + } + } + + return is_ok; +} + static void ggml_cuda_graph_evaluate_and_capture(ggml_backend_cuda_context * cuda_ctx, ggml_cgraph * cgraph, const bool use_cuda_graph, const bool cuda_graph_update_required, const void * graph_key) { bool graph_evaluated_or_captured = false; @@ -3584,7 +3687,8 @@ static void ggml_cuda_graph_evaluate_and_capture(ggml_backend_cuda_context * cud out_nodes[1] = i + ops.size() - 1; if (ggml_can_fuse_subgraph(cgraph, i, ops.size(), ops.data(), out_nodes, 2) && - ggml_cuda_should_use_topk_moe(node, logits, weights, ids)) { + ggml_cuda_should_use_topk_moe(node, logits, weights, ids) && + ggml_cuda_check_fusion_memory_ranges(cgraph, i, ops.size(), out_nodes, 2)) { ggml_cuda_op_topk_moe(*cuda_ctx, logits, weights, ids, clamp, scale, bias, args); i += ops.size() - 1; continue; @@ -3599,7 +3703,8 @@ static void ggml_cuda_graph_evaluate_and_capture(ggml_backend_cuda_context * cud int out_nodes[2] = { i + 1, i + 5 }; if (ggml_can_fuse_subgraph(cgraph, i, ops.size(), ops.data(), out_nodes, 2) && - ggml_cuda_should_use_topk_moe(softmax, logits, weights, ids)) { + ggml_cuda_should_use_topk_moe(softmax, logits, weights, ids) && + ggml_cuda_check_fusion_memory_ranges(cgraph, i, ops.size(), out_nodes, 2)) { ggml_cuda_op_topk_moe(*cuda_ctx, logits, weights, ids, clamp, scale, bias, args); i += ops.size() - 1; continue; @@ -3852,6 +3957,20 @@ static void ggml_cuda_graph_evaluate_and_capture(ggml_backend_cuda_context * cud continue; } + if (ggml_cuda_can_fuse(cgraph, i, { GGML_OP_SSM_CONV, GGML_OP_UNARY }, { GGML_UNARY_OP_SILU })) { + ggml_cuda_op_ssm_conv(*cuda_ctx, node, cgraph->nodes[i+1]); + i++; + continue; + } + + if (ggml_cuda_can_fuse(cgraph, i, { GGML_OP_UNARY, GGML_OP_MUL }, { GGML_UNARY_OP_SILU }) || + ggml_cuda_can_fuse(cgraph, i, { GGML_OP_UNARY, GGML_OP_MUL }, { GGML_UNARY_OP_SIGMOID }) || + ggml_cuda_can_fuse(cgraph, i, { GGML_OP_UNARY, GGML_OP_MUL }, { GGML_UNARY_OP_SOFTPLUS })) { + ggml_cuda_op_unary_mul(*cuda_ctx, node, cgraph->nodes[i+1]); + i++; + continue; + } + if (ggml_cuda_can_fuse(cgraph, i, { GGML_OP_SCALE, GGML_OP_UNARY, GGML_OP_SCALE }, { GGML_UNARY_OP_TANH })) { i += 2; ggml_cuda_op_softcap(*cuda_ctx, cgraph->nodes[i], node); diff --git a/ggml/src/ggml-cuda/ssm-conv.cu b/ggml/src/ggml-cuda/ssm-conv.cu index 6d5ea704c..85e82b5a4 100644 --- a/ggml/src/ggml-cuda/ssm-conv.cu +++ b/ggml/src/ggml-cuda/ssm-conv.cu @@ -1,6 +1,7 @@ #include "ssm-conv.cuh" +#include "unary.cuh" -template +template static __global__ void ssm_conv_f32(const float * __restrict__ src0, const float * __restrict__ src1, const int src0_nb0, const int src0_nb1, const int src0_nb2, const int src1_nb1, float * __restrict__ dst, const int dst_nb0, const int dst_nb1, const int dst_nb2, @@ -41,11 +42,11 @@ static __global__ void ssm_conv_f32(const float * __restrict__ src0, const float for (size_t j = 0; j < d_conv; j++) { sumf += x[(i + j) % d_conv] * w[j]; } - y_block[i * stride_y + tid] = sumf; + y_block[i * stride_y + tid] = apply_silu ? ggml_cuda_op_silu_single(sumf) : sumf; } } -template +template static __global__ void ssm_conv_long_token_f32(const float * __restrict__ src0, const float * __restrict__ src1, const int src0_nb0, const int src0_nb1, const int src0_nb2, const int src1_nb1, float * __restrict__ dst, const int dst_nb0, @@ -65,36 +66,46 @@ static __global__ void ssm_conv_long_token_f32(const float * __restrict__ src0, const int stride_w = src1_nb1 / sizeof(float); const int stride_y = dst_nb1 / sizeof(float); - float x[d_conv] = { 0.0f }; - float w[d_conv] = { 0.0f }; + const int64_t local_n_t = min(split_n_t, n_t - bidz * split_n_t); + const int n_cols = d_conv - 1 + split_n_t; + extern __shared__ float smem[]; + + constexpr int load_cols = d_conv - 1 + split_n_t; + constexpr int total_elems = split_d_inner * load_cols; + int row = tid / load_cols; + int col = tid % load_cols; +#pragma unroll + for (int idx = tid; idx < total_elems; idx += split_d_inner) { + if (row < (int)split_d_inner) { + smem[row * n_cols + col] = x_block[row * stride_x + col]; + } + + col += split_d_inner; + row += col / load_cols; + col = col % load_cols; + } + __syncthreads(); + + // Load weights into registers (done once, small) + float w[d_conv] = { 0.0f }; #pragma unroll for (size_t j = 0; j < d_conv; j++) { w[j] = w_block[tid * stride_w + j]; } + // Compute from shared memory + for (int64_t i = 0; i < local_n_t; i++) { + float sumf = 0.0f; #pragma unroll - for (int64_t i = 0; i < split_n_t; i++) { - if (bidz * split_n_t + i < n_t) { - float sumf = 0.0f; - - if (i == 0) { - for (size_t j = 0; j < d_conv; j++) { - x[j] = x_block[tid * stride_x + j]; - } - } else { - x[(i - 1) % d_conv] = x_block[tid * stride_x + i + d_conv - 1]; - } - -#pragma unroll - for (size_t j = 0; j < d_conv; j++) { - sumf += x[(i + j) % d_conv] * w[j]; - } - y_block[i * stride_y + tid] = sumf; + for (size_t j = 0; j < d_conv; j++) { + sumf += smem[tid * n_cols + i + j] * w[j]; } + y_block[i * stride_y + tid] = apply_silu ? ggml_cuda_op_silu_single(sumf) : sumf; } } +template static void ssm_conv_f32_cuda(const float * src0, const float * src1, const int src0_nb0, const int src0_nb1, const int src0_nb2, const int src1_nb1, float * dst, const int dst_nb0, const int dst_nb1, const int dst_nb2, const int64_t nc, const int64_t nr, const int64_t n_t, @@ -106,12 +117,13 @@ static void ssm_conv_f32_cuda(const float * src0, const float * src1, const int constexpr int kNC = decltype(NC)::value; if (n_t <= 32) { const dim3 blocks(n_s, (nr + threads - 1) / threads, 1); - ssm_conv_f32<<>>(src0, src1, src0_nb0, src0_nb1, src0_nb2, src1_nb1, + ssm_conv_f32<<>>(src0, src1, src0_nb0, src0_nb1, src0_nb2, src1_nb1, dst, dst_nb0, dst_nb1, dst_nb2, n_t); } else { const int64_t split_n_t = 32; dim3 blocks(n_s, (nr + threads - 1) / threads, (n_t + split_n_t - 1) / split_n_t); - ssm_conv_long_token_f32<<>>( + const size_t smem_size = threads * (kNC - 1 + split_n_t) * sizeof(float); + ssm_conv_long_token_f32<<>>( src0, src1, src0_nb0, src0_nb1, src0_nb2, src1_nb1, dst, dst_nb0, dst_nb1, dst_nb2, n_t); } }; @@ -124,27 +136,36 @@ static void ssm_conv_f32_cuda(const float * src0, const float * src1, const int } } -void ggml_cuda_op_ssm_conv(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { +void ggml_cuda_op_ssm_conv(ggml_backend_cuda_context & ctx, ggml_tensor * dst, ggml_tensor * silu_dst) { const struct ggml_tensor * src0 = dst->src[0]; // conv_x const struct ggml_tensor * src1 = dst->src[1]; // conv1d.weight + const bool fuse_silu = silu_dst != nullptr; + + // When fusing, write to silu_dst (the node downstream references). + const struct ggml_tensor * out = fuse_silu ? silu_dst : dst; const int64_t nc = src1->ne[0]; // d_conv const int64_t nr = src0->ne[1]; // d_inner - const int64_t n_t = dst->ne[1]; // tokens per sequence - const int64_t n_s = dst->ne[2]; // number of sequences in the batch + const int64_t n_t = out->ne[1]; // tokens per sequence + const int64_t n_s = out->ne[2]; // number of sequences in the batch - GGML_ASSERT(dst->ne[0] == nr); + GGML_ASSERT(out->ne[0] == nr); GGML_ASSERT(src0->nb[0] == sizeof(float)); GGML_ASSERT(src1->nb[0] == sizeof(float)); GGML_ASSERT(src0->nb[1] == src0->ne[0] * sizeof(float)); const float * src0_d = (const float *) src0->data; const float * src1_d = (const float *) src1->data; - float * dst_d = (float *) dst->data; + float * dst_d = (float *) out->data; cudaStream_t stream = ctx.stream(); GGML_ASSERT(src0->type == GGML_TYPE_F32); - GGML_ASSERT(dst->type == GGML_TYPE_F32); - ssm_conv_f32_cuda(src0_d, src1_d, src0->nb[0], src0->nb[1], src0->nb[2], src1->nb[1], dst_d, dst->nb[0], dst->nb[1], - dst->nb[2], nc, nr, n_t, n_s, stream); + GGML_ASSERT(out->type == GGML_TYPE_F32); + if (fuse_silu) { + ssm_conv_f32_cuda(src0_d, src1_d, src0->nb[0], src0->nb[1], src0->nb[2], src1->nb[1], dst_d, out->nb[0], out->nb[1], + out->nb[2], nc, nr, n_t, n_s, stream); + } else { + ssm_conv_f32_cuda(src0_d, src1_d, src0->nb[0], src0->nb[1], src0->nb[2], src1->nb[1], dst_d, out->nb[0], out->nb[1], + out->nb[2], nc, nr, n_t, n_s, stream); + } } diff --git a/ggml/src/ggml-cuda/ssm-conv.cuh b/ggml/src/ggml-cuda/ssm-conv.cuh index 8e6c1f00b..f96a1cd24 100644 --- a/ggml/src/ggml-cuda/ssm-conv.cuh +++ b/ggml/src/ggml-cuda/ssm-conv.cuh @@ -1,3 +1,3 @@ #include "common.cuh" -void ggml_cuda_op_ssm_conv(ggml_backend_cuda_context & ctx, ggml_tensor * dst); +void ggml_cuda_op_ssm_conv(ggml_backend_cuda_context & ctx, ggml_tensor * dst, ggml_tensor * silu_dst = nullptr); diff --git a/ggml/src/ggml-cuda/topk-moe.cu b/ggml/src/ggml-cuda/topk-moe.cu index 08a88990d..3020e5c74 100644 --- a/ggml/src/ggml-cuda/topk-moe.cu +++ b/ggml/src/ggml-cuda/topk-moe.cu @@ -119,6 +119,18 @@ __launch_bounds__(4 * WARP_SIZE, 1) __global__ void topk_moe_cuda(const float * } } + // Sanitize NaN to -FLT_MAX so the iterative argmax produces unique expert IDs. + // NaN comparisons always return false, which would cause the same expert to be + // selected repeatedly. -FLT_MAX compares normally and is still excluded by the + // -INFINITY sentinel used after each selection round. + // More relevant for the cuBLAS path. See https://github.com/ggml-org/llama.cpp/issues/19659 +#pragma unroll + for (int i = 0; i < experts_per_thread; i++) { + if (__isnanf(wt[i])) { + wt[i] = -FLT_MAX; + } + } + // selection_wt is only needed when bias is present (selection uses wt + bias) // when no bias, we use wt directly for both selection and weight values float selection_wt[has_bias ? experts_per_thread : 1]; diff --git a/ggml/src/ggml-cuda/unary.cu b/ggml/src/ggml-cuda/unary.cu index d4866067a..4ad30fa1f 100644 --- a/ggml/src/ggml-cuda/unary.cu +++ b/ggml/src/ggml-cuda/unary.cu @@ -560,3 +560,58 @@ void ggml_cuda_op_leaky_relu(ggml_backend_cuda_context & ctx, ggml_tensor * dst) leaky_relu_cuda((const float *)src0_d, (float *)dst_d, ggml_nelements(src0), negative_slope, stream); } } + +/* fused unary + mul */ + +template +static void ggml_cuda_op_unary_mul_impl(ggml_backend_cuda_context & ctx, ggml_tensor * unary_node, ggml_tensor * mul_node) { + // unary_node: UNARY op applied to unary_node->src[0] + // mul_node: MUL(a, b) where one of a/b is unary_node + // Output goes to mul_node->data + + const ggml_tensor * unary_src = unary_node->src[0]; // input to the unary op + const ggml_tensor * other_src = (mul_node->src[0] == unary_node) ? mul_node->src[1] : mul_node->src[0]; + + GGML_ASSERT(ggml_is_contiguous_1(unary_src)); + GGML_ASSERT(unary_src->nb[0] == ggml_element_size(unary_src)); + GGML_ASSERT(ggml_is_contiguous_1(other_src)); + GGML_ASSERT(other_src->nb[0] == ggml_element_size(other_src)); + GGML_ASSERT(ggml_are_same_shape(unary_src, other_src)); + + GGML_ASSERT(unary_src->type == GGML_TYPE_F32 || unary_src->type == GGML_TYPE_F16); + GGML_ASSERT(unary_src->type == other_src->type); + GGML_ASSERT(unary_src->type == mul_node->type); + + cudaStream_t stream = ctx.stream(); + + const int64_t k = ggml_nelements(mul_node); + const int64_t nc = unary_src->ne[0]; + const int64_t unary_stride = unary_src->nb[1]; + const int64_t other_stride = other_src->nb[1]; + + if (unary_src->type == GGML_TYPE_F16) { + unary_gated_cuda((const half *) unary_src->data, (const half *) other_src->data, + (half *) mul_node->data, k, nc, + unary_stride / sizeof(half), other_stride / sizeof(half), stream); + } else { + unary_gated_cuda((const float *) unary_src->data, (const float *) other_src->data, + (float *) mul_node->data, k, nc, + unary_stride / sizeof(float), other_stride / sizeof(float), stream); + } +} + +void ggml_cuda_op_unary_mul(ggml_backend_cuda_context & ctx, ggml_tensor * unary_node, ggml_tensor * mul_node) { + switch (ggml_get_unary_op(unary_node)) { + case GGML_UNARY_OP_SILU: + ggml_cuda_op_unary_mul_impl(ctx, unary_node, mul_node); + break; + case GGML_UNARY_OP_SIGMOID: + ggml_cuda_op_unary_mul_impl(ctx, unary_node, mul_node); + break; + case GGML_UNARY_OP_SOFTPLUS: + ggml_cuda_op_unary_mul_impl(ctx, unary_node, mul_node); + break; + default: + GGML_ABORT("Unsupported unary op for fused unary+mul"); + } +} diff --git a/ggml/src/ggml-cuda/unary.cuh b/ggml/src/ggml-cuda/unary.cuh index 609046e56..f1dd2183a 100644 --- a/ggml/src/ggml-cuda/unary.cuh +++ b/ggml/src/ggml-cuda/unary.cuh @@ -89,6 +89,8 @@ void ggml_cuda_op_geglu_quick(ggml_backend_cuda_context & ctx, ggml_tensor * dst void ggml_cuda_op_xielu(ggml_backend_cuda_context & ctx, ggml_tensor * dst); +void ggml_cuda_op_unary_mul(ggml_backend_cuda_context & ctx, ggml_tensor * unary_node, ggml_tensor * mul_node); + __device__ __forceinline__ float ggml_cuda_op_silu_single(float x) { return x / (1.0f + expf(-x)); } diff --git a/ggml/src/ggml-hexagon/htp/argsort-ops.c b/ggml/src/ggml-hexagon/htp/argsort-ops.c deleted file mode 100644 index a4cee980b..000000000 --- a/ggml/src/ggml-hexagon/htp/argsort-ops.c +++ /dev/null @@ -1,281 +0,0 @@ -#include -#include -#include -#include -#include - -#define GGML_COMMON_DECL_C -#include "ggml-common.h" -#include "ggml.h" - -#include "hvx-utils.h" -#include "hex-dma.h" - -#include "htp-ctx.h" -#include "htp-msg.h" -#include "htp-ops.h" - -#ifndef MIN -#define MIN(a, b) ((a) < (b) ? (a) : (b)) -#endif - -struct htp_argsort_context { - struct htp_ops_context * octx; - uint32_t nrows_per_thread; -}; - -static inline bool all_greater_f32(HVX_Vector x, HVX_Vector y) -{ - const HVX_Vector one = Q6_V_vsplat_R(1); - const HVX_Vector zero = Q6_V_vzero(); - - HVX_VectorPred pred = Q6_Q_vcmp_gt_VsfVsf(x, y); - HVX_Vector matches = Q6_V_vmux_QVV(pred, one, zero); - HVX_Vector sum = hvx_vec_reduce_sum_i32(matches); - return hvx_vec_get_i32(sum) == 32; -} - -// Sorts values and mirrors swaps to indices. -static void quicksort_values_indices_asc(float * values, int32_t * indices, int left, int right) { - if (left >= right) return; - - int pivot_idx = (left + right) / 2; - float pivot = values[pivot_idx]; - int i = left; - int j = right; - - HVX_Vector pivot_vec = hvx_vec_splat_f32(pivot); - while (i <= j) { - // Vectorized scan for i - while (i <= j) { - // Check if we have at least one full vector - if (i + 32 <= j) { - HVX_Vector vals_vec = *(HVX_UVector *)(values + i); - if (all_greater_f32(pivot_vec, vals_vec)) { - // If all elements are < pivot, we can skip this whole block - i += 32; - continue; - } - } - - // Scalar fallback / cleanup - if (values[i] < pivot) { - i++; - } else { - break; - } - } - - // Vectorized scan for j - while (i <= j) { - if (j - 32 >= i) { - // Load 32 elements ending at j. - // Since we want `values[j] > pivot`, let's load from j-31 to j. - HVX_Vector vals_vec = *(HVX_UVector *)(values + j - 31); - if (all_greater_f32(vals_vec, pivot_vec)) { - j -= 32; - continue; - } - } - - if (values[j] > pivot) { - j--; - } else { - break; - } - } - - if (i <= j) { - float tmp_val = values[i]; - values[i] = values[j]; - values[j] = tmp_val; - - int32_t tmp_idx = indices[i]; - indices[i] = indices[j]; - indices[j] = tmp_idx; - i++; - j--; - } - } - - if (left < j) quicksort_values_indices_asc(values, indices, left, j); - if (i < right) quicksort_values_indices_asc(values, indices, i, right); -} - -static void quicksort_values_indices_desc(float * values, int32_t * indices, int left, int right) { - if (left >= right) return; - - int pivot_idx = (left + right) / 2; - float pivot = values[pivot_idx]; - int i = left; - int j = right; - - HVX_Vector pivot_vec = hvx_vec_splat_f32(pivot); - - while (i <= j) { - // Vectorized scan for i (values[i] > pivot) - while (i <= j) { - if (i + 32 <= j) { - HVX_Vector vals_vec = *(HVX_UVector *)(values + i); - if (all_greater_f32(vals_vec, pivot_vec)) { - i += 32; - continue; - } - } - - if (values[i] > pivot) { - i++; - } else { - break; - } - } - - // Vectorized scan for j (values[j] < pivot) - while (i <= j) { - if (j - 32 >= i) { - HVX_Vector vals_vec = *(HVX_UVector *)(values + j - 31); - if (all_greater_f32(pivot_vec, vals_vec)) { - j -= 32; - continue; - } - } - - if (values[j] < pivot) { - j--; - } else { - break; - } - } - - if (i <= j) { - float tmp_val = values[i]; - values[i] = values[j]; - values[j] = tmp_val; - - int32_t tmp_idx = indices[i]; - indices[i] = indices[j]; - indices[j] = tmp_idx; - i++; - j--; - } - } - - if (left < j) quicksort_values_indices_desc(values, indices, left, j); - if (i < right) quicksort_values_indices_desc(values, indices, i, right); -} - -static void htp_argsort_f32(unsigned int n, unsigned int i, void * data) { - struct htp_argsort_context * actx = (struct htp_argsort_context *)data; - struct htp_ops_context * octx = actx->octx; - - // Unpack context - const struct htp_tensor * src0 = &octx->src0; - const struct htp_tensor * dst = &octx->dst; - - // Scratchpad memory - uint8_t * spad = octx->src0_spad.data + octx->src0_spad.size_per_thread * i; - - // Dimensions - uint32_t ne00 = src0->ne[0]; - uint32_t ne01 = src0->ne[1]; - uint32_t ne02 = src0->ne[2]; - uint32_t ne03 = src0->ne[3]; - - uint32_t nb01 = src0->nb[1]; - //uint32_t nb02 = src0->nb[2]; - //uint32_t nb03 = src0->nb[3]; - - uint32_t nb1 = dst->nb[1]; - //uint32_t nb2 = dst->nb[2]; - //uint32_t nb3 = dst->nb[3]; - - // Sort order - enum ggml_sort_order order = (enum ggml_sort_order) octx->op_params[0]; - - // Rows to process - uint32_t total_rows = ne01 * ne02 * ne03; - uint32_t rows_per_thread = actx->nrows_per_thread; - uint32_t start_row = rows_per_thread * i; - uint32_t end_row = MIN(start_row + rows_per_thread, total_rows); - - // Scratchpad layout: - // We need space for one row of float data (values) and one row of int32 indices. - // values: ne00 * sizeof(float) - // indices: ne00 * sizeof(int32_t) - // Padded to 128 bytes. - - size_t values_size = hex_round_up(ne00 * sizeof(float), 128); - float * values_buf = (float *) spad; - int32_t * indices_buf = (int32_t *) (spad + values_size); - - for (uint32_t r = start_row; r < end_row; r++) { - uint32_t src_offset = r * nb01; - uint32_t dst_offset = r * nb1; - - uint8_t * src_ptr = (uint8_t *) src0->data + src_offset; - uint8_t * dst_ptr = (uint8_t *) dst->data + dst_offset; - - hex_l2fetch(src_ptr, ne00 * sizeof(float), ne00 * sizeof(float), 1); - hvx_copy_f32_au((uint8_t*)values_buf, src_ptr, ne00); - - // Initialize indices - for (uint32_t j = 0; j < ne00; j++) { - indices_buf[j] = j; - } - - // Sort values and mirror swaps to indices - if (order == GGML_SORT_ORDER_ASC) { - quicksort_values_indices_asc(values_buf, indices_buf, 0, ne00 - 1); - } else { - quicksort_values_indices_desc(values_buf, indices_buf, 0, ne00 - 1); - } - - // Copy indices back to DDR - hvx_copy_f32_ua(dst_ptr, (const uint8_t *) indices_buf, ne00); - } -} - -int op_argsort(struct htp_ops_context * octx) { - // Check supported types - if (octx->src0.type != HTP_TYPE_F32) { - return HTP_STATUS_NO_SUPPORT; - } - - // Allocate scratchpad - // We need 1 row of float + 1 row of int32 per thread. - uint32_t ne00 = octx->src0.ne[0]; - size_t values_size = hex_round_up(ne00 * sizeof(float), 128); - size_t indices_size = hex_round_up(ne00 * sizeof(int32_t), 128); - size_t spad_per_thread = values_size + indices_size; - - // Make sure we round up to 256 for alignment requirements - spad_per_thread = hex_round_up(spad_per_thread, 256); - - size_t total_spad_size = spad_per_thread * octx->n_threads; - - if (octx->ctx->vtcm_size < total_spad_size) { - FARF(ERROR, "argsort: VTCM size too small. Needed %zu, have %zu", total_spad_size, octx->ctx->vtcm_size); - return HTP_STATUS_VTCM_TOO_SMALL; - } - - octx->src0_spad.data = octx->ctx->vtcm_base; - octx->src0_spad.size = total_spad_size; - octx->src0_spad.size_per_thread = spad_per_thread; - - FARF(HIGH, "argsort: %ux%ux%ux%u -> %ux%ux%ux%u (0x%x, 0x%x)", - octx->src0.ne[0], octx->src0.ne[1], octx->src0.ne[2], octx->src0.ne[3], - octx->dst.ne[0], octx->dst.ne[1], octx->dst.ne[2], octx->dst.ne[3], - octx->src0.data, octx->dst.data); - - uint32_t total_rows = octx->src0.ne[1] * octx->src0.ne[2] * octx->src0.ne[3]; - uint32_t n_jobs = MIN(total_rows, octx->n_threads); - - struct htp_argsort_context actx; - actx.octx = octx; - actx.nrows_per_thread = (total_rows + n_jobs - 1) / n_jobs; - - // Run jobs - worker_pool_run_func(octx->ctx->worker_pool, htp_argsort_f32, &actx, n_jobs); - - return HTP_STATUS_OK; -} diff --git a/ggml/src/ggml-hexagon/htp/hvx-div.h b/ggml/src/ggml-hexagon/htp/hvx-div.h deleted file mode 100644 index 7dae012e0..000000000 --- a/ggml/src/ggml-hexagon/htp/hvx-div.h +++ /dev/null @@ -1,116 +0,0 @@ -#ifndef HVX_DIV_H -#define HVX_DIV_H - -#include - -#include -#include -#include -#include -#include - -#include "hvx-base.h" -#include "hex-utils.h" -#include "hvx-inverse.h" -#include "hvx-arith.h" - -#if __HVX_ARCH__ < 79 -#define HVX_OP_MUL(a, b) Q6_Vsf_equals_Vqf32(Q6_Vqf32_vmpy_VsfVsf(a, b)) -#else -#define HVX_OP_MUL(a, b) Q6_Vsf_vmpy_VsfVsf(a, b) -#endif - -#define hvx_div_f32_loop_body(dst_type, src0_type, src1_type, vec_store) \ - do { \ - dst_type * restrict vdst = (dst_type *) dst; \ - src0_type * restrict vsrc0 = (src0_type *) src0; \ - src1_type * restrict vsrc1 = (src1_type *) src1; \ - \ - const HVX_Vector nan_inf_mask = Q6_V_vsplat_R(0x7f800000); \ - \ - const uint32_t nvec = n / VLEN_FP32; \ - const uint32_t nloe = n % VLEN_FP32; \ - \ - uint32_t i = 0; \ - \ - _Pragma("unroll(4)") \ - for (; i < nvec; i++) { \ - HVX_Vector inv_src1 = hvx_vec_inverse_f32_guard(vsrc1[i], nan_inf_mask); \ - HVX_Vector res = HVX_OP_MUL(vsrc0[i], inv_src1); \ - vdst[i] = res; \ - } \ - if (nloe) { \ - HVX_Vector inv_src1 = hvx_vec_inverse_f32_guard(vsrc1[i], nan_inf_mask); \ - HVX_Vector res = HVX_OP_MUL(vsrc0[i], inv_src1); \ - vec_store((void *) &vdst[i], nloe * SIZEOF_FP32, res); \ - } \ - } while(0) - -// 3-letter suffix variants -static inline void hvx_div_f32_aaa(uint8_t * restrict dst, const uint8_t * restrict src0, const uint8_t * restrict src1, uint32_t n) { - assert((uintptr_t) dst % 128 == 0); - assert((uintptr_t) src0 % 128 == 0); - assert((uintptr_t) src1 % 128 == 0); - hvx_div_f32_loop_body(HVX_Vector, HVX_Vector, HVX_Vector, hvx_vec_store_a); -} - -static inline void hvx_div_f32_aau(uint8_t * restrict dst, const uint8_t * restrict src0, const uint8_t * restrict src1, uint32_t n) { - assert((uintptr_t) dst % 128 == 0); - assert((uintptr_t) src0 % 128 == 0); - hvx_div_f32_loop_body(HVX_Vector, HVX_Vector, HVX_UVector, hvx_vec_store_a); -} - -static inline void hvx_div_f32_aua(uint8_t * restrict dst, const uint8_t * restrict src0, const uint8_t * restrict src1, uint32_t n) { - assert((uintptr_t) dst % 128 == 0); - assert((uintptr_t) src1 % 128 == 0); - hvx_div_f32_loop_body(HVX_Vector, HVX_UVector, HVX_Vector, hvx_vec_store_a); -} - -static inline void hvx_div_f32_auu(uint8_t * restrict dst, const uint8_t * restrict src0, const uint8_t * restrict src1, uint32_t n) { - assert((uintptr_t) dst % 128 == 0); - hvx_div_f32_loop_body(HVX_Vector, HVX_UVector, HVX_UVector, hvx_vec_store_a); -} - -static inline void hvx_div_f32_uaa(uint8_t * restrict dst, const uint8_t * restrict src0, const uint8_t * restrict src1, uint32_t n) { - assert((uintptr_t) src0 % 128 == 0); - assert((uintptr_t) src1 % 128 == 0); - hvx_div_f32_loop_body(HVX_UVector, HVX_Vector, HVX_Vector, hvx_vec_store_u); -} - -static inline void hvx_div_f32_uau(uint8_t * restrict dst, const uint8_t * restrict src0, const uint8_t * restrict src1, uint32_t n) { - assert((uintptr_t) src0 % 128 == 0); - hvx_div_f32_loop_body(HVX_UVector, HVX_Vector, HVX_UVector, hvx_vec_store_u); -} - -static inline void hvx_div_f32_uua(uint8_t * restrict dst, const uint8_t * restrict src0, const uint8_t * restrict src1, uint32_t n) { - assert((uintptr_t) src1 % 128 == 0); - hvx_div_f32_loop_body(HVX_UVector, HVX_UVector, HVX_Vector, hvx_vec_store_u); -} - -static inline void hvx_div_f32_uuu(uint8_t * restrict dst, const uint8_t * restrict src0, const uint8_t * restrict src1, uint32_t n) { - hvx_div_f32_loop_body(HVX_UVector, HVX_UVector, HVX_UVector, hvx_vec_store_u); -} - -static inline void hvx_div_f32(uint8_t * restrict dst, const uint8_t * restrict src0, const uint8_t * restrict src1, const uint32_t num_elems) { - if (hex_is_aligned((void *) dst, 128)) { - if (hex_is_aligned((void *) src0, 128)) { - if (hex_is_aligned((void *) src1, 128)) hvx_div_f32_aaa(dst, src0, src1, num_elems); - else hvx_div_f32_aau(dst, src0, src1, num_elems); - } else { - if (hex_is_aligned((void *) src1, 128)) hvx_div_f32_aua(dst, src0, src1, num_elems); - else hvx_div_f32_auu(dst, src0, src1, num_elems); - } - } else { - if (hex_is_aligned((void *) src0, 128)) { - if (hex_is_aligned((void *) src1, 128)) hvx_div_f32_uaa(dst, src0, src1, num_elems); - else hvx_div_f32_uau(dst, src0, src1, num_elems); - } else { - if (hex_is_aligned((void *) src1, 128)) hvx_div_f32_uua(dst, src0, src1, num_elems); - else hvx_div_f32_uuu(dst, src0, src1, num_elems); - } - } -} - -#undef HVX_OP_MUL - -#endif // HVX_DIV_H diff --git a/ggml/src/ggml-hexagon/htp/sum-rows-ops.c b/ggml/src/ggml-hexagon/htp/sum-rows-ops.c deleted file mode 100644 index 04fa72182..000000000 --- a/ggml/src/ggml-hexagon/htp/sum-rows-ops.c +++ /dev/null @@ -1,130 +0,0 @@ -#pragma clang diagnostic ignored "-Wunused-variable" -#pragma clang diagnostic ignored "-Wunused-function" -#pragma clang diagnostic ignored "-Wunused-but-set-variable" - -#include -#include - -#include -#include - -#include "hex-dma.h" -#include "hvx-utils.h" - -#define GGML_COMMON_DECL_C -#include "ggml-common.h" -#include "htp-ctx.h" -#include "htp-msg.h" -#include "htp-ops.h" - -#define sum_rows_preamble \ - struct htp_tensor *src0 = &octx->src0;\ - struct htp_tensor *dst = &octx->dst; \ - \ - const uint32_t ne00 = src0->ne[0]; \ - const uint32_t ne01 = src0->ne[1]; \ - const uint32_t ne02 = src0->ne[2]; \ - const uint32_t ne03 = src0->ne[3]; \ - \ - const uint32_t nb00 = src0->nb[0]; \ - const uint32_t nb01 = src0->nb[1]; \ - const uint32_t nb02 = src0->nb[2]; \ - const uint32_t nb03 = src0->nb[3]; \ - \ - const uint32_t ne0 = dst->ne[0]; \ - const uint32_t ne1 = dst->ne[1]; \ - const uint32_t ne2 = dst->ne[2]; \ - const uint32_t ne3 = dst->ne[3]; \ - \ - const uint32_t nb0 = dst->nb[0]; \ - const uint32_t nb1 = dst->nb[1]; \ - const uint32_t nb2 = dst->nb[2]; \ - const uint32_t nb3 = dst->nb[3]; \ - -struct sum_rows_context { - const uint8_t * src_data; - uint8_t * dst_data; - uint32_t ne00; - size_t src_stride; - size_t dst_stride; - uint32_t rows_per_thread; - uint32_t total_rows; - bool opt_path; -}; - -static void sum_rows_thread_f32(unsigned int nth, unsigned int ith, void *data) { - const struct sum_rows_context * smctx = (const struct sum_rows_context *) data; - - const uint32_t rows_per_thread = smctx->rows_per_thread; - const uint32_t total_rows = smctx->total_rows; - - const uint32_t start_row = rows_per_thread * ith; - const uint32_t end_row = MIN(start_row + rows_per_thread, total_rows); - - if (start_row >= end_row) { - return; - } - - const size_t src_stride = smctx->src_stride; - const size_t dst_stride = smctx->dst_stride; - const uint32_t ne00 = smctx->ne00; - const bool opt_path = smctx->opt_path; - - const float * restrict src_th = (const float *) (smctx->src_data + (start_row * src_stride)); - float * restrict dst_th = (float *) (smctx->dst_data + (start_row * dst_stride)); - - // Calculate actual number of rows for this thread - const uint32_t n_rows = end_row - start_row; - - for (uint32_t ir = 0; ir < n_rows; ir++) { - const float * restrict src_local = src_th + (ir * (src_stride / sizeof(float))); - - if (ir + 1 < n_rows) { - hex_l2fetch(src_local + (src_stride / sizeof(float)), src_stride, src_stride, 1); - } - - if (opt_path) { - dst_th[ir] = hvx_reduce_sum_f32_a((const uint8_t *) src_local, ne00); - } else { - dst_th[ir] = hvx_reduce_sum_f32((const uint8_t *) src_local, ne00); - } - } -} - -int op_sum_rows(struct htp_ops_context * octx) { - sum_rows_preamble; - - if (octx->src0.type != HTP_TYPE_F32) { - return HTP_STATUS_NO_SUPPORT; - } - - if (octx->flags & HTP_OPFLAGS_SKIP_COMPUTE) { - return HTP_STATUS_OK; - } - - const int n_threads = octx->n_threads; - const uint32_t src0_nrows = ne01 * ne02 * ne03; - - uint32_t n_jobs = MIN(n_threads, src0_nrows); - uint32_t rows_per_thread = (src0_nrows + n_jobs - 1) / n_jobs; - - bool opt_path = false; - if ((0 == hex_is_aligned((void *) src0->data, VLEN)) && !(nb01 & (VLEN - 1))) { - opt_path = true; - } - - struct sum_rows_context smctx = { - .src_data = (const uint8_t *) src0->data, - .dst_data = (uint8_t *) dst->data, - .ne00 = ne00, - .src_stride = nb01, - .dst_stride = nb1, - .rows_per_thread = rows_per_thread, - .total_rows = src0_nrows, - .opt_path = opt_path, - }; - - worker_pool_run_func(octx->ctx->worker_pool, sum_rows_thread_f32, &smctx, n_jobs); - - return HTP_STATUS_OK; -} diff --git a/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl b/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl deleted file mode 100644 index 3602c92fe..000000000 --- a/ggml/src/ggml-opencl/kernels/mul_mm_q6_k_f32_l4_lm.cl +++ /dev/null @@ -1,158 +0,0 @@ -#pragma OPENCL EXTENSION cl_khr_fp16 : enable - -#define LOAD_VEC_A 2 -#define LOAD_VEC_B 4 - -#define BM 64 -#define BN 64 -#define BK 32 -#define TM 4 -#define TN 8 - -kernel void kernel_mul_mm_q6_k_f32_l4_lm( - global uchar * src0_ql, - global uchar * src0_qh, - global char * src0_s, - global half * src0_d, - global float4 * src1, - ulong offset1, - global float * dst, - ulong offsetd, - - int ne00, - int ne01, - int ne02, - int ne11, - int ne12, - - int stride_a, - int stride_b, - int stride_d, - - int batch_stride_a, - int batch_stride_b, - int batch_stride_d, - - int r2, - int r3 -) { - src1 = (global float4*)((global char*)src1 + offset1); - dst = (global float *)((global char*)dst + offsetd); - - local float buf_a[BM * BK]; - local float buf_b[BN * BK]; - - const int batch_idx = get_global_id(2); - - const int i13 = batch_idx / ne12; - const int i12 = batch_idx % ne12; - - const int i03 = i13 / r3; - const int i02 = i12 / r2; - - const int batch_idx_a = i03 * ne02 + i02; - - const int ir = get_group_id(0); - const int ic = get_group_id(1); - - const int tid = get_local_id(0); - const int th_r = tid % (BM / TM); - const int th_c = tid / (BM / TM); - - const int loadr_a = get_local_id(0) % (BK / LOAD_VEC_A); - const int loadc_a = get_local_id(0) / (BK / LOAD_VEC_A); - const int loadr_b = get_local_id(0) % (BK / LOAD_VEC_B); - const int loadc_b = get_local_id(0) / (BK / LOAD_VEC_B); - - const int loadstride_a = get_local_size(0) * LOAD_VEC_A / BK; - const int loadstride_b = get_local_size(0) * LOAD_VEC_B / BK; - - int pos_a = (batch_idx_a * batch_stride_a + ir * BM * stride_a) / LOAD_VEC_A; - int pos_b = (batch_idx * batch_stride_b + ic * BN * stride_b) / LOAD_VEC_B; - - float sums[TM * TN]; - float cache_a[TM]; - float cache_b[TN]; - - for (int i = 0; i < TM * TN; i++) { - sums[i] = 0.0f; - } - - for (int block = 0; block < ne00; block += BK) { - for (int l = 0; l < BM; l += loadstride_a) { - if (ir*BM + loadc_a + l < ne01) { - int idx = pos_a + (loadc_a + l) * stride_a / LOAD_VEC_A + loadr_a; - - int ib = idx / 128; // 2 values per idx - int iqs = idx % 128; // 0..127 - - int n = iqs / 64; // 0,1 - int b = (iqs % 64) / 32; // 0,1 - int is_b = (iqs % 16) / 8; // 0,1 - int qhshift = ((iqs % 64) / 16) * 2; // 0,2,4,6 - int is = 8 * n + qhshift + is_b; // 0..15 - int qsi = n * 64 + (iqs % 32) * 2; // 0,2,4..126 - int qhi = n * 32 + (iqs % 16) * 2; // 0,2,4..62 - - float dscale = (float)src0_d[ib] * (float)src0_s[ib*16 + is]; - - buf_a[(loadr_a * LOAD_VEC_A + 0) * BM + loadc_a + l] = dscale * convert_float(convert_char(((src0_ql[128*ib + qsi + 0] >> (b * 4)) & 0xF) | (((src0_qh[64*ib + qhi + 0] >> qhshift) & 3) << 4)) - 32); - buf_a[(loadr_a * LOAD_VEC_A + 1) * BM + loadc_a + l] = dscale * convert_float(convert_char(((src0_ql[128*ib + qsi + 1] >> (b * 4)) & 0xF) | (((src0_qh[64*ib + qhi + 1] >> qhshift) & 3) << 4)) - 32); - } else { - buf_a[(loadr_a * LOAD_VEC_A + 0) * BM + loadc_a + l] = 0.0f; - buf_a[(loadr_a * LOAD_VEC_A + 1) * BM + loadc_a + l] = 0.0f; - } - } - - for (int l = 0; l < BN; l += loadstride_b) { - if (ic*BN + loadc_b + l < ne11) { - int idx = pos_b + (loadc_b + l) * stride_b / LOAD_VEC_B + loadr_b; - buf_b[(loadr_b * LOAD_VEC_B + 0) * BN + loadc_b + l] = src1[idx].s0; - buf_b[(loadr_b * LOAD_VEC_B + 1) * BN + loadc_b + l] = src1[idx].s1; - buf_b[(loadr_b * LOAD_VEC_B + 2) * BN + loadc_b + l] = src1[idx].s2; - buf_b[(loadr_b * LOAD_VEC_B + 3) * BN + loadc_b + l] = src1[idx].s3; - } else { - buf_b[(loadr_b * LOAD_VEC_B + 0) * BN + loadc_b + l] = 0.0f; - buf_b[(loadr_b * LOAD_VEC_B + 1) * BN + loadc_b + l] = 0.0f; - buf_b[(loadr_b * LOAD_VEC_B + 2) * BN + loadc_b + l] = 0.0f; - buf_b[(loadr_b * LOAD_VEC_B + 3) * BN + loadc_b + l] = 0.0f; - } - } - - barrier(CLK_LOCAL_MEM_FENCE); - - pos_a += BK / LOAD_VEC_A; - pos_b += BK / LOAD_VEC_B; - - for (int i = 0; i < BK; i++) { - for (int j = 0; j < TM; j++) { - cache_a[j] = buf_a[(i) * BM + th_r * TM + j]; - } - - for (int j = 0; j < TN; j++) { - cache_b[j] = buf_b[(i) * BN + th_c * TN + j]; - } - - for (int cc = 0; cc < TN; cc++) { - for (int cr = 0; cr < TM; cr++) { - const int sums_idx = cc*TM + cr; - sums[sums_idx] = mad(cache_a[cr], cache_b[cc], sums[sums_idx]); - } - } - } - barrier(CLK_LOCAL_MEM_FENCE); - } - - const int dr = ir * BM + th_r * TM; - const int dc = ic * BN + th_c * TN; - - const int offsets = batch_idx * batch_stride_d; - - for (int cc = 0; cc < TN; cc++) { - for (int cr = 0; cr < TM; cr++) { - if (dr + cr < ne01 && dc + cc < ne11) { - dst[offsets + (dc + cc) * stride_d + dr + cr] = sums[cc * TM + cr]; - } - } - } -} diff --git a/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl b/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl deleted file mode 100644 index 71ab98982..000000000 --- a/ggml/src/ggml-opencl/kernels/mul_mv_q4_k_f32.cl +++ /dev/null @@ -1,180 +0,0 @@ -#ifdef cl_intel_required_subgroup_size -#pragma OPENCL EXTENSION cl_intel_required_subgroup_size : enable -#define INTEL_GPU 1 -#define REQD_SUBGROUP_SIZE_16 __attribute__((intel_reqd_sub_group_size(16))) -#define REQD_SUBGROUP_SIZE_32 __attribute__((intel_reqd_sub_group_size(32))) -#elif defined(cl_qcom_reqd_sub_group_size) -#pragma OPENCL EXTENSION cl_qcom_reqd_sub_group_size : enable -#define ADRENO_GPU 1 -#define REQD_SUBGROUP_SIZE_64 __attribute__((qcom_reqd_sub_group_size("half"))) -#define REQD_SUBGROUP_SIZE_128 __attribute__((qcom_reqd_sub_group_size("full"))) -#endif - -//------------------------------------------------------------------------------ -// block_q4_K -//------------------------------------------------------------------------------ -#define QK_K 256 -#define K_SCALE_SIZE 12 - -// 8 blocks of 32 elements each -// weight is represented as x = a * q + b -typedef struct { - half d; // super-block scale for quantized scales - half dmin; // super-block scale for quantized mins - - uchar scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits - uchar qs[QK_K/2]; // 4-bit quants -} block_q4_K; - -#undef N_DST -#undef N_SIMDGROUP -#undef N_SIMDWIDTH - -#ifdef INTEL_GPU -#define N_DST 4 // number of rows each SIMD group works on -#define N_SIMDGROUP 1 // number of SIMD groups in a thread group -#define N_SIMDWIDTH 16 // SIMD group size -#elif defined (ADRENO_GPU) -#define N_DST 4 -#define N_SIMDGROUP 1 -#define N_SIMDWIDTH 64 -#endif - -#undef BLOCK_STRIDE -// number of (super) blocks each subgroup processes -// each thread in a subgroup processes a block (32 weights) -#define BLOCK_STRIDE (N_SIMDWIDTH/8) - -#ifdef INTEL_GPU -REQD_SUBGROUP_SIZE_16 -#elif defined (ADRENO_GPU) -REQD_SUBGROUP_SIZE_64 -#endif -kernel void kernel_mul_mv_q4_K_f32( - global char * src0, - int offset0, - global char * src1, - int offset1, - global char * dst, - int offsetd, - int ne00, - int ne01, - ulong nb01, - ulong nb02, - ulong nb03, - int ne12, - ulong nb11, - ulong nb12, - ulong nb13, - int ne0, - int ne1, - int r2, - int r3 -) { - src0 = src0 + offset0; - src1 = src1 + offset1; - dst = dst + offsetd; - - ushort kmask1 = 0x3f3f; - ushort kmask2 = 0x0f0f; - ushort kmask3 = 0xc0c0; - - int ix = get_sub_group_local_id()/8; // super block index - int it = get_sub_group_local_id()%8; // block index (inside super block) - int iq = it/4; // 0 or 1 - first or second half of the super block - int ir = it%4; // 0...3 - block index in the half super block - - int nb = ne00/QK_K; - - int r0 = get_group_id(0); - int r1 = get_group_id(1); - int im = get_group_id(2); - int first_row = (r0 * N_SIMDGROUP + get_sub_group_id()) * N_DST; - - int i12 = im%ne12; - int i13 = im/ne12; - - int offset_src0 = first_row*nb01 + (i12/r2)*nb02 + (i13/r3)*nb03; - int offset_src1 = r1*nb11 + (i12 )*nb12 + (i13 )*nb13; - - global block_q4_K * x = (global block_q4_K *) (src0 + offset_src0); - global float * y = (global float *) (src1 + offset_src1); - - float yl[16]; - float yh[16]; - float sumf[N_DST] = {0.f}; - float all_sum; - - global float * y4 = y + ix * QK_K + 64 * iq + 8 * ir; - - ushort sc16[4]; - uchar * sc8 = (uchar *)sc16; - - for (int ib = ix; ib < nb; ib += BLOCK_STRIDE) { - float4 sumy = {0.f, 0.f, 0.f, 0.f}; - for (int i = 0; i < 8; ++i) { - yl[i+0] = y4[i+0]; - sumy.s0 += yl[i+0]; - - yl[i+8] = y4[i+32]; - sumy.s1 += yl[i+8]; - - yh[i+0] = y4[i+128]; - sumy.s2 += yh[i+0]; - - yh[i+8] = y4[i+160]; - sumy.s3 += yh[i+8]; - } - - global ushort * sc = (global ushort *)x[ib].scales + iq; - global ushort * q1 = (global ushort *)x[ib].qs + 16 * iq + 4 * ir; - global half * dh = &x[ib].d; - - for (int row = 0; row < N_DST; row++) { - sc16[0] = sc[0] & kmask1; - sc16[1] = sc[2] & kmask1; - sc16[2] = ((sc[4] >> 0) & kmask2) | ((sc[0] & kmask3) >> 2); - sc16[3] = ((sc[4] >> 4) & kmask2) | ((sc[2] & kmask3) >> 2); - - global ushort * q2 = q1 + 32; - - float4 acc1 = {0.f, 0.f, 0.f, 0.f}; - float4 acc2 = {0.f, 0.f, 0.f, 0.f}; - for (int i = 0; i < 8; i += 2) { - acc1.s0 += yl[i+0] * (q1[i/2] & 0x000F); - acc1.s1 += yl[i+1] * (q1[i/2] & 0x0F00); - acc1.s2 += yl[i+8] * (q1[i/2] & 0x00F0); - acc1.s3 += yl[i+9] * (q1[i/2] & 0xF000); - acc2.s0 += yh[i+0] * (q2[i/2] & 0x000F); - acc2.s1 += yh[i+1] * (q2[i/2] & 0x0F00); - acc2.s2 += yh[i+8] * (q2[i/2] & 0x00F0); - acc2.s3 += yh[i+9] * (q2[i/2] & 0xF000); - } - - float dall = dh[0]; - float dmin = dh[1]; - sumf[row] += dall * ((acc1.s0 + 1.f/256.f * acc1.s1) * sc8[0] + - (acc1.s2 + 1.f/256.f * acc1.s3) * sc8[1] * 1.f/16.f + - (acc2.s0 + 1.f/256.f * acc2.s1) * sc8[4] + - (acc2.s2 + 1.f/256.f * acc2.s3) * sc8[5] * 1.f/16.f) - - dmin * (sumy.s0 * sc8[2] + sumy.s1 * sc8[3] + sumy.s2 * sc8[6] + sumy.s3 * sc8[7]); - - q1 += nb01/2; - sc += nb01/2; - dh += nb01/2; - } - - y4 += BLOCK_STRIDE * QK_K; - } - - global float * dst_f32 = (global float *) dst + im*ne0*ne1 + r1*ne0; - - for (int row = 0; row < N_DST; ++row) { - all_sum = sub_group_reduce_add(sumf[row]); - if (first_row + row < ne01) { - if (get_sub_group_local_id() == 0) { - dst_f32[first_row + row] = all_sum; - } - } - } -} diff --git a/src/llama-batch.cpp b/src/llama-batch.cpp index 386fab04a..6bf76939c 100644 --- a/src/llama-batch.cpp +++ b/src/llama-batch.cpp @@ -394,11 +394,13 @@ llama_ubatch llama_batch_allocr::ubatch_reserve(uint32_t n_seq_tokens, uint32_t clear(); split_reset(); + const int64_t n_pos_all = (int64_t) n_tokens*n_pos_per_embd; + auto udata = std::make_shared(); udata->token .resize(n_tokens); udata->embd .clear(); - udata->pos .resize(n_tokens); + udata->pos .resize(n_pos_all); udata->n_seq_id .resize(n_tokens); udata->seq_id .resize(n_tokens); udata->seq_id_unq.resize(0); diff --git a/src/llama-context.cpp b/src/llama-context.cpp index ff78c8cfc..51c1b9a6e 100644 --- a/src/llama-context.cpp +++ b/src/llama-context.cpp @@ -1049,11 +1049,15 @@ void llama_context::set_adapters_lora(llama_adapter_lora ** adapters, size_t n_a bool llama_context::adapters_lora_are_same(llama_adapter_lora ** adapters, size_t n_adapters, float * scales) { LLAMA_LOG_DEBUG("%s: adapters = %p\n", __func__, (void *) adapters); - if (n_adapters != loras->size()) { - return false; - } + // Adapters with a zero scale are never added to `loras`, so also ignore them for the comparison. + size_t n_non_zero = 0; for (size_t i = 0; i < n_adapters; i ++) { + if (scales[i] == 0.0f) { + continue; + } + n_non_zero++; + auto it = loras->find(adapters[i]); if (it == loras->end() || it->second != scales[i]) { @@ -1061,6 +1065,10 @@ bool llama_context::adapters_lora_are_same(llama_adapter_lora ** adapters, size_ } } + if (n_non_zero != loras->size()) { + return false; + } + return true; } diff --git a/src/llama-kv-cache.cpp b/src/llama-kv-cache.cpp index 9c3084060..e85e875db 100644 --- a/src/llama-kv-cache.cpp +++ b/src/llama-kv-cache.cpp @@ -1760,8 +1760,10 @@ void llama_kv_cache::state_write_meta(llama_io_write_i & io, const cell_ranges_t io.write(&pos, sizeof(pos)); io.write(&n_seq_id, sizeof(n_seq_id)); - // TODO: we also need to save llama_kv_cell_ext when apply_ubatch() support loading it - // see: https://github.com/ggml-org/llama.cpp/pull/16825#issuecomment-3460868350 + if (hparams.n_pos_per_embd() > 1) { + const llama_kv_cell_ext ext = cells.ext_get(i); + io.write(&ext, sizeof(ext)); + } for (const auto & seq_id : seq_ids) { io.write(&seq_id, sizeof(seq_id)); @@ -1895,6 +1897,14 @@ bool llama_kv_cache::state_read_meta(llama_io_read_i & io, uint32_t strm, uint32 return false; } + if (hparams.n_pos_per_embd() > 1) { + llama_kv_cell_ext ext; + io.read_to(&ext, sizeof(ext)); + + ubatch.pos[i + ubatch.n_tokens] = ext.y; + ubatch.pos[i + ubatch.n_tokens*2] = ext.x; + } + // read the sequence id, but directly discard it - we will use dest_seq_id instead { llama_seq_id seq_id; diff --git a/src/models/delta-net-base.cpp b/src/models/delta-net-base.cpp index 99f1fdd95..c57abbb5b 100644 --- a/src/models/delta-net-base.cpp +++ b/src/models/delta-net-base.cpp @@ -1,7 +1,5 @@ #include "models.h" -#define CHUNK_SIZE 64 - // utility to get one slice from the third dimension // input dim: [x, y, c, b] // output dim: [x, y, 1, b] @@ -57,7 +55,7 @@ std::pair llm_build_delta_net_base::build_delta_ne g = ggml_permute(ctx0, g, 0, 2, 1, 3); // [g_0, n_tokens, H_v, n_seqs] b = ggml_permute(ctx0, b, 0, 2, 1, 3); // [ 1, n_tokens, H_v, n_seqs] - const int CS = CHUNK_SIZE; + const int CS = kda ? 16 : 64; // chunk size const int pad = (CS - n_tokens % CS) % CS; const int n_chunks = (n_tokens + pad) / CS; diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz index 77362ce66..ed3fc127b 100644 Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ diff --git a/tools/server/server-common.cpp b/tools/server/server-common.cpp index ff3c6d3c2..13ea8c690 100644 --- a/tools/server/server-common.cpp +++ b/tools/server/server-common.cpp @@ -1463,6 +1463,7 @@ json convert_anthropic_to_oai(const json & body) { json tool_calls = json::array(); json converted_content = json::array(); json tool_results = json::array(); + std::string reasoning_content; bool has_tool_calls = false; for (const auto & block : content) { @@ -1470,6 +1471,8 @@ json convert_anthropic_to_oai(const json & body) { if (type == "text") { converted_content.push_back(block); + } else if (type == "thinking") { + reasoning_content += json_value(block, "thinking", std::string()); } else if (type == "image") { json source = json_value(block, "source", json::object()); std::string source_type = json_value(source, "type", std::string()); @@ -1528,16 +1531,19 @@ json convert_anthropic_to_oai(const json & body) { } } - if (!converted_content.empty() || has_tool_calls) { + if (!converted_content.empty() || has_tool_calls || !reasoning_content.empty()) { json new_msg = {{"role", role}}; if (!converted_content.empty()) { new_msg["content"] = converted_content; - } else if (has_tool_calls) { + } else if (has_tool_calls || !reasoning_content.empty()) { new_msg["content"] = ""; } if (!tool_calls.empty()) { new_msg["tool_calls"] = tool_calls; } + if (!reasoning_content.empty()) { + new_msg["reasoning_content"] = reasoning_content; + } oai_messages.push_back(new_msg); } diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index aafed4950..9dbd6d798 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -12,6 +12,7 @@ #include "mtmd.h" #include "mtmd-helper.h" +#include #include #include #include @@ -2348,8 +2349,10 @@ private: const auto it = std::find_if( slot.prompt.checkpoints.rbegin(), slot.prompt.checkpoints.rend(), - [&](const auto & cur) { + [&, func_name = __func__](const auto & cur) { // guarantee that a checkpoint will result in at least one token being processed [TAG_PROMPT_LOGITS] + LOG_INF("slot %12.*s: id %2d | task %d | Checking checkpoint with [%d, %d] against %d...\n", 12, + func_name, (slot).id, ((slot).task ? (slot).task->id : -1), cur.pos_min, cur.pos_max, pos_min_thold); return cur.pos_min < pos_min_thold; } ); @@ -2533,48 +2536,66 @@ private: slot.i_batch = batch.n_tokens - 1; slot.init_sampler(); - - const auto pos_min = llama_memory_seq_pos_min(llama_get_memory(ctx), slot.id); - const auto pos_max = llama_memory_seq_pos_max(llama_get_memory(ctx), slot.id); - - // no need for empty or small checkpoints - do_checkpoint = do_checkpoint && (pos_min >= 0 && pos_max >= 64); - - // no need to create checkpoints that are too close together - do_checkpoint = do_checkpoint && (slot.prompt.checkpoints.empty() || pos_max > slot.prompt.checkpoints.back().pos_max + 64); - - // note: we create the checkpoint before calling llama_decode(), so the current batch is not - // yet processed and therefore it is not part of the checkpoint. - if (do_checkpoint) { - while (slot.prompt.checkpoints.size() >= (size_t) params_base.n_ctx_checkpoints) { - // make room for the new checkpoint, if needed - const auto & cur = slot.prompt.checkpoints.front(); - - SLT_WRN(slot, "erasing old context checkpoint (pos_min = %d, pos_max = %d, n_tokens = %" PRId64 ", size = %.3f MiB)\n", - cur.pos_min, cur.pos_max, cur.n_tokens, (float) cur.data.size() / 1024 / 1024); - - slot.prompt.checkpoints.erase(slot.prompt.checkpoints.begin()); - } - - const size_t checkpoint_size = llama_state_seq_get_size_ext(ctx, slot.id, LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY); - - auto & cur = slot.prompt.checkpoints.emplace_back(server_prompt_checkpoint{ - /*.pos_min = */ pos_min, - /*.pos_max = */ pos_max, - /*.n_tokens = */ slot.prompt.n_tokens() - batch.n_tokens, - /*.data = */ std::vector(checkpoint_size), - }); - - llama_state_seq_get_data_ext(ctx, cur.data.data(), checkpoint_size, slot.id, LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY); - - SLT_WRN(slot, "created context checkpoint %d of %d (pos_min = %d, pos_max = %d, n_tokens = %" PRId64 ", size = %.3f MiB)\n", - (int) slot.prompt.checkpoints.size(), params_base.n_ctx_checkpoints, cur.pos_min, cur.pos_max, cur.n_tokens, (float) cur.data.size() / 1024 / 1024); - } - SLT_INF(slot, "prompt processing done, n_tokens = %d, batch.n_tokens = %d\n", slot.prompt.n_tokens(), batch.n_tokens); } else { + // only do non-end checkpoints if the "checkpoint every n tokens" option is set + do_checkpoint = do_checkpoint && params_base.checkpoint_every_nt > 0; + if (do_checkpoint) { + llama_pos last_checkpoint = 0; + if (!slot.prompt.checkpoints.empty()) { + last_checkpoint = slot.prompt.checkpoints.back().n_tokens; + } + do_checkpoint = do_checkpoint && slot.prompt.n_tokens() - batch.n_tokens - last_checkpoint >= params_base.checkpoint_every_nt; + if (do_checkpoint) { + SLT_INF(slot, "%d tokens since last checkpoint at %d, creating new checkpoint during processing at position %d\n", params_base.checkpoint_every_nt, last_checkpoint, slot.prompt.n_tokens()); + } + } SLT_INF(slot, "prompt processing progress, n_tokens = %d, batch.n_tokens = %d, progress = %f\n", slot.prompt.n_tokens(), batch.n_tokens, (float) slot.prompt.n_tokens() / slot.task->n_tokens()); } + + const auto pos_min = llama_memory_seq_pos_min(llama_get_memory(ctx), slot.id); + const auto pos_max = llama_memory_seq_pos_max(llama_get_memory(ctx), slot.id); + + // no need for empty or small checkpoints + do_checkpoint = do_checkpoint && (pos_min >= 0 && pos_max >= 64); + + // no need to create checkpoints that are too close together + do_checkpoint = do_checkpoint && (slot.prompt.checkpoints.empty() || pos_max > slot.prompt.checkpoints.back().pos_max + 64); + + // note: we create the checkpoint before calling llama_decode(), so the current batch is not + // yet processed and therefore it is not part of the checkpoint. + if (do_checkpoint) { + while (slot.prompt.checkpoints.size() >= (size_t) params_base.n_ctx_checkpoints) { + // make room for the new checkpoint, if needed + const auto & cur = slot.prompt.checkpoints.front(); + + SLT_WRN(slot, + "erasing old context checkpoint (pos_min = %d, pos_max = %d, n_tokens = %" PRId64 + ", size = %.3f MiB)\n", + cur.pos_min, cur.pos_max, cur.n_tokens, (float) cur.data.size() / 1024 / 1024); + + slot.prompt.checkpoints.erase(slot.prompt.checkpoints.begin()); + } + + const size_t checkpoint_size = + llama_state_seq_get_size_ext(ctx, slot.id, LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY); + + auto & cur = slot.prompt.checkpoints.emplace_back(server_prompt_checkpoint{ + /*.pos_min = */ pos_min, + /*.pos_max = */ pos_max, + /*.n_tokens = */ slot.prompt.n_tokens() - batch.n_tokens, + /*.data = */ std::vector(checkpoint_size), + }); + + llama_state_seq_get_data_ext(ctx, cur.data.data(), checkpoint_size, slot.id, + LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY); + + SLT_WRN(slot, + "created context checkpoint %d of %d (pos_min = %d, pos_max = %d, n_tokens = %" PRId64 + ", size = %.3f MiB)\n", + (int) slot.prompt.checkpoints.size(), params_base.n_ctx_checkpoints, cur.pos_min, + cur.pos_max, cur.n_tokens, (float) cur.data.size() / 1024 / 1024); + } } if (!slot_batched) { diff --git a/tools/server/server-cors-proxy.h b/tools/server/server-cors-proxy.h new file mode 100644 index 000000000..bca50b53d --- /dev/null +++ b/tools/server/server-cors-proxy.h @@ -0,0 +1,56 @@ +#pragma once + +#include "common.h" +#include "http.h" + +#include +#include +#include +#include + +#include "server-http.h" + +static server_http_res_ptr proxy_request(const server_http_req & req, std::string method) { + std::string target_url = req.get_param("url"); + common_http_url parsed_url = common_http_parse_url(target_url); + + if (parsed_url.host.empty()) { + throw std::runtime_error("invalid target URL: missing host"); + } + + if (parsed_url.path.empty()) { + parsed_url.path = "/"; + } + + if (!parsed_url.password.empty()) { + throw std::runtime_error("authentication in target URL is not supported"); + } + + if (parsed_url.scheme != "http" && parsed_url.scheme != "https") { + throw std::runtime_error("unsupported URL scheme in target URL: " + parsed_url.scheme); + } + + SRV_INF("proxying %s request to %s://%s%s\n", method.c_str(), parsed_url.scheme.c_str(), parsed_url.host.c_str(), parsed_url.path.c_str()); + + auto proxy = std::make_unique( + method, + parsed_url.host, + parsed_url.scheme == "http" ? 80 : 443, + parsed_url.path, + req.headers, + req.body, + req.should_stop, + 600, // timeout_read (default to 10 minutes) + 600 // timeout_write (default to 10 minutes) + ); + + return proxy; +} + +static server_http_context::handler_t proxy_handler_post = [](const server_http_req & req) -> server_http_res_ptr { + return proxy_request(req, "POST"); +}; + +static server_http_context::handler_t proxy_handler_get = [](const server_http_req & req) -> server_http_res_ptr { + return proxy_request(req, "GET"); +}; diff --git a/tools/server/server-models.cpp b/tools/server/server-models.cpp index bc601237b..5f87ba9a2 100644 --- a/tools/server/server-models.cpp +++ b/tools/server/server-models.cpp @@ -1089,11 +1089,20 @@ server_http_proxy::server_http_proxy( int32_t timeout_write ) { // shared between reader and writer threads - auto cli = std::make_shared(host, port); + auto cli = std::make_shared(host, port); auto pipe = std::make_shared>(); + if (port == 443) { +#ifdef CPPHTTPLIB_OPENSSL_SUPPORT + cli.reset(new httplib::SSLClient(host, port)); +#else + throw std::runtime_error("HTTPS requested but CPPHTTPLIB_OPENSSL_SUPPORT is not defined"); +#endif + } + // setup Client - cli->set_connection_timeout(0, 200000); // 200 milliseconds + cli->set_follow_location(true); + cli->set_connection_timeout(5, 0); // 5 seconds cli->set_write_timeout(timeout_read, 0); // reversed for cli (client) vs srv (server) cli->set_read_timeout(timeout_write, 0); this->status = 500; // to be overwritten upon response @@ -1142,7 +1151,15 @@ server_http_proxy::server_http_proxy( req.method = method; req.path = path; for (const auto & [key, value] : headers) { - req.set_header(key, value); + if (key == "Accept-Encoding") { + // disable Accept-Encoding to avoid compressed responses + continue; + } + if (key == "Host" || key == "host") { + req.set_header(key, host); + } else { + req.set_header(key, value); + } } req.body = body; req.response_handler = response_handler; diff --git a/tools/server/server.cpp b/tools/server/server.cpp index fab0bb587..0bd6fda17 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -1,6 +1,7 @@ #include "server-context.h" #include "server-http.h" #include "server-models.h" +#include "server-cors-proxy.h" #include "arg.h" #include "common.h" @@ -201,6 +202,15 @@ int main(int argc, char ** argv) { // Save & load slots ctx_http.get ("/slots", ex_wrapper(routes.get_slots)); ctx_http.post("/slots/:id_slot", ex_wrapper(routes.post_slots)); + // CORS proxy (EXPERIMENTAL, only used by the Web UI for MCP) + if (params.webui_mcp_proxy) { + SRV_WRN("%s", "-----------------\n"); + SRV_WRN("%s", "CORS proxy is enabled, do not expose server to untrusted environments\n"); + SRV_WRN("%s", "This feature is EXPERIMENTAL and may be removed or changed in future versions\n"); + SRV_WRN("%s", "-----------------\n"); + ctx_http.get ("/cors-proxy", ex_wrapper(proxy_handler_get)); + ctx_http.post("/cors-proxy", ex_wrapper(proxy_handler_post)); + } // // Start the server diff --git a/tools/server/tests/unit/test_compat_anthropic.py b/tools/server/tests/unit/test_compat_anthropic.py index e16e0235c..93ff03be6 100644 --- a/tools/server/tests/unit/test_compat_anthropic.py +++ b/tools/server/tests/unit/test_compat_anthropic.py @@ -809,6 +809,139 @@ def test_anthropic_vs_openai_different_response_format(): # Extended thinking tests with reasoning models +# The next two tests cover the input path (conversation history): +# Client sends thinking blocks -> convert_anthropic_to_oai -> reasoning_content -> template + +def test_anthropic_thinking_history_in_count_tokens(): + """Test that interleaved thinking blocks in conversation history are not dropped during conversion.""" + global server + server.jinja = True + server.chat_template_file = '../../../models/templates/Qwen-Qwen3-0.6B.jinja' + server.start() + + tool = { + "name": "list_files", + "description": "List files", + "input_schema": { + "type": "object", + "properties": {"path": {"type": "string"}}, + "required": ["path"] + } + } + + messages_without_thinking = [ + {"role": "user", "content": "Fix the bug"}, + { + "role": "assistant", + "content": [ + {"type": "tool_use", "id": "call_1", "name": "list_files", "input": {"path": "."}} + ] + }, + { + "role": "user", + "content": [ + {"type": "tool_result", "tool_use_id": "call_1", "content": "main.py"} + ] + }, + ] + + messages_with_thinking = [ + {"role": "user", "content": "Fix the bug"}, + { + "role": "assistant", + "content": [ + {"type": "thinking", "thinking": "I should check the project structure first to understand the codebase layout."}, + {"type": "tool_use", "id": "call_1", "name": "list_files", "input": {"path": "."}} + ] + }, + { + "role": "user", + "content": [ + {"type": "tool_result", "tool_use_id": "call_1", "content": "main.py"} + ] + }, + ] + + res_without = server.make_request("POST", "/v1/messages/count_tokens", data={ + "model": "test", + "messages": messages_without_thinking, + "tools": [tool], + }) + assert res_without.status_code == 200, f"Expected 200: {res_without.body}" + + res_with = server.make_request("POST", "/v1/messages/count_tokens", data={ + "model": "test", + "messages": messages_with_thinking, + "tools": [tool], + }) + assert res_with.status_code == 200, f"Expected 200: {res_with.body}" + + # Thinking blocks should increase the token count + assert res_with.body["input_tokens"] > res_without.body["input_tokens"], \ + f"Expected more tokens with thinking ({res_with.body['input_tokens']}) than without ({res_without.body['input_tokens']})" + + +def test_anthropic_thinking_history_in_template(): + """Test that reasoning_content from converted interleaved thinking blocks renders in the prompt.""" + global server + server.jinja = True + server.chat_template_file = '../../../models/templates/Qwen-Qwen3-0.6B.jinja' + server.start() + + reasoning_1 = "I should check the project structure first." + reasoning_2 = "Now I need to read the main file." + + res = server.make_request("POST", "/apply-template", data={ + "messages": [ + {"role": "user", "content": "Fix the bug in main.py"}, + { + "role": "assistant", + "content": "", + "reasoning_content": reasoning_1, + "tool_calls": [{ + "id": "call_1", + "type": "function", + "function": {"name": "list_files", "arguments": "{\"path\": \".\"}"} + }] + }, + {"role": "tool", "tool_call_id": "call_1", "content": "main.py\nutils.py"}, + { + "role": "assistant", + "content": "", + "reasoning_content": reasoning_2, + "tool_calls": [{ + "id": "call_2", + "type": "function", + "function": {"name": "read_file", "arguments": "{\"path\": \"main.py\"}"} + }] + }, + {"role": "tool", "tool_call_id": "call_2", "content": "print('hello')"}, + ], + "tools": [{ + "type": "function", + "function": { + "name": "list_files", + "description": "List files", + "parameters": {"type": "object", "properties": {"path": {"type": "string"}}, "required": ["path"]} + } + }, { + "type": "function", + "function": { + "name": "read_file", + "description": "Read a file", + "parameters": {"type": "object", "properties": {"path": {"type": "string"}}, "required": ["path"]} + } + }], + }) + assert res.status_code == 200, f"Expected 200, got {res.status_code}: {res.body}" + prompt = res.body["prompt"] + + # Both reasoning_content values should be rendered in tags + assert reasoning_1 in prompt, f"Expected first reasoning text in prompt: {prompt}" + assert reasoning_2 in prompt, f"Expected second reasoning text in prompt: {prompt}" + assert prompt.count("") >= 2, f"Expected at least 2 blocks in prompt: {prompt}" + + @pytest.mark.slow @pytest.mark.parametrize("stream", [False, True]) def test_anthropic_thinking_with_reasoning_model(stream): diff --git a/tools/server/webui/docs/architecture/high-level-architecture-simplified.md b/tools/server/webui/docs/architecture/high-level-architecture-simplified.md index a6cb1e9c3..500f477c9 100644 --- a/tools/server/webui/docs/architecture/high-level-architecture-simplified.md +++ b/tools/server/webui/docs/architecture/high-level-architecture-simplified.md @@ -12,9 +12,13 @@ flowchart TB C_Form["ChatForm"] C_Messages["ChatMessages"] C_Message["ChatMessage"] + C_ChatMessageAgenticContent["ChatMessageAgenticContent"] C_MessageEditForm["ChatMessageEditForm"] C_ModelsSelector["ModelsSelector"] C_Settings["ChatSettings"] + C_McpSettings["McpServersSettings"] + C_McpResourceBrowser["McpResourceBrowser"] + C_McpServersSelector["McpServersSelector"] end subgraph Hooks["🪝 Hooks"] @@ -24,10 +28,13 @@ flowchart TB subgraph Stores["🗄️ Stores"] S1["chatStore
Chat interactions & streaming"] - S2["conversationsStore
Conversation data & messages"] + SA["agenticStore
Multi-turn agentic loop orchestration"] + S2["conversationsStore
Conversation data, messages & MCP overrides"] S3["modelsStore
Model selection & loading"] S4["serverStore
Server props & role detection"] - S5["settingsStore
User configuration"] + S5["settingsStore
User configuration incl. MCP"] + S6["mcpStore
MCP servers, tools, prompts"] + S7["mcpResourceStore
MCP resources & attachments"] end subgraph Services["⚙️ Services"] @@ -36,11 +43,12 @@ flowchart TB SV3["PropsService"] SV4["DatabaseService"] SV5["ParameterSyncService"] + SV6["MCPService
protocol operations"] end subgraph Storage["💾 Storage"] ST1["IndexedDB
conversations, messages"] - ST2["LocalStorage
config, userOverrides"] + ST2["LocalStorage
config, userOverrides, mcpServers"] end subgraph APIs["🌐 llama-server API"] @@ -50,15 +58,27 @@ flowchart TB API4["/v1/models"] end + subgraph ExternalMCP["🔌 External MCP Servers"] + EXT1["MCP Server 1
WebSocket/HTTP/SSE"] + EXT2["MCP Server N"] + end + %% Routes → Components R1 & R2 --> C_Screen RL --> C_Sidebar + %% Layout runs MCP health checks + RL --> S6 + %% Component hierarchy C_Screen --> C_Form & C_Messages & C_Settings C_Messages --> C_Message + C_Message --> C_ChatMessageAgenticContent C_Message --> C_MessageEditForm C_Form & C_MessageEditForm --> C_ModelsSelector + C_Form --> C_McpServersSelector + C_Settings --> C_McpSettings + C_McpSettings --> C_McpResourceBrowser %% Components → Hooks → Stores C_Form & C_Messages --> H1 & H2 @@ -70,6 +90,15 @@ flowchart TB C_Sidebar --> S2 C_ModelsSelector --> S3 & S4 C_Settings --> S5 + C_McpSettings --> S6 + C_McpResourceBrowser --> S6 & S7 + C_McpServersSelector --> S6 + C_Form --> S6 + + %% chatStore → agenticStore → mcpStore (agentic loop) + S1 --> SA + SA --> SV1 + SA --> S6 %% Stores → Services S1 --> SV1 & SV4 @@ -77,6 +106,8 @@ flowchart TB S3 --> SV2 & SV3 S4 --> SV3 S5 --> SV5 + S6 --> SV6 + S7 --> SV6 %% Services → Storage SV4 --> ST1 @@ -87,6 +118,9 @@ flowchart TB SV2 --> API3 & API4 SV3 --> API2 + %% MCP → External Servers + SV6 --> EXT1 & EXT2 + %% Styling classDef routeStyle fill:#e1f5fe,stroke:#01579b,stroke-width:2px classDef componentStyle fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px @@ -95,12 +129,17 @@ flowchart TB classDef serviceStyle fill:#e8f5e9,stroke:#2e7d32,stroke-width:2px classDef storageStyle fill:#fce4ec,stroke:#c2185b,stroke-width:2px classDef apiStyle fill:#e3f2fd,stroke:#1565c0,stroke-width:2px + classDef mcpStyle fill:#e0f2f1,stroke:#00695c,stroke-width:2px + classDef agenticStyle fill:#e8eaf6,stroke:#283593,stroke-width:2px + classDef externalStyle fill:#f3e5f5,stroke:#6a1b9a,stroke-width:2px,stroke-dasharray: 5 5 class R1,R2,RL routeStyle - class C_Sidebar,C_Screen,C_Form,C_Messages,C_Message,C_MessageEditForm,C_ModelsSelector,C_Settings componentStyle + class C_Sidebar,C_Screen,C_Form,C_Messages,C_Message,C_ChatMessageAgenticContent,C_MessageEditForm,C_ModelsSelector,C_Settings componentStyle + class C_McpSettings,C_McpResourceBrowser,C_McpServersSelector componentStyle class H1,H2 hookStyle - class S1,S2,S3,S4,S5 storeStyle - class SV1,SV2,SV3,SV4,SV5 serviceStyle + class S1,S2,S3,S4,S5,SA,S6,S7 storeStyle + class SV1,SV2,SV3,SV4,SV5,SV6 serviceStyle class ST1,ST2 storageStyle class API1,API2,API3,API4 apiStyle + class EXT1,EXT2 externalStyle ``` diff --git a/tools/server/webui/docs/architecture/high-level-architecture.md b/tools/server/webui/docs/architecture/high-level-architecture.md index c5ec4d690..42ddb3f4f 100644 --- a/tools/server/webui/docs/architecture/high-level-architecture.md +++ b/tools/server/webui/docs/architecture/high-level-architecture.md @@ -22,6 +22,13 @@ end C_ModelsSelector["ModelsSelector"] C_Settings["ChatSettings"] end + subgraph MCPComponents["MCP UI"] + C_McpSettings["McpServersSettings"] + C_McpServerCard["McpServerCard"] + C_McpResourceBrowser["McpResourceBrowser"] + C_McpResourcePreview["McpResourcePreview"] + C_McpServersSelector["McpServersSelector"] + end end subgraph Hooks["🪝 Hooks"] @@ -43,14 +50,20 @@ end S1Edit["Editing:
editAssistantMessage()
editUserMessagePreserveResponses()
editMessageWithBranching()
clearEditMode()
isEditModeActive()
getAddFilesHandler()
setEditModeActive()"] S1Utils["Utilities:
getApiOptions()
parseTimingData()
getOrCreateAbortController()
getConversationModel()"] end + subgraph SA["agenticStore"] + SAState["State:
sessions (Map)
isAnyRunning"] + SASession["Session Management:
getSession()
updateSession()
clearSession()
getActiveSessions()
isRunning()
currentTurn()
totalToolCalls()
lastError()
streamingToolCall()"] + SAConfig["Configuration:
getConfig()
maxTurns, maxToolPreviewLines"] + SAFlow["Agentic Loop:
runAgenticFlow()
executeAgenticLoop()
normalizeToolCalls()
emitToolCallResult()
extractBase64Attachments()"] + end subgraph S2["conversationsStore"] - S2State["State:
conversations
activeConversation
activeMessages
usedModalities
isInitialized
titleUpdateConfirmationCallback"] - S2Modal["Modalities:
getModalitiesUpToMessage()
calculateModalitiesFromMessages()"] + S2State["State:
conversations
activeConversation
activeMessages
isInitialized
pendingMcpServerOverrides
titleUpdateConfirmationCallback"] S2Lifecycle["Lifecycle:
initialize()
loadConversations()
clearActiveConversation()"] - S2ConvCRUD["Conversation CRUD:
createConversation()
loadConversation()
deleteConversation()
updateConversationName()
updateConversationTitleWithConfirmation()"] + S2ConvCRUD["Conversation CRUD:
createConversation()
loadConversation()
deleteConversation()
deleteAll()
updateConversationName()
updateConversationTitleWithConfirmation()"] S2MsgMgmt["Message Management:
refreshActiveMessages()
addMessageToActive()
updateMessageAtIndex()
findMessageIndex()
sliceActiveMessages()
removeMessageAtIndex()
getConversationMessages()"] S2Nav["Navigation:
navigateToSibling()
updateCurrentNode()
updateConversationTimestamp()"] - S2Export["Import/Export:
downloadConversation()
exportAllConversations()
importConversations()
triggerDownload()"] + S2McpOverrides["MCP Per-Chat Overrides:
getMcpServerOverride()
getAllMcpServerOverrides()
setMcpServerOverride()
toggleMcpServerForChat()
removeMcpServerOverride()
isMcpServerEnabledForChat()
clearPendingMcpServerOverrides()"] + S2Export["Import/Export:
downloadConversation()
exportAllConversations()
importConversations()
importConversationsData()
triggerDownload()"] S2Utils["Utilities:
setTitleUpdateConfirmationCallback()"] end subgraph S3["modelsStore"] @@ -77,6 +90,21 @@ end S5Sync["Server Sync:
syncWithServerDefaults()
forceSyncWithServerDefaults()"] S5Utils["Utilities:
getConfig()
getAllConfig()
getParameterInfo()
getParameterDiff()
getServerDefaults()
clearAllUserOverrides()"] end + subgraph S6["mcpStore"] + S6State["State:
isInitializing, error
toolCount, connectedServers
healthChecks (Map)
connections (Map)
toolsIndex (Map)"] + S6Lifecycle["Lifecycle:
ensureInitialized()
initialize()
shutdown()
acquireConnection()
releaseConnection()"] + S6Health["Health Checks:
runHealthCheck()
runHealthChecksForServers()
updateHealthCheck()
getHealthCheckState()
clearHealthCheck()"] + S6Servers["Server Management:
getServers()
addServer()
updateServer()
removeServer()
getServerById()
getServerDisplayName()"] + S6Tools["Tool Operations:
getToolDefinitionsForLLM()
getToolNames()
hasTool()
getToolServer()
executeTool()
executeToolByName()"] + S6Prompts["Prompt Operations:
getAllPrompts()
getPrompt()
hasPromptsCapability()
getPromptCompletions()"] + end + subgraph S7["mcpResourceStore"] + S7State["State:
serverResources (Map)
cachedResources (Map)
subscriptions (Map)
attachments[]
isLoading"] + S7Resources["Resource Discovery:
setServerResources()
getServerResources()
getAllResourceInfos()
getAllTemplateInfos()
clearServerResources()"] + S7Cache["Caching:
cacheResourceContent()
getCachedContent()
invalidateCache()
clearCache()"] + S7Subs["Subscriptions:
addSubscription()
removeSubscription()
isSubscribed()
handleResourceUpdate()"] + S7Attach["Attachments:
addAttachment()
updateAttachmentContent()
removeAttachment()
clearAttachments()
toMessageExtras()"] + end subgraph ReactiveExports["⚡ Reactive Exports"] direction LR @@ -95,12 +123,19 @@ end RE9c["setEditModeActive()"] RE9d["clearEditMode()"] end + subgraph AgenticExports["agenticStore"] + REA1["agenticIsRunning()"] + REA2["agenticCurrentTurn()"] + REA3["agenticTotalToolCalls()"] + REA4["agenticLastError()"] + REA5["agenticStreamingToolCall()"] + REA6["agenticIsAnyRunning()"] + end subgraph ConvExports["conversationsStore"] RE10["conversations()"] RE11["activeConversation()"] RE12["activeMessages()"] RE13["isConversationsInitialized()"] - RE14["usedModalities()"] end subgraph ModelsExports["modelsStore"] RE15["modelOptions()"] @@ -131,6 +166,13 @@ end RE36["theme()"] RE37["isInitialized()"] end + subgraph MCPExports["mcpStore / mcpResourceStore"] + RE38["mcpResources()"] + RE39["mcpResourceAttachments()"] + RE40["mcpHasResourceAttachments()"] + RE41["mcpTotalResourceCount()"] + RE42["mcpResourcesLoading()"] + end end end @@ -138,9 +180,9 @@ end direction TB subgraph SV1["ChatService"] SV1Msg["Messaging:
sendMessage()"] - SV1Stream["Streaming:
handleStreamResponse()
parseSSEChunk()"] - SV1Convert["Conversion:
convertMessageToChatData()
convertExtraToApiFormat()"] - SV1Utils["Utilities:
extractReasoningContent()
getServerProps()
getModels()"] + SV1Stream["Streaming:
handleStreamResponse()
handleNonStreamResponse()"] + SV1Convert["Conversion:
convertDbMessageToApiChatMessageData()
mergeToolCallDeltas()"] + SV1Utils["Utilities:
stripReasoningContent()
extractModelName()
parseErrorResponse()"] end subgraph SV2["ModelsService"] SV2List["Listing:
list()
listRouter()"] @@ -152,7 +194,7 @@ end end subgraph SV4["DatabaseService"] SV4Conv["Conversations:
createConversation()
getConversation()
getAllConversations()
updateConversation()
deleteConversation()"] - SV4Msg["Messages:
createMessageBranch()
createRootMessage()
getConversationMessages()
updateMessage()
deleteMessage()
deleteMessageCascading()"] + SV4Msg["Messages:
createMessageBranch()
createRootMessage()
createSystemMessage()
getConversationMessages()
updateMessage()
deleteMessage()
deleteMessageCascading()"] SV4Node["Navigation:
updateCurrentNode()"] SV4Import["Import:
importConversations()"] end @@ -162,6 +204,19 @@ end SV5Info["Info:
getParameterInfo()
canSyncParameter()
getSyncableParameterKeys()
validateServerParameter()"] SV5Diff["Diff:
createParameterDiff()"] end + subgraph SV6["MCPService"] + SV6Transport["Transport:
createTransport()
WebSocket / StreamableHTTP / SSE"] + SV6Conn["Connection:
connect()
disconnect()"] + SV6Tools["Tools:
listTools()
callTool()"] + SV6Prompts["Prompts:
listPrompts()
getPrompt()"] + SV6Resources["Resources:
listResources()
listResourceTemplates()
readResource()
subscribeResource()
unsubscribeResource()"] + SV6Complete["Completions:
complete()"] + end + end + + subgraph ExternalMCP["🔌 External MCP Servers"] + EXT1["MCP Server 1
(WebSocket/StreamableHTTP/SSE)"] + EXT2["MCP Server N"] end subgraph Storage["💾 Storage"] @@ -171,6 +226,7 @@ end ST5["LocalStorage"] ST6["config"] ST7["userOverrides"] + ST8["mcpServers"] end subgraph APIs["🌐 llama-server API"] @@ -185,6 +241,9 @@ end R2 --> C_Screen RL --> C_Sidebar + %% Layout runs MCP health checks on startup + RL --> S6 + %% Component hierarchy C_Screen --> C_Form & C_Messages & C_Settings C_Messages --> C_Message @@ -194,8 +253,15 @@ end C_MessageEditForm --> C_Attach C_Form --> C_ModelsSelector C_Form --> C_Attach + C_Form --> C_McpServersSelector C_Message --> C_Attach + %% MCP Components hierarchy + C_Settings --> C_McpSettings + C_McpSettings --> C_McpServerCard + C_McpServerCard --> C_McpResourceBrowser + C_McpResourceBrowser --> C_McpResourcePreview + %% Components use Hooks C_Form --> H1 C_Message --> H1 & H2 @@ -210,17 +276,29 @@ end C_Screen --> S1 & S2 C_Messages --> S2 C_Message --> S1 & S2 & S3 - C_Form --> S1 & S3 + C_Form --> S1 & S3 & S6 C_Sidebar --> S2 C_ModelsSelector --> S3 & S4 C_Settings --> S5 + C_McpSettings --> S6 + C_McpServerCard --> S6 + C_McpResourceBrowser --> S6 & S7 + C_McpServersSelector --> S6 %% Stores export Reactive State S1 -. exports .-> ChatExports + SA -. exports .-> AgenticExports S2 -. exports .-> ConvExports S3 -. exports .-> ModelsExports S4 -. exports .-> ServerExports S5 -. exports .-> SettingsExports + S6 -. exports .-> MCPExports + S7 -. exports .-> MCPExports + + %% chatStore → agenticStore (agentic loop orchestration) + S1 --> SA + SA --> SV1 + SA --> S6 %% Stores use Services S1 --> SV1 & SV4 @@ -228,28 +306,35 @@ end S3 --> SV2 & SV3 S4 --> SV3 S5 --> SV5 + S6 --> SV6 + S7 --> SV6 %% Services to Storage SV4 --> ST1 ST1 --> ST2 & ST3 SV5 --> ST5 - ST5 --> ST6 & ST7 + ST5 --> ST6 & ST7 & ST8 %% Services to APIs SV1 --> API1 SV2 --> API3 & API4 SV3 --> API2 + %% MCP → External Servers + SV6 --> EXT1 & EXT2 + %% Styling classDef routeStyle fill:#e1f5fe,stroke:#01579b,stroke-width:2px classDef componentStyle fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px classDef componentGroupStyle fill:#e1bee7,stroke:#7b1fa2,stroke-width:1px + classDef hookStyle fill:#fff8e1,stroke:#ff8f00,stroke-width:2px classDef storeStyle fill:#fff3e0,stroke:#e65100,stroke-width:2px classDef stateStyle fill:#ffe0b2,stroke:#e65100,stroke-width:1px classDef methodStyle fill:#ffecb3,stroke:#e65100,stroke-width:1px classDef reactiveStyle fill:#fffde7,stroke:#f9a825,stroke-width:1px classDef serviceStyle fill:#e8f5e9,stroke:#2e7d32,stroke-width:2px classDef serviceMStyle fill:#c8e6c9,stroke:#2e7d32,stroke-width:1px + classDef externalStyle fill:#f3e5f5,stroke:#6a1b9a,stroke-width:2px,stroke-dasharray: 5 5 classDef storageStyle fill:#fce4ec,stroke:#c2185b,stroke-width:2px classDef apiStyle fill:#e3f2fd,stroke:#1565c0,stroke-width:2px @@ -257,23 +342,32 @@ end class C_Sidebar,C_Screen,C_Form,C_Messages,C_Message,C_MessageUser,C_MessageEditForm componentStyle class C_ModelsSelector,C_Settings componentStyle class C_Attach componentStyle - class H1,H2,H3 methodStyle - class LayoutComponents,ChatUIComponents componentGroupStyle - class Hooks storeStyle - class S1,S2,S3,S4,S5 storeStyle - class S1State,S2State,S3State,S4State,S5State stateStyle + class C_McpSettings,C_McpServerCard,C_McpResourceBrowser,C_McpResourcePreview,C_McpServersSelector componentStyle + class H1,H2,H3 hookStyle + class LayoutComponents,ChatUIComponents,MCPComponents componentGroupStyle + class Hooks hookStyle + classDef agenticStyle fill:#e8eaf6,stroke:#283593,stroke-width:2px + classDef agenticMethodStyle fill:#c5cae9,stroke:#283593,stroke-width:1px + + class S1,S2,S3,S4,S5,SA,S6,S7 storeStyle + class S1State,S2State,S3State,S4State,S5State,SAState,S6State,S7State stateStyle class S1Msg,S1Regen,S1Edit,S1Stream,S1LoadState,S1ProcState,S1Error,S1Utils methodStyle - class S2Lifecycle,S2ConvCRUD,S2MsgMgmt,S2Nav,S2Modal,S2Export,S2Utils methodStyle + class SASession,SAConfig,SAFlow methodStyle + class S2Lifecycle,S2ConvCRUD,S2MsgMgmt,S2Nav,S2McpOverrides,S2Export,S2Utils methodStyle class S3Getters,S3Modal,S3Status,S3Fetch,S3Select,S3LoadUnload,S3Utils methodStyle class S4Getters,S4Data,S4Utils methodStyle class S5Lifecycle,S5Update,S5Reset,S5Sync,S5Utils methodStyle - class ChatExports,ConvExports,ModelsExports,ServerExports,SettingsExports reactiveStyle - class SV1,SV2,SV3,SV4,SV5 serviceStyle + class S6Lifecycle,S6Health,S6Servers,S6Tools,S6Prompts methodStyle + class S7Resources,S7Cache,S7Subs,S7Attach methodStyle + class ChatExports,AgenticExports,ConvExports,ModelsExports,ServerExports,SettingsExports,MCPExports reactiveStyle + class SV1,SV2,SV3,SV4,SV5,SV6 serviceStyle + class SV6Transport,SV6Conn,SV6Tools,SV6Prompts,SV6Resources,SV6Complete serviceMStyle + class EXT1,EXT2 externalStyle class SV1Msg,SV1Stream,SV1Convert,SV1Utils serviceMStyle class SV2List,SV2LoadUnload,SV2Status serviceMStyle class SV3Fetch serviceMStyle class SV4Conv,SV4Msg,SV4Node,SV4Import serviceMStyle class SV5Extract,SV5Merge,SV5Info,SV5Diff serviceMStyle - class ST1,ST2,ST3,ST5,ST6,ST7 storageStyle + class ST1,ST2,ST3,ST5,ST6,ST7,ST8 storageStyle class API1,API2,API3,API4 apiStyle ``` diff --git a/tools/server/webui/docs/flows/chat-flow.md b/tools/server/webui/docs/flows/chat-flow.md index 05e1df385..296693c6a 100644 --- a/tools/server/webui/docs/flows/chat-flow.md +++ b/tools/server/webui/docs/flows/chat-flow.md @@ -2,8 +2,10 @@ sequenceDiagram participant UI as 🧩 ChatForm / ChatMessage participant chatStore as 🗄️ chatStore + participant agenticStore as 🗄️ agenticStore participant convStore as 🗄️ conversationsStore participant settingsStore as 🗄️ settingsStore + participant mcpStore as 🗄️ mcpStore participant ChatSvc as ⚙️ ChatService participant DbSvc as ⚙️ DatabaseService participant API as 🌐 /v1/chat/completions @@ -25,6 +27,9 @@ sequenceDiagram Note over convStore: → see conversations-flow.mmd end + chatStore->>mcpStore: consumeResourceAttachmentsAsExtras() + Note right of mcpStore: Converts pending MCP resource
attachments into message extras + chatStore->>chatStore: addMessage("user", content, extras) chatStore->>DbSvc: createMessageBranch(userMsg, parentId) chatStore->>convStore: addMessageToActive(userMsg) @@ -38,7 +43,7 @@ sequenceDiagram deactivate chatStore %% ═══════════════════════════════════════════════════════════════════════════ - Note over UI,API: 🌊 STREAMING + Note over UI,API: 🌊 STREAMING (with agentic flow detection) %% ═══════════════════════════════════════════════════════════════════════════ activate chatStore @@ -52,10 +57,17 @@ sequenceDiagram chatStore->>chatStore: getApiOptions() Note right of chatStore: Merge from settingsStore.config:
temperature, max_tokens, top_p, etc. - chatStore->>ChatSvc: sendMessage(messages, options, signal) + alt agenticConfig.enabled && mcpStore has connected servers + chatStore->>agenticStore: runAgenticFlow(convId, messages, assistantMsg, options, signal) + Note over agenticStore: Multi-turn agentic loop:
1. Call ChatService.sendMessage()
2. If response has tool_calls → execute via mcpStore
3. Append tool results as messages
4. Loop until no more tool_calls or maxTurns
→ see agentic flow details below + agenticStore-->>chatStore: final response with timings + else standard (non-agentic) flow + chatStore->>ChatSvc: sendMessage(messages, options, signal) + end + activate ChatSvc - ChatSvc->>ChatSvc: convertMessageToChatData(messages) + ChatSvc->>ChatSvc: convertDbMessageToApiChatMessageData(messages) Note right of ChatSvc: DatabaseMessage[] → ApiChatMessageData[]
Process attachments (images, PDFs, audio) ChatSvc->>API: POST /v1/chat/completions @@ -63,7 +75,7 @@ sequenceDiagram loop SSE chunks API-->>ChatSvc: data: {"choices":[{"delta":{...}}]} - ChatSvc->>ChatSvc: parseSSEChunk(line) + ChatSvc->>ChatSvc: handleStreamResponse(response) alt content chunk ChatSvc-->>chatStore: onChunk(content) @@ -154,12 +166,15 @@ sequenceDiagram Note over UI,API: ✏️ EDIT USER MESSAGE %% ═══════════════════════════════════════════════════════════════════════════ - UI->>chatStore: editUserMessagePreserveResponses(msgId, newContent) + UI->>chatStore: editMessageWithBranching(msgId, newContent, extras) activate chatStore chatStore->>chatStore: Get parent of target message chatStore->>DbSvc: createMessageBranch(editedMsg, parentId) chatStore->>convStore: refreshActiveMessages() Note right of chatStore: Creates new branch, original preserved + chatStore->>chatStore: createAssistantMessage(editedMsg.id) + chatStore->>chatStore: streamChatCompletion(...) + Note right of chatStore: Automatically regenerates response deactivate chatStore %% ═══════════════════════════════════════════════════════════════════════════ @@ -171,4 +186,43 @@ sequenceDiagram Note right of chatStore: errorDialogState = {type: 'timeout'|'server', message} chatStore->>convStore: removeMessageAtIndex(failedMsgIdx) chatStore->>DbSvc: deleteMessage(failedMsgId) + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,API: 🤖 AGENTIC LOOP (when agenticConfig.enabled) + %% ═══════════════════════════════════════════════════════════════════════════ + + Note over agenticStore: agenticStore.runAgenticFlow(convId, messages, assistantMsg, options, signal) + activate agenticStore + agenticStore->>agenticStore: getSession(convId) or create new + agenticStore->>agenticStore: updateSession(turn: 0, running: true) + + loop executeAgenticLoop (until no tool_calls or maxTurns) + agenticStore->>agenticStore: turn++ + agenticStore->>ChatSvc: sendMessage(messages, options, signal) + ChatSvc->>API: POST /v1/chat/completions + API-->>ChatSvc: response with potential tool_calls + ChatSvc-->>agenticStore: onComplete(content, reasoning, timings, toolCalls) + + alt response has tool_calls + agenticStore->>agenticStore: normalizeToolCalls(toolCalls) + loop for each tool_call + agenticStore->>agenticStore: updateSession(streamingToolCall) + agenticStore->>mcpStore: executeTool(mcpCall, signal) + mcpStore-->>agenticStore: tool result + agenticStore->>agenticStore: extractBase64Attachments(result) + agenticStore->>agenticStore: emitToolCallResult(convId, ...) + agenticStore->>convStore: addMessageToActive(toolResultMsg) + agenticStore->>DbSvc: createMessageBranch(toolResultMsg) + end + agenticStore->>agenticStore: Create new assistantMsg for next turn + Note right of agenticStore: Continue loop with updated messages + else no tool_calls (final response) + agenticStore->>agenticStore: buildFinalTimings(allTurns) + Note right of agenticStore: Break loop, return final response + end + end + + agenticStore->>agenticStore: updateSession(running: false) + agenticStore-->>chatStore: final content, timings, model + deactivate agenticStore ``` diff --git a/tools/server/webui/docs/flows/conversations-flow.md b/tools/server/webui/docs/flows/conversations-flow.md index 185ed16e0..bd2309bc0 100644 --- a/tools/server/webui/docs/flows/conversations-flow.md +++ b/tools/server/webui/docs/flows/conversations-flow.md @@ -6,7 +6,7 @@ sequenceDiagram participant DbSvc as ⚙️ DatabaseService participant IDB as 💾 IndexedDB - Note over convStore: State:
conversations: DatabaseConversation[]
activeConversation: DatabaseConversation | null
activeMessages: DatabaseMessage[]
isInitialized: boolean
usedModalities: $derived({vision, audio}) + Note over convStore: State:
conversations: DatabaseConversation[]
activeConversation: DatabaseConversation | null
activeMessages: DatabaseMessage[]
isInitialized: boolean
pendingMcpServerOverrides: Map<string, McpServerOverride> %% ═══════════════════════════════════════════════════════════════════════════ Note over UI,IDB: 🚀 INITIALIZATION @@ -37,6 +37,13 @@ sequenceDiagram convStore->>convStore: conversations.unshift(conversation) convStore->>convStore: activeConversation = $state(conversation) convStore->>convStore: activeMessages = $state([]) + + alt pendingMcpServerOverrides has entries + loop each pending override + convStore->>DbSvc: Store MCP server override for new conversation + end + convStore->>convStore: clearPendingMcpServerOverrides() + end deactivate convStore %% ═══════════════════════════════════════════════════════════════════════════ @@ -58,8 +65,7 @@ sequenceDiagram Note right of convStore: Filter to show only current branch path convStore->>convStore: activeMessages = $state(filtered) - convStore->>chatStore: syncLoadingStateForChat(convId) - Note right of chatStore: Sync isLoading/currentResponse if streaming + Note right of convStore: Route (+page.svelte) then calls:
chatStore.syncLoadingStateForChat(convId) deactivate convStore %% ═══════════════════════════════════════════════════════════════════════════ @@ -121,16 +127,36 @@ sequenceDiagram end deactivate convStore + UI->>convStore: deleteAll() + activate convStore + convStore->>DbSvc: Delete all conversations and messages + convStore->>convStore: conversations = [] + convStore->>convStore: clearActiveConversation() + deactivate convStore + %% ═══════════════════════════════════════════════════════════════════════════ - Note over UI,IDB: 📊 MODALITY TRACKING + Note over UI,IDB: � MCP SERVER PER-CHAT OVERRIDES %% ═══════════════════════════════════════════════════════════════════════════ - Note over convStore: usedModalities = $derived.by(() => {
calculateModalitiesFromMessages(activeMessages)
}) + Note over convStore: Conversations can override which MCP servers are enabled. + Note over convStore: Uses pendingMcpServerOverrides before conversation
is created, then persists to conversation metadata. - Note over convStore: Scans activeMessages for attachments:
- IMAGE → vision: true
- PDF (processedAsImages) → vision: true
- AUDIO → audio: true + UI->>convStore: setMcpServerOverride(convId, serverName, override) + Note right of convStore: override = {enabled: boolean} - UI->>convStore: getModalitiesUpToMessage(msgId) - Note right of convStore: Used for regeneration validation
Only checks messages BEFORE target + UI->>convStore: toggleMcpServerForChat(convId, serverName, enabled) + activate convStore + convStore->>convStore: setMcpServerOverride(convId, serverName, {enabled}) + deactivate convStore + + UI->>convStore: isMcpServerEnabledForChat(convId, serverName) + Note right of convStore: Check override → fall back to global MCP config + + UI->>convStore: getAllMcpServerOverrides(convId) + Note right of convStore: Returns all overrides for a conversation + + UI->>convStore: removeMcpServerOverride(convId, serverName) + UI->>convStore: getMcpServerOverride(convId, serverName) %% ═══════════════════════════════════════════════════════════════════════════ Note over UI,IDB: 📤 EXPORT / 📥 IMPORT @@ -148,8 +174,10 @@ sequenceDiagram UI->>convStore: importConversations(file) activate convStore convStore->>convStore: Parse JSON file + convStore->>convStore: importConversationsData(parsed) convStore->>DbSvc: importConversations(parsed) - DbSvc->>IDB: Bulk INSERT conversations + messages + Note right of DbSvc: Skips duplicate conversations
(checks existing by ID) + DbSvc->>IDB: INSERT conversations + messages (skip existing) convStore->>convStore: loadConversations() deactivate convStore ``` diff --git a/tools/server/webui/docs/flows/database-flow.md b/tools/server/webui/docs/flows/database-flow.md index 50f8284e3..38cd6941c 100644 --- a/tools/server/webui/docs/flows/database-flow.md +++ b/tools/server/webui/docs/flows/database-flow.md @@ -66,6 +66,14 @@ sequenceDiagram DbSvc-->>Store: rootMessageId deactivate DbSvc + Store->>DbSvc: createSystemMessage(convId, content, parentId) + activate DbSvc + DbSvc->>DbSvc: Create message {role: "system", parent: parentId} + DbSvc->>Dexie: db.messages.add(systemMsg) + Dexie->>IDB: INSERT + DbSvc-->>Store: DatabaseMessage + deactivate DbSvc + Store->>DbSvc: createMessageBranch(message, parentId) activate DbSvc DbSvc->>DbSvc: Generate UUID for new message @@ -116,6 +124,13 @@ sequenceDiagram end DbSvc->>Dexie: db.messages.delete(msgId) Dexie->>IDB: DELETE target message + + alt target message has a parent + DbSvc->>Dexie: db.messages.get(parentId) + DbSvc->>DbSvc: parent.children.filter(id !== msgId) + DbSvc->>Dexie: db.messages.update(parentId, {children}) + Note right of DbSvc: Remove deleted message from parent's children[] + end deactivate DbSvc %% ═══════════════════════════════════════════════════════════════════════════ @@ -125,12 +140,16 @@ sequenceDiagram Store->>DbSvc: importConversations(data) activate DbSvc loop each conversation in data - DbSvc->>DbSvc: Generate new UUIDs (avoid conflicts) - DbSvc->>Dexie: db.conversations.add(conversation) - Dexie->>IDB: INSERT conversation - loop each message - DbSvc->>Dexie: db.messages.add(message) - Dexie->>IDB: INSERT message + DbSvc->>Dexie: db.conversations.get(conv.id) + alt conversation already exists + Note right of DbSvc: Skip duplicate (keep existing) + else conversation is new + DbSvc->>Dexie: db.conversations.add(conversation) + Dexie->>IDB: INSERT conversation + loop each message + DbSvc->>Dexie: db.messages.add(message) + Dexie->>IDB: INSERT message + end end end deactivate DbSvc diff --git a/tools/server/webui/docs/flows/mcp-flow.md b/tools/server/webui/docs/flows/mcp-flow.md new file mode 100644 index 000000000..c8aa66659 --- /dev/null +++ b/tools/server/webui/docs/flows/mcp-flow.md @@ -0,0 +1,226 @@ +```mermaid +sequenceDiagram + participant UI as 🧩 McpServersSettings / ChatForm + participant chatStore as 🗄️ chatStore + participant mcpStore as 🗄️ mcpStore + participant mcpResStore as 🗄️ mcpResourceStore + participant convStore as 🗄️ conversationsStore + participant MCPSvc as ⚙️ MCPService + participant LS as 💾 LocalStorage + participant ExtMCP as 🔌 External MCP Server + + Note over mcpStore: State:
isInitializing, error
toolCount, connectedServers
healthChecks (Map)
connections (Map)
toolsIndex (Map)
serverConfigs (Map) + + Note over mcpResStore: State:
serverResources (Map)
cachedResources (Map)
subscriptions (Map)
attachments[] + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,ExtMCP: 🚀 INITIALIZATION (App Startup) + %% ═══════════════════════════════════════════════════════════════════════════ + + UI->>mcpStore: ensureInitialized() + activate mcpStore + + mcpStore->>LS: get(MCP_SERVERS_LOCALSTORAGE_KEY) + LS-->>mcpStore: MCPServerSettingsEntry[] + + mcpStore->>mcpStore: parseServerSettings(servers) + Note right of mcpStore: Filter enabled servers
Build MCPServerConfig objects
Per-chat overrides checked via convStore + + loop For each enabled server + mcpStore->>mcpStore: runHealthCheck(serverId) + mcpStore->>mcpStore: updateHealthCheck(id, CONNECTING) + + mcpStore->>MCPSvc: connect(serverName, config, clientInfo, capabilities, onPhase) + activate MCPSvc + + MCPSvc->>MCPSvc: createTransport(config) + Note right of MCPSvc: WebSocket / StreamableHTTP / SSE
with optional CORS proxy + + MCPSvc->>ExtMCP: Transport handshake + ExtMCP-->>MCPSvc: Connection established + + MCPSvc->>ExtMCP: Initialize request + Note right of ExtMCP: Exchange capabilities
Server info, protocol version + + ExtMCP-->>MCPSvc: InitializeResult (serverInfo, capabilities) + + MCPSvc->>ExtMCP: listTools() + ExtMCP-->>MCPSvc: Tool[] + + MCPSvc-->>mcpStore: MCPConnection + deactivate MCPSvc + + mcpStore->>mcpStore: connections.set(serverName, connection) + mcpStore->>mcpStore: indexTools(connection.tools, serverName) + Note right of mcpStore: toolsIndex.set(toolName, serverName)
Handle name conflicts with prefixes + + mcpStore->>mcpStore: updateHealthCheck(id, SUCCESS) + mcpStore->>mcpStore: _connectedServers.push(serverName) + + alt Server supports resources + mcpStore->>MCPSvc: listAllResources(connection) + MCPSvc->>ExtMCP: listResources() + ExtMCP-->>MCPSvc: MCPResource[] + MCPSvc-->>mcpStore: resources + + mcpStore->>MCPSvc: listAllResourceTemplates(connection) + MCPSvc->>ExtMCP: listResourceTemplates() + ExtMCP-->>MCPSvc: MCPResourceTemplate[] + MCPSvc-->>mcpStore: templates + + mcpStore->>mcpResStore: setServerResources(serverName, resources, templates) + end + end + + mcpStore->>mcpStore: _isInitializing = false + deactivate mcpStore + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,ExtMCP: 🔧 TOOL EXECUTION (Chat with Tools) + %% ═══════════════════════════════════════════════════════════════════════════ + + UI->>mcpStore: executeTool(mcpCall: MCPToolCall, signal?) + activate mcpStore + + mcpStore->>mcpStore: toolsIndex.get(mcpCall.function.name) + Note right of mcpStore: Resolve serverName from toolsIndex
MCPToolCall = {id, type, function: {name, arguments}} + + mcpStore->>mcpStore: acquireConnection() + Note right of mcpStore: activeFlowCount++
Prevent shutdown during execution + + mcpStore->>mcpStore: connection = connections.get(serverName) + + mcpStore->>MCPSvc: callTool(connection, {name, arguments}, signal) + activate MCPSvc + + MCPSvc->>MCPSvc: throwIfAborted(signal) + MCPSvc->>ExtMCP: callTool(name, arguments) + + alt Tool execution success + ExtMCP-->>MCPSvc: ToolCallResult (content, isError) + MCPSvc->>MCPSvc: formatToolResult(result) + Note right of MCPSvc: Handle text, image (base64),
embedded resource content + MCPSvc-->>mcpStore: ToolExecutionResult + else Tool execution error + ExtMCP-->>MCPSvc: Error + MCPSvc-->>mcpStore: throw Error + else Aborted + MCPSvc-->>mcpStore: throw AbortError + end + + deactivate MCPSvc + + mcpStore->>mcpStore: releaseConnection() + Note right of mcpStore: activeFlowCount-- + + mcpStore-->>UI: ToolExecutionResult + deactivate mcpStore + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,ExtMCP: � RESOURCE ATTACHMENT CONSUMPTION + %% ═══════════════════════════════════════════════════════════════════════════ + + chatStore->>mcpStore: consumeResourceAttachmentsAsExtras() + activate mcpStore + mcpStore->>mcpResStore: getAttachments() + mcpResStore-->>mcpStore: MCPResourceAttachment[] + mcpStore->>mcpStore: Convert attachments to message extras + mcpStore->>mcpResStore: clearAttachments() + mcpStore-->>chatStore: MessageExtra[] (for user message) + deactivate mcpStore + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,ExtMCP: �📝 PROMPT OPERATIONS + %% ═══════════════════════════════════════════════════════════════════════════ + + UI->>mcpStore: getAllPrompts() + activate mcpStore + + loop For each connected server with prompts capability + mcpStore->>MCPSvc: listPrompts(connection) + MCPSvc->>ExtMCP: listPrompts() + ExtMCP-->>MCPSvc: Prompt[] + MCPSvc-->>mcpStore: prompts + end + + mcpStore-->>UI: MCPPromptInfo[] (with serverName) + deactivate mcpStore + + UI->>mcpStore: getPrompt(serverName, promptName, args?) + activate mcpStore + + mcpStore->>MCPSvc: getPrompt(connection, name, args) + MCPSvc->>ExtMCP: getPrompt({name, arguments}) + ExtMCP-->>MCPSvc: GetPromptResult (messages) + MCPSvc-->>mcpStore: GetPromptResult + + mcpStore-->>UI: GetPromptResult + deactivate mcpStore + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,ExtMCP: 📁 RESOURCE OPERATIONS + %% ═══════════════════════════════════════════════════════════════════════════ + + UI->>mcpResStore: addAttachment(resourceInfo) + activate mcpResStore + mcpResStore->>mcpResStore: Create MCPResourceAttachment (loading: true) + mcpResStore-->>UI: attachment + + UI->>mcpStore: readResource(serverName, uri) + activate mcpStore + + mcpStore->>MCPSvc: readResource(connection, uri) + MCPSvc->>ExtMCP: readResource({uri}) + ExtMCP-->>MCPSvc: MCPReadResourceResult (contents) + MCPSvc-->>mcpStore: contents + + mcpStore-->>UI: MCPResourceContent[] + deactivate mcpStore + + UI->>mcpResStore: updateAttachmentContent(attachmentId, content) + mcpResStore->>mcpResStore: cacheResourceContent(resource, content) + deactivate mcpResStore + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,ExtMCP: 🔄 AUTO-RECONNECTION + %% ═══════════════════════════════════════════════════════════════════════════ + + Note over mcpStore: On WebSocket close or connection error: + mcpStore->>mcpStore: autoReconnect(serverName, attempt) + activate mcpStore + + mcpStore->>mcpStore: Calculate backoff delay + Note right of mcpStore: delay = min(30s, 1s * 2^attempt) + + mcpStore->>mcpStore: Wait for delay + mcpStore->>mcpStore: reconnectServer(serverName) + + alt Reconnection success + mcpStore->>mcpStore: updateHealthCheck(id, SUCCESS) + else Max attempts reached + mcpStore->>mcpStore: updateHealthCheck(id, ERROR) + end + deactivate mcpStore + + %% ═══════════════════════════════════════════════════════════════════════════ + Note over UI,ExtMCP: 🛑 SHUTDOWN + %% ═══════════════════════════════════════════════════════════════════════════ + + UI->>mcpStore: shutdown() + activate mcpStore + + mcpStore->>mcpStore: Wait for activeFlowCount == 0 + + loop For each connection + mcpStore->>MCPSvc: disconnect(connection) + MCPSvc->>MCPSvc: transport.onclose = undefined + MCPSvc->>ExtMCP: close() + end + + mcpStore->>mcpStore: connections.clear() + mcpStore->>mcpStore: toolsIndex.clear() + mcpStore->>mcpStore: _connectedServers = [] + + mcpStore->>mcpResStore: clear() + deactivate mcpStore +``` diff --git a/tools/server/webui/package-lock.json b/tools/server/webui/package-lock.json index 8d13e5a53..361144915 100644 --- a/tools/server/webui/package-lock.json +++ b/tools/server/webui/package-lock.json @@ -8,6 +8,7 @@ "name": "webui", "version": "1.0.0", "dependencies": { + "@modelcontextprotocol/sdk": "^1.25.1", "highlight.js": "^11.11.1", "mode-watcher": "^1.1.0", "pdfjs-dist": "^5.4.54", @@ -19,7 +20,8 @@ "remark-html": "^16.0.1", "remark-rehype": "^11.1.2", "svelte-sonner": "^1.0.5", - "unist-util-visit": "^5.0.0" + "unist-util-visit": "^5.0.0", + "zod": "^4.2.1" }, "devDependencies": { "@chromatic-com/storybook": "^5.0.0", @@ -853,6 +855,18 @@ "dev": true, "license": "MIT" }, + "node_modules/@hono/node-server": { + "version": "1.19.9", + "resolved": "https://registry.npmjs.org/@hono/node-server/-/node-server-1.19.9.tgz", + "integrity": "sha512-vHL6w3ecZsky+8P5MD+eFfaGTyCeOHUIFYMGpQGbrBTSmNNoxv0if69rEZ5giu36weC5saFuznL411gRX7bJDw==", + "license": "MIT", + "engines": { + "node": ">=18.14.1" + }, + "peerDependencies": { + "hono": "^4" + } + }, "node_modules/@humanfs/core": { "version": "0.19.1", "resolved": "https://registry.npmjs.org/@humanfs/core/-/core-0.19.1.tgz", @@ -1044,6 +1058,68 @@ "react": ">=16" } }, + "node_modules/@modelcontextprotocol/sdk": { + "version": "1.26.0", + "resolved": "https://registry.npmjs.org/@modelcontextprotocol/sdk/-/sdk-1.26.0.tgz", + "integrity": "sha512-Y5RmPncpiDtTXDbLKswIJzTqu2hyBKxTNsgKqKclDbhIgg1wgtf1fRuvxgTnRfcnxtvvgbIEcqUOzZrJ6iSReg==", + "license": "MIT", + "dependencies": { + "@hono/node-server": "^1.19.9", + "ajv": "^8.17.1", + "ajv-formats": "^3.0.1", + "content-type": "^1.0.5", + "cors": "^2.8.5", + "cross-spawn": "^7.0.5", + "eventsource": "^3.0.2", + "eventsource-parser": "^3.0.0", + "express": "^5.2.1", + "express-rate-limit": "^8.2.1", + "hono": "^4.11.4", + "jose": "^6.1.3", + "json-schema-typed": "^8.0.2", + "pkce-challenge": "^5.0.0", + "raw-body": "^3.0.0", + "zod": "^3.25 || ^4.0", + "zod-to-json-schema": "^3.25.1" + }, + "engines": { + "node": ">=18" + }, + "peerDependencies": { + "@cfworker/json-schema": "^4.1.1", + "zod": "^3.25 || ^4.0" + }, + "peerDependenciesMeta": { + "@cfworker/json-schema": { + "optional": true + }, + "zod": { + "optional": false + } + } + }, + "node_modules/@modelcontextprotocol/sdk/node_modules/ajv": { + "version": "8.17.1", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz", + "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==", + "license": "MIT", + "dependencies": { + "fast-deep-equal": "^3.1.3", + "fast-uri": "^3.0.1", + "json-schema-traverse": "^1.0.0", + "require-from-string": "^2.0.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, + "node_modules/@modelcontextprotocol/sdk/node_modules/json-schema-traverse": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz", + "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==", + "license": "MIT" + }, "node_modules/@napi-rs/canvas": { "version": "0.1.76", "resolved": "https://registry.npmjs.org/@napi-rs/canvas/-/canvas-0.1.76.tgz", @@ -2164,9 +2240,9 @@ } }, "node_modules/@sveltejs/kit": { - "version": "2.52.0", - "resolved": "https://registry.npmjs.org/@sveltejs/kit/-/kit-2.52.0.tgz", - "integrity": "sha512-zG+HmJuSF7eC0e7xt2htlOcEMAdEtlVdb7+gAr+ef08EhtwUsjLxcAwBgUCJY3/5p08OVOxVZti91WfXeuLvsg==", + "version": "2.50.2", + "resolved": "https://registry.npmjs.org/@sveltejs/kit/-/kit-2.50.2.tgz", + "integrity": "sha512-875hTUkEbz+MyJIxWbQjfMaekqdmEKUUfR7JyKcpfMRZqcGyrO9Gd+iS1D/Dx8LpE5FEtutWGOtlAh4ReSAiOA==", "dev": true, "license": "MIT", "peer": true, @@ -3282,6 +3358,19 @@ "url": "https://opencollective.com/vitest" } }, + "node_modules/accepts": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/accepts/-/accepts-2.0.0.tgz", + "integrity": "sha512-5cvg6CtKwfgdmVqY1WIiXKc3Q1bkRqGLi+2W/6ao+6Y7gu/RCwRuAhGEzh5B4KlszSuTLgZYuqFqo5bImjNKng==", + "license": "MIT", + "dependencies": { + "mime-types": "^3.0.0", + "negotiator": "^1.0.0" + }, + "engines": { + "node": ">= 0.6" + } + }, "node_modules/acorn": { "version": "8.15.0", "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.15.0.tgz", @@ -3322,6 +3411,45 @@ "url": "https://github.com/sponsors/epoberezkin" } }, + "node_modules/ajv-formats": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/ajv-formats/-/ajv-formats-3.0.1.tgz", + "integrity": "sha512-8iUql50EUR+uUcdRQ3HDqa6EVyo3docL8g5WJ3FNcWmu62IbkGUue/pEyLBW8VGKKucTPgqeks4fIU1DA4yowQ==", + "license": "MIT", + "dependencies": { + "ajv": "^8.0.0" + }, + "peerDependencies": { + "ajv": "^8.0.0" + }, + "peerDependenciesMeta": { + "ajv": { + "optional": true + } + } + }, + "node_modules/ajv-formats/node_modules/ajv": { + "version": "8.17.1", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz", + "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==", + "license": "MIT", + "dependencies": { + "fast-deep-equal": "^3.1.3", + "fast-uri": "^3.0.1", + "json-schema-traverse": "^1.0.0", + "require-from-string": "^2.0.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, + "node_modules/ajv-formats/node_modules/json-schema-traverse": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz", + "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==", + "license": "MIT" + }, "node_modules/ansi-regex": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", @@ -3464,9 +3592,9 @@ } }, "node_modules/bits-ui": { - "version": "2.15.7", - "resolved": "https://registry.npmjs.org/bits-ui/-/bits-ui-2.15.7.tgz", - "integrity": "sha512-M9VrQAJXnT3xfhN/joEtVXhO794yBPmadZfNtDT4t4QwI8wgCBmDuv8FlH6K4v0q0Ugw07tumAPfym9MU2BGpg==", + "version": "2.15.5", + "resolved": "https://registry.npmjs.org/bits-ui/-/bits-ui-2.15.5.tgz", + "integrity": "sha512-WhS+P+E//ClLfKU6KqjKC17nGDRLnz+vkwoP6ClFUPd5m1fFVDxTElPX8QVsduLj5V1KFDxlnv6sW2G5Lqk+vw==", "dev": true, "license": "MIT", "dependencies": { @@ -3534,6 +3662,46 @@ "svelte": "^5.30.2" } }, + "node_modules/body-parser": { + "version": "2.2.2", + "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-2.2.2.tgz", + "integrity": "sha512-oP5VkATKlNwcgvxi0vM0p/D3n2C3EReYVX+DNYs5TjZFn/oQt2j+4sVJtSMr18pdRr8wjTcBl6LoV+FUwzPmNA==", + "license": "MIT", + "dependencies": { + "bytes": "^3.1.2", + "content-type": "^1.0.5", + "debug": "^4.4.3", + "http-errors": "^2.0.0", + "iconv-lite": "^0.7.0", + "on-finished": "^2.4.1", + "qs": "^6.14.1", + "raw-body": "^3.0.1", + "type-is": "^2.0.1" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/body-parser/node_modules/iconv-lite": { + "version": "0.7.2", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.7.2.tgz", + "integrity": "sha512-im9DjEDQ55s9fL4EYzOAv0yMqmMBSZp6G0VvFyTMPKWxiSBHUj9NW/qqLmXUwXrrM7AvqSlTCfvqRb0cM8yYqw==", + "license": "MIT", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3.0.0" + }, + "engines": { + "node": ">=0.10.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/brace-expansion": { "version": "1.1.12", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz", @@ -3575,6 +3743,15 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/bytes": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz", + "integrity": "sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/cac": { "version": "6.7.14", "resolved": "https://registry.npmjs.org/cac/-/cac-6.7.14.tgz", @@ -3589,7 +3766,6 @@ "version": "1.0.2", "resolved": "https://registry.npmjs.org/call-bind-apply-helpers/-/call-bind-apply-helpers-1.0.2.tgz", "integrity": "sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==", - "dev": true, "license": "MIT", "dependencies": { "es-errors": "^1.3.0", @@ -3603,7 +3779,6 @@ "version": "1.0.4", "resolved": "https://registry.npmjs.org/call-bound/-/call-bound-1.0.4.tgz", "integrity": "sha512-+ys997U96po4Kx/ABpBCqhA9EuxJaQWDQg7295H4hBphv3IZg0boBKuwYpt4YXp6MZ5AmZQnU/tyMTlRpaSejg==", - "dev": true, "license": "MIT", "dependencies": { "call-bind-apply-helpers": "^1.0.2", @@ -3816,6 +3991,28 @@ "dev": true, "license": "MIT" }, + "node_modules/content-disposition": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/content-disposition/-/content-disposition-1.0.1.tgz", + "integrity": "sha512-oIXISMynqSqm241k6kcQ5UwttDILMK4BiurCfGEREw6+X9jkkpEe5T9FZaApyLGGOnFuyMWZpdolTXMtvEJ08Q==", + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/content-type": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/content-type/-/content-type-1.0.5.tgz", + "integrity": "sha512-nTjqfcBFEipKdXCv4YDQWCfmcLZKm81ldF0pAopTvyrFGVbcR6P/VAAd5G7N+0tTr8QqiU0tFadD6FK4NtJwOA==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, "node_modules/cookie": { "version": "0.6.0", "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.6.0.tgz", @@ -3826,6 +4023,28 @@ "node": ">= 0.6" } }, + "node_modules/cookie-signature": { + "version": "1.2.2", + "resolved": "https://registry.npmjs.org/cookie-signature/-/cookie-signature-1.2.2.tgz", + "integrity": "sha512-D76uU73ulSXrD1UXF4KE2TMxVVwhsnCgfAyTg9k8P6KGZjlXKrOLe4dJQKI3Bxi5wjesZoFXJWElNWBjPZMbhg==", + "license": "MIT", + "engines": { + "node": ">=6.6.0" + } + }, + "node_modules/cors": { + "version": "2.8.5", + "resolved": "https://registry.npmjs.org/cors/-/cors-2.8.5.tgz", + "integrity": "sha512-KIHbLJqu73RGr/hnbrO9uBeixNGuvSQjul/jdFvS/KFSIH1hWVd1ng7zOHx+YrEfInLG7q4n6GHQ9cDtxv/P6g==", + "license": "MIT", + "dependencies": { + "object-assign": "^4", + "vary": "^1" + }, + "engines": { + "node": ">= 0.10" + } + }, "node_modules/corser": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/corser/-/corser-2.0.1.tgz", @@ -3840,7 +4059,6 @@ "version": "7.0.6", "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", - "dev": true, "license": "MIT", "dependencies": { "path-key": "^3.1.0", @@ -4000,6 +4218,15 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/depd": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/depd/-/depd-2.0.0.tgz", + "integrity": "sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/dequal": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/dequal/-/dequal-2.0.3.tgz", @@ -4056,7 +4283,6 @@ "version": "1.0.1", "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", "integrity": "sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==", - "dev": true, "license": "MIT", "dependencies": { "call-bind-apply-helpers": "^1.0.1", @@ -4074,6 +4300,12 @@ "dev": true, "license": "MIT" }, + "node_modules/ee-first": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz", + "integrity": "sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==", + "license": "MIT" + }, "node_modules/emoji-regex": { "version": "9.2.2", "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-9.2.2.tgz", @@ -4081,6 +4313,15 @@ "dev": true, "license": "MIT" }, + "node_modules/encodeurl": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/encodeurl/-/encodeurl-2.0.0.tgz", + "integrity": "sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/enhanced-resolve": { "version": "5.18.2", "resolved": "https://registry.npmjs.org/enhanced-resolve/-/enhanced-resolve-5.18.2.tgz", @@ -4112,7 +4353,6 @@ "version": "1.0.1", "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", "integrity": "sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==", - "dev": true, "license": "MIT", "engines": { "node": ">= 0.4" @@ -4122,7 +4362,6 @@ "version": "1.3.0", "resolved": "https://registry.npmjs.org/es-errors/-/es-errors-1.3.0.tgz", "integrity": "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==", - "dev": true, "license": "MIT", "engines": { "node": ">= 0.4" @@ -4139,7 +4378,6 @@ "version": "1.1.1", "resolved": "https://registry.npmjs.org/es-object-atoms/-/es-object-atoms-1.1.1.tgz", "integrity": "sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==", - "dev": true, "license": "MIT", "dependencies": { "es-errors": "^1.3.0" @@ -4202,6 +4440,12 @@ "@esbuild/win32-x64": "0.25.8" } }, + "node_modules/escape-html": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/escape-html/-/escape-html-1.0.3.tgz", + "integrity": "sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow==", + "license": "MIT" + }, "node_modules/escape-string-regexp": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz", @@ -4293,9 +4537,9 @@ } }, "node_modules/eslint-plugin-storybook": { - "version": "10.2.9", - "resolved": "https://registry.npmjs.org/eslint-plugin-storybook/-/eslint-plugin-storybook-10.2.9.tgz", - "integrity": "sha512-nmPxjPw2KfmosqAUb/W0jmEfAZzK97kyJ8W5KMuweCblwjIL0hI/GMsWSP8CCBPnhQ9LnuxtT8JtQUOsslcbwA==", + "version": "10.2.4", + "resolved": "https://registry.npmjs.org/eslint-plugin-storybook/-/eslint-plugin-storybook-10.2.4.tgz", + "integrity": "sha512-D8a6Y+iun2MSOpgps0Vd/t8y9Y5ZZ7O2VeKqw2PCv2+b7yInqogOS2VBMSRZVfP8TTGQgDpbUK67k7KZEUC7Ng==", "dev": true, "license": "MIT", "dependencies": { @@ -4303,7 +4547,174 @@ }, "peerDependencies": { "eslint": ">=8", - "storybook": "^10.2.9" + "storybook": "^10.2.4" + } + }, + "node_modules/eslint-plugin-storybook/node_modules/@typescript-eslint/project-service": { + "version": "8.54.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/project-service/-/project-service-8.54.0.tgz", + "integrity": "sha512-YPf+rvJ1s7MyiWM4uTRhE4DvBXrEV+d8oC3P9Y2eT7S+HBS0clybdMIPnhiATi9vZOYDc7OQ1L/i6ga6NFYK/g==", + "dev": true, + "license": "MIT", + "dependencies": { + "@typescript-eslint/tsconfig-utils": "^8.54.0", + "@typescript-eslint/types": "^8.54.0", + "debug": "^4.4.3" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + }, + "peerDependencies": { + "typescript": ">=4.8.4 <6.0.0" + } + }, + "node_modules/eslint-plugin-storybook/node_modules/@typescript-eslint/scope-manager": { + "version": "8.54.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-8.54.0.tgz", + "integrity": "sha512-27rYVQku26j/PbHYcVfRPonmOlVI6gihHtXFbTdB5sb6qA0wdAQAbyXFVarQ5t4HRojIz64IV90YtsjQSSGlQg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@typescript-eslint/types": "8.54.0", + "@typescript-eslint/visitor-keys": "8.54.0" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + } + }, + "node_modules/eslint-plugin-storybook/node_modules/@typescript-eslint/tsconfig-utils": { + "version": "8.54.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/tsconfig-utils/-/tsconfig-utils-8.54.0.tgz", + "integrity": "sha512-dRgOyT2hPk/JwxNMZDsIXDgyl9axdJI3ogZ2XWhBPsnZUv+hPesa5iuhdYt2gzwA9t8RE5ytOJ6xB0moV0Ujvw==", + "dev": true, + "license": "MIT", + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + }, + "peerDependencies": { + "typescript": ">=4.8.4 <6.0.0" + } + }, + "node_modules/eslint-plugin-storybook/node_modules/@typescript-eslint/types": { + "version": "8.54.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-8.54.0.tgz", + "integrity": "sha512-PDUI9R1BVjqu7AUDsRBbKMtwmjWcn4J3le+5LpcFgWULN3LvHC5rkc9gCVxbrsrGmO1jfPybN5s6h4Jy+OnkAA==", + "dev": true, + "license": "MIT", + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + } + }, + "node_modules/eslint-plugin-storybook/node_modules/@typescript-eslint/typescript-estree": { + "version": "8.54.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-8.54.0.tgz", + "integrity": "sha512-BUwcskRaPvTk6fzVWgDPdUndLjB87KYDrN5EYGetnktoeAvPtO4ONHlAZDnj5VFnUANg0Sjm7j4usBlnoVMHwA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@typescript-eslint/project-service": "8.54.0", + "@typescript-eslint/tsconfig-utils": "8.54.0", + "@typescript-eslint/types": "8.54.0", + "@typescript-eslint/visitor-keys": "8.54.0", + "debug": "^4.4.3", + "minimatch": "^9.0.5", + "semver": "^7.7.3", + "tinyglobby": "^0.2.15", + "ts-api-utils": "^2.4.0" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + }, + "peerDependencies": { + "typescript": ">=4.8.4 <6.0.0" + } + }, + "node_modules/eslint-plugin-storybook/node_modules/@typescript-eslint/utils": { + "version": "8.54.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-8.54.0.tgz", + "integrity": "sha512-9Cnda8GS57AQakvRyG0PTejJNlA2xhvyNtEVIMlDWOOeEyBkYWhGPnfrIAnqxLMTSTo6q8g12XVjjev5l1NvMA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@eslint-community/eslint-utils": "^4.9.1", + "@typescript-eslint/scope-manager": "8.54.0", + "@typescript-eslint/types": "8.54.0", + "@typescript-eslint/typescript-estree": "8.54.0" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + }, + "peerDependencies": { + "eslint": "^8.57.0 || ^9.0.0", + "typescript": ">=4.8.4 <6.0.0" + } + }, + "node_modules/eslint-plugin-storybook/node_modules/@typescript-eslint/visitor-keys": { + "version": "8.54.0", + "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-8.54.0.tgz", + "integrity": "sha512-VFlhGSl4opC0bprJiItPQ1RfUhGDIBokcPwaFH4yiBCaNPeld/9VeXbiPO1cLyorQi1G1vL+ecBk1x8o1axORA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@typescript-eslint/types": "8.54.0", + "eslint-visitor-keys": "^4.2.1" + }, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + } + }, + "node_modules/eslint-plugin-storybook/node_modules/brace-expansion": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.2.tgz", + "integrity": "sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "balanced-match": "^1.0.0" + } + }, + "node_modules/eslint-plugin-storybook/node_modules/minimatch": { + "version": "9.0.5", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz", + "integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==", + "dev": true, + "license": "ISC", + "dependencies": { + "brace-expansion": "^2.0.1" + }, + "engines": { + "node": ">=16 || 14 >=14.17" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" } }, "node_modules/eslint-plugin-svelte": { @@ -4474,6 +4885,15 @@ "node": ">=0.10.0" } }, + "node_modules/etag": { + "version": "1.8.1", + "resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz", + "integrity": "sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, "node_modules/eventemitter3": { "version": "4.0.7", "resolved": "https://registry.npmjs.org/eventemitter3/-/eventemitter3-4.0.7.tgz", @@ -4481,6 +4901,27 @@ "dev": true, "license": "MIT" }, + "node_modules/eventsource": { + "version": "3.0.7", + "resolved": "https://registry.npmjs.org/eventsource/-/eventsource-3.0.7.tgz", + "integrity": "sha512-CRT1WTyuQoD771GW56XEZFQ/ZoSfWid1alKGDYMmkt2yl8UXrVR4pspqWNEcqKvVIzg6PAltWjxcSSPrboA4iA==", + "license": "MIT", + "dependencies": { + "eventsource-parser": "^3.0.1" + }, + "engines": { + "node": ">=18.0.0" + } + }, + "node_modules/eventsource-parser": { + "version": "3.0.6", + "resolved": "https://registry.npmjs.org/eventsource-parser/-/eventsource-parser-3.0.6.tgz", + "integrity": "sha512-Vo1ab+QXPzZ4tCa8SwIHJFaSzy4R6SHf7BY79rFBDf0idraZWAkYrDjDj8uWaSm3S2TK+hJ7/t1CEmZ7jXw+pg==", + "license": "MIT", + "engines": { + "node": ">=18.0.0" + } + }, "node_modules/expect-type": { "version": "1.2.2", "resolved": "https://registry.npmjs.org/expect-type/-/expect-type-1.2.2.tgz", @@ -4491,6 +4932,76 @@ "node": ">=12.0.0" } }, + "node_modules/express": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/express/-/express-5.2.1.tgz", + "integrity": "sha512-hIS4idWWai69NezIdRt2xFVofaF4j+6INOpJlVOLDO8zXGpUVEVzIYk12UUi2JzjEzWL3IOAxcTubgz9Po0yXw==", + "license": "MIT", + "dependencies": { + "accepts": "^2.0.0", + "body-parser": "^2.2.1", + "content-disposition": "^1.0.0", + "content-type": "^1.0.5", + "cookie": "^0.7.1", + "cookie-signature": "^1.2.1", + "debug": "^4.4.0", + "depd": "^2.0.0", + "encodeurl": "^2.0.0", + "escape-html": "^1.0.3", + "etag": "^1.8.1", + "finalhandler": "^2.1.0", + "fresh": "^2.0.0", + "http-errors": "^2.0.0", + "merge-descriptors": "^2.0.0", + "mime-types": "^3.0.0", + "on-finished": "^2.4.1", + "once": "^1.4.0", + "parseurl": "^1.3.3", + "proxy-addr": "^2.0.7", + "qs": "^6.14.0", + "range-parser": "^1.2.1", + "router": "^2.2.0", + "send": "^1.1.0", + "serve-static": "^2.2.0", + "statuses": "^2.0.1", + "type-is": "^2.0.1", + "vary": "^1.1.2" + }, + "engines": { + "node": ">= 18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/express-rate-limit": { + "version": "8.2.1", + "resolved": "https://registry.npmjs.org/express-rate-limit/-/express-rate-limit-8.2.1.tgz", + "integrity": "sha512-PCZEIEIxqwhzw4KF0n7QF4QqruVTcF73O5kFKUnGOyjbCCgizBBiFaYpd/fnBLUMPw/BWw9OsiN7GgrNYr7j6g==", + "license": "MIT", + "dependencies": { + "ip-address": "10.0.1" + }, + "engines": { + "node": ">= 16" + }, + "funding": { + "url": "https://github.com/sponsors/express-rate-limit" + }, + "peerDependencies": { + "express": ">= 4.11" + } + }, + "node_modules/express/node_modules/cookie": { + "version": "0.7.2", + "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.7.2.tgz", + "integrity": "sha512-yki5XnKuf750l50uGTllt6kKILY4nQ1eNIQatoXEByZ5dWgnKqbnqmTrBE5B4N7lrMJKQ2ytWMiTO2o0v6Ew/w==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, "node_modules/extend": { "version": "3.0.2", "resolved": "https://registry.npmjs.org/extend/-/extend-3.0.2.tgz", @@ -4501,7 +5012,6 @@ "version": "3.1.3", "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==", - "dev": true, "license": "MIT" }, "node_modules/fast-json-stable-stringify": { @@ -4518,6 +5028,22 @@ "dev": true, "license": "MIT" }, + "node_modules/fast-uri": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/fast-uri/-/fast-uri-3.1.0.tgz", + "integrity": "sha512-iPeeDKJSWf4IEOasVVrknXpaBV0IApz/gp7S2bb7Z4Lljbl2MGJRqInZiUrQwV16cpzw/D3S5j5Julj/gT52AA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ], + "license": "BSD-3-Clause" + }, "node_modules/fdir": { "version": "6.5.0", "resolved": "https://registry.npmjs.org/fdir/-/fdir-6.5.0.tgz", @@ -4580,6 +5106,27 @@ "node": ">=8" } }, + "node_modules/finalhandler": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-2.1.1.tgz", + "integrity": "sha512-S8KoZgRZN+a5rNwqTxlZZePjT/4cnm0ROV70LedRHZ0p8u9fRID0hJUZQpkKLzro8LfmC8sx23bY6tVNxv8pQA==", + "license": "MIT", + "dependencies": { + "debug": "^4.4.0", + "encodeurl": "^2.0.0", + "escape-html": "^1.0.3", + "on-finished": "^2.4.1", + "parseurl": "^1.3.3", + "statuses": "^2.0.1" + }, + "engines": { + "node": ">= 18.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/find-up": { "version": "5.0.0", "resolved": "https://registry.npmjs.org/find-up/-/find-up-5.0.0.tgz", @@ -4656,6 +5203,24 @@ "url": "https://github.com/sponsors/isaacs" } }, + "node_modules/forwarded": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/forwarded/-/forwarded-0.2.0.tgz", + "integrity": "sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/fresh": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/fresh/-/fresh-2.0.0.tgz", + "integrity": "sha512-Rx/WycZ60HOaqLKAi6cHRKKI7zxWbJ31MhntmtwMoaTeF7XFH9hhBp8vITaMidfljRQ6eYWCKkaTK+ykVJHP2A==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/fsevents": { "version": "2.3.2", "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", @@ -4675,7 +5240,6 @@ "version": "1.1.2", "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", "integrity": "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==", - "dev": true, "license": "MIT", "funding": { "url": "https://github.com/sponsors/ljharb" @@ -4685,7 +5249,6 @@ "version": "1.3.0", "resolved": "https://registry.npmjs.org/get-intrinsic/-/get-intrinsic-1.3.0.tgz", "integrity": "sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==", - "dev": true, "license": "MIT", "dependencies": { "call-bind-apply-helpers": "^1.0.2", @@ -4710,7 +5273,6 @@ "version": "1.0.1", "resolved": "https://registry.npmjs.org/get-proto/-/get-proto-1.0.1.tgz", "integrity": "sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==", - "dev": true, "license": "MIT", "dependencies": { "dunder-proto": "^1.0.1", @@ -4797,7 +5359,6 @@ "version": "1.2.0", "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.2.0.tgz", "integrity": "sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==", - "dev": true, "license": "MIT", "engines": { "node": ">= 0.4" @@ -4827,7 +5388,6 @@ "version": "1.1.0", "resolved": "https://registry.npmjs.org/has-symbols/-/has-symbols-1.1.0.tgz", "integrity": "sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==", - "dev": true, "license": "MIT", "engines": { "node": ">= 0.4" @@ -4840,7 +5400,6 @@ "version": "2.0.2", "resolved": "https://registry.npmjs.org/hasown/-/hasown-2.0.2.tgz", "integrity": "sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==", - "dev": true, "license": "MIT", "dependencies": { "function-bind": "^1.1.2" @@ -5108,6 +5667,16 @@ "node": ">=12.0.0" } }, + "node_modules/hono": { + "version": "4.11.7", + "resolved": "https://registry.npmjs.org/hono/-/hono-4.11.7.tgz", + "integrity": "sha512-l7qMiNee7t82bH3SeyUCt9UF15EVmaBvsppY2zQtrbIhl/yzBTny+YUxsVjSjQ6gaqaeVtZmGocom8TzBlA4Yw==", + "license": "MIT", + "peer": true, + "engines": { + "node": ">=16.9.0" + } + }, "node_modules/html-encoding-sniffer": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/html-encoding-sniffer/-/html-encoding-sniffer-3.0.0.tgz", @@ -5138,6 +5707,26 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/http-errors": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/http-errors/-/http-errors-2.0.1.tgz", + "integrity": "sha512-4FbRdAX+bSdmo4AUFuS0WNiPz8NgFt+r8ThgNWmlrjQjt1Q7ZR9+zTlce2859x4KSXrwIsaeTqDoKQmtP8pLmQ==", + "license": "MIT", + "dependencies": { + "depd": "~2.0.0", + "inherits": "~2.0.4", + "setprototypeof": "~1.2.0", + "statuses": "~2.0.2", + "toidentifier": "~1.0.1" + }, + "engines": { + "node": ">= 0.8" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/http-proxy": { "version": "1.18.1", "resolved": "https://registry.npmjs.org/http-proxy/-/http-proxy-1.18.1.tgz", @@ -5248,12 +5837,36 @@ "node": ">=8" } }, + "node_modules/inherits": { + "version": "2.0.4", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", + "license": "ISC" + }, "node_modules/inline-style-parser": { "version": "0.2.4", "resolved": "https://registry.npmjs.org/inline-style-parser/-/inline-style-parser-0.2.4.tgz", "integrity": "sha512-0aO8FkhNZlj/ZIbNi7Lxxr12obT7cL1moPfE4tg1LkX7LlLfC6DeX4l2ZEud1ukP9jNQyNnfzQVqwbwmAATY4Q==", "license": "MIT" }, + "node_modules/ip-address": { + "version": "10.0.1", + "resolved": "https://registry.npmjs.org/ip-address/-/ip-address-10.0.1.tgz", + "integrity": "sha512-NWv9YLW4PoW2B7xtzaS3NCot75m6nK7Icdv0o3lfMceJVRfSoQwqD4wEH5rLwoKJwUiZ/rfpiVBhnaF0FK4HoA==", + "license": "MIT", + "engines": { + "node": ">= 12" + } + }, + "node_modules/ipaddr.js": { + "version": "1.9.1", + "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz", + "integrity": "sha512-0KI/607xoxSToH7GjN1FfSbLoU0+btTicjsQSWQlh/hZykN8KpmMf7uYwPW3R+akZ6R/w18ZlXSHBYXiYUPO3g==", + "license": "MIT", + "engines": { + "node": ">= 0.10" + } + }, "node_modules/is-docker": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/is-docker/-/is-docker-3.0.0.tgz", @@ -5345,6 +5958,12 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/is-promise": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/is-promise/-/is-promise-4.0.0.tgz", + "integrity": "sha512-hvpoI6korhJMnej285dSg6nu1+e6uxs7zG3BYAm5byqDsgJNWwxzM6z6iZiAgQR4TJ30JmBTOwqZUw3WlyH3AQ==", + "license": "MIT" + }, "node_modules/is-wsl": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/is-wsl/-/is-wsl-3.1.0.tgz", @@ -5365,7 +5984,6 @@ "version": "2.0.0", "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", - "dev": true, "license": "ISC" }, "node_modules/istanbul-lib-coverage": { @@ -5448,6 +6066,15 @@ "jiti": "lib/jiti-cli.mjs" } }, + "node_modules/jose": { + "version": "6.1.3", + "resolved": "https://registry.npmjs.org/jose/-/jose-6.1.3.tgz", + "integrity": "sha512-0TpaTfihd4QMNwrz/ob2Bp7X04yuxJkjRGi4aKmOqwhov54i6u79oCv7T+C7lo70MKH6BesI3vscD1yb/yzKXQ==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/panva" + } + }, "node_modules/js-tokens": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/js-tokens/-/js-tokens-4.0.0.tgz", @@ -5482,6 +6109,12 @@ "dev": true, "license": "MIT" }, + "node_modules/json-schema-typed": { + "version": "8.0.2", + "resolved": "https://registry.npmjs.org/json-schema-typed/-/json-schema-typed-8.0.2.tgz", + "integrity": "sha512-fQhoXdcvc3V28x7C7BMs4P5+kNlgUURe2jmUT1T//oBRMDrqy1QPelJimwZGo7Hg9VPV3EQV5Bnq4hbFy2vetA==", + "license": "BSD-2-Clause" + }, "node_modules/json-stable-stringify-without-jsonify": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/json-stable-stringify-without-jsonify/-/json-stable-stringify-without-jsonify-1.0.1.tgz", @@ -5959,7 +6592,6 @@ "version": "1.1.0", "resolved": "https://registry.npmjs.org/math-intrinsics/-/math-intrinsics-1.1.0.tgz", "integrity": "sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==", - "dev": true, "license": "MIT", "engines": { "node": ">= 0.4" @@ -6313,6 +6945,27 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/media-typer": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/media-typer/-/media-typer-1.1.0.tgz", + "integrity": "sha512-aisnrDP4GNe06UcKFnV5bfMNPBUw4jsLGaWwWfnH3v02GnBuXX2MCVn5RbrWo0j3pczUilYblq7fQ7Nw2t5XKw==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/merge-descriptors": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/merge-descriptors/-/merge-descriptors-2.0.0.tgz", + "integrity": "sha512-Snk314V5ayFLhp3fkUREub6WtjBfPdCPY1Ln8/8munuLuiYhsABgBVWsozAG+MWMbVEvcdcpbi9R7ww22l9Q3g==", + "license": "MIT", + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/micromark": { "version": "4.0.2", "resolved": "https://registry.npmjs.org/micromark/-/micromark-4.0.2.tgz", @@ -6938,6 +7591,31 @@ "node": ">=4" } }, + "node_modules/mime-db": { + "version": "1.54.0", + "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.54.0.tgz", + "integrity": "sha512-aU5EJuIN2WDemCcAp2vFBfp/m4EAhWJnUNSSw0ixs7/kXbd6Pg64EmwJkNdFhB8aWt1sH2CTXrLxo/iAGV3oPQ==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/mime-types": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-3.0.2.tgz", + "integrity": "sha512-Lbgzdk0h4juoQ9fCKXW4by0UJqj+nOOrI9MJ1sSj4nI8aI2eo1qmvQEie4VD1glsS250n15LsWsYtCugiStS5A==", + "license": "MIT", + "dependencies": { + "mime-db": "^1.54.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/min-indent": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/min-indent/-/min-indent-1.0.1.tgz", @@ -7069,6 +7747,15 @@ "dev": true, "license": "MIT" }, + "node_modules/negotiator": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-1.0.0.tgz", + "integrity": "sha512-8Ofs/AUQh8MaEcrlq5xOX0CQ9ypTF5dl78mjlMNfOK08fzpgTHQRQPBxcPlEtIw0yRpws+Zo/3r+5WRby7u3Gg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, "node_modules/node-addon-api": { "version": "7.1.1", "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-7.1.1.tgz", @@ -7077,11 +7764,19 @@ "license": "MIT", "optional": true }, + "node_modules/object-assign": { + "version": "4.1.1", + "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", + "integrity": "sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/object-inspect": { "version": "1.13.4", "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.4.tgz", "integrity": "sha512-W67iLl4J2EXEGTbfeHCffrjDfitvLANg0UlX3wFUUSTx92KXRFegMHUVgSqE+wvhAbi4WqjGg9czysTV2Epbew==", - "dev": true, "license": "MIT", "engines": { "node": ">= 0.4" @@ -7090,6 +7785,27 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/on-finished": { + "version": "2.4.1", + "resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.4.1.tgz", + "integrity": "sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg==", + "license": "MIT", + "dependencies": { + "ee-first": "1.1.1" + }, + "engines": { + "node": ">= 0.8" + } + }, + "node_modules/once": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz", + "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", + "license": "ISC", + "dependencies": { + "wrappy": "1" + } + }, "node_modules/open": { "version": "10.2.0", "resolved": "https://registry.npmjs.org/open/-/open-10.2.0.tgz", @@ -7202,6 +7918,15 @@ "url": "https://github.com/inikulin/parse5?sponsor=1" } }, + "node_modules/parseurl": { + "version": "1.3.3", + "resolved": "https://registry.npmjs.org/parseurl/-/parseurl-1.3.3.tgz", + "integrity": "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/path-exists": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", @@ -7216,7 +7941,6 @@ "version": "3.1.1", "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", - "dev": true, "license": "MIT", "engines": { "node": ">=8" @@ -7239,6 +7963,16 @@ "url": "https://github.com/sponsors/isaacs" } }, + "node_modules/path-to-regexp": { + "version": "8.3.0", + "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-8.3.0.tgz", + "integrity": "sha512-7jdwVIRtsP8MYpdXSwOS0YdD0Du+qOoF/AEPIt88PcCFrZCzx41oxku1jD88hZBwbNUIEfpqvuhjFaMAqMTWnA==", + "license": "MIT", + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/pathe": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/pathe/-/pathe-2.0.3.tgz", @@ -7288,6 +8022,15 @@ "url": "https://github.com/sponsors/jonschlinkert" } }, + "node_modules/pkce-challenge": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/pkce-challenge/-/pkce-challenge-5.0.1.tgz", + "integrity": "sha512-wQ0b/W4Fr01qtpHlqSqspcj3EhBvimsdh0KlHhH8HRZnMsEa0ea2fTULOXOS9ccQr3om+GcGRk4e+isrZWV8qQ==", + "license": "MIT", + "engines": { + "node": ">=16.20.0" + } + }, "node_modules/playwright": { "version": "1.56.1", "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.56.1.tgz", @@ -7653,6 +8396,19 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/proxy-addr": { + "version": "2.0.7", + "resolved": "https://registry.npmjs.org/proxy-addr/-/proxy-addr-2.0.7.tgz", + "integrity": "sha512-llQsMLSUDUPT44jdrU/O37qlnifitDP+ZwrmmZcoSKyLKvtZxpyV0n2/bD/N4tBAAZ/gJEdZU7KMraoK1+XYAg==", + "license": "MIT", + "dependencies": { + "forwarded": "0.2.0", + "ipaddr.js": "1.9.1" + }, + "engines": { + "node": ">= 0.10" + } + }, "node_modules/punycode": { "version": "2.3.1", "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz", @@ -7664,10 +8420,9 @@ } }, "node_modules/qs": { - "version": "6.15.0", - "resolved": "https://registry.npmjs.org/qs/-/qs-6.15.0.tgz", - "integrity": "sha512-mAZTtNCeetKMH+pSjrb76NAM8V9a05I9aBZOHztWy/UqcJdQYNsf59vrRKWnojAT9Y+GbIvoTBC++CPHqpDBhQ==", - "dev": true, + "version": "6.14.1", + "resolved": "https://registry.npmjs.org/qs/-/qs-6.14.1.tgz", + "integrity": "sha512-4EK3+xJl8Ts67nLYNwqw/dsFVnCf+qR7RgXSK9jEEm9unao3njwMDdmsdvoKBKHzxd7tCYz5e5M+SnMjdtXGQQ==", "license": "BSD-3-Clause", "dependencies": { "side-channel": "^1.1.0" @@ -7679,6 +8434,46 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/range-parser": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/range-parser/-/range-parser-1.2.1.tgz", + "integrity": "sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==", + "license": "MIT", + "engines": { + "node": ">= 0.6" + } + }, + "node_modules/raw-body": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/raw-body/-/raw-body-3.0.2.tgz", + "integrity": "sha512-K5zQjDllxWkf7Z5xJdV0/B0WTNqx6vxG70zJE4N0kBs4LovmEYWJzQGxC9bS9RAKu3bgM40lrd5zoLJ12MQ5BA==", + "license": "MIT", + "dependencies": { + "bytes": "~3.1.2", + "http-errors": "~2.0.1", + "iconv-lite": "~0.7.0", + "unpipe": "~1.0.0" + }, + "engines": { + "node": ">= 0.10" + } + }, + "node_modules/raw-body/node_modules/iconv-lite": { + "version": "0.7.2", + "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.7.2.tgz", + "integrity": "sha512-im9DjEDQ55s9fL4EYzOAv0yMqmMBSZp6G0VvFyTMPKWxiSBHUj9NW/qqLmXUwXrrM7AvqSlTCfvqRb0cM8yYqw==", + "license": "MIT", + "dependencies": { + "safer-buffer": ">= 2.1.2 < 3.0.0" + }, + "engines": { + "node": ">=0.10.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/react": { "version": "19.1.0", "resolved": "https://registry.npmjs.org/react/-/react-19.1.0.tgz", @@ -7939,6 +8734,15 @@ "url": "https://opencollective.com/unified" } }, + "node_modules/require-from-string": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/require-from-string/-/require-from-string-2.0.2.tgz", + "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/requires-port": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/requires-port/-/requires-port-1.0.0.tgz", @@ -7997,6 +8801,22 @@ "fsevents": "~2.3.2" } }, + "node_modules/router": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/router/-/router-2.2.0.tgz", + "integrity": "sha512-nLTrUKm2UyiL7rlhapu/Zl45FwNgkZGaCpZbIHajDYgwlJCOzLSk+cIPAnsEqV955GjILJnKbdQC1nVPz+gAYQ==", + "license": "MIT", + "dependencies": { + "debug": "^4.4.0", + "depd": "^2.0.0", + "is-promise": "^4.0.0", + "parseurl": "^1.3.3", + "path-to-regexp": "^8.0.0" + }, + "engines": { + "node": ">= 18" + } + }, "node_modules/run-applescript": { "version": "7.1.0", "resolved": "https://registry.npmjs.org/run-applescript/-/run-applescript-7.1.0.tgz", @@ -8049,7 +8869,6 @@ "version": "2.1.2", "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==", - "dev": true, "license": "MIT" }, "node_modules/sass": { @@ -8108,6 +8927,51 @@ "node": ">=10" } }, + "node_modules/send": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/send/-/send-1.2.1.tgz", + "integrity": "sha512-1gnZf7DFcoIcajTjTwjwuDjzuz4PPcY2StKPlsGAQ1+YH20IRVrBaXSWmdjowTJ6u8Rc01PoYOGHXfP1mYcZNQ==", + "license": "MIT", + "dependencies": { + "debug": "^4.4.3", + "encodeurl": "^2.0.0", + "escape-html": "^1.0.3", + "etag": "^1.8.1", + "fresh": "^2.0.0", + "http-errors": "^2.0.1", + "mime-types": "^3.0.2", + "ms": "^2.1.3", + "on-finished": "^2.4.1", + "range-parser": "^1.2.1", + "statuses": "^2.0.2" + }, + "engines": { + "node": ">= 18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, + "node_modules/serve-static": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/serve-static/-/serve-static-2.2.1.tgz", + "integrity": "sha512-xRXBn0pPqQTVQiC8wyQrKs2MOlX24zQ0POGaj0kultvoOCstBQM5yvOhAVSUwOMjQtTvsPWoNCHfPGwaaQJhTw==", + "license": "MIT", + "dependencies": { + "encodeurl": "^2.0.0", + "escape-html": "^1.0.3", + "parseurl": "^1.3.3", + "send": "^1.2.0" + }, + "engines": { + "node": ">= 18" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/express" + } + }, "node_modules/set-cookie-parser": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/set-cookie-parser/-/set-cookie-parser-3.0.1.tgz", @@ -8115,11 +8979,16 @@ "dev": true, "license": "MIT" }, + "node_modules/setprototypeof": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/setprototypeof/-/setprototypeof-1.2.0.tgz", + "integrity": "sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==", + "license": "ISC" + }, "node_modules/shebang-command": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", - "dev": true, "license": "MIT", "dependencies": { "shebang-regex": "^3.0.0" @@ -8132,7 +9001,6 @@ "version": "3.0.0", "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", - "dev": true, "license": "MIT", "engines": { "node": ">=8" @@ -8142,7 +9010,6 @@ "version": "1.1.0", "resolved": "https://registry.npmjs.org/side-channel/-/side-channel-1.1.0.tgz", "integrity": "sha512-ZX99e6tRweoUXqR+VBrslhda51Nh5MTQwou5tnUDgbtyM0dBgmhEDtWGP/xbKn6hqfPRHujUNwz5fy/wbbhnpw==", - "dev": true, "license": "MIT", "dependencies": { "es-errors": "^1.3.0", @@ -8162,7 +9029,6 @@ "version": "1.0.0", "resolved": "https://registry.npmjs.org/side-channel-list/-/side-channel-list-1.0.0.tgz", "integrity": "sha512-FCLHtRD/gnpCiCHEiJLOwdmFP+wzCmDEkc9y7NsYxeF4u7Btsn1ZuwgwJGxImImHicJArLP4R0yX4c2KCrMrTA==", - "dev": true, "license": "MIT", "dependencies": { "es-errors": "^1.3.0", @@ -8179,7 +9045,6 @@ "version": "1.0.1", "resolved": "https://registry.npmjs.org/side-channel-map/-/side-channel-map-1.0.1.tgz", "integrity": "sha512-VCjCNfgMsby3tTdo02nbjtM/ewra6jPHmpThenkTYh8pG9ucZ/1P8So4u4FGBek/BjpOVsDCMoLA/iuBKIFXRA==", - "dev": true, "license": "MIT", "dependencies": { "call-bound": "^1.0.2", @@ -8198,7 +9063,6 @@ "version": "1.0.2", "resolved": "https://registry.npmjs.org/side-channel-weakmap/-/side-channel-weakmap-1.0.2.tgz", "integrity": "sha512-WPS/HvHQTYnHisLo9McqBHOJk2FkHO/tlpvldyrnem4aeQp4hai3gythswg6p01oSoTl58rcpiFAjF2br2Ak2A==", - "dev": true, "license": "MIT", "dependencies": { "call-bound": "^1.0.2", @@ -8286,6 +9150,15 @@ "dev": true, "license": "MIT" }, + "node_modules/statuses": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/statuses/-/statuses-2.0.2.tgz", + "integrity": "sha512-DvEy55V3DB7uknRo+4iOGT5fP1slR8wQohVdknigZPMpMstaKJQWhwiYBACJE3Ul2pTnATihhBYnRhZQHGBiRw==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/std-env": { "version": "3.9.0", "resolved": "https://registry.npmjs.org/std-env/-/std-env-3.9.0.tgz", @@ -8294,9 +9167,9 @@ "license": "MIT" }, "node_modules/storybook": { - "version": "10.2.9", - "resolved": "https://registry.npmjs.org/storybook/-/storybook-10.2.9.tgz", - "integrity": "sha512-DGok7XwIwdPWF+a49Yw+4madER5DZWRo9CdyySBLT3zeuxiEPt0Ua7ouJHm/y6ojnb/FVKZcQe8YmrE71s0qPQ==", + "version": "10.2.4", + "resolved": "https://registry.npmjs.org/storybook/-/storybook-10.2.4.tgz", + "integrity": "sha512-LwF0VZsT4qkgx66Ad/q0QgZZrU2a5WftaADDEcJ3bGq3O2fHvwWPlSZjM1HiXD4vqP9U5JiMqQkV1gkyH0XJkw==", "dev": true, "license": "MIT", "peer": true, @@ -8805,9 +9678,9 @@ } }, "node_modules/tar": { - "version": "7.5.9", - "resolved": "https://registry.npmjs.org/tar/-/tar-7.5.9.tgz", - "integrity": "sha512-BTLcK0xsDh2+PUe9F6c2TlRp4zOOBMTkoQHQIWSIzI0R7KG46uEwq4OPk2W7bZcprBMsuaeFsqwYr7pjh6CuHg==", + "version": "7.5.7", + "resolved": "https://registry.npmjs.org/tar/-/tar-7.5.7.tgz", + "integrity": "sha512-fov56fJiRuThVFXD6o6/Q354S7pnWMJIVlDBYijsTNx6jKSE4pvrDTs6lUnmGvNyfJwFQQwWy3owKz1ucIhveQ==", "dev": true, "license": "BlueOak-1.0.0", "dependencies": { @@ -8944,6 +9817,15 @@ "node": ">=8.0" } }, + "node_modules/toidentifier": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/toidentifier/-/toidentifier-1.0.1.tgz", + "integrity": "sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==", + "license": "MIT", + "engines": { + "node": ">=0.6" + } + }, "node_modules/totalist": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/totalist/-/totalist-3.0.1.tgz", @@ -9040,6 +9922,20 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/type-is": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/type-is/-/type-is-2.0.1.tgz", + "integrity": "sha512-OZs6gsjF4vMp32qrCbiVSkrFmXtG/AZhY3t0iAMrMBiAZyV9oALtXO8hsrHbMXF9x6L3grlFuwW2oAz7cav+Gw==", + "license": "MIT", + "dependencies": { + "content-type": "^1.0.5", + "media-typer": "^1.1.0", + "mime-types": "^3.0.0" + }, + "engines": { + "node": ">= 0.6" + } + }, "node_modules/typescript": { "version": "5.8.3", "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.8.3.tgz", @@ -9268,6 +10164,15 @@ "node": ">= 10.0.0" } }, + "node_modules/unpipe": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/unpipe/-/unpipe-1.0.0.tgz", + "integrity": "sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/unplugin": { "version": "2.3.11", "resolved": "https://registry.npmjs.org/unplugin/-/unplugin-2.3.11.tgz", @@ -9332,6 +10237,15 @@ "uuid": "dist-node/bin/uuid" } }, + "node_modules/vary": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz", + "integrity": "sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==", + "license": "MIT", + "engines": { + "node": ">= 0.8" + } + }, "node_modules/vfile": { "version": "6.0.3", "resolved": "https://registry.npmjs.org/vfile/-/vfile-6.0.3.tgz", @@ -9704,7 +10618,6 @@ "version": "2.0.2", "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", - "dev": true, "license": "ISC", "dependencies": { "isexe": "^2.0.0" @@ -9828,6 +10741,12 @@ "url": "https://github.com/chalk/ansi-styles?sponsor=1" } }, + "node_modules/wrappy": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz", + "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==", + "license": "ISC" + }, "node_modules/ws": { "version": "8.18.3", "resolved": "https://registry.npmjs.org/ws/-/ws-8.18.3.tgz", @@ -9895,6 +10814,25 @@ "integrity": "sha512-rAbqEGa8ovJy4pyBxZM70hg4pE6gDgaQ0Sl9M3enG3I0d6H4XSAM3GeNGLKnsBpuijUow064sf7ww1nutC5/3w==", "license": "MIT" }, + "node_modules/zod": { + "version": "4.2.1", + "resolved": "https://registry.npmjs.org/zod/-/zod-4.2.1.tgz", + "integrity": "sha512-0wZ1IRqGGhMP76gLqz8EyfBXKk0J2qo2+H3fi4mcUP/KtTocoX08nmIAHl1Z2kJIZbZee8KOpBCSNPRgauucjw==", + "license": "MIT", + "peer": true, + "funding": { + "url": "https://github.com/sponsors/colinhacks" + } + }, + "node_modules/zod-to-json-schema": { + "version": "3.25.1", + "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.25.1.tgz", + "integrity": "sha512-pM/SU9d3YAggzi6MtR4h7ruuQlqKtad8e9S0fmxcMi+ueAK5Korys/aWcV9LIIHTVbj01NdzxcnXSN+O74ZIVA==", + "license": "ISC", + "peerDependencies": { + "zod": "^3.25 || ^4" + } + }, "node_modules/zwitch": { "version": "2.0.4", "resolved": "https://registry.npmjs.org/zwitch/-/zwitch-2.0.4.tgz", diff --git a/tools/server/webui/package.json b/tools/server/webui/package.json index 0b74e301b..f5cdc9e47 100644 --- a/tools/server/webui/package.json +++ b/tools/server/webui/package.json @@ -79,6 +79,7 @@ "vitest-browser-svelte": "^0.1.0" }, "dependencies": { + "@modelcontextprotocol/sdk": "^1.25.1", "highlight.js": "^11.11.1", "mode-watcher": "^1.1.0", "pdfjs-dist": "^5.4.54", @@ -90,6 +91,7 @@ "remark-html": "^16.0.1", "remark-rehype": "^11.1.2", "svelte-sonner": "^1.0.5", - "unist-util-visit": "^5.0.0" + "unist-util-visit": "^5.0.0", + "zod": "^4.2.1" } } diff --git a/tools/server/webui/src/lib/components/app/actions/ActionIconRemove.svelte b/tools/server/webui/src/lib/components/app/actions/ActionIconRemove.svelte index 1ae3d2177..11f1c17d9 100644 --- a/tools/server/webui/src/lib/components/app/actions/ActionIconRemove.svelte +++ b/tools/server/webui/src/lib/components/app/actions/ActionIconRemove.svelte @@ -6,21 +6,22 @@ id: string; onRemove?: (id: string) => void; class?: string; + iconSize?: number; } - let { id, onRemove, class: className = '' }: Props = $props(); + let { id, onRemove, class: className = '', iconSize = 3 }: Props = $props(); diff --git a/tools/server/webui/src/lib/components/app/chat/ChatAttachments/ChatAttachmentMcpPrompt.svelte b/tools/server/webui/src/lib/components/app/chat/ChatAttachments/ChatAttachmentMcpPrompt.svelte new file mode 100644 index 000000000..5fba2b3d1 --- /dev/null +++ b/tools/server/webui/src/lib/components/app/chat/ChatAttachments/ChatAttachmentMcpPrompt.svelte @@ -0,0 +1,40 @@ + + +
+ + + {#if !readonly && onRemove} +
+ onRemove?.()} /> +
+ {/if} +
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatAttachments/ChatAttachmentMcpResource.svelte b/tools/server/webui/src/lib/components/app/chat/ChatAttachments/ChatAttachmentMcpResource.svelte new file mode 100644 index 000000000..258fcac80 --- /dev/null +++ b/tools/server/webui/src/lib/components/app/chat/ChatAttachments/ChatAttachmentMcpResource.svelte @@ -0,0 +1,86 @@ + + + + + + + + +
+ {#if favicon} + { + (e.currentTarget as HTMLImageElement).style.display = 'none'; + }} + /> + {/if} + + + {serverName} + +
+
+
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatAttachments/ChatAttachmentMcpResources.svelte b/tools/server/webui/src/lib/components/app/chat/ChatAttachments/ChatAttachmentMcpResources.svelte new file mode 100644 index 000000000..341bf32c0 --- /dev/null +++ b/tools/server/webui/src/lib/components/app/chat/ChatAttachments/ChatAttachmentMcpResources.svelte @@ -0,0 +1,41 @@ + + +{#if hasAttachments} +
+ + {#each attachments as attachment, i (attachment.id)} + handleResourceClick(attachment.resource.uri)} + /> + {/each} + +
+{/if} diff --git a/tools/server/webui/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsList.svelte b/tools/server/webui/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsList.svelte index 6248d84fb..a3d37b42a 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsList.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatAttachments/ChatAttachmentsList.svelte @@ -1,12 +1,21 @@ @@ -167,6 +231,103 @@

{systemMessageTooltip}

+ + + + + + + + MCP Servers + + + + +
+ {#each filteredMcpServers as server (server.id)} + {@const healthState = mcpStore.getHealthCheckState(server.id)} + {@const hasError = healthState.status === HealthCheckStatus.ERROR} + {@const isEnabledForChat = isServerEnabledForChat(server.id)} + + + {/each} +
+ + {#snippet footer()} + + + + Manage MCP Servers + + {/snippet} +
+
+
+ + {#if hasMcpPromptsSupport} + + + + MCP Prompt + + {/if} + + {#if hasMcpResourcesSupport} + + + + MCP Resources + + {/if} diff --git a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionAttachmentsSheet.svelte b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionAttachmentsSheet.svelte new file mode 100644 index 000000000..bf643dd7f --- /dev/null +++ b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActionAttachmentsSheet.svelte @@ -0,0 +1,170 @@ + + +
+ + + + + + Add to chat + + + Add files, system prompt or configure MCP servers + + + +
+ + + + + + + + + + + + + + + {#if hasMcpPromptsSupport} + + {/if} + + {#if hasMcpResourcesSupport} + + {/if} +
+
+
+
diff --git a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActions.svelte b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActions.svelte index c94fe267d..850177693 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActions.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormActions/ChatFormActions.svelte @@ -3,17 +3,24 @@ import { Button } from '$lib/components/ui/button'; import { ChatFormActionAttachmentsDropdown, + ChatFormActionAttachmentsSheet, ChatFormActionRecord, ChatFormActionSubmit, - ModelsSelector + McpServersSelector, + ModelsSelector, + ModelsSelectorSheet } from '$lib/components/app'; + import { DialogChatSettings } from '$lib/components/app/dialogs'; + import { SETTINGS_SECTION_TITLES } from '$lib/constants'; + import { mcpStore } from '$lib/stores/mcp.svelte'; import { FileTypeCategory } from '$lib/enums'; import { getFileTypeCategory } from '$lib/utils'; import { config } from '$lib/stores/settings.svelte'; import { modelsStore, modelOptions, selectedModelId } from '$lib/stores/models.svelte'; import { isRouterMode, serverError } from '$lib/stores/server.svelte'; import { chatStore } from '$lib/stores/chat.svelte'; - import { activeMessages } from '$lib/stores/conversations.svelte'; + import { activeMessages, conversationsStore } from '$lib/stores/conversations.svelte'; + import { IsMobile } from '$lib/hooks/is-mobile.svelte'; interface Props { canSend?: boolean; @@ -27,6 +34,8 @@ onMicClick?: () => void; onStop?: () => void; onSystemPromptClick?: () => void; + onMcpPromptClick?: () => void; + onMcpResourcesClick?: () => void; } let { @@ -40,7 +49,9 @@ onFileUpload, onMicClick, onStop, - onSystemPromptClick + onSystemPromptClick, + onMcpPromptClick, + onMcpResourcesClick }: Props = $props(); let currentConfig = $derived(config()); @@ -152,32 +163,83 @@ return ''; }); - let selectorModelRef: ModelsSelector | undefined = $state(undefined); + let selectorModelRef: ModelsSelector | ModelsSelectorSheet | undefined = $state(undefined); + + let isMobile = new IsMobile(); export function openModelSelector() { selectorModelRef?.open(); } + + let showChatSettingsDialogWithMcpSection = $state(false); + + let hasMcpPromptsSupport = $derived.by(() => { + const perChatOverrides = conversationsStore.getAllMcpServerOverrides(); + + return mcpStore.hasPromptsCapability(perChatOverrides); + }); + + let hasMcpResourcesSupport = $derived.by(() => { + const perChatOverrides = conversationsStore.getAllMcpServerOverrides(); + + return mcpStore.hasResourcesCapability(perChatOverrides); + });
- (showChatSettingsDialogWithMcpSection = true)} + /> + {:else} + (showChatSettingsDialogWithMcpSection = true)} + /> + {/if} + + (showChatSettingsDialogWithMcpSection = true)} />
- + {#if isMobile.current} + + {:else} + + {/if}
{#if isLoading} @@ -205,3 +267,9 @@ /> {/if}
+ + (showChatSettingsDialogWithMcpSection = open)} + initialSection={SETTINGS_SECTION_TITLES.MCP} +/> diff --git a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormHelperText.svelte b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormHelperText.svelte index f8246f249..a8f1f76c7 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormHelperText.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatForm/ChatFormHelperText.svelte @@ -8,7 +8,7 @@ {#if show} -
+