diff --git a/common/arg.cpp b/common/arg.cpp index b29bf7206..af722557f 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -2791,6 +2791,20 @@ common_params_context common_params_parser_init(common_params & params, llama_ex params.image.emplace_back(value); } ).set_examples({LLAMA_EXAMPLE_MTMD})); + add_opt(common_arg( + {"--image-min-tokens"}, "N", + "minimum number of tokens each image can take, only used by vision models with dynamic resolution (default: read from model)", + [](common_params & params, int value) { + params.image_min_tokens = value; + } + ).set_examples(mmproj_examples).set_env("LLAMA_ARG_IMAGE_MIN_TOKENS")); + add_opt(common_arg( + {"--image-max-tokens"}, "N", + "maximum number of tokens each image can take, only used by vision models with dynamic resolution (default: read from model)", + [](common_params & params, int value) { + params.image_max_tokens = value; + } + ).set_examples(mmproj_examples).set_env("LLAMA_ARG_IMAGE_MAX_TOKENS")); if (llama_supports_rpc()) { add_opt(common_arg( {"--rpc"}, "SERVERS", diff --git a/common/common.h b/common/common.h index 3ed12090c..73b14cd01 100644 --- a/common/common.h +++ b/common/common.h @@ -402,6 +402,8 @@ struct common_params { bool mmproj_use_gpu = true; // use GPU for multimodal model bool no_mmproj = false; // explicitly disable multimodal model std::vector image; // path to image file(s) + int image_min_tokens = -1; + int image_max_tokens = -1; // finetune struct lr_opt lr; diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index f186c2167..c6f5ba6a0 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -9802,6 +9802,113 @@ class CogVLMModel(LlamaModel): return [(self.map_tensor_name(name), data_torch)] + +@ModelBase.register("JanusForConditionalGeneration") +class JanusProModel(LlamaModel): + model_arch = gguf.MODEL_ARCH.LLAMA # reuse Llama arch + + def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: + # Skip vision, aligner, and generation tensors + skip_prefixes = ( + 'model.vision_model.', + 'model.aligner.', + 'model.vqmodel.', + 'model.generation_embeddings.', + 'model.generation_aligner.', + 'model.generation_head.', + ) + if name.startswith(skip_prefixes): + return [] + + if name.startswith('model.language_model.'): + name = name.replace('model.language_model.', 'model.') + elif name.startswith('language_model.'): + name = name.replace('language_model.', '') + + return super().modify_tensors(data_torch, name, bid) + + +@ModelBase.register("JanusForConditionalGeneration") +class JanusProVisionModel(MmprojModel): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + assert self.hparams_vision is not None + if "intermediate_size" not in self.hparams_vision: + mlp_ratio = self.hparams_vision.get("mlp_ratio") + hidden_size = self.hparams_vision.get("hidden_size") + if mlp_ratio is not None and hidden_size is not None: + self.hparams_vision["intermediate_size"] = int(round(hidden_size * mlp_ratio)) + + def set_gguf_parameters(self): + super().set_gguf_parameters() + assert self.hparams_vision is not None + + self.gguf_writer.add_clip_projector_type(gguf.VisionProjectorType.JANUS_PRO) + + self.gguf_writer.add_vision_attention_layernorm_eps(self.hparams_vision.get("layer_norm_eps", 1e-6)) + + hidden_act = str(self.hparams_vision.get("hidden_act", "")).lower() + if hidden_act == "gelu": + self.gguf_writer.add_vision_use_gelu(True) + elif hidden_act == "silu": + self.gguf_writer.add_vision_use_silu(True) + + def _map_aligner_tensor(self, data_torch: Tensor, name: str) -> Iterable[tuple[str, Tensor]]: + """Map aligner tensors to projector format""" + suffix = ".bias" if name.endswith(".bias") else ".weight" + + if name.startswith("model.aligner."): + local_name = name[len("model.aligner."):] + elif name.startswith("aligner."): + local_name = name[len("aligner."):] + else: + raise ValueError(f"Unsupported Janus aligner prefix: {name}") + + if local_name.startswith("fc1."): + mm_index = 0 + elif local_name.startswith("hidden_layers."): + parts = local_name.split(".", 2) + if len(parts) < 3: + raise ValueError(f"Unexpected Janus aligner tensor name: {name}") + mm_index = int(parts[1]) + 1 + else: + raise ValueError(f"Unsupported Janus aligner tensor: {name}") + + tensor_name = self.format_tensor_name(gguf.MODEL_TENSOR.V_MMPROJ, mm_index, suffix=suffix) + return [(tensor_name, data_torch)] + + def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: + del bid # unused + + # Skip language model tensors as they will be handled by `JanusProModel` + if name.startswith(('model.language_model.', 'language_model.')): + return [] + + # Skip generation-related components + skip_generation_prefixes = ( + 'model.vqmodel.', + 'vqmodel.', + 'model.generation_embeddings.', + 'generation_embeddings.', + 'model.generation_aligner.', + 'generation_aligner.', + 'model.generation_head.', + 'generation_head.', + ) + if name.startswith(skip_generation_prefixes): + return [] + + # Handle aligner tensors + if name.startswith(('model.aligner.', 'aligner.')): + return list(self._map_aligner_tensor(data_torch, name)) + + # Handle vision tensors + if name.startswith(('model.vision_model.', 'vision_model.')): + return [(self.map_tensor_name(name), data_torch)] + + return [] + + ###### CONVERSION LOGIC ###### diff --git a/ggml/src/ggml-cpu/arch/loongarch/quants.c b/ggml/src/ggml-cpu/arch/loongarch/quants.c index 22fc7607f..f531e916b 100644 --- a/ggml/src/ggml-cpu/arch/loongarch/quants.c +++ b/ggml/src/ggml-cpu/arch/loongarch/quants.c @@ -700,7 +700,8 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi for (; ib + 1 < nb; ib += 2) { // Compute combined scale for the block 0 and 1 - const __m128 d_0_1 = (__m128)__lsx_vreplgr2vr_w( GGML_CPU_FP16_TO_FP32(x[ib].d) * GGML_CPU_FP16_TO_FP32(y[ib].d) ); + const float ft0 = GGML_CPU_FP16_TO_FP32(x[ib].d) * GGML_CPU_FP16_TO_FP32(y[ib].d); + const __m128 d_0_1 = (__m128)(v4f32){ft0, ft0, ft0, ft0}; const __m128i tmp_0_1 = __lsx_vld((const __m128i *)x[ib].qs, 0); @@ -714,11 +715,9 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi bx_1 = __lsx_vsub_b(bx_1, off); const __m128i i32_1 = mul_sum_i8_pairs(bx_1, by_1); - //_mm_prefetch(&x[ib] + 2 * sizeof(block_q4_0), _MM_HINT_T0); - //_mm_prefetch(&y[ib] + 2 * sizeof(block_q8_0), _MM_HINT_T0); - // Compute combined scale for the block 2 and 3 - const __m128 d_2_3 = (__m128)__lsx_vreplgr2vr_w( GGML_CPU_FP16_TO_FP32(x[ib + 1].d) * GGML_CPU_FP16_TO_FP32(y[ib + 1].d) ); + const float ft1 = GGML_CPU_FP16_TO_FP32(x[ib + 1].d) * GGML_CPU_FP16_TO_FP32(y[ib + 1].d); + const __m128 d_2_3 = (__m128)(v4f32){ft1, ft1, ft1, ft1}; const __m128i tmp_2_3 = __lsx_vld((const __m128i *)x[ib + 1].qs, 0); diff --git a/ggml/src/ggml-cpu/ggml-cpu-impl.h b/ggml/src/ggml-cpu/ggml-cpu-impl.h index 713bf85e5..7597377cc 100644 --- a/ggml/src/ggml-cpu/ggml-cpu-impl.h +++ b/ggml/src/ggml-cpu/ggml-cpu-impl.h @@ -500,13 +500,15 @@ inline static int32x4_t ggml_vec_dot(int32x4_t acc, int8x16_t a, int8x16_t b) { #endif -#if defined(__loongarch_asx) +#if defined(__loongarch_sx) /* float type data load instructions */ static __m128 __lsx_vreplfr2vr_s(const float val) { v4f32 res = {val, val, val, val}; return (__m128)res; } +#endif +#if defined(__loongarch_asx) static __m256 __lasx_xvreplfr2vr_s(const float val) { v8f32 res = {val, val, val, val, val, val, val, val}; return (__m256)res; diff --git a/ggml/src/ggml-cpu/repack.cpp b/ggml/src/ggml-cpu/repack.cpp index 26b52bb82..1e58a67f5 100644 --- a/ggml/src/ggml-cpu/repack.cpp +++ b/ggml/src/ggml-cpu/repack.cpp @@ -1552,10 +1552,24 @@ template 0 && (nr / nchunk) < min_chunk_size && nr >= min_chunk_size) { + nchunk = (nr + min_chunk_size - 1) / min_chunk_size; + } + if (nth == 1 || nchunk < nth || disable_chunking) { nchunk = nth; } + // Ensure nchunk doesn't exceed the number of rows divided by minimum chunk size + // This prevents creating too many tiny chunks that could overlap after alignment + const int64_t max_nchunk = (nr + min_chunk_size - 1) / min_chunk_size; + if (nchunk > max_nchunk) { + nchunk = max_nchunk; + } + if (ith == 0) { // Every thread starts at ith, so the first unprocessed chunk is nth. This save a bit of coordination right at the start. ggml_threadpool_chunk_set(params->threadpool, nth); @@ -1569,8 +1583,15 @@ template ne01) { + src0_end = ne01; + } + if (src0_start >= src0_end) { break; } @@ -1682,8 +1703,12 @@ template ne01) { + src0_cur_end = ne01; + } if (src0_cur_start >= src0_cur_end) { return; diff --git a/ggml/src/ggml-cpu/simd-mappings.h b/ggml/src/ggml-cpu/simd-mappings.h index 8daec6637..74c74d1a2 100644 --- a/ggml/src/ggml-cpu/simd-mappings.h +++ b/ggml/src/ggml-cpu/simd-mappings.h @@ -956,7 +956,7 @@ do { \ #define GGML_F32Cx8 __m256 #define GGML_F32Cx8_ZERO (__m256)__lasx_xvldi(0) -#define GGML_F32Cx8_SET1(x) (__m256)__lasx_xvreplgr2vr_w((x)) +#define GGML_F32Cx8_SET1(x) (__m256)__lasx_xvreplfr2vr_s((x)) static inline __m256 __lasx_f32cx8_load(const ggml_fp16_t * x) { __m256i a; @@ -999,34 +999,34 @@ static inline void __lasx_f32cx8_store(ggml_fp16_t * x, __m256 y) { #define GGML_F32x4 __m128 #define GGML_F32x4_ZERO (__m128)__lsx_vldi(0) -#define GGML_F32x4_SET1(x) (__m128)__lsx_vinsgr2vr_w(__lsx_vldi(0),(x), 0) +#define GGML_F32x4_SET1(x) (__m128)__lsx_vreplfr2vr_s((x)) #define GGML_F32x4_LOAD(x) (__m128)__lsx_vld((x), 0) #define GGML_F32x4_STORE(x, y) __lsx_vst(y, x, 0) #define GGML_F32x4_FMA(a, b, c) __lsx_vfmadd_s(b, c, a) #define GGML_F32x4_ADD __lsx_vfadd_s #define GGML_F32x4_MUL __lsx_vfmul_s -#define GGML_F32x4_REDUCE(res, x) \ -{ \ - int offset = GGML_F32_ARR >> 1; \ - for (int i = 0; i < offset; ++i) { \ - x[i] = __lsx_vfadd_s(x[i], x[offset + i]); \ - } \ - offset >>= 1; \ - for (int i = 0; i < offset; ++i) { \ - x[i] = __lsx_vfadd_s(x[i], x[offset + i]); \ - } \ - offset >>= 1; \ - for (int i = 0; i < offset; ++i) { \ - x[i] = __lsx_vfadd_s(x[i], x[offset + i]); \ - } \ - __m128i tmp = __lsx_vsrli_d((__m128i) x[0], 32); \ - tmp = (__m128i) __lsx_vfadd_s((__m128) tmp, x[0]); \ - tmp = __lsx_vpickev_w(__lsx_vldi(0), tmp); \ - const __m128 t0 = (__m128)__lsx_vshuf4i_w(tmp, 0x88); \ - tmp = __lsx_vsrli_d((__m128i) t0, 32); \ - tmp = (__m128i) __lsx_vfadd_s((__m128) tmp, t0); \ - tmp = __lsx_vpickev_w(__lsx_vldi(0), tmp); \ - res = (ggml_float) __lsx_vpickve2gr_w(__lsx_vshuf4i_w(tmp, 0x88), 0); \ + +#define GGML_F32x4_REDUCE(res, x) \ +{ \ + int offset = GGML_F32_ARR >> 1; \ + for (int i = 0; i < offset; ++i) { \ + x[i] = __lsx_vfadd_s(x[i], x[offset+i]); \ + } \ + offset >>= 1; \ + for (int i = 0; i < offset; ++i) { \ + x[i] = __lsx_vfadd_s(x[i], x[offset+i]); \ + } \ + offset >>= 1; \ + for (int i = 0; i < offset; ++i) { \ + x[i] = __lsx_vfadd_s(x[i], x[offset+i]); \ + } \ + __m128i t0 = __lsx_vpickev_w((__m128i)x[0], (__m128i)x[0]); \ + __m128i t1 = __lsx_vpickod_w((__m128i)x[0], (__m128i)x[0]); \ + __m128 t2 = __lsx_vfadd_s((__m128)t0, (__m128)t1); \ + __m128i t3 = __lsx_vpickev_w((__m128i)t2, (__m128i)t2); \ + __m128i t4 = __lsx_vpickod_w((__m128i)t2, (__m128i)t2); \ + __m128 t5 = __lsx_vfadd_s((__m128)t3, (__m128)t4); \ + res = (ggml_float) ((v4f32)t5)[0]; \ } #define GGML_F32_VEC GGML_F32x4 @@ -1068,7 +1068,7 @@ static inline void __lsx_f16x4_store(ggml_fp16_t * x, __m128 y) { #define GGML_F32Cx4 __m128 #define GGML_F32Cx4_ZERO (__m128)__lsx_vldi(0) -#define GGML_F32Cx4_SET1(x) (__m128)__lsx_vinsgr2vr_w(__lsx_vldi(0),(x), 0) +#define GGML_F32Cx4_SET1(x) (__m128)__lsx_vreplfr2vr_s((x)) #define GGML_F32Cx4_LOAD(x) (__m128)__lsx_f16x4_load(x) #define GGML_F32Cx4_STORE(x, y) __lsx_f16x4_store(x, y) #define GGML_F32Cx4_FMA GGML_F32x4_FMA diff --git a/ggml/src/ggml-cuda/fattn-tile.cu b/ggml/src/ggml-cuda/fattn-tile.cu index 3a5806d90..3fcb09b7a 100644 --- a/ggml/src/ggml-cuda/fattn-tile.cu +++ b/ggml/src/ggml-cuda/fattn-tile.cu @@ -14,6 +14,10 @@ void ggml_cuda_flash_attn_ext_tile(ggml_backend_cuda_context & ctx, ggml_tensor GGML_ASSERT(V->ne[0] == K->ne[0]); ggml_cuda_flash_attn_ext_tile_case< 64, 64>(ctx, dst); } break; + case 72: { + GGML_ASSERT(V->ne[0] == K->ne[0]); + ggml_cuda_flash_attn_ext_tile_case< 72, 72>(ctx, dst); + } break; case 80: { GGML_ASSERT(V->ne[0] == K->ne[0]); ggml_cuda_flash_attn_ext_tile_case< 80, 80>(ctx, dst); diff --git a/ggml/src/ggml-cuda/fattn-tile.cuh b/ggml/src/ggml-cuda/fattn-tile.cuh index 2b60b3bb1..c358aa1e8 100644 --- a/ggml/src/ggml-cuda/fattn-tile.cuh +++ b/ggml/src/ggml-cuda/fattn-tile.cuh @@ -6,7 +6,7 @@ // nbatch_K == number of K columns to load in parallel for KQ calculation // TODO optimize kernel parameters for FP16 NVIDIA (P100) -// TODO optimize kernel parameters for head sizes 40, 80, 96, 112 +// TODO optimize kernel parameters for head sizes 40, 72, 80, 96, 112 // The ROCm compiler cannot handle templating in __launch_bounds__. // As a workaround, define a macro to package the kernel parameters as uint32_t: @@ -32,6 +32,12 @@ static constexpr __host__ __device__ uint32_t ggml_cuda_fattn_tile_get_config_nv GGML_CUDA_FATTN_TILE_CONFIG_CASE( 64, 64, 16, 256, 2, 64, 64) GGML_CUDA_FATTN_TILE_CONFIG_CASE( 64, 64, 32, 256, 2, 64, 64) + GGML_CUDA_FATTN_TILE_CONFIG_CASE( 72, 72, 2, 64, 2, 64, 72) + GGML_CUDA_FATTN_TILE_CONFIG_CASE( 72, 72, 4, 128, 2, 64, 72) + GGML_CUDA_FATTN_TILE_CONFIG_CASE( 72, 72, 8, 256, 2, 64, 72) + GGML_CUDA_FATTN_TILE_CONFIG_CASE( 72, 72, 16, 256, 2, 64, 72) + GGML_CUDA_FATTN_TILE_CONFIG_CASE( 72, 72, 32, 256, 2, 64, 72) + GGML_CUDA_FATTN_TILE_CONFIG_CASE( 80, 80, 2, 64, 2, 64, 40) GGML_CUDA_FATTN_TILE_CONFIG_CASE( 80, 80, 4, 128, 2, 64, 40) GGML_CUDA_FATTN_TILE_CONFIG_CASE( 80, 80, 8, 256, 2, 64, 40) @@ -80,6 +86,12 @@ static constexpr __host__ __device__ uint32_t ggml_cuda_fattn_tile_get_config_nv GGML_CUDA_FATTN_TILE_CONFIG_CASE( 64, 64, 16, 128, 3, 64, 64) GGML_CUDA_FATTN_TILE_CONFIG_CASE( 64, 64, 32, 256, 2, 64, 64) + GGML_CUDA_FATTN_TILE_CONFIG_CASE( 72, 72, 2, 64, 2, 32, 72) + GGML_CUDA_FATTN_TILE_CONFIG_CASE( 72, 72, 4, 128, 2, 32, 72) + GGML_CUDA_FATTN_TILE_CONFIG_CASE( 72, 72, 8, 256, 2, 32, 72) + GGML_CUDA_FATTN_TILE_CONFIG_CASE( 72, 72, 16, 256, 2, 32, 72) + GGML_CUDA_FATTN_TILE_CONFIG_CASE( 72, 72, 32, 256, 2, 32, 72) + GGML_CUDA_FATTN_TILE_CONFIG_CASE( 80, 80, 2, 64, 2, 32, 40) GGML_CUDA_FATTN_TILE_CONFIG_CASE( 80, 80, 4, 128, 2, 32, 40) GGML_CUDA_FATTN_TILE_CONFIG_CASE( 80, 80, 8, 256, 2, 32, 40) @@ -130,6 +142,13 @@ static constexpr __host__ __device__ uint32_t ggml_cuda_fattn_tile_get_config_am GGML_CUDA_FATTN_TILE_CONFIG_CASE( 64, 64, 32, 256, 2, 64, 64) GGML_CUDA_FATTN_TILE_CONFIG_CASE( 64, 64, 64, 256, 2, 64, 64) + GGML_CUDA_FATTN_TILE_CONFIG_CASE( 72, 72, 2, 64, 2, 32, 72) + GGML_CUDA_FATTN_TILE_CONFIG_CASE( 72, 72, 4, 128, 2, 32, 72) + GGML_CUDA_FATTN_TILE_CONFIG_CASE( 72, 72, 8, 256, 2, 32, 72) + GGML_CUDA_FATTN_TILE_CONFIG_CASE( 72, 72, 16, 256, 2, 32, 72) + GGML_CUDA_FATTN_TILE_CONFIG_CASE( 72, 72, 32, 256, 2, 32, 72) + GGML_CUDA_FATTN_TILE_CONFIG_CASE( 72, 72, 64, 256, 2, 32, 72) + GGML_CUDA_FATTN_TILE_CONFIG_CASE( 80, 80, 2, 64, 2, 32, 40) GGML_CUDA_FATTN_TILE_CONFIG_CASE( 80, 80, 4, 128, 2, 32, 40) GGML_CUDA_FATTN_TILE_CONFIG_CASE( 80, 80, 8, 256, 2, 32, 40) @@ -185,6 +204,13 @@ static constexpr __host__ __device__ uint32_t ggml_cuda_fattn_tile_get_config_am GGML_CUDA_FATTN_TILE_CONFIG_CASE( 64, 64, 32, 128, 4, 64, 64) GGML_CUDA_FATTN_TILE_CONFIG_CASE( 64, 64, 64, 128, 5, 64, 64) + GGML_CUDA_FATTN_TILE_CONFIG_CASE( 72, 72, 2, 64, 2, 32, 72) + GGML_CUDA_FATTN_TILE_CONFIG_CASE( 72, 72, 4, 128, 2, 32, 72) + GGML_CUDA_FATTN_TILE_CONFIG_CASE( 72, 72, 8, 256, 2, 32, 72) + GGML_CUDA_FATTN_TILE_CONFIG_CASE( 72, 72, 16, 256, 2, 32, 72) + GGML_CUDA_FATTN_TILE_CONFIG_CASE( 72, 72, 32, 256, 2, 32, 72) + GGML_CUDA_FATTN_TILE_CONFIG_CASE( 72, 72, 64, 256, 2, 32, 72) + GGML_CUDA_FATTN_TILE_CONFIG_CASE( 80, 80, 2, 64, 2, 32, 40) GGML_CUDA_FATTN_TILE_CONFIG_CASE( 80, 80, 4, 128, 2, 32, 40) GGML_CUDA_FATTN_TILE_CONFIG_CASE( 80, 80, 8, 256, 2, 32, 40) @@ -723,7 +749,7 @@ static __global__ void flash_attn_tile( if ( #ifdef GGML_USE_WMMA_FATTN - (ncols2 != 1 && DV != 40 && DV != 512) || + (ncols2 != 1 && DV != 40 && DV != 72 && DV != 512) || #endif // GGML_USE_WMMA_FATTN (use_logit_softcap && !(DV == 128 || DV == 256)) ) { @@ -1198,6 +1224,7 @@ void ggml_cuda_flash_attn_ext_tile(ggml_backend_cuda_context & ctx, ggml_tensor extern DECL_FATTN_TILE_CASE( 40, 40); extern DECL_FATTN_TILE_CASE( 64, 64); +extern DECL_FATTN_TILE_CASE( 72, 72); extern DECL_FATTN_TILE_CASE( 80, 80); extern DECL_FATTN_TILE_CASE( 96, 96); extern DECL_FATTN_TILE_CASE(112, 112); diff --git a/ggml/src/ggml-cuda/fattn.cu b/ggml/src/ggml-cuda/fattn.cu index 42f304475..4c96dab47 100644 --- a/ggml/src/ggml-cuda/fattn.cu +++ b/ggml/src/ggml-cuda/fattn.cu @@ -229,6 +229,7 @@ static best_fattn_kernel ggml_cuda_get_best_fattn_kernel(const int device, const switch (K->ne[0]) { case 40: case 64: + case 72: case 80: case 96: case 128: @@ -281,7 +282,7 @@ static best_fattn_kernel ggml_cuda_get_best_fattn_kernel(const int device, const const bool can_use_vector_kernel = Q->ne[0] <= 256 && Q->ne[0] % 64 == 0 && K->ne[1] % FATTN_KQ_STRIDE == 0; // If Turing tensor cores available, use them: - if (turing_mma_available(cc) && K->ne[1] % FATTN_KQ_STRIDE == 0 && Q->ne[0] != 40) { + if (turing_mma_available(cc) && K->ne[1] % FATTN_KQ_STRIDE == 0 && Q->ne[0] != 40 && Q->ne[0] != 72) { if (can_use_vector_kernel) { if (!ggml_is_quantized(K->type) && !ggml_is_quantized(V->type)) { if (cc >= GGML_CUDA_CC_ADA_LOVELACE && Q->ne[1] == 1 && Q->ne[3] == 1 && !(gqa_ratio > 4 && K->ne[1] >= 8192)) { @@ -312,7 +313,7 @@ static best_fattn_kernel ggml_cuda_get_best_fattn_kernel(const int device, const } // Use the WMMA kernel if possible: - if (ggml_cuda_should_use_wmma_fattn(cc) && K->ne[1] % FATTN_KQ_STRIDE == 0 && Q->ne[0] != 40 && Q->ne[0] != 576) { + if (ggml_cuda_should_use_wmma_fattn(cc) && K->ne[1] % FATTN_KQ_STRIDE == 0 && Q->ne[0] != 40 && Q->ne[0] != 72 && Q->ne[0] != 576) { if (can_use_vector_kernel && Q->ne[1] <= 2) { return BEST_FATTN_KERNEL_VEC; } diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index dcf13dd51..074e03d4a 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -2116,6 +2116,14 @@ static bool ggml_cuda_should_fuse_mul_mat_vec_f(const ggml_tensor * tensor) { const int cc = ggml_cuda_info().devices[ggml_cuda_get_device()].cc; use_mul_mat_vec_f = use_mul_mat_vec_f && ggml_cuda_should_use_mmvf(src0->type, cc, src0->ne, is_mul_mat_id ? src1->ne[2] : src1->ne[1]); + const bool split = ggml_backend_buft_is_cuda_split(src0->buffer->buft) || + ggml_backend_buft_is_cuda_split(src1->buffer->buft); + + //TODO: add support for fusion for split buffers + if (split) { + return false; + } + //we only support fusion for ncols_dst = 1 if (tensor->op == GGML_OP_MUL_MAT && dst->ne[1] != 1) { return false; @@ -2155,6 +2163,15 @@ static bool ggml_cuda_should_fuse_mul_mat_vec_q(const ggml_tensor * tensor) { return false; } + + const bool split = ggml_backend_buft_is_cuda_split(src0->buffer->buft) || + ggml_backend_buft_is_cuda_split(src1->buffer->buft); + + //TODO: add support for fusion for split buffers + if (split) { + return false; + } + return use_mul_mat_vec_q; } diff --git a/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq72-dv72.cu b/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq72-dv72.cu new file mode 100644 index 000000000..8f9d5315f --- /dev/null +++ b/ggml/src/ggml-cuda/template-instances/fattn-tile-instance-dkq72-dv72.cu @@ -0,0 +1,5 @@ +// This file has been autogenerated by generate_cu_files.py, do not edit manually. + +#include "../fattn-tile.cuh" + +DECL_FATTN_TILE_CASE(72, 72); diff --git a/ggml/src/ggml-cuda/template-instances/generate_cu_files.py b/ggml/src/ggml-cuda/template-instances/generate_cu_files.py index 81a986f38..a5602da02 100755 --- a/ggml/src/ggml-cuda/template-instances/generate_cu_files.py +++ b/ggml/src/ggml-cuda/template-instances/generate_cu_files.py @@ -3,7 +3,7 @@ from glob import glob import os -HEAD_SIZES_KQ = [40, 64, 80, 96, 112, 128, 256, 576] +HEAD_SIZES_KQ = [40, 64, 72, 80, 96, 112, 128, 256, 576] TYPES_KV = ["GGML_TYPE_F16", "GGML_TYPE_Q4_0", "GGML_TYPE_Q4_1", "GGML_TYPE_Q5_0", "GGML_TYPE_Q5_1", "GGML_TYPE_Q8_0"] @@ -81,6 +81,8 @@ for ncols in [8, 16, 32, 64]: for head_size_kq in HEAD_SIZES_KQ: if head_size_kq == 40: continue + if head_size_kq == 72: + continue if head_size_kq != 576 and ncols2 == 16: continue if head_size_kq == 576 and ncols2 != 16: diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 0d5afa01e..77e3b0650 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -3186,6 +3186,7 @@ class VisionProjectorType: KIMIVL = "kimivl" LIGHTONOCR = "lightonocr" COGVLM = "cogvlm" + JANUS_PRO = "janus_pro" # Items here are (block size, type size) diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py index cef5acec7..929406687 100644 --- a/gguf-py/gguf/tensor_mapping.py +++ b/gguf-py/gguf/tensor_mapping.py @@ -1183,6 +1183,7 @@ class TensorNameMap: "model.mm_projector.mlp.mlp.{bid}", "vision_model.vision_adapter.mlp.fc{bid}", # llama 4 "mlp1.{bid}", # InternVL + "model.aligner.fc1.hidden_layers.{bid}", # Janus Pro ), MODEL_TENSOR.V_MMPROJ_PEG: ( @@ -1291,6 +1292,7 @@ class TensorNameMap: "model.vision_tower.encoder.layer.{bid}.attention.projection_layer", # Intern-S1 "vpm.encoder.layers.{bid}.self_attn.out_proj", "model.vision_model.encoder.layers.{bid}.self_attn.out_proj", # SmolVLM + "model.vision_model.encoder.layers.{bid}.self_attn.projection_layer", # Janus Pro "vision_model.model.layers.{bid}.self_attn.o_proj", # llama4 "vision_tower.transformer.layers.{bid}.attention.o_proj", # pixtral-hf "vision_encoder.transformer.layers.{bid}.attention.wo", # pixtral diff --git a/tools/mtmd/clip-impl.h b/tools/mtmd/clip-impl.h index c7e949834..722b1a494 100644 --- a/tools/mtmd/clip-impl.h +++ b/tools/mtmd/clip-impl.h @@ -155,6 +155,7 @@ enum projector_type { PROJECTOR_TYPE_KIMIVL, PROJECTOR_TYPE_LIGHTONOCR, PROJECTOR_TYPE_COGVLM, + PROJECTOR_TYPE_JANUS_PRO, PROJECTOR_TYPE_UNKNOWN, }; @@ -180,6 +181,7 @@ static std::map PROJECTOR_TYPE_NAMES = { { PROJECTOR_TYPE_KIMIVL, "kimivl"}, { PROJECTOR_TYPE_LIGHTONOCR,"lightonocr"}, { PROJECTOR_TYPE_COGVLM, "cogvlm"}, + { PROJECTOR_TYPE_JANUS_PRO, "janus_pro"}, }; static projector_type clip_projector_type_from_string(const std::string & str) { diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp index 79289e4d0..4d32251a7 100644 --- a/tools/mtmd/clip.cpp +++ b/tools/mtmd/clip.cpp @@ -187,8 +187,8 @@ struct clip_hparams { int32_t n_layer; // idefics3 int32_t image_longest_edge = 0; - int32_t image_min_pixels = 0; - int32_t image_max_pixels = 0; + int32_t image_min_pixels = -1; + int32_t image_max_pixels = -1; int32_t n_merge = 0; // number of patch merges **per-side** float image_mean[3]; @@ -221,11 +221,15 @@ struct clip_hparams { int minicpmv_version = 0; int32_t minicpmv_query_num = 0; // MiniCPM-V query number + // custom value provided by user, can be undefined if not set + int32_t custom_image_min_tokens = -1; + int32_t custom_image_max_tokens = -1; + void set_limit_image_tokens(int n_tokens_min, int n_tokens_max) { const int cur_merge = n_merge == 0 ? 1 : n_merge; const int patch_area = patch_size * patch_size * cur_merge * cur_merge; - image_min_pixels = n_tokens_min * patch_area; - image_max_pixels = n_tokens_max * patch_area; + image_min_pixels = (custom_image_min_tokens > 0 ? custom_image_min_tokens : n_tokens_min) * patch_area; + image_max_pixels = (custom_image_max_tokens > 0 ? custom_image_max_tokens : n_tokens_max) * patch_area; warmup_image_size = static_cast(std::sqrt(image_max_pixels)); } @@ -234,6 +238,7 @@ struct clip_hparams { GGML_ASSERT(n_tok_per_side * n_tok_per_side == n_tokens && "n_tokens must be n*n"); const int cur_merge = n_merge == 0 ? 1 : n_merge; warmup_image_size = n_tok_per_side * patch_size * cur_merge; + // TODO: support warmup size for custom token numbers } }; @@ -455,7 +460,7 @@ struct clip_ctx { clip_ctx(clip_context_params & ctx_params) { flash_attn_type = ctx_params.flash_attn_type; - backend_cpu = ggml_backend_cpu_init(); //always has CPU backend + backend_cpu = ggml_backend_cpu_init(); //always has CPU backend if (!backend_cpu) { throw std::runtime_error("failed to initialize CPU backend"); } @@ -482,6 +487,13 @@ struct clip_ctx { LOG_INF("%s: CLIP using CPU backend\n", __func__); } + if (ctx_params.image_min_tokens > 0) { + model.hparams.custom_image_min_tokens = ctx_params.image_min_tokens; + } + if (ctx_params.image_max_tokens > 0) { + model.hparams.custom_image_max_tokens = ctx_params.image_max_tokens; + } + backend_ptrs.push_back(backend_cpu); backend_buft.push_back(ggml_backend_get_default_buffer_type(backend_cpu)); @@ -611,6 +623,15 @@ struct clip_graph { cur = ggml_gelu(ctx0, cur); cur = ggml_mul_mat(ctx0, model.mm_2_w, cur); cur = ggml_add(ctx0, cur, model.mm_2_b); + + } else if (ctx->proj_type() == PROJECTOR_TYPE_JANUS_PRO) { + cur = build_ffn(cur, + model.mm_0_w, model.mm_0_b, + nullptr, nullptr, + model.mm_1_w, model.mm_1_b, + hparams.ffn_op, + -1); + } else { GGML_ABORT("SigLIP: Unsupported projector type"); } @@ -775,6 +796,15 @@ struct clip_graph { ggml_set_name(window_mask, "window_mask"); ggml_set_input(window_mask); + // if flash attn is used, we need to pad the mask and cast to f16 + if (ctx->flash_attn_type == CLIP_FLASH_ATTN_TYPE_ENABLED) { + int n_pad = GGML_PAD(window_mask->ne[1], GGML_KQ_MASK_PAD) - window_mask->ne[1]; + if (n_pad > 0) { + window_mask = ggml_pad(ctx0, window_mask, 0, n_pad, 0, 0); + } + window_mask = ggml_cast(ctx0, window_mask, GGML_TYPE_F16); + } + // inpL shape: [n_embd, n_patches_x * n_patches_y, batch_size] GGML_ASSERT(batch_size == 1); inpL = ggml_reshape_2d(ctx0, inpL, n_embd * 4, n_patches_x * n_patches_y * batch_size / 4); @@ -1752,7 +1782,6 @@ struct clip_graph { return gf; } - // whisper encoder with custom projector ggml_cgraph * build_whisper_enc() { const int n_frames = img.nx; @@ -2480,6 +2509,10 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32 { res = graph.build_kimivl(); } break; + case PROJECTOR_TYPE_JANUS_PRO: + { + res = graph.build_siglip(); + } break; case PROJECTOR_TYPE_COGVLM: { res = graph.build_cogvlm(); @@ -2815,6 +2848,12 @@ struct clip_model_loader { } hparams.set_limit_image_tokens(8, 2048); hparams.set_warmup_n_tokens(256); // avoid OOM on warmup + const int warn_min_pixels = 1024 * hparams.n_merge * hparams.n_merge * hparams.patch_size * hparams.patch_size; + if (hparams.image_min_pixels < warn_min_pixels) { + LOG_WRN("%s: Qwen-VL models require at minimum 1024 image tokens to function correctly on grounding tasks\n", __func__); + LOG_WRN("%s: if you encounter problems with accuracy, try adding --image-min-tokens 1024\n", __func__); + LOG_WRN("%s: more info: https://github.com/ggml-org/llama.cpp/issues/16842\n\n", __func__); + } } break; case PROJECTOR_TYPE_LLAMA4: { @@ -2839,6 +2878,13 @@ struct clip_model_loader { break; } + // sanity check + { + if (hparams.image_max_pixels < hparams.image_min_pixels) { + throw std::runtime_error(string_format("%s: image_max_pixels (%d) is less than image_min_pixels (%d)\n", __func__, hparams.image_max_pixels, hparams.image_min_pixels)); + } + } + LOG_INF("%s: projector: %s\n", __func__, proj_type.c_str()); LOG_INF("%s: n_embd: %d\n", __func__, hparams.n_embd); LOG_INF("%s: n_head: %d\n", __func__, hparams.n_head); @@ -2855,10 +2901,10 @@ struct clip_model_loader { LOG_INF("%s: n_merge: %d\n", __func__, hparams.n_merge); LOG_INF("%s: n_wa_pattern: %d\n", __func__, hparams.n_wa_pattern); if (hparams.image_min_pixels > 0) { - LOG_INF("%s: image_min_pixels: %d\n", __func__, hparams.image_min_pixels); + LOG_INF("%s: image_min_pixels: %d%s\n", __func__, hparams.image_min_pixels, hparams.custom_image_min_tokens > 0 ? " (custom value)" : ""); } if (hparams.image_max_pixels > 0) { - LOG_INF("%s: image_max_pixels: %d\n", __func__, hparams.image_max_pixels); + LOG_INF("%s: image_max_pixels: %d%s\n", __func__, hparams.image_max_pixels, hparams.custom_image_max_tokens > 0 ? " (custom value)" : ""); } } else if (is_audio) { LOG_INF("\n--- audio hparams ---\n"); @@ -3208,6 +3254,13 @@ struct clip_model_loader { model.mm_boi = get_tensor(TN_TOK_BOI); model.mm_eoi = get_tensor(TN_TOK_EOI); } break; + case PROJECTOR_TYPE_JANUS_PRO: + { + model.mm_0_w = get_tensor(string_format(TN_LLAVA_PROJ, 0, "weight")); + model.mm_0_b = get_tensor(string_format(TN_LLAVA_PROJ, 0, "bias")); + model.mm_1_w = get_tensor(string_format(TN_LLAVA_PROJ, 1, "weight")); + model.mm_1_b = get_tensor(string_format(TN_LLAVA_PROJ, 1, "bias")); + } break; default: GGML_ASSERT(false && "unknown projector type"); } @@ -4316,7 +4369,7 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, str case PROJECTOR_TYPE_QWEN25VL: case PROJECTOR_TYPE_QWEN3VL: { - // step 1: make a blank canvas which aligns to the grid + GGML_ASSERT(params.image_min_pixels > 0 && params.image_max_pixels > 0); clip_image_u8 resized; const clip_image_size new_size = img_tool::calc_size_preserved_ratio( original_size, @@ -4394,10 +4447,22 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, str res_imgs->entries.push_back(std::move(img_f32)); } break; + case PROJECTOR_TYPE_JANUS_PRO: + { + // Janus Pro preprocessing: pad to square with gray(127), resize to 384x384 + const std::array pad_color = {127, 127, 127}; + clip_image_u8 resized_image; + int sz = params.image_size; + img_tool::resize(*img, resized_image, {sz, sz}, img_tool::RESIZE_ALGO_BILINEAR, true, pad_color); + clip_image_f32_ptr img_f32(clip_image_f32_init()); + normalize_image_u8_to_f32(resized_image, *img_f32, params.image_mean, params.image_std); + res_imgs->entries.push_back(std::move(img_f32)); + } break; + case PROJECTOR_TYPE_PIXTRAL: case PROJECTOR_TYPE_LIGHTONOCR: { - GGML_ASSERT(params.image_min_pixels && params.image_max_pixels); + GGML_ASSERT(params.image_min_pixels > 0 && params.image_max_pixels > 0); clip_image_u8 resized_image; // the original pixtral model doesn't have n_merge const int cur_merge = params.n_merge == 0 ? 1 : params.n_merge; @@ -4431,7 +4496,7 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, str case PROJECTOR_TYPE_LFM2: case PROJECTOR_TYPE_KIMIVL: { - GGML_ASSERT(params.image_min_pixels && params.image_max_pixels); + GGML_ASSERT(params.image_min_pixels > 0 && params.image_max_pixels > 0); const clip_image_size target_size = img_tool::calc_size_preserved_ratio( original_size, params.patch_size * params.n_merge, @@ -4570,6 +4635,7 @@ int clip_n_output_tokens(const struct clip_ctx * ctx, struct clip_image_f32 * im switch (proj) { case PROJECTOR_TYPE_MLP: case PROJECTOR_TYPE_MLP_NORM: + case PROJECTOR_TYPE_JANUS_PRO: { // do nothing } break; @@ -5080,6 +5146,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima case PROJECTOR_TYPE_ULTRAVOX: case PROJECTOR_TYPE_LFM2: case PROJECTOR_TYPE_VOXTRAL: + case PROJECTOR_TYPE_JANUS_PRO: case PROJECTOR_TYPE_COGVLM: { // do nothing @@ -5348,6 +5415,7 @@ int clip_n_mmproj_embd(const struct clip_ctx * ctx) { return ctx->model.mm_model_mlp_3_w->ne[1]; case PROJECTOR_TYPE_QWEN2VL: case PROJECTOR_TYPE_QWEN25VL: + case PROJECTOR_TYPE_JANUS_PRO: return ctx->model.mm_1_b->ne[0]; case PROJECTOR_TYPE_QWEN3VL: // main path + deepstack paths diff --git a/tools/mtmd/clip.h b/tools/mtmd/clip.h index d614ef3b3..94eaa750a 100644 --- a/tools/mtmd/clip.h +++ b/tools/mtmd/clip.h @@ -33,6 +33,8 @@ struct clip_context_params { bool use_gpu; enum ggml_log_level verbosity; enum clip_flash_attn_type flash_attn_type; + int image_min_tokens; + int image_max_tokens; }; struct clip_init_result { diff --git a/tools/mtmd/mtmd-cli.cpp b/tools/mtmd/mtmd-cli.cpp index 17aea1472..3e19e9595 100644 --- a/tools/mtmd/mtmd-cli.cpp +++ b/tools/mtmd/mtmd-cli.cpp @@ -132,11 +132,13 @@ struct mtmd_cli_context { void init_vision_context(common_params & params) { const char * clip_path = params.mmproj.path.c_str(); mtmd_context_params mparams = mtmd_context_params_default(); - mparams.use_gpu = params.mmproj_use_gpu; - mparams.print_timings = true; - mparams.n_threads = params.cpuparams.n_threads; - mparams.verbosity = params.verbosity > 0 ? GGML_LOG_LEVEL_DEBUG : GGML_LOG_LEVEL_INFO; - mparams.flash_attn_type = params.flash_attn_type; + mparams.use_gpu = params.mmproj_use_gpu; + mparams.print_timings = true; + mparams.n_threads = params.cpuparams.n_threads; + mparams.verbosity = params.verbosity > 0 ? GGML_LOG_LEVEL_DEBUG : GGML_LOG_LEVEL_INFO; + mparams.flash_attn_type = params.flash_attn_type; + mparams.image_min_tokens = params.image_min_tokens; + mparams.image_max_tokens = params.image_max_tokens; ctx_vision.reset(mtmd_init_from_file(clip_path, model, mparams)); if (!ctx_vision.get()) { LOG_ERR("Failed to load vision model from %s\n", clip_path); diff --git a/tools/mtmd/mtmd.cpp b/tools/mtmd/mtmd.cpp index 297eef437..325f7ff99 100644 --- a/tools/mtmd/mtmd.cpp +++ b/tools/mtmd/mtmd.cpp @@ -109,6 +109,8 @@ mtmd_context_params mtmd_context_params_default() { params.image_marker = MTMD_DEFAULT_IMAGE_MARKER; params.media_marker = mtmd_default_marker(); params.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_AUTO; + params.image_min_tokens = -1; + params.image_max_tokens = -1; return params; } @@ -171,9 +173,13 @@ struct mtmd_context { } clip_context_params ctx_clip_params; - ctx_clip_params.use_gpu = ctx_params.use_gpu; - ctx_clip_params.verbosity = ctx_params.verbosity; - ctx_clip_params.flash_attn_type = mtmd_get_clip_flash_attn_type(ctx_params.flash_attn_type); + ctx_clip_params.use_gpu = ctx_params.use_gpu; + ctx_clip_params.verbosity = ctx_params.verbosity; + ctx_clip_params.flash_attn_type = mtmd_get_clip_flash_attn_type(ctx_params.flash_attn_type); + // custom image token limits + ctx_clip_params.image_min_tokens = ctx_params.image_min_tokens; + ctx_clip_params.image_max_tokens = ctx_params.image_max_tokens; + auto res = clip_init(mmproj_fname, ctx_clip_params); ctx_v = res.ctx_v; ctx_a = res.ctx_a; diff --git a/tools/mtmd/mtmd.h b/tools/mtmd/mtmd.h index 4ae1925bc..775fba621 100644 --- a/tools/mtmd/mtmd.h +++ b/tools/mtmd/mtmd.h @@ -83,6 +83,10 @@ struct mtmd_context_params { const char * image_marker; // deprecated, use media_marker instead const char * media_marker; enum llama_flash_attn_type flash_attn_type; + + // limit number of image tokens, only for vision models with dynamic resolution + int image_min_tokens; // minimum number of tokens for image input (default: read from metadata) + int image_max_tokens; // maximum number of tokens for image input (default: read from metadata) }; MTMD_API const char * mtmd_default_marker(void); diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz index b71690cc8..a796c255c 100644 Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ diff --git a/tools/server/server.cpp b/tools/server/server.cpp index a9bef3518..ec7dc1a13 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -2452,11 +2452,13 @@ struct server_context { std::string & mmproj_path = params_base.mmproj.path; if (!mmproj_path.empty()) { mtmd_context_params mparams = mtmd_context_params_default(); - mparams.use_gpu = params_base.mmproj_use_gpu; - mparams.print_timings = false; - mparams.n_threads = params_base.cpuparams.n_threads; - mparams.verbosity = params_base.verbosity > 0 ? GGML_LOG_LEVEL_DEBUG : GGML_LOG_LEVEL_INFO; - mparams.flash_attn_type = params_base.flash_attn_type; + mparams.use_gpu = params_base.mmproj_use_gpu; + mparams.print_timings = false; + mparams.n_threads = params_base.cpuparams.n_threads; + mparams.verbosity = params_base.verbosity > 0 ? GGML_LOG_LEVEL_DEBUG : GGML_LOG_LEVEL_INFO; + mparams.flash_attn_type = params_base.flash_attn_type; + mparams.image_min_tokens = params_base.image_min_tokens; + mparams.image_max_tokens = params_base.image_max_tokens; mctx = mtmd_init_from_file(mmproj_path.c_str(), model, mparams); if (mctx == nullptr) { SRV_ERR("failed to load multimodal model, '%s'\n", mmproj_path.c_str()); @@ -4908,6 +4910,7 @@ int main(int argc, char ** argv) { json data = { { "default_generation_settings", default_generation_settings_for_props }, { "total_slots", ctx_server.params_base.n_parallel }, + { "model_alias", ctx_server.params_base.model_alias }, { "model_path", ctx_server.params_base.model.path }, { "modalities", json { {"vision", ctx_server.oai_parser_opt.allow_image}, diff --git a/tools/server/webui/package-lock.json b/tools/server/webui/package-lock.json index f86b9282c..8fab38f6f 100644 --- a/tools/server/webui/package-lock.json +++ b/tools/server/webui/package-lock.json @@ -59,6 +59,7 @@ "prettier-plugin-tailwindcss": "^0.6.11", "rehype-katex": "^7.0.1", "remark-math": "^6.0.0", + "sass": "^1.93.3", "storybook": "^9.0.17", "svelte": "^5.0.0", "svelte-check": "^4.0.0", @@ -1176,6 +1177,330 @@ "node": ">= 8" } }, + "node_modules/@parcel/watcher": { + "version": "2.5.1", + "resolved": "https://registry.npmjs.org/@parcel/watcher/-/watcher-2.5.1.tgz", + "integrity": "sha512-dfUnCxiN9H4ap84DvD2ubjw+3vUNpstxa0TneY/Paat8a3R4uQZDLSvWjmznAY/DoahqTHl9V46HF/Zs3F29pg==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "dependencies": { + "detect-libc": "^1.0.3", + "is-glob": "^4.0.3", + "micromatch": "^4.0.5", + "node-addon-api": "^7.0.0" + }, + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + }, + "optionalDependencies": { + "@parcel/watcher-android-arm64": "2.5.1", + "@parcel/watcher-darwin-arm64": "2.5.1", + "@parcel/watcher-darwin-x64": "2.5.1", + "@parcel/watcher-freebsd-x64": "2.5.1", + "@parcel/watcher-linux-arm-glibc": "2.5.1", + "@parcel/watcher-linux-arm-musl": "2.5.1", + "@parcel/watcher-linux-arm64-glibc": "2.5.1", + "@parcel/watcher-linux-arm64-musl": "2.5.1", + "@parcel/watcher-linux-x64-glibc": "2.5.1", + "@parcel/watcher-linux-x64-musl": "2.5.1", + "@parcel/watcher-win32-arm64": "2.5.1", + "@parcel/watcher-win32-ia32": "2.5.1", + "@parcel/watcher-win32-x64": "2.5.1" + } + }, + "node_modules/@parcel/watcher-android-arm64": { + "version": "2.5.1", + "resolved": "https://registry.npmjs.org/@parcel/watcher-android-arm64/-/watcher-android-arm64-2.5.1.tgz", + "integrity": "sha512-KF8+j9nNbUN8vzOFDpRMsaKBHZ/mcjEjMToVMJOhTozkDonQFFrRcfdLWn6yWKCmJKmdVxSgHiYvTCef4/qcBA==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "android" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher-darwin-arm64": { + "version": "2.5.1", + "resolved": "https://registry.npmjs.org/@parcel/watcher-darwin-arm64/-/watcher-darwin-arm64-2.5.1.tgz", + "integrity": "sha512-eAzPv5osDmZyBhou8PoF4i6RQXAfeKL9tjb3QzYuccXFMQU0ruIc/POh30ePnaOyD1UXdlKguHBmsTs53tVoPw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher-darwin-x64": { + "version": "2.5.1", + "resolved": "https://registry.npmjs.org/@parcel/watcher-darwin-x64/-/watcher-darwin-x64-2.5.1.tgz", + "integrity": "sha512-1ZXDthrnNmwv10A0/3AJNZ9JGlzrF82i3gNQcWOzd7nJ8aj+ILyW1MTxVk35Db0u91oD5Nlk9MBiujMlwmeXZg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher-freebsd-x64": { + "version": "2.5.1", + "resolved": "https://registry.npmjs.org/@parcel/watcher-freebsd-x64/-/watcher-freebsd-x64-2.5.1.tgz", + "integrity": "sha512-SI4eljM7Flp9yPuKi8W0ird8TI/JK6CSxju3NojVI6BjHsTyK7zxA9urjVjEKJ5MBYC+bLmMcbAWlZ+rFkLpJQ==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "freebsd" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher-linux-arm-glibc": { + "version": "2.5.1", + "resolved": "https://registry.npmjs.org/@parcel/watcher-linux-arm-glibc/-/watcher-linux-arm-glibc-2.5.1.tgz", + "integrity": "sha512-RCdZlEyTs8geyBkkcnPWvtXLY44BCeZKmGYRtSgtwwnHR4dxfHRG3gR99XdMEdQ7KeiDdasJwwvNSF5jKtDwdA==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher-linux-arm-musl": { + "version": "2.5.1", + "resolved": "https://registry.npmjs.org/@parcel/watcher-linux-arm-musl/-/watcher-linux-arm-musl-2.5.1.tgz", + "integrity": "sha512-6E+m/Mm1t1yhB8X412stiKFG3XykmgdIOqhjWj+VL8oHkKABfu/gjFj8DvLrYVHSBNC+/u5PeNrujiSQ1zwd1Q==", + "cpu": [ + "arm" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher-linux-arm64-glibc": { + "version": "2.5.1", + "resolved": "https://registry.npmjs.org/@parcel/watcher-linux-arm64-glibc/-/watcher-linux-arm64-glibc-2.5.1.tgz", + "integrity": "sha512-LrGp+f02yU3BN9A+DGuY3v3bmnFUggAITBGriZHUREfNEzZh/GO06FF5u2kx8x+GBEUYfyTGamol4j3m9ANe8w==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher-linux-arm64-musl": { + "version": "2.5.1", + "resolved": "https://registry.npmjs.org/@parcel/watcher-linux-arm64-musl/-/watcher-linux-arm64-musl-2.5.1.tgz", + "integrity": "sha512-cFOjABi92pMYRXS7AcQv9/M1YuKRw8SZniCDw0ssQb/noPkRzA+HBDkwmyOJYp5wXcsTrhxO0zq1U11cK9jsFg==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher-linux-x64-glibc": { + "version": "2.5.1", + "resolved": "https://registry.npmjs.org/@parcel/watcher-linux-x64-glibc/-/watcher-linux-x64-glibc-2.5.1.tgz", + "integrity": "sha512-GcESn8NZySmfwlTsIur+49yDqSny2IhPeZfXunQi48DMugKeZ7uy1FX83pO0X22sHntJ4Ub+9k34XQCX+oHt2A==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher-linux-x64-musl": { + "version": "2.5.1", + "resolved": "https://registry.npmjs.org/@parcel/watcher-linux-x64-musl/-/watcher-linux-x64-musl-2.5.1.tgz", + "integrity": "sha512-n0E2EQbatQ3bXhcH2D1XIAANAcTZkQICBPVaxMeaCVBtOpBZpWJuf7LwyWPSBDITb7In8mqQgJ7gH8CILCURXg==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher-win32-arm64": { + "version": "2.5.1", + "resolved": "https://registry.npmjs.org/@parcel/watcher-win32-arm64/-/watcher-win32-arm64-2.5.1.tgz", + "integrity": "sha512-RFzklRvmc3PkjKjry3hLF9wD7ppR4AKcWNzH7kXR7GUe0Igb3Nz8fyPwtZCSquGrhU5HhUNDr/mKBqj7tqA2Vw==", + "cpu": [ + "arm64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher-win32-ia32": { + "version": "2.5.1", + "resolved": "https://registry.npmjs.org/@parcel/watcher-win32-ia32/-/watcher-win32-ia32-2.5.1.tgz", + "integrity": "sha512-c2KkcVN+NJmuA7CGlaGD1qJh1cLfDnQsHjE89E60vUEMlqduHGCdCLJCID5geFVM0dOtA3ZiIO8BoEQmzQVfpQ==", + "cpu": [ + "ia32" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher-win32-x64": { + "version": "2.5.1", + "resolved": "https://registry.npmjs.org/@parcel/watcher-win32-x64/-/watcher-win32-x64-2.5.1.tgz", + "integrity": "sha512-9lHBdJITeNR++EvSQVUcaZoWupyHfXe1jZvGZ06O/5MflPcuPLtEphScIBL+AiCWBO46tDSHzWyD0uDmmZqsgA==", + "cpu": [ + "x64" + ], + "dev": true, + "license": "MIT", + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/parcel" + } + }, + "node_modules/@parcel/watcher/node_modules/detect-libc": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-1.0.3.tgz", + "integrity": "sha512-pGjwhsmsp4kL2RTz08wcOlGN83otlqHeD/Z5T8GXZB+/YcpQ/dgo+lbU8ZsGxV0HIvqqxo9l7mqYwyYMD9bKDg==", + "dev": true, + "license": "Apache-2.0", + "optional": true, + "bin": { + "detect-libc": "bin/detect-libc.js" + }, + "engines": { + "node": ">=0.10" + } + }, "node_modules/@playwright/test": { "version": "1.54.1", "resolved": "https://registry.npmjs.org/@playwright/test/-/test-1.54.1.tgz", @@ -4697,6 +5022,13 @@ "node": ">= 4" } }, + "node_modules/immutable": { + "version": "5.1.4", + "resolved": "https://registry.npmjs.org/immutable/-/immutable-5.1.4.tgz", + "integrity": "sha512-p6u1bG3YSnINT5RQmx/yRZBpenIl30kVxkTLDyHLIMk0gict704Q9n+thfDI7lTRm9vXdDYutVzXhzcThxTnXA==", + "dev": true, + "license": "MIT" + }, "node_modules/import-fresh": { "version": "3.3.1", "resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.3.1.tgz", @@ -6462,6 +6794,14 @@ "tslib": "^2.0.3" } }, + "node_modules/node-addon-api": { + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/node-addon-api/-/node-addon-api-7.1.1.tgz", + "integrity": "sha512-5m3bsyrjFWE1xf7nz7YXdN4udnVtXK6/Yfgn5qnahL6bCkf2yKt4k3nuTKAtT4r3IG8JNR2ncsIMdZuAzJjHQQ==", + "dev": true, + "license": "MIT", + "optional": true + }, "node_modules/object-inspect": { "version": "1.13.4", "resolved": "https://registry.npmjs.org/object-inspect/-/object-inspect-1.13.4.tgz", @@ -7484,6 +7824,27 @@ "dev": true, "license": "MIT" }, + "node_modules/sass": { + "version": "1.93.3", + "resolved": "https://registry.npmjs.org/sass/-/sass-1.93.3.tgz", + "integrity": "sha512-elOcIZRTM76dvxNAjqYrucTSI0teAF/L2Lv0s6f6b7FOwcwIuA357bIE871580AjHJuSvLIRUosgV+lIWx6Rgg==", + "dev": true, + "license": "MIT", + "dependencies": { + "chokidar": "^4.0.0", + "immutable": "^5.0.2", + "source-map-js": ">=0.6.2 <2.0.0" + }, + "bin": { + "sass": "sass.js" + }, + "engines": { + "node": ">=14.0.0" + }, + "optionalDependencies": { + "@parcel/watcher": "^2.4.1" + } + }, "node_modules/scheduler": { "version": "0.26.0", "resolved": "https://registry.npmjs.org/scheduler/-/scheduler-0.26.0.tgz", diff --git a/tools/server/webui/package.json b/tools/server/webui/package.json index 376f69015..92c7457bd 100644 --- a/tools/server/webui/package.json +++ b/tools/server/webui/package.json @@ -61,6 +61,7 @@ "prettier-plugin-tailwindcss": "^0.6.11", "rehype-katex": "^7.0.1", "remark-math": "^6.0.0", + "sass": "^1.93.3", "storybook": "^9.0.17", "svelte": "^5.0.0", "svelte-check": "^4.0.0", diff --git a/tools/server/webui/src/lib/components/app/chat/ChatAttachments/ChatAttachmentPreviewDialog.svelte b/tools/server/webui/src/lib/components/app/chat/ChatAttachments/ChatAttachmentPreviewDialog.svelte index c28cb1c10..3c1ee7fc5 100644 --- a/tools/server/webui/src/lib/components/app/chat/ChatAttachments/ChatAttachmentPreviewDialog.svelte +++ b/tools/server/webui/src/lib/components/app/chat/ChatAttachments/ChatAttachmentPreviewDialog.svelte @@ -134,6 +134,15 @@ } } + $effect(() => { + if (open) { + pdfImages = []; + pdfImagesLoading = false; + pdfImagesError = null; + pdfViewMode = 'pages'; + } + }); + $effect(() => { if (open && isPdf && pdfViewMode === 'pages') { loadPdfImages(); diff --git a/tools/server/webui/src/lib/components/app/misc/MarkdownContent.svelte b/tools/server/webui/src/lib/components/app/misc/MarkdownContent.svelte index 1c069db58..7e83d30f1 100644 --- a/tools/server/webui/src/lib/components/app/misc/MarkdownContent.svelte +++ b/tools/server/webui/src/lib/components/app/misc/MarkdownContent.svelte @@ -8,8 +8,9 @@ import rehypeKatex from 'rehype-katex'; import rehypeStringify from 'rehype-stringify'; import { copyCodeToClipboard } from '$lib/utils/copy'; + import { preprocessLaTeX } from '$lib/utils/latex-protection'; import { browser } from '$app/environment'; - import 'katex/dist/katex.min.css'; + import '$styles/katex-custom.scss'; import githubDarkCss from 'highlight.js/styles/github-dark.css?inline'; import githubLightCss from 'highlight.js/styles/github.css?inline'; @@ -176,19 +177,9 @@ return mutated ? tempDiv.innerHTML : html; } - function normalizeMathDelimiters(text: string): string { - return text - .replace(/(^|[^\\])\\\[((?:\\.|[\s\S])*?)\\\]/g, (_, prefix: string, content: string) => { - return `${prefix}$$${content}$$`; - }) - .replace(/(^|[^\\])\\\(((?:\\.|[\s\S])*?)\\\)/g, (_, prefix: string, content: string) => { - return `${prefix}$${content}$`; - }); - } - async function processMarkdown(text: string): Promise { try { - const normalized = normalizeMathDelimiters(text); + let normalized = preprocessLaTeX(text); const result = await processor().process(normalized); const html = String(result); const enhancedLinks = enhanceLinks(html); diff --git a/tools/server/webui/src/lib/constants/latex-protection.ts b/tools/server/webui/src/lib/constants/latex-protection.ts new file mode 100644 index 000000000..27c88e725 --- /dev/null +++ b/tools/server/webui/src/lib/constants/latex-protection.ts @@ -0,0 +1,35 @@ +/** + * Matches common Markdown code blocks to exclude them from further processing (e.g. LaTeX). + * - Fenced: ```...``` + * - Inline: `...` (does NOT support nested backticks or multi-backtick syntax) + * + * Note: This pattern does not handle advanced cases like: + * `` `code with `backticks` `` or \\``...\\`` + */ +export const CODE_BLOCK_REGEXP = /(```[\s\S]*?```|`[^`\n]+`)/g; + +/** + * Matches LaTeX math delimiters \(...\) and \[...\] only when not preceded by a backslash (i.e., not escaped), + * while also capturing code blocks (```, `...`) so they can be skipped during processing. + * + * Uses negative lookbehind `(? { + it('should protect LaTeX $x + y$ but not money $3.99', () => { + const latexExpressions: string[] = []; + const input = 'I have $10, $3.99 and $x + y$ and $100x$. The amount is $2,000.'; + const output = maskInlineLaTeX(input, latexExpressions); + + expect(output).toBe('I have $10, $3.99 and <> and <>. The amount is $2,000.'); + expect(latexExpressions).toEqual(['$x + y$', '$100x$']); + }); + + it('should ignore money like $5 and $12.99', () => { + const latexExpressions: string[] = []; + const input = 'Prices are $12.99 and $5. Tax?'; + const output = maskInlineLaTeX(input, latexExpressions); + + expect(output).toBe('Prices are $12.99 and $5. Tax?'); + expect(latexExpressions).toEqual([]); + }); + + it('should protect inline math $a^2 + b^2$ even after text', () => { + const latexExpressions: string[] = []; + const input = 'Pythagorean: $a^2 + b^2 = c^2$.'; + const output = maskInlineLaTeX(input, latexExpressions); + + expect(output).toBe('Pythagorean: <>.'); + expect(latexExpressions).toEqual(['$a^2 + b^2 = c^2$']); + }); + + it('should not protect math that has letter after closing $ (e.g. units)', () => { + const latexExpressions: string[] = []; + const input = 'The cost is $99 and change.'; + const output = maskInlineLaTeX(input, latexExpressions); + + expect(output).toBe('The cost is $99 and change.'); + expect(latexExpressions).toEqual([]); + }); + + it('should allow $x$ followed by punctuation', () => { + const latexExpressions: string[] = []; + const input = 'We know $x$, right?'; + const output = maskInlineLaTeX(input, latexExpressions); + + expect(output).toBe('We know <>, right?'); + expect(latexExpressions).toEqual(['$x$']); + }); + + it('should work across multiple lines', () => { + const latexExpressions: string[] = []; + const input = `Emma buys cupcakes for $3 each.\nHow much is $x + y$?`; + const output = maskInlineLaTeX(input, latexExpressions); + + expect(output).toBe(`Emma buys cupcakes for $3 each.\nHow much is <>?`); + expect(latexExpressions).toEqual(['$x + y$']); + }); + + it('should not protect $100 but protect $matrix$', () => { + const latexExpressions: string[] = []; + const input = '$100 and $\\mathrm{GL}_2(\\mathbb{F}_7)$ are different.'; + const output = maskInlineLaTeX(input, latexExpressions); + + expect(output).toBe('$100 and <> are different.'); + expect(latexExpressions).toEqual(['$\\mathrm{GL}_2(\\mathbb{F}_7)$']); + }); + + it('should skip if $ is followed by digit and alphanumeric after close (money)', () => { + const latexExpressions: string[] = []; + const input = 'I paid $5 quickly.'; + const output = maskInlineLaTeX(input, latexExpressions); + + expect(output).toBe('I paid $5 quickly.'); + expect(latexExpressions).toEqual([]); + }); + + it('should protect LaTeX even with special chars inside', () => { + const latexExpressions: string[] = []; + const input = 'Consider $\\alpha_1 + \\beta_2$ now.'; + const output = maskInlineLaTeX(input, latexExpressions); + + expect(output).toBe('Consider <> now.'); + expect(latexExpressions).toEqual(['$\\alpha_1 + \\beta_2$']); + }); + + it('short text', () => { + const latexExpressions: string[] = ['$0$']; + const input = '$a$\n$a$ and $b$'; + const output = maskInlineLaTeX(input, latexExpressions); + + expect(output).toBe('<>\n<> and <>'); + expect(latexExpressions).toEqual(['$0$', '$a$', '$a$', '$b$']); + }); + + it('empty text', () => { + const latexExpressions: string[] = []; + const input = '$\n$$\n'; + const output = maskInlineLaTeX(input, latexExpressions); + + expect(output).toBe('$\n$$\n'); + expect(latexExpressions).toEqual([]); + }); + + it('LaTeX-spacer preceded by backslash', () => { + const latexExpressions: string[] = []; + const input = `\\[ +\\boxed{ +\\begin{aligned} +N_{\\text{att}}^{\\text{(MHA)}} &= +h \\bigl[\\, d_{\\text{model}}\\;d_{k} + d_{\\text{model}}\\;d_{v}\\, \\bigr] && (\\text{Q,K,V の重み})\\\\ +&\\quad+ h(d_{k}+d_{k}+d_{v}) && (\\text{バイアス Q,K,V)}\\\\[4pt] +&\\quad+ (h d_{v})\\, d_{\\text{model}} && (\\text{出力射影 }W^{O})\\\\ +&\\quad+ d_{\\text{model}} && (\\text{バイアス }b^{O}) +\\end{aligned}} +\\]`; + const output = maskInlineLaTeX(input, latexExpressions); + + expect(output).toBe(input); + expect(latexExpressions).toEqual([]); + }); +}); + +describe('preprocessLaTeX', () => { + test('converts inline \\( ... \\) to $...$', () => { + const input = + '\\( \\mathrm{GL}_2(\\mathbb{F}_7) \\): Group of invertible matrices with entries in \\(\\mathbb{F}_7\\).'; + const output = preprocessLaTeX(input); + expect(output).toBe( + '$ \\mathrm{GL}_2(\\mathbb{F}_7) $: Group of invertible matrices with entries in $\\mathbb{F}_7$.' + ); + }); + + test("don't inline \\\\( ... \\) to $...$", () => { + const input = + 'Chapter 20 of The TeXbook, in source "Definitions\\\\(also called Macros)", containst the formula \\((x_1,\\ldots,x_n)\\).'; + const output = preprocessLaTeX(input); + expect(output).toBe( + 'Chapter 20 of The TeXbook, in source "Definitions\\\\(also called Macros)", containst the formula $(x_1,\\ldots,x_n)$.' + ); + }); + + test('preserves display math \\[ ... \\] and protects adjacent text', () => { + const input = `Some kernel of \\(\\mathrm{SL}_2(\\mathbb{F}_7)\\): + \\[ + \\left\\{ \\begin{pmatrix} 1 & 0 \\\\ 0 & 1 \\end{pmatrix}, \\begin{pmatrix} -1 & 0 \\\\ 0 & -1 \\end{pmatrix} \\right\\} = \\{\\pm I\\} + \\]`; + const output = preprocessLaTeX(input); + + expect(output).toBe(`Some kernel of $\\mathrm{SL}_2(\\mathbb{F}_7)$: + $$ + \\left\\{ \\begin{pmatrix} 1 & 0 \\\\ 0 & 1 \\end{pmatrix}, \\begin{pmatrix} -1 & 0 \\\\ 0 & -1 \\end{pmatrix} \\right\\} = \\{\\pm I\\} + $$`); + }); + + test('handles standalone display math equation', () => { + const input = `Algebra: +\\[ +x = \\frac{-b \\pm \\sqrt{\\,b^{2}-4ac\\,}}{2a} +\\]`; + const output = preprocessLaTeX(input); + + expect(output).toBe(`Algebra: +$$ +x = \\frac{-b \\pm \\sqrt{\\,b^{2}-4ac\\,}}{2a} +$$`); + }); + + test('does not interpret currency values as LaTeX', () => { + const input = 'I have $10, $3.99 and $x + y$ and $100x$. The amount is $2,000.'; + const output = preprocessLaTeX(input); + + expect(output).toBe('I have \\$10, \\$3.99 and $x + y$ and $100x$. The amount is \\$2,000.'); + }); + + test('ignores dollar signs followed by digits (money), but keeps valid math $x + y$', () => { + const input = 'I have $10, $3.99 and $x + y$ and $100x$. The amount is $2,000.'; + const output = preprocessLaTeX(input); + + expect(output).toBe('I have \\$10, \\$3.99 and $x + y$ and $100x$. The amount is \\$2,000.'); + }); + + test('handles real-world word problems with amounts and no math delimiters', () => { + const input = + 'Emma buys 2 cupcakes for $3 each and 1 cookie for $1.50. How much money does she spend in total?'; + const output = preprocessLaTeX(input); + + expect(output).toBe( + 'Emma buys 2 cupcakes for \\$3 each and 1 cookie for \\$1.50. How much money does she spend in total?' + ); + }); + + test('handles decimal amounts in word problem correctly', () => { + const input = + 'Maria has $20. She buys a notebook for $4.75 and a pack of pencils for $3.25. How much change does she receive?'; + const output = preprocessLaTeX(input); + + expect(output).toBe( + 'Maria has \\$20. She buys a notebook for \\$4.75 and a pack of pencils for \\$3.25. How much change does she receive?' + ); + }); + + test('preserves display math with surrounding non-ASCII text', () => { + const input = `1 kg の質量は + \\[ + E = (1\\ \\text{kg}) \\times (3.0 \\times 10^8\\ \\text{m/s})^2 \\approx 9.0 \\times 10^{16}\\ \\text{J} + \\] + というエネルギーに相当します。これは約 21 百万トンの TNT が爆発したときのエネルギーに匹敵します。`; + const output = preprocessLaTeX(input); + + expect(output).toBe( + `1 kg の質量は + $$ + E = (1\\ \\text{kg}) \\times (3.0 \\times 10^8\\ \\text{m/s})^2 \\approx 9.0 \\times 10^{16}\\ \\text{J} + $$ + というエネルギーに相当します。これは約 21 百万トンの TNT が爆発したときのエネルギーに匹敵します。` + ); + }); + + test('LaTeX-spacer preceded by backslash', () => { + const input = `\\[ +\\boxed{ +\\begin{aligned} +N_{\\text{att}}^{\\text{(MHA)}} &= +h \\bigl[\\, d_{\\text{model}}\\;d_{k} + d_{\\text{model}}\\;d_{v}\\, \\bigr] && (\\text{Q,K,V の重み})\\\\ +&\\quad+ h(d_{k}+d_{k}+d_{v}) && (\\text{バイアス Q,K,V)}\\\\[4pt] +&\\quad+ (h d_{v})\\, d_{\\text{model}} && (\\text{出力射影 }W^{O})\\\\ +&\\quad+ d_{\\text{model}} && (\\text{バイアス }b^{O}) +\\end{aligned}} +\\]`; + const output = preprocessLaTeX(input); + expect(output).toBe( + `$$ +\\boxed{ +\\begin{aligned} +N_{\\text{att}}^{\\text{(MHA)}} &= +h \\bigl[\\, d_{\\text{model}}\\;d_{k} + d_{\\text{model}}\\;d_{v}\\, \\bigr] && (\\text{Q,K,V の重み})\\\\ +&\\quad+ h(d_{k}+d_{k}+d_{v}) && (\\text{バイアス Q,K,V)}\\\\[4pt] +&\\quad+ (h d_{v})\\, d_{\\text{model}} && (\\text{出力射影 }W^{O})\\\\ +&\\quad+ d_{\\text{model}} && (\\text{バイアス }b^{O}) +\\end{aligned}} +$$` + ); + }); + + test('converts \\[ ... \\] even when preceded by text without space', () => { + const input = 'Some line ...\nAlgebra: \\[x = \\frac{-b \\pm \\sqrt{\\,b^{2}-4ac\\,}}{2a}\\]'; + const output = preprocessLaTeX(input); + + expect(output).toBe( + 'Some line ...\nAlgebra: \n$$x = \\frac{-b \\pm \\sqrt{\\,b^{2}-4ac\\,}}{2a}$$\n' + ); + }); + + test('converts \\[ ... \\] in table-cells', () => { + const input = `| ID | Expression |\n| #1 | \\[ + x = \\frac{-b \\pm \\sqrt{\\,b^{2}-4ac\\,}}{2a} +\\] |`; + const output = preprocessLaTeX(input); + + expect(output).toBe( + '| ID | Expression |\n| #1 | $x = \\frac{-b \\pm \\sqrt{\\,b^{2}-4ac\\,}}{2a}$ |' + ); + }); + + test('escapes isolated $ before digits ($5 → \\$5), but not valid math', () => { + const input = 'This costs $5 and this is math $x^2$. $100 is money.'; + const output = preprocessLaTeX(input); + + expect(output).toBe('This costs \\$5 and this is math $x^2$. \\$100 is money.'); + // Note: Since $x^2$ is detected as valid LaTeX, it's preserved. + // $5 becomes \$5 only *after* real math is masked — but here it's correct because the masking logic avoids treating $5 as math. + }); + + test('display with LaTeX-line-breaks', () => { + const input = String.raw`- Algebraic topology, Homotopy Groups of $\mathbb{S}^3$: +$$\pi_n(\mathbb{S}^3) = \begin{cases} +\mathbb{Z} & n = 3 \\ +0 & n > 3, n \neq 4 \\ +\mathbb{Z}_2 & n = 4 \\ +\end{cases}$$`; + const output = preprocessLaTeX(input); + // If the formula contains '\\' the $$-delimiters should be in their own line. + expect(output).toBe(`- Algebraic topology, Homotopy Groups of $\\mathbb{S}^3$: +$$\n\\pi_n(\\mathbb{S}^3) = \\begin{cases} +\\mathbb{Z} & n = 3 \\\\ +0 & n > 3, n \\neq 4 \\\\ +\\mathbb{Z}_2 & n = 4 \\\\ +\\end{cases}\n$$`); + }); + + test('handles mhchem notation safely if present', () => { + const input = 'Chemical reaction: \\( \\ce{H2O} \\) and $\\ce{CO2}$'; + const output = preprocessLaTeX(input); + + expect(output).toBe('Chemical reaction: $ \\ce{H2O} $ and $\\ce{CO2}$'); + }); + + test('preserves code blocks', () => { + const input = 'Inline code: `sum $total` and block:\n```\ndollar $amount\n```\nEnd.'; + const output = preprocessLaTeX(input); + + expect(output).toBe(input); // Code blocks prevent misinterpretation + }); + + test('escape backslash in mchem ce', () => { + const input = 'mchem ce:\n$\\ce{2H2(g) + O2(g) -> 2H2O(l)}$'; + const output = preprocessLaTeX(input); + + // mhchem-escape would insert a backslash here. + expect(output).toBe('mchem ce:\n$\\ce{2H2(g) + O2(g) -> 2H2O(l)}$'); + }); + + test('escape backslash in mchem pu', () => { + const input = 'mchem pu:\n$\\pu{-572 kJ mol^{-1}}$'; + const output = preprocessLaTeX(input); + + // mhchem-escape would insert a backslash here. + expect(output).toBe('mchem pu:\n$\\pu{-572 kJ mol^{-1}}$'); + }); + + test('LaTeX in blockquotes with display math', () => { + const input = + '> **Definition (limit):** \n> \\[\n> \\lim_{x\\to a} f(x) = L\n> \\]\n> means that as \\(x\\) gets close to \\(a\\).'; + const output = preprocessLaTeX(input); + + // Blockquote markers should be preserved, LaTeX should be converted + expect(output).toContain('> **Definition (limit):**'); + expect(output).toContain('$$'); + expect(output).toContain('$x$'); + expect(output).not.toContain('\\['); + expect(output).not.toContain('\\]'); + expect(output).not.toContain('\\('); + expect(output).not.toContain('\\)'); + }); + + test('LaTeX in blockquotes with inline math', () => { + const input = + "> The derivative \\(f'(x)\\) at point \\(x=a\\) measures slope.\n> Formula: \\(f'(a)=\\lim_{h\\to 0}\\frac{f(a+h)-f(a)}{h}\\)"; + const output = preprocessLaTeX(input); + + // Blockquote markers should be preserved, inline LaTeX converted to $...$ + expect(output).toContain("> The derivative $f'(x)$ at point $x=a$ measures slope."); + expect(output).toContain("> Formula: $f'(a)=\\lim_{h\\to 0}\\frac{f(a+h)-f(a)}{h}$"); + }); + + test('Mixed content with blockquotes and regular text', () => { + const input = + 'Regular text with \\(x^2\\).\n\n> Quote with \\(y^2\\).\n\nMore text with \\(z^2\\).'; + const output = preprocessLaTeX(input); + + // All LaTeX should be converted, blockquote markers preserved + expect(output).toBe('Regular text with $x^2$.\n\n> Quote with $y^2$.\n\nMore text with $z^2$.'); + }); +}); diff --git a/tools/server/webui/src/lib/utils/latex-protection.ts b/tools/server/webui/src/lib/utils/latex-protection.ts new file mode 100644 index 000000000..7f5cf2cdd --- /dev/null +++ b/tools/server/webui/src/lib/utils/latex-protection.ts @@ -0,0 +1,267 @@ +import { + CODE_BLOCK_REGEXP, + LATEX_MATH_AND_CODE_PATTERN, + LATEX_LINEBREAK_REGEXP, + MHCHEM_PATTERN_MAP +} from '$lib/constants/latex-protection'; + +/** + * Replaces inline LaTeX expressions enclosed in `$...$` with placeholders, avoiding dollar signs + * that appear to be part of monetary values or identifiers. + * + * This function processes the input line by line and skips `$` sequences that are likely + * part of money amounts (e.g., `$5`, `$100.99`) or code-like tokens (e.g., `var$`, `$var`). + * Valid LaTeX inline math is replaced with a placeholder like `<>`, and the + * actual LaTeX content is stored in the provided `latexExpressions` array. + * + * @param content - The input text potentially containing LaTeX expressions. + * @param latexExpressions - An array used to collect extracted LaTeX expressions. + * @returns The processed string with LaTeX replaced by placeholders. + */ +export function maskInlineLaTeX(content: string, latexExpressions: string[]): string { + if (!content.includes('$')) { + return content; + } + return content + .split('\n') + .map((line) => { + if (line.indexOf('$') == -1) { + return line; + } + + let processedLine = ''; + let currentPosition = 0; + + while (currentPosition < line.length) { + const openDollarIndex = line.indexOf('$', currentPosition); + + if (openDollarIndex == -1) { + processedLine += line.slice(currentPosition); + break; + } + + // Is there a next $-sign? + const closeDollarIndex = line.indexOf('$', openDollarIndex + 1); + + if (closeDollarIndex == -1) { + processedLine += line.slice(currentPosition); + break; + } + + const charBeforeOpen = openDollarIndex > 0 ? line[openDollarIndex - 1] : ''; + const charAfterOpen = line[openDollarIndex + 1]; + const charBeforeClose = + openDollarIndex + 1 < closeDollarIndex ? line[closeDollarIndex - 1] : ''; + const charAfterClose = closeDollarIndex + 1 < line.length ? line[closeDollarIndex + 1] : ''; + + let shouldSkipAsNonLatex = false; + + if (closeDollarIndex == currentPosition + 1) { + // No content + shouldSkipAsNonLatex = true; + } + + if (/[A-Za-z0-9_$-]/.test(charBeforeOpen)) { + // Character, digit, $, _ or - before first '$', no TeX. + shouldSkipAsNonLatex = true; + } + + if ( + /[0-9]/.test(charAfterOpen) && + (/[A-Za-z0-9_$-]/.test(charAfterClose) || ' ' == charBeforeClose) + ) { + // First $ seems to belong to an amount. + shouldSkipAsNonLatex = true; + } + + if (shouldSkipAsNonLatex) { + processedLine += line.slice(currentPosition, openDollarIndex + 1); + currentPosition = openDollarIndex + 1; + + continue; + } + + // Treat as LaTeX + processedLine += line.slice(currentPosition, openDollarIndex); + const latexContent = line.slice(openDollarIndex, closeDollarIndex + 1); + latexExpressions.push(latexContent); + processedLine += `<>`; + currentPosition = closeDollarIndex + 1; + } + + return processedLine; + }) + .join('\n'); +} + +function escapeBrackets(text: string): string { + return text.replace( + LATEX_MATH_AND_CODE_PATTERN, + ( + match: string, + codeBlock: string | undefined, + squareBracket: string | undefined, + roundBracket: string | undefined + ): string => { + if (codeBlock != null) { + return codeBlock; + } else if (squareBracket != null) { + return `$$${squareBracket}$$`; + } else if (roundBracket != null) { + return `$${roundBracket}$`; + } + + return match; + } + ); +} + +// Escape $\\ce{...} → $\\ce{...} but with proper handling +function escapeMhchem(text: string): string { + return MHCHEM_PATTERN_MAP.reduce((result, [pattern, replacement]) => { + return result.replace(pattern, replacement); + }, text); +} + +const doEscapeMhchem = false; + +/** + * Preprocesses markdown content to safely handle LaTeX math expressions while protecting + * against false positives (e.g., dollar amounts like $5.99) and ensuring proper rendering. + * + * This function: + * - Protects code blocks (```) and inline code (`...`) + * - Safeguards block and inline LaTeX: \(...\), \[...\], $$...$$, and selective $...$ + * - Escapes standalone dollar signs before numbers (e.g., $5 → \$5) to prevent misinterpretation + * - Restores protected LaTeX and code blocks after processing + * - Converts \(...\) → $...$ and \[...\] → $$...$$ for compatibility with math renderers + * - Applies additional escaping for brackets and mhchem syntax if needed + * + * @param content - The raw text (e.g., markdown) that may contain LaTeX or code blocks. + * @returns The preprocessed string with properly escaped and normalized LaTeX. + * + * @example + * preprocessLaTeX("Price: $10. The equation is \\(x^2\\).") + * // → "Price: $10. The equation is $x^2$." + */ +export function preprocessLaTeX(content: string): string { + // See also: + // https://github.com/danny-avila/LibreChat/blob/main/client/src/utils/latex.ts + + // Step 0: Temporarily remove blockquote markers (>) to process LaTeX correctly + // Store the structure so we can restore it later + const blockquoteMarkers: Map = new Map(); + const lines = content.split('\n'); + const processedLines = lines.map((line, index) => { + const match = line.match(/^(>\s*)/); + if (match) { + blockquoteMarkers.set(index, match[1]); + return line.slice(match[1].length); + } + return line; + }); + content = processedLines.join('\n'); + + // Step 1: Protect code blocks + const codeBlocks: string[] = []; + + content = content.replace(CODE_BLOCK_REGEXP, (match) => { + codeBlocks.push(match); + + return `<>`; + }); + + // Step 2: Protect existing LaTeX expressions + const latexExpressions: string[] = []; + + // Match \S...\[...\] and protect them and insert a line-break. + content = content.replace(/([\S].*?)\\\[([\s\S]*?)\\\](.*)/g, (match, group1, group2, group3) => { + // Check if there are characters following the formula (display-formula in a table-cell?) + if (group1.endsWith('\\')) { + return match; // Backslash before \[, do nothing. + } + const hasSuffix = /\S/.test(group3); + let optBreak; + + if (hasSuffix) { + latexExpressions.push(`\\(${group2.trim()}\\)`); // Convert into inline. + optBreak = ''; + } else { + latexExpressions.push(`\\[${group2}\\]`); + optBreak = '\n'; + } + + return `${group1}${optBreak}<>${optBreak}${group3}`; + }); + + // Match \(...\), \[...\], $$...$$ and protect them + content = content.replace( + /(\$\$[\s\S]*?\$\$|(? { + latexExpressions.push(match); + + return `<>`; + } + ); + + // Protect inline $...$ but NOT if it looks like money (e.g., $10, $3.99) + content = maskInlineLaTeX(content, latexExpressions); + + // Step 3: Escape standalone $ before digits (currency like $5 → \$5) + // (Now that inline math is protected, this will only escape dollars not already protected) + content = content.replace(/\$(?=\d)/g, '\\$'); + + // Step 4: Restore protected LaTeX expressions (they are valid) + content = content.replace(/<>/g, (_, index) => { + let expr = latexExpressions[parseInt(index)]; + const match = expr.match(LATEX_LINEBREAK_REGEXP); + if (match) { + // Katex: The $$-delimiters should be in their own line + // if there are \\-line-breaks. + const formula = match[1]; + const prefix = formula.startsWith('\n') ? '' : '\n'; + const suffix = formula.endsWith('\n') ? '' : '\n'; + expr = '$$' + prefix + formula + suffix + '$$'; + } + return expr; + }); + + // Step 5: Restore code blocks + content = content.replace(/<>/g, (_, index) => { + return codeBlocks[parseInt(index)]; + }); + + // Step 6: Apply additional escaping functions (brackets and mhchem) + content = escapeBrackets(content); + + if (doEscapeMhchem && (content.includes('\\ce{') || content.includes('\\pu{'))) { + content = escapeMhchem(content); + } + + // Final pass: Convert \(...\) → $...$, \[...\] → $$...$$ + content = content + // Using the look‑behind pattern `(? { + return `${prefix}$$${content}$$`; + } + ); + + // Step 7: Restore blockquote markers + if (blockquoteMarkers.size > 0) { + const finalLines = content.split('\n'); + const restoredLines = finalLines.map((line, index) => { + const marker = blockquoteMarkers.get(index); + return marker ? marker + line : line; + }); + content = restoredLines.join('\n'); + } + + return content; +} diff --git a/tools/server/webui/src/stories/fixtures/math-formulas.ts b/tools/server/webui/src/stories/fixtures/math-formulas.ts index a4e9ab0ed..1355256b2 100644 --- a/tools/server/webui/src/stories/fixtures/math-formulas.ts +++ b/tools/server/webui/src/stories/fixtures/math-formulas.ts @@ -1,3 +1,4 @@ +/* eslint-disable no-irregular-whitespace */ // Math Formulas Content export const MATH_FORMULAS_MD = String.raw` # Mathematical Formulas and Expressions @@ -150,6 +151,70 @@ $$\lim_{x \to 0} \frac{\sin x}{x} = 1$$ $$\lim_{n \to \infty} \left(1 + \frac{x}{n}\right)^n = e^x$$ +## Further Bracket Styles and Amounts + +- \( \mathrm{GL}_2(\mathbb{F}_7) \): Group of invertible matrices with entries in \(\mathbb{F}_7\). +- Some kernel of \(\mathrm{SL}_2(\mathbb{F}_7)\): + \[ + \left\{ \begin{pmatrix} 1 & 0 \\ 0 & 1 \end{pmatrix}, \begin{pmatrix} -1 & 0 \\ 0 & -1 \end{pmatrix} \right\} = \{\pm I\} + \] +- Algebra: +\[ +x = \frac{-b \pm \sqrt{\,b^{2}-4ac\,}}{2a} +\] +- $100 and $12.99 are amounts, not LaTeX. +- I have $10, $3.99 and $x + y$ and $100x$. The amount is $2,000. +- Emma buys 2 cupcakes for $3 each and 1 cookie for $1.50. How much money does she spend in total? +- Maria has $20. She buys a notebook for $4.75 and a pack of pencils for $3.25. How much change does she receive? +- 1 kg の質量は + \[ + E = (1\ \text{kg}) \times (3.0 \times 10^8\ \text{m/s})^2 \approx 9.0 \times 10^{16}\ \text{J} + \] + というエネルギーに相当します。これは約 21 百万トンの TNT が爆発したときのエネルギーに匹敵します。 +- Algebra: \[ +x = \frac{-b \pm \sqrt{\,b^{2}-4ac\,}}{2a} +\] +- Algebraic topology, Homotopy Groups of $\mathbb{S}^3$: +$$\pi_n(\mathbb{S}^3) = \begin{cases} +\mathbb{Z} & n = 3 \\ +0 & n > 3, n \neq 4 \\ +\mathbb{Z}_2 & n = 4 \\ +\end{cases}$$ +- Spacer preceded by backslash: +\[ +\boxed{ +\begin{aligned} +N_{\text{att}}^{\text{(MHA)}} &= +h \bigl[\, d_{\text{model}}\;d_{k} + d_{\text{model}}\;d_{v}\, \bigr] && (\text{Q,K,V の重み})\\ +&\quad+ h(d_{k}+d_{k}+d_{v}) && (\text{バイアス Q,K,V)}\\[4pt] +&\quad+ (h d_{v})\, d_{\text{model}} && (\text{出力射影 }W^{O})\\ +&\quad+ d_{\text{model}} && (\text{バイアス }b^{O}) +\end{aligned}} +\] + +## Formulas in a Table + +| Area | Expression | Comment | +|------|------------|---------| +| **Algebra** | \[ +x = \frac{-b \pm \sqrt{\,b^{2}-4ac\,}}{2a} +\] | Quadratic formula | +| | \[ +(a+b)^{n} = \sum_{k=0}^{n}\binom{n}{k}\,a^{\,n-k}\,b^{\,k} +\] | Binomial theorem | +| | \(\displaystyle \prod_{k=1}^{n}k = n! \) | Factorial definition | +| **Geometry** | \( \mathbf{a}\cdot \mathbf{b} = \|\mathbf{a}\|\,\|\mathbf{b}\|\,\cos\theta \) | Dot product & angle | + +## No math (but chemical) + +Balanced chemical reaction with states: + +\[ +\ce{2H2(g) + O2(g) -> 2H2O(l)} +\] + +The standard enthalpy change for the reaction is: $\Delta H^\circ = \pu{-572 kJ mol^{-1}}$. + --- *This document showcases various mathematical notation and formulas that can be rendered in markdown using LaTeX syntax.* diff --git a/tools/server/webui/src/styles/katex-custom.scss b/tools/server/webui/src/styles/katex-custom.scss new file mode 100644 index 000000000..9c8b96ed5 --- /dev/null +++ b/tools/server/webui/src/styles/katex-custom.scss @@ -0,0 +1,13 @@ +// Override KaTeX SCSS variables to disable ttf and woff fonts +// Only use woff2 format which is embedded in the bundle +$use-woff2: true; +$use-woff: false; +$use-ttf: false; + +// Use Vite alias for font folder +$font-folder: 'katex-fonts'; + +// Import KaTeX SCSS with overridden variables +// Note: @import is deprecated but required because KaTeX uses @import internally +// The deprecation warnings are from KaTeX's code and cannot be avoided +@import 'katex/src/styles/katex.scss'; diff --git a/tools/server/webui/svelte.config.js b/tools/server/webui/svelte.config.js index f25494236..947499363 100644 --- a/tools/server/webui/svelte.config.js +++ b/tools/server/webui/svelte.config.js @@ -22,6 +22,9 @@ const config = { }), output: { bundleStrategy: 'inline' + }, + alias: { + $styles: 'src/styles' } }, diff --git a/tools/server/webui/vite.config.ts b/tools/server/webui/vite.config.ts index b077e232a..11ff665d8 100644 --- a/tools/server/webui/vite.config.ts +++ b/tools/server/webui/vite.config.ts @@ -18,6 +18,15 @@ const GUIDE_FOR_FRONTEND = ` const MAX_BUNDLE_SIZE = 2 * 1024 * 1024; +/** + * the maximum size of an embedded asset in bytes, + * e.g. maximum size of embedded font (see node_modules/katex/dist/fonts/*.woff2) + */ +const MAX_ASSET_SIZE = 32000; + +/** public/index.html.gz minified flag */ +const ENABLE_JS_MINIFICATION = true; + function llamaCppBuildPlugin() { return { name: 'llamacpp:build', @@ -75,12 +84,28 @@ function llamaCppBuildPlugin() { } export default defineConfig({ - build: { - chunkSizeWarningLimit: 3072 + resolve: { + alias: { + 'katex-fonts': resolve('node_modules/katex/dist/fonts') + } + }, + build: { + assetsInlineLimit: MAX_ASSET_SIZE, + chunkSizeWarningLimit: 3072, + minify: ENABLE_JS_MINIFICATION + }, + css: { + preprocessorOptions: { + scss: { + additionalData: ` + $use-woff2: true; + $use-woff: false; + $use-ttf: false; + ` + } + } }, - plugins: [tailwindcss(), sveltekit(), devtoolsJson(), llamaCppBuildPlugin()], - test: { projects: [ { diff --git a/vendor/minja/chat-template.hpp b/vendor/minja/chat-template.hpp index d5295b335..f080aa92f 100644 --- a/vendor/minja/chat-template.hpp +++ b/vendor/minja/chat-template.hpp @@ -192,18 +192,25 @@ class chat_template { }; }; const json dummy_args_obj {{"argument_needle", "print('Hello, World!')"}}; + const auto contains_arg_needle = [&](const std::string & out_str) { + return contains(out_str, "") + || contains(out_str, "\"argument_needle\":") + || contains(out_str, "'argument_needle':") + || contains(out_str, ">argument_needle<") + || contains(out_str, ""); + }; // Note: the arguments are rendered in both cases, but may be double-escaped, which we don't want. out = try_raw_render(json::array({ dummy_user_msg, make_tool_calls_msg(json::array({make_tool_call("ipython", dummy_args_obj.dump())})), }), {}, false); - auto tool_call_renders_str_arguments = contains(out, "") || contains(out, "\"argument_needle\":") || contains(out, "'argument_needle':"); + auto tool_call_renders_str_arguments = contains_arg_needle(out); out = try_raw_render(json::array({ dummy_user_msg, make_tool_calls_msg(json::array({make_tool_call("ipython", dummy_args_obj)})), }), {}, false); - auto tool_call_renders_obj_arguments = contains(out, "") || contains(out, "\"argument_needle\":") || contains(out, "'argument_needle':"); + auto tool_call_renders_obj_arguments = contains_arg_needle(out); caps_.supports_tool_calls = tool_call_renders_str_arguments || tool_call_renders_obj_arguments; caps_.requires_object_arguments = !tool_call_renders_str_arguments && tool_call_renders_obj_arguments; diff --git a/vendor/minja/minja.hpp b/vendor/minja/minja.hpp index 57b138add..873ece8c1 100644 --- a/vendor/minja/minja.hpp +++ b/vendor/minja/minja.hpp @@ -2205,7 +2205,7 @@ private: auto value = parseValue(); - while (it != end && consumeSpaces() && peekSymbols({ "[", "." })) { + while (it != end && consumeSpaces() && peekSymbols({ "[", ".", "(" })) { if (!consumeToken("[").empty()) { std::shared_ptr index; auto slice_loc = get_location(); @@ -2250,15 +2250,13 @@ private: auto key = std::make_shared(identifier->location, Value(identifier->get_name())); value = std::make_shared(identifier->location, std::move(value), std::move(key)); } + } else if (peekSymbols({ "(" })) { + auto callParams = parseCallArgs(); + value = std::make_shared(get_location(), std::move(value), std::move(callParams)); } consumeSpaces(); } - if (peekSymbols({ "(" })) { - auto location = get_location(); - auto callParams = parseCallArgs(); - value = std::make_shared(location, std::move(value), std::move(callParams)); - } return value; } @@ -2738,7 +2736,7 @@ inline std::shared_ptr Context::builtins() { globals.set("raise_exception", simple_function("raise_exception", { "message" }, [](const std::shared_ptr &, Value & args) -> Value { throw std::runtime_error(args.at("message").get()); })); - globals.set("tojson", simple_function("tojson", { "value", "indent" }, [](const std::shared_ptr &, Value & args) { + globals.set("tojson", simple_function("tojson", { "value", "indent", "ensure_ascii" }, [](const std::shared_ptr &, Value & args) { return Value(args.at("value").dump(args.get("indent", -1), /* to_json= */ true)); })); globals.set("items", simple_function("items", { "object" }, [](const std::shared_ptr &, Value & args) {