emergency fix for q25vl

2026-05-31 21:39:42 +00:00 · 2025-04-27 16:46:33 +08:00 · 2025-04-27 16:46:33 +08:00 · f8b7ddeac0
commit f8b7ddeac0
parent 1b0481f4b1
1 changed files with 5 additions and 4 deletions
--- a/examples/llava/clip.cpp
+++ b/examples/llava/clip.cpp
@ -1743,7 +1743,8 @@ struct clip_model_loader {

                if (ctx_clip.proj_type == PROJECTOR_TYPE_MINICPMV
                        || ctx_clip.proj_type == PROJECTOR_TYPE_GLM_EDGE
-                        || ctx_clip.proj_type == PROJECTOR_TYPE_QWEN2VL) {
+                        || ctx_clip.proj_type == PROJECTOR_TYPE_QWEN2VL
+                        || ctx_clip.proj_type == PROJECTOR_TYPE_QWEN25VL) {
                    n_layer += 1;
                }

@ -2856,7 +2857,7 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, str
        }
        return true;
    }
-    else if (ctx->proj_type == PROJECTOR_TYPE_QWEN2VL) {
+    else if (ctx->proj_type == PROJECTOR_TYPE_QWEN2VL || ctx->proj_type == PROJECTOR_TYPE_QWEN25VL) {
        clip_image_u8 resized;
        auto patch_size = clip_get_patch_size(ctx) * 2;
        int nx = ceil((float)img->nx / patch_size) * patch_size;
@ -3255,7 +3256,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
    else {
        // non-minicpmv models

-        if (ctx->proj_type == PROJECTOR_TYPE_QWEN2VL) {
+        if (ctx->proj_type == PROJECTOR_TYPE_QWEN2VL || ctx->proj_type == PROJECTOR_TYPE_QWEN25VL) {
            // pw * ph = number of tokens output by ViT after apply patch merger
            // ipw * ipw = number of vision token been processed inside ViT
            const int merge_ratio = 2;
@ -3395,7 +3396,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
        }
    }

-    if (use_window_attn && ctx->proj_type == PROJECTOR_TYPE_QWEN25VL) {
+    if (use_window_attn && (ctx->proj_type == PROJECTOR_TYPE_QWEN2VL || ctx->proj_type == PROJECTOR_TYPE_QWEN25VL)) {
        struct ggml_tensor * window_idx = ggml_graph_get_tensor(gf, "window_idx");
        struct ggml_tensor * inv_window_idx = ggml_graph_get_tensor(gf, "inv_window_idx");
        struct ggml_tensor * window_mask = ggml_graph_get_tensor(gf, "window_mask");