Merge branch 'upstream' into concedo_experimental

# Conflicts: # .github/workflows/build.yml # .github/workflows/close-issue.yml # .github/workflows/server.yml # AUTHORS # CMakeLists.txt # Makefile # README.md # cmake/llama.pc.in # common/CMakeLists.txt # docs/build.md # examples/batched.swift/Sources/main.swift # examples/llama.swiftui/llama.cpp.swift/LibLlama.swift # examples/llava/CMakeLists.txt # examples/llava/clip.h # examples/run/run.cpp # examples/server/README.md # ggml/CMakeLists.txt # ggml/src/ggml-cuda/CMakeLists.txt # ggml/src/ggml-hip/CMakeLists.txt # ggml/src/ggml-musa/CMakeLists.txt # scripts/sync-ggml.last # tests/CMakeLists.txt # tests/test-backend-ops.cpp # tests/test-chat-template.cpp # tests/test-grammar-integration.cpp # tests/test-json-schema-to-grammar.cpp
2025-09-11 01:24:36 +00:00 · 2025-02-07 00:52:31 +08:00 · 2025-02-07 00:52:31 +08:00 · db6db9dff9
commit db6db9dff9
parent 8fef9f3fb5 2fb3c32a16
142 changed files with 74261 additions and 10014 deletions
--- a/examples/llava/llava.cpp
+++ b/examples/llava/llava.cpp
@ -311,6 +311,20 @@ static bool encode_image_with_clip(clip_ctx * ctx_clip, int n_threads, const cli
        img_res_v.size = 0;
        img_res_v.data = nullptr;
    }
+    else if (clip_is_glm(ctx_clip)){
+        struct clip_image_size * load_image_size = clip_image_size_init();
+        load_image_size->width = img_res_v.data[0].nx;
+        load_image_size->height = img_res_v.data[0].ny;
+        clip_add_load_image_size(ctx_clip, load_image_size);
+
+        bool encoded = clip_image_encode(ctx_clip, n_threads, &img_res_v.data[0], image_embd);
+        int pos = int(load_image_size->width/clip_patch_size(ctx_clip)/2);
+        *n_img_pos = (pos * pos + 2);
+        if (!encoded){
+            LOG_ERR("Unable to encode image \n");
+            return false;
+        }
+    }
    else if (strcmp(mm_patch_merge_type, "spatial_unpad") != 0) {
        // flat / default llava-1.5 type embedding
        *n_img_pos = clip_n_patches(ctx_clip);
@ -395,6 +409,9 @@ bool llava_image_embed_make_with_clip_img(clip_ctx * ctx_clip, int n_threads, co
    if (clip_is_minicpmv(ctx_clip)) {
        num_max_patches = 10;
    }
+    if (clip_is_glm(ctx_clip)) {
+        num_max_patches = 1;
+    }
    float * image_embd;
    if (clip_is_qwen2vl(ctx_clip)) {
        // qwen2vl don't split image into chunks, so `num_max_patches` is not needed.