Merge branch 'upstream' into concedo_experimental

# Conflicts: # .github/workflows/docker.yml # CMakeLists.txt # Makefile # README.md # flake.lock # tests/test-backend-ops.cpp
2025-09-11 01:24:36 +00:00 · 2024-05-28 21:57:19 +08:00 · 2024-05-28 21:57:19 +08:00 · 4ed9ba7352
commit 4ed9ba7352
parent b5401a2901 e2b065071c
58 changed files with 2404 additions and 361 deletions
--- a/otherarch/sdcpp/clip.hpp
+++ b/otherarch/sdcpp/clip.hpp
@ -617,7 +617,7 @@ public:
        class_embedding                     = ggml_repeat(ctx, class_embed_weight, class_embedding);      // [N, embed_dim]
        class_embedding                     = ggml_reshape_4d(ctx, class_embedding, 1, embed_dim, 1, N);  // [N, 1, embed_dim, 1]

-        struct ggml_tensor* x = ggml_concat(ctx, class_embedding, patch_embedding);    // [N, num_positions, embed_dim, 1]
+        struct ggml_tensor* x = ggml_concat(ctx, class_embedding, patch_embedding, 2);    // [N, num_positions, embed_dim, 1]
        x                     = ggml_reshape_3d(ctx, x, embed_dim, num_positions, N);  // [N, num_positions, embed_dim]
        x                     = ggml_add(ctx, x, position_embed_weight);
        return x;  // [N, num_positions, embed_dim]
@ -948,7 +948,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public GGMLModule {
                                             hidden_states2->ne[3]);
            hidden_states2 = ggml_cont(ctx, ggml_permute(ctx, hidden_states2, 2, 0, 1, 3));

-            hidden_states = ggml_concat(ctx, hidden_states, hidden_states2);  // [N, n_token, hidden_size + hidden_size2]
+            hidden_states = ggml_concat(ctx, hidden_states, hidden_states2, 2);  // [N, n_token, hidden_size + hidden_size2]

            hidden_states = ggml_cont(ctx, ggml_permute(ctx, hidden_states, 1, 2, 0, 3));
        }
@ -980,7 +980,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public GGMLModule {
            auto token_embed_weight = text_model.get_token_embed_weight();
            token_embed_weight      = ggml_reshape_3d(compute_ctx, token_embed_weight, token_embed_weight->ne[0], 1, token_embed_weight->ne[1]);
            // concatenate custom embeddings
-            embeddings = ggml_concat(compute_ctx, token_embed_weight, custom_embeddings);
+            embeddings = ggml_concat(compute_ctx, token_embed_weight, custom_embeddings, 2);
            embeddings = ggml_reshape_2d(compute_ctx, embeddings, embeddings->ne[0], embeddings->ne[2]);
        }