vulkan: fuse adds (#15252)

* vulkan: fuse adds Fuse adds that have the same shape, which are common in MoE models. It will currently fuse up to 6 adds, because we assume no more than 8 descriptors per dispatch. But this could be changed. * check runtimeDescriptorArray feature * disable multi_add for Intel due to likely driver bug
2025-09-11 01:24:36 +00:00 · 2025-08-16 11:48:22 -05:00 · 2025-08-16 11:48:22 -05:00 · 1fe00296f5
commit 1fe00296f5
parent de2192794f
6 changed files with 301 additions and 25 deletions
--- a/tests/test-backend-ops.cpp
+++ b/tests/test-backend-ops.cpp
@ -2491,12 +2491,12 @@ struct test_bin_bcast : public test_case {
        : op(op), type(type), ne(ne), nr(nr), nf(nf) {}

    ggml_tensor * build_graph(ggml_context * ctx) override {
-        GGML_ASSERT(nf <= 8);
+        GGML_ASSERT(nf <= 16);

        ggml_tensor * a = ggml_new_tensor_4d(ctx, type, ne[0]*nr[0], ne[1]*nr[1], ne[2]*nr[2], ne[3]*nr[3]);
        ggml_set_name(a, "a");

-        ggml_tensor * b[8];
+        ggml_tensor * b[16];
        for (int i = 0; i < nf; ++i) {
            b[i] = ggml_new_tensor(ctx, type, 4, ne.data());
            ggml_set_name(b[i], (std::string("b") + std::to_string(i)).c_str());
@ -5658,6 +5658,7 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
    test_cases.emplace_back(new test_bin_bcast(ggml_add, GGML_TYPE_F32, {10, 5, 4, 3}, {1, 1, 2, 2}, 6));
    test_cases.emplace_back(new test_bin_bcast(ggml_add, GGML_TYPE_F32, {10, 5, 4, 3}, {1, 2, 2, 2}, 7));
    test_cases.emplace_back(new test_bin_bcast(ggml_add, GGML_TYPE_F32, {16, 5, 4, 3}, {2, 2, 2, 2}, 8));
+    test_cases.emplace_back(new test_bin_bcast(ggml_add, GGML_TYPE_F32, {16, 5, 4, 3}, {1, 1, 1, 1}, 16));

    test_cases.emplace_back(new test_add1());
    test_cases.emplace_back(new test_scale());