vulkan: support q4_0/q8_0 KV in scalar FA

2025-09-14 02:49:41 +00:00 · 2025-05-07 23:53:38 -05:00 · 2025-05-07 23:53:38 -05:00 · e66094276b
commit e66094276b
parent 989bfb18fc
3 changed files with 66 additions and 8 deletions
--- a/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp
+++ b/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp
@ -444,7 +444,11 @@ void process_shaders() {
            if (tname == "f16") {
                string_to_spv("flash_attn_f32_f16_" + tname, "flash_attn.comp",
                    merge_maps(base_dict, {{"Q_TYPE", "float"}, {"D_TYPE", "float"}, {"ACC_TYPE", acctype}}), true, false, false, f16acc);
-            } // quants not supported yet
+            } else if (tname == "q4_0" || tname == "q8_0") {
+                std::string data_a_key = "DATA_A_" + to_uppercase(tname);
+                string_to_spv("flash_attn_f32_f16_" + tname, "flash_attn.comp",
+                    merge_maps(base_dict, {{data_a_key, "1"}, {"Q_TYPE", "float"}, {"D_TYPE", "float"}, {"ACC_TYPE", acctype}, {"BLOCK_SIZE", "QUANT_K_"+to_uppercase(tname) }}), true, false, false, f16acc);
+            }
        }
    }