kcpp musicgen - disable flash attention as its not stable on vulkan. due to optimizations should still fit in 6gb in lowvram.

2026-07-10 01:18:32 +00:00 · 2026-04-12 18:28:30 +08:00 · 2026-04-12 18:28:30 +08:00 · d9724a4caa
commit d9724a4caa
parent 7bf7b0aefc
2 changed files with 3 additions and 2 deletions
--- a/otherarch/acestep/backend.h
+++ b/otherarch/acestep/backend.h
@ -32,7 +32,7 @@ static BackendPair backend_init(const char * label) {
        bp.cpu_backend = bp.backend;
        ggml_backend_cpu_set_n_threads(bp.backend, n_threads);
    } else {
-        n_threads = (n_threads>4?4:n_threads);
+        // n_threads = (n_threads>4?4:n_threads);
        bp.cpu_backend = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, NULL);
        ggml_backend_cpu_set_n_threads(bp.cpu_backend, n_threads);
    }
--- a/otherarch/acestep/dit.h
+++ b/otherarch/acestep/dit.h
@ -409,7 +409,8 @@ static void dit_ggml_init_backend(DiTGGML * m) {
    m->sched = backend_sched_new(bp, 8192);
    // flash_attn_ext accumulates in F16 on CPU, causing audible drift over
    // 24 layers x 8 steps. Use F32 manual attention on CPU instead.
-    m->use_flash_attn = (bp.backend != bp.cpu_backend);
+    // m->use_flash_attn = (bp.backend != bp.cpu_backend);
+     m->use_flash_attn = false; //kcpp: flash attn for music is unstable on vulkan. disable it.
 }

 // Graph builder: single DiT layer (self-attention block)