From d9724a4caa95a6d548ac63ee4f12c45a22c4fa9b Mon Sep 17 00:00:00 2001
From: Concedo <39025047+LostRuins@users.noreply.github.com>
Date: Sun, 12 Apr 2026 18:28:30 +0800
Subject: [PATCH] kcpp musicgen - disable flash attention as its not stable on
 vulkan. due to optimizations should still fit in 6gb in lowvram.

---
 otherarch/acestep/backend.h | 2 +-
 otherarch/acestep/dit.h     | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/otherarch/acestep/backend.h b/otherarch/acestep/backend.h
index 2e4026a08..303ec7b87 100644
--- a/otherarch/acestep/backend.h
+++ b/otherarch/acestep/backend.h
@@ -32,7 +32,7 @@ static BackendPair backend_init(const char * label) {
         bp.cpu_backend = bp.backend;
         ggml_backend_cpu_set_n_threads(bp.backend, n_threads);
     } else {
-        n_threads = (n_threads>4?4:n_threads);
+        // n_threads = (n_threads>4?4:n_threads);
         bp.cpu_backend = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, NULL);
         ggml_backend_cpu_set_n_threads(bp.cpu_backend, n_threads);
     }
diff --git a/otherarch/acestep/dit.h b/otherarch/acestep/dit.h
index acbb4aeca..025899cc1 100644
--- a/otherarch/acestep/dit.h
+++ b/otherarch/acestep/dit.h
@@ -409,7 +409,8 @@ static void dit_ggml_init_backend(DiTGGML * m) {
     m->sched = backend_sched_new(bp, 8192);
     // flash_attn_ext accumulates in F16 on CPU, causing audible drift over
     // 24 layers x 8 steps. Use F32 manual attention on CPU instead.
-    m->use_flash_attn = (bp.backend != bp.cpu_backend);
+    // m->use_flash_attn = (bp.backend != bp.cpu_backend);
+     m->use_flash_attn = false; //kcpp: flash attn for music is unstable on vulkan. disable it.
 }
 
 // Graph builder: single DiT layer (self-attention block)