From d9724a4caa95a6d548ac63ee4f12c45a22c4fa9b Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Sun, 12 Apr 2026 18:28:30 +0800 Subject: [PATCH] kcpp musicgen - disable flash attention as its not stable on vulkan. due to optimizations should still fit in 6gb in lowvram. --- otherarch/acestep/backend.h | 2 +- otherarch/acestep/dit.h | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/otherarch/acestep/backend.h b/otherarch/acestep/backend.h index 2e4026a08..303ec7b87 100644 --- a/otherarch/acestep/backend.h +++ b/otherarch/acestep/backend.h @@ -32,7 +32,7 @@ static BackendPair backend_init(const char * label) { bp.cpu_backend = bp.backend; ggml_backend_cpu_set_n_threads(bp.backend, n_threads); } else { - n_threads = (n_threads>4?4:n_threads); + // n_threads = (n_threads>4?4:n_threads); bp.cpu_backend = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, NULL); ggml_backend_cpu_set_n_threads(bp.cpu_backend, n_threads); } diff --git a/otherarch/acestep/dit.h b/otherarch/acestep/dit.h index acbb4aeca..025899cc1 100644 --- a/otherarch/acestep/dit.h +++ b/otherarch/acestep/dit.h @@ -409,7 +409,8 @@ static void dit_ggml_init_backend(DiTGGML * m) { m->sched = backend_sched_new(bp, 8192); // flash_attn_ext accumulates in F16 on CPU, causing audible drift over // 24 layers x 8 steps. Use F32 manual attention on CPU instead. - m->use_flash_attn = (bp.backend != bp.cpu_backend); + // m->use_flash_attn = (bp.backend != bp.cpu_backend); + m->use_flash_attn = false; //kcpp: flash attn for music is unstable on vulkan. disable it. } // Graph builder: single DiT layer (self-attention block)