kcpp musicgen - disable flash attention as its not stable on vulkan. due to optimizations should still fit in 6gb in lowvram.

This commit is contained in:
Concedo 2026-04-12 18:28:30 +08:00
parent 7bf7b0aefc
commit d9724a4caa
2 changed files with 3 additions and 2 deletions

View file

@ -32,7 +32,7 @@ static BackendPair backend_init(const char * label) {
bp.cpu_backend = bp.backend;
ggml_backend_cpu_set_n_threads(bp.backend, n_threads);
} else {
n_threads = (n_threads>4?4:n_threads);
// n_threads = (n_threads>4?4:n_threads);
bp.cpu_backend = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, NULL);
ggml_backend_cpu_set_n_threads(bp.cpu_backend, n_threads);
}

View file

@ -409,7 +409,8 @@ static void dit_ggml_init_backend(DiTGGML * m) {
m->sched = backend_sched_new(bp, 8192);
// flash_attn_ext accumulates in F16 on CPU, causing audible drift over
// 24 layers x 8 steps. Use F32 manual attention on CPU instead.
m->use_flash_attn = (bp.backend != bp.cpu_backend);
// m->use_flash_attn = (bp.backend != bp.cpu_backend);
m->use_flash_attn = false; //kcpp: flash attn for music is unstable on vulkan. disable it.
}
// Graph builder: single DiT layer (self-attention block)