mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-19 08:00:25 +00:00
kcpp musicgen - disable flash attention as its not stable on vulkan. due to optimizations should still fit in 6gb in lowvram.
This commit is contained in:
parent
7bf7b0aefc
commit
d9724a4caa
2 changed files with 3 additions and 2 deletions
|
|
@ -32,7 +32,7 @@ static BackendPair backend_init(const char * label) {
|
|||
bp.cpu_backend = bp.backend;
|
||||
ggml_backend_cpu_set_n_threads(bp.backend, n_threads);
|
||||
} else {
|
||||
n_threads = (n_threads>4?4:n_threads);
|
||||
// n_threads = (n_threads>4?4:n_threads);
|
||||
bp.cpu_backend = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, NULL);
|
||||
ggml_backend_cpu_set_n_threads(bp.cpu_backend, n_threads);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -409,7 +409,8 @@ static void dit_ggml_init_backend(DiTGGML * m) {
|
|||
m->sched = backend_sched_new(bp, 8192);
|
||||
// flash_attn_ext accumulates in F16 on CPU, causing audible drift over
|
||||
// 24 layers x 8 steps. Use F32 manual attention on CPU instead.
|
||||
m->use_flash_attn = (bp.backend != bp.cpu_backend);
|
||||
// m->use_flash_attn = (bp.backend != bp.cpu_backend);
|
||||
m->use_flash_attn = false; //kcpp: flash attn for music is unstable on vulkan. disable it.
|
||||
}
|
||||
|
||||
// Graph builder: single DiT layer (self-attention block)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue