fixed inccorect padding for flash attn with swa

2025-09-15 03:19:41 +00:00 · 2025-06-30 20:32:14 +08:00 · 2025-06-30 20:32:14 +08:00 · 989f9e6b98
commit 989f9e6b98
parent 186227fc26
1 changed files with 1 additions and 0 deletions
--- a/src/llama-kv-cache-unified-iswa.cpp
+++ b/src/llama-kv-cache-unified-iswa.cpp
@ -31,6 +31,7 @@ llama_kv_cache_unified_iswa::llama_kv_cache_unified_iswa(

    //kcpp: pad the swa kv cache as well, similar to extra_context_handle_fragmentation
    size_swa += 32;
+    size_swa = GGML_PAD(size_swa, n_pad);

    // when using full-size SWA cache, we set the SWA cache size to be equal to the base cache size
    if (swa_full) {