From 989f9e6b98b42417b1f45883bf3b670367f243dc Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Mon, 30 Jun 2025 20:32:14 +0800 Subject: [PATCH] fixed inccorect padding for flash attn with swa --- src/llama-kv-cache-unified-iswa.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/llama-kv-cache-unified-iswa.cpp b/src/llama-kv-cache-unified-iswa.cpp index 4852af80a..dc1b39691 100644 --- a/src/llama-kv-cache-unified-iswa.cpp +++ b/src/llama-kv-cache-unified-iswa.cpp @@ -31,6 +31,7 @@ llama_kv_cache_unified_iswa::llama_kv_cache_unified_iswa( //kcpp: pad the swa kv cache as well, similar to extra_context_handle_fragmentation size_swa += 32; + size_swa = GGML_PAD(size_swa, n_pad); // when using full-size SWA cache, we set the SWA cache size to be equal to the base cache size if (swa_full) {