From 989f9e6b98b42417b1f45883bf3b670367f243dc Mon Sep 17 00:00:00 2001
From: Concedo <39025047+LostRuins@users.noreply.github.com>
Date: Mon, 30 Jun 2025 20:32:14 +0800
Subject: [PATCH] fixed inccorect padding for flash attn with swa

---
 src/llama-kv-cache-unified-iswa.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/llama-kv-cache-unified-iswa.cpp b/src/llama-kv-cache-unified-iswa.cpp
index 4852af80a..dc1b39691 100644
--- a/src/llama-kv-cache-unified-iswa.cpp
+++ b/src/llama-kv-cache-unified-iswa.cpp
@@ -31,6 +31,7 @@ llama_kv_cache_unified_iswa::llama_kv_cache_unified_iswa(
 
     //kcpp: pad the swa kv cache as well, similar to extra_context_handle_fragmentation
     size_swa += 32;
+    size_swa = GGML_PAD(size_swa, n_pad);
 
     // when using full-size SWA cache, we set the SWA cache size to be equal to the base cache size
     if (swa_full) {