mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-15 03:19:41 +00:00
fixed inccorect padding for flash attn with swa
This commit is contained in:
parent
186227fc26
commit
989f9e6b98
1 changed files with 1 additions and 0 deletions
|
@ -31,6 +31,7 @@ llama_kv_cache_unified_iswa::llama_kv_cache_unified_iswa(
|
|||
|
||||
//kcpp: pad the swa kv cache as well, similar to extra_context_handle_fragmentation
|
||||
size_swa += 32;
|
||||
size_swa = GGML_PAD(size_swa, n_pad);
|
||||
|
||||
// when using full-size SWA cache, we set the SWA cache size to be equal to the base cache size
|
||||
if (swa_full) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue