mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-15 11:29:43 +00:00
fixed inccorect padding for flash attn with swa
This commit is contained in:
parent
186227fc26
commit
989f9e6b98
1 changed files with 1 additions and 0 deletions
|
@ -31,6 +31,7 @@ llama_kv_cache_unified_iswa::llama_kv_cache_unified_iswa(
|
||||||
|
|
||||||
//kcpp: pad the swa kv cache as well, similar to extra_context_handle_fragmentation
|
//kcpp: pad the swa kv cache as well, similar to extra_context_handle_fragmentation
|
||||||
size_swa += 32;
|
size_swa += 32;
|
||||||
|
size_swa = GGML_PAD(size_swa, n_pad);
|
||||||
|
|
||||||
// when using full-size SWA cache, we set the SWA cache size to be equal to the base cache size
|
// when using full-size SWA cache, we set the SWA cache size to be equal to the base cache size
|
||||||
if (swa_full) {
|
if (swa_full) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue