added swa padding controls

This commit is contained in:
Concedo 2026-04-16 16:21:48 +08:00
parent a9e817fb4c
commit 0251c6dbde
5 changed files with 17 additions and 4 deletions

View file

@ -10,6 +10,8 @@
//
// llama_kv_cache_iswa
//
//kcpp: use a global flag to adjust swa padding
static int kcpp_extra_swa_padding = 0;
llama_kv_cache_iswa::llama_kv_cache_iswa(
const llama_model & model,
@ -51,6 +53,7 @@ llama_kv_cache_iswa::llama_kv_cache_iswa(
//kcpp: pad the swa kv cache as well, similar to extra_context_handle_fragmentation
size_swa += 128;
size_swa += kcpp_extra_swa_padding;
size_swa = GGML_PAD(size_swa, n_pad);
// when using full-size SWA cache, we set the SWA cache size to be equal to the base cache size

View file

@ -160,7 +160,7 @@ llama_kv_cache::llama_kv_cache(
for (uint32_t il = 0; il < hparams.n_layer; il++) {
if (!hparams.has_kv(il)) {
LLAMA_LOG_DEBUG("%s: layer %3d: does not have KV cache\n", __func__, il);
// LLAMA_LOG_DEBUG("%s: layer %3d: does not have KV cache\n", __func__, il);
continue;
}
@ -232,12 +232,12 @@ llama_kv_cache::llama_kv_cache(
const int32_t il_reuse = reuse(il);
if (il_reuse < 0) {
LLAMA_LOG_DEBUG("%s: - layer %3d: no reuse\n", __func__, il);
// LLAMA_LOG_DEBUG("%s: - layer %3d: no reuse\n", __func__, il);
continue;
}
if (filter && !filter(il)) {
LLAMA_LOG_DEBUG("%s: - layer %3d: filtered\n", __func__, il);
// LLAMA_LOG_DEBUG("%s: - layer %3d: filtered\n", __func__, il);
continue;
}