added swa padding controls

2026-05-19 08:00:25 +00:00 · 2026-04-16 16:21:48 +08:00 · 2026-04-16 16:21:48 +08:00 · 0251c6dbde
commit 0251c6dbde
parent a9e817fb4c
5 changed files with 17 additions and 4 deletions
--- a/src/llama-kv-cache-iswa.cpp
+++ b/src/llama-kv-cache-iswa.cpp
@ -10,6 +10,8 @@
 //
 // llama_kv_cache_iswa
 //
+//kcpp: use a global flag to adjust swa padding
+static int kcpp_extra_swa_padding = 0;

 llama_kv_cache_iswa::llama_kv_cache_iswa(
        const llama_model & model,
@ -51,6 +53,7 @@ llama_kv_cache_iswa::llama_kv_cache_iswa(

    //kcpp: pad the swa kv cache as well, similar to extra_context_handle_fragmentation
    size_swa += 128;
+    size_swa += kcpp_extra_swa_padding;
    size_swa = GGML_PAD(size_swa, n_pad);

    // when using full-size SWA cache, we set the SWA cache size to be equal to the base cache size
--- a/src/llama-kv-cache.cpp
+++ b/src/llama-kv-cache.cpp
@ -160,7 +160,7 @@ llama_kv_cache::llama_kv_cache(

    for (uint32_t il = 0; il < hparams.n_layer; il++) {
        if (!hparams.has_kv(il)) {
-            LLAMA_LOG_DEBUG("%s: layer %3d: does not have KV cache\n", __func__, il);
+            // LLAMA_LOG_DEBUG("%s: layer %3d: does not have KV cache\n", __func__, il);
            continue;
        }

@ -232,12 +232,12 @@ llama_kv_cache::llama_kv_cache(
            const int32_t il_reuse = reuse(il);

            if (il_reuse < 0) {
-                LLAMA_LOG_DEBUG("%s: - layer %3d: no reuse\n", __func__, il);
+                // LLAMA_LOG_DEBUG("%s: - layer %3d: no reuse\n", __func__, il);
                continue;
            }

            if (filter && !filter(il)) {
-                LLAMA_LOG_DEBUG("%s: - layer %3d: filtered\n", __func__, il);
+                // LLAMA_LOG_DEBUG("%s: - layer %3d: filtered\n", __func__, il);
                continue;
            }