swa full used unless ctx shift and fast forward disabled

2025-09-10 09:04:36 +00:00 · 2025-05-21 22:47:45 +08:00 · 2025-05-21 22:47:45 +08:00 · 9f976e9c65
commit 9f976e9c65
parent 5b6ed445de e298d2fbd0
16 changed files with 1429 additions and 654 deletions
--- a/common/common.h
+++ b/common/common.h
@ -319,6 +319,7 @@ struct common_params {
    bool flash_attn        = false; // flash attention
    bool no_perf           = false; // disable performance metrics
    bool ctx_shift         = true;  // context shift on inifinite text generation
+    bool swa_full          = false; // use full-size SWA cache (https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055)

    bool input_prefix_bos  = false; // prefix BOS to user inputs, preceding input_prefix
    bool use_mmap          = true;  // use mmap for faster loads