additional padding for the swa kv cache itself

2025-09-11 01:24:36 +00:00 · 2025-06-28 15:52:48 +08:00 · 2025-06-28 15:52:48 +08:00 · 2e14338455
commit 2e14338455
parent ff2cabc28f
2 changed files with 8 additions and 1 deletions
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@ -45,7 +45,7 @@
 #include "common/common.h"

 //const
-const int extra_context_handle_fragmentation = 120;
+const int extra_context_handle_fragmentation = 128;
 const int LLAVA_TOKEN_IDENTIFIER_A = -998; //alternate between both, changing when image changes
 const int LLAVA_TOKEN_IDENTIFIER_B = -999;

@ -2174,6 +2174,10 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
        {
           llama_ctx_params.n_ctx += extra_context_handle_fragmentation;
        }
+        else
+        {
+            llama_ctx_params.n_ctx += (extra_context_handle_fragmentation/2);
+        }

        llama_ctx_params.offload_kqv = !inputs.low_vram;
        model_params.use_mmap = inputs.use_mmap;