additional padding for the swa kv cache itself

This commit is contained in:
Concedo 2025-06-28 15:52:48 +08:00
parent ff2cabc28f
commit 2e14338455
2 changed files with 8 additions and 1 deletions

View file

@ -45,7 +45,7 @@
#include "common/common.h"
//const
const int extra_context_handle_fragmentation = 120;
const int extra_context_handle_fragmentation = 128;
const int LLAVA_TOKEN_IDENTIFIER_A = -998; //alternate between both, changing when image changes
const int LLAVA_TOKEN_IDENTIFIER_B = -999;
@ -2174,6 +2174,10 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
{
llama_ctx_params.n_ctx += extra_context_handle_fragmentation;
}
else
{
llama_ctx_params.n_ctx += (extra_context_handle_fragmentation/2);
}
llama_ctx_params.offload_kqv = !inputs.low_vram;
model_params.use_mmap = inputs.use_mmap;