mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 01:24:36 +00:00
additional padding for the swa kv cache itself
This commit is contained in:
parent
ff2cabc28f
commit
2e14338455
2 changed files with 8 additions and 1 deletions
|
@ -45,7 +45,7 @@
|
|||
#include "common/common.h"
|
||||
|
||||
//const
|
||||
const int extra_context_handle_fragmentation = 120;
|
||||
const int extra_context_handle_fragmentation = 128;
|
||||
const int LLAVA_TOKEN_IDENTIFIER_A = -998; //alternate between both, changing when image changes
|
||||
const int LLAVA_TOKEN_IDENTIFIER_B = -999;
|
||||
|
||||
|
@ -2174,6 +2174,10 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
|||
{
|
||||
llama_ctx_params.n_ctx += extra_context_handle_fragmentation;
|
||||
}
|
||||
else
|
||||
{
|
||||
llama_ctx_params.n_ctx += (extra_context_handle_fragmentation/2);
|
||||
}
|
||||
|
||||
llama_ctx_params.offload_kqv = !inputs.low_vram;
|
||||
model_params.use_mmap = inputs.use_mmap;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue