mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-19 08:00:25 +00:00
added swa padding controls
This commit is contained in:
parent
a9e817fb4c
commit
0251c6dbde
5 changed files with 17 additions and 4 deletions
|
|
@ -10,6 +10,8 @@
|
|||
//
|
||||
// llama_kv_cache_iswa
|
||||
//
|
||||
//kcpp: use a global flag to adjust swa padding
|
||||
static int kcpp_extra_swa_padding = 0;
|
||||
|
||||
llama_kv_cache_iswa::llama_kv_cache_iswa(
|
||||
const llama_model & model,
|
||||
|
|
@ -51,6 +53,7 @@ llama_kv_cache_iswa::llama_kv_cache_iswa(
|
|||
|
||||
//kcpp: pad the swa kv cache as well, similar to extra_context_handle_fragmentation
|
||||
size_swa += 128;
|
||||
size_swa += kcpp_extra_swa_padding;
|
||||
size_swa = GGML_PAD(size_swa, n_pad);
|
||||
|
||||
// when using full-size SWA cache, we set the SWA cache size to be equal to the base cache size
|
||||
|
|
|
|||
|
|
@ -160,7 +160,7 @@ llama_kv_cache::llama_kv_cache(
|
|||
|
||||
for (uint32_t il = 0; il < hparams.n_layer; il++) {
|
||||
if (!hparams.has_kv(il)) {
|
||||
LLAMA_LOG_DEBUG("%s: layer %3d: does not have KV cache\n", __func__, il);
|
||||
// LLAMA_LOG_DEBUG("%s: layer %3d: does not have KV cache\n", __func__, il);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -232,12 +232,12 @@ llama_kv_cache::llama_kv_cache(
|
|||
const int32_t il_reuse = reuse(il);
|
||||
|
||||
if (il_reuse < 0) {
|
||||
LLAMA_LOG_DEBUG("%s: - layer %3d: no reuse\n", __func__, il);
|
||||
// LLAMA_LOG_DEBUG("%s: - layer %3d: no reuse\n", __func__, il);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (filter && !filter(il)) {
|
||||
LLAMA_LOG_DEBUG("%s: - layer %3d: filtered\n", __func__, il);
|
||||
// LLAMA_LOG_DEBUG("%s: - layer %3d: filtered\n", __func__, il);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue