From 066e73d769f294c932cc7d4941d103d75e50f304 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Sun, 11 Feb 2024 18:30:38 +0800 Subject: [PATCH] context shift even more lenient --- gpttype_adapter.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index eadbbc95d..32b72de9e 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -620,8 +620,8 @@ void PurgeMissingTokens(llama_context * ctx, std::vector ¤t_context_t //if passed, save beginning of LCQ from old ctx as p1 //remove all tokens from old ctx between p0 and p1, updating both arrays and kv, then continue as normal - const int ShortfallThreshold = 200 + (nctx/20); //dont trigger shifting if the distance between trimstart and currhead < this - const int SlackAllowance = 50 + (nctx/50); //in case the end text is slightly modified, be forgiving + const int ShortfallThreshold = 200 + (nctx/30); //dont trigger shifting if the distance between trimstart and currhead < this + const int SlackAllowance = 60 + (nctx/50); //in case the end text is slightly modified, be forgiving int trimstart = 0; int new_tokens_len = new_context_tokens.size(); @@ -650,7 +650,7 @@ void PurgeMissingTokens(llama_context * ctx, std::vector ¤t_context_t } //at least this many tokens need to match, otherwise don't bother trimming - const int LCSTokThreshold = std::max(std::min((new_tokens_len - trimstart) - (genamt+SlackAllowance), (int)(nctx*0.5)), ShortfallThreshold-SlackAllowance); + const int LCSTokThreshold = std::max(std::min((new_tokens_len - trimstart) - (genamt+SlackAllowance), (int)(nctx*0.45)), ShortfallThreshold-SlackAllowance); auto curr_ctx_without_memory = std::vector(current_context_tokens.begin() + trimstart, current_context_tokens.end()); auto new_ctx_without_memory = std::vector(new_context_tokens.begin() + trimstart, new_context_tokens.end());