diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index dacfae5d7..8a8d2ba98 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -1036,8 +1036,8 @@ void PurgeMissingTokens(llama_context * ctx, std::vector ¤t_context_t //if passed, save beginning of LCQ from old ctx as p1 //remove all tokens from old ctx between p0 and p1, updating both arrays and kv, then continue as normal - const int ShortfallThreshold = 200 + (nctx/30); //dont trigger shifting if the distance between trimstart and currhead < this - const int SlackAllowance = 60 + (nctx/50); //in case the end text is slightly modified, be forgiving + const int ShortfallThreshold = 200 + std::min((nctx/30),140); //dont trigger shifting if the distance between trimstart and currhead < this + const int SlackAllowance = 60 + std::min((nctx/60),70); //in case the end text is slightly modified, be forgiving int trimstart = 0; int new_tokens_len = new_context_tokens.size();