From b78a637da517468fb8f2933427c72c7fd203624f Mon Sep 17 00:00:00 2001
From: Concedo <39025047+LostRuins@users.noreply.github.com>
Date: Mon, 26 Aug 2024 23:07:31 +0800
Subject: [PATCH] try to optimize context shifting

---
 gpttype_adapter.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp
index dacfae5d7..8a8d2ba98 100644
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@@ -1036,8 +1036,8 @@ void PurgeMissingTokens(llama_context * ctx, std::vector<int> &current_context_t
     //if passed, save beginning of LCQ from old ctx as p1
     //remove all tokens from old ctx between p0 and p1, updating both arrays and kv, then continue as normal
 
-    const int ShortfallThreshold = 200 + (nctx/30); //dont trigger shifting if the distance between trimstart and currhead < this
-    const int SlackAllowance = 60 + (nctx/50); //in case the end text is slightly modified, be forgiving
+    const int ShortfallThreshold = 200 + std::min((nctx/30),140); //dont trigger shifting if the distance between trimstart and currhead < this
+    const int SlackAllowance = 60 + std::min((nctx/60),70); //in case the end text is slightly modified, be forgiving
 
     int trimstart = 0;
     int new_tokens_len = new_context_tokens.size();