diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp
index 00f73df42..0f4486aa0 100644
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@@ -314,7 +314,7 @@ static std::string get_tok_vec_str(std::vector<int> &embd)
 }
 static void print_tok_vec_str(std::vector<int> &vec)
 {
-    printf("\n%s", get_tok_vec_str(vec).c_str());
+    printf("\n[%s]\n", get_tok_vec_str(vec).c_str());
 }
 
 bool allExtendedUnicode(const std::string& str) {
@@ -401,6 +401,64 @@ static void GetOverlappingTokenSequences(const std::string& str, std::unordered_
     }
 }
 
+void ContextRewind(std::vector<int> &embd, std::vector<int> &current_context_tokens, int &n_past, std::vector<int> &last_n_tokens, const int amount_rewind)
+{
+    if(amount_rewind<=0 || current_context_tokens.size()==0)
+    {
+        return; //do nothing
+    }
+    if(embd.size()>1)
+    {
+        printf("\nWARNING: Don't use context rewind when in batch processing phase!\n");
+        return;
+    }
+    bool is_mamba = (file_format == FileFormat::GGUF_GENERIC && file_format_meta.model_architecture==GGUFArch::ARCH_MAMBA);
+    bool is_rwkv_new = (file_format == FileFormat::GGUF_GENERIC && file_format_meta.model_architecture==GGUFArch::ARCH_RWKV);
+    if(file_format == FileFormat::RWKV_1 || file_format==FileFormat::RWKV_2 || is_mamba || is_rwkv_new)
+    {
+        printf("\nWARNING: RNN models do not support context rewind!\n");
+        return;
+    }
+
+    if (amount_rewind >= last_n_tokens.size())
+    {
+        last_n_tokens.clear();
+    }
+    else
+    {
+        last_n_tokens.resize(last_n_tokens.size() - amount_rewind);
+    }
+
+    if (amount_rewind >= current_context_tokens.size())
+    {
+        current_context_tokens.clear();
+    }
+    else
+    {
+        current_context_tokens.resize(current_context_tokens.size() - amount_rewind);
+    }
+
+    if (amount_rewind >= n_past)
+    {
+        n_past = 0;
+    }
+    else
+    {
+        n_past -= amount_rewind;
+    }
+
+    if (file_format == FileFormat::GGUF_GENERIC)
+    {
+        llama_kv_cache_seq_rm(llama_ctx_v4, 0, n_past, -1);
+    }
+
+    embd.clear();
+    if(current_context_tokens.size()>0)
+    {
+        embd.push_back(current_context_tokens[current_context_tokens.size()-1]);
+    }
+}
+
 // KCPP SAMPLING FUNCTIONS
 void sample_softmax(llama_token_data_array * cur_p) {
     GGML_ASSERT(cur_p->size > 0);
diff --git a/koboldcpp.py b/koboldcpp.py
index 108121530..806773606 100644
--- a/koboldcpp.py
+++ b/koboldcpp.py
@@ -3285,10 +3285,10 @@ def show_gui():
     def load_config_gui(): #this is used to populate the GUI with a config file, whereas load_config_cli simply overwrites cli args
         file_type = [("KoboldCpp Settings", "*.kcpps *.kcppt")]
         global runmode_untouched
-        runmode_untouched = False
         filename = askopenfilename(filetypes=file_type, defaultextension=file_type, initialdir=None)
         if not filename or filename=="":
             return
+        runmode_untouched = False
         with open(filename, 'r') as f:
             dict = json.load(f)
             import_vars(dict)