fix off-by-one npast during some instances of fast forwarding

2025-09-09 08:34:37 +00:00 · 2025-05-22 19:51:21 +08:00 · 2025-05-22 19:51:21 +08:00 · f125e724eb
commit f125e724eb
parent f10574e598
3 changed files with 11 additions and 3 deletions
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@ -3468,7 +3468,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
    if (debugmode==1 && !is_quiet)
    {
        std::string outstr = "";
-        printf("\n\n[Debug: Dump Raw Input Tokens]\n");
+        printf("\n\n[Debug: Dump %d Raw Input Tokens]\n",embd_inp.size());
        outstr += get_tok_vec_str(embd_inp);
        printf("%s\n", RemoveBell(outstr).c_str());
    }
@ -3615,7 +3615,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
    if (debugmode==1 && !is_quiet)
    {
        std::string outstr = "";
-        // printf("\n[Debug: Dump Forwarded Input Tokens, format: %d]\n", file_format);
+        // printf("\n[Debug: Dump Forwarded Input Tokens]\n");
        // outstr += get_tok_vec_str(embd_inp);
        outstr += "\n\n[Debug: n_past="+std::to_string(n_past)+" Context Size = " + std::to_string(current_context_tokens.size()) + "]\n";
        outstr += get_tok_vec_str(current_context_tokens);
--- a/koboldcpp.py
+++ b/koboldcpp.py
@ -4367,6 +4367,9 @@ def show_gui():
    def on_picked_model_file(filepath):
        if filepath and (filepath.lower().endswith('.kcpps') or filepath.lower().endswith('.kcppt')):
            #load it as a config file instead
+            if filepath.lower().endswith('.kcpps'):
+                global runmode_untouched
+                runmode_untouched = False
            with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
                dict = json.load(f)
                import_vars(dict)
--- a/model_adapter.cpp
+++ b/model_adapter.cpp
@ -464,9 +464,10 @@ void print_tok_vec(std::vector<float> &embd)

    //fast forward the past based on identical tokens, stop once a divergence is noted
    int embd_inp_len = embd_inp.size();
+    int cur_ctx_len = current_context_tokens.size();
    bool fastforwardok = true;

-    for (int i = 0; i < current_context_tokens.size(); ++i)
+    for (int i = 0; i < cur_ctx_len; ++i)
    {
        if (current_context_tokens[i] == embd_inp[i])
        {
@ -500,6 +501,10 @@ void print_tok_vec(std::vector<float> &embd)
            {
                break;
            }
+            if ((i + 2) >= cur_ctx_len)
+            {
+                break;
+            }
        }
    }