diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 00f73df42..0f4486aa0 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -314,7 +314,7 @@ static std::string get_tok_vec_str(std::vector &embd) } static void print_tok_vec_str(std::vector &vec) { - printf("\n%s", get_tok_vec_str(vec).c_str()); + printf("\n[%s]\n", get_tok_vec_str(vec).c_str()); } bool allExtendedUnicode(const std::string& str) { @@ -401,6 +401,64 @@ static void GetOverlappingTokenSequences(const std::string& str, std::unordered_ } } +void ContextRewind(std::vector &embd, std::vector ¤t_context_tokens, int &n_past, std::vector &last_n_tokens, const int amount_rewind) +{ + if(amount_rewind<=0 || current_context_tokens.size()==0) + { + return; //do nothing + } + if(embd.size()>1) + { + printf("\nWARNING: Don't use context rewind when in batch processing phase!\n"); + return; + } + bool is_mamba = (file_format == FileFormat::GGUF_GENERIC && file_format_meta.model_architecture==GGUFArch::ARCH_MAMBA); + bool is_rwkv_new = (file_format == FileFormat::GGUF_GENERIC && file_format_meta.model_architecture==GGUFArch::ARCH_RWKV); + if(file_format == FileFormat::RWKV_1 || file_format==FileFormat::RWKV_2 || is_mamba || is_rwkv_new) + { + printf("\nWARNING: RNN models do not support context rewind!\n"); + return; + } + + if (amount_rewind >= last_n_tokens.size()) + { + last_n_tokens.clear(); + } + else + { + last_n_tokens.resize(last_n_tokens.size() - amount_rewind); + } + + if (amount_rewind >= current_context_tokens.size()) + { + current_context_tokens.clear(); + } + else + { + current_context_tokens.resize(current_context_tokens.size() - amount_rewind); + } + + if (amount_rewind >= n_past) + { + n_past = 0; + } + else + { + n_past -= amount_rewind; + } + + if (file_format == FileFormat::GGUF_GENERIC) + { + llama_kv_cache_seq_rm(llama_ctx_v4, 0, n_past, -1); + } + + embd.clear(); + if(current_context_tokens.size()>0) + { + embd.push_back(current_context_tokens[current_context_tokens.size()-1]); + } +} + // KCPP SAMPLING FUNCTIONS void sample_softmax(llama_token_data_array * cur_p) { GGML_ASSERT(cur_p->size > 0); diff --git a/koboldcpp.py b/koboldcpp.py index 108121530..806773606 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -3285,10 +3285,10 @@ def show_gui(): def load_config_gui(): #this is used to populate the GUI with a config file, whereas load_config_cli simply overwrites cli args file_type = [("KoboldCpp Settings", "*.kcpps *.kcppt")] global runmode_untouched - runmode_untouched = False filename = askopenfilename(filetypes=file_type, defaultextension=file_type, initialdir=None) if not filename or filename=="": return + runmode_untouched = False with open(filename, 'r') as f: dict = json.load(f) import_vars(dict)