From fe5479f286543fb8a314f56bffb9d79aaf053c4c Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Thu, 10 Oct 2024 18:21:07 +0800 Subject: [PATCH] unify antislop and token bans --- expose.h | 4 +-- gpttype_adapter.cpp | 50 +++++++++++++++-------------- klite.embd | 78 +++++---------------------------------------- koboldcpp.py | 13 ++------ 4 files changed, 37 insertions(+), 108 deletions(-) diff --git a/expose.h b/expose.h index 04d5b42e5..4c3d8da39 100644 --- a/expose.h +++ b/expose.h @@ -2,8 +2,7 @@ #include const int stop_token_max = 24; -const int ban_token_max = 16; -const int ban_phrase_max = 16; +const int ban_token_max = 24; const int tensor_split_max = 16; const int logit_bias_max = 24; const int dry_seq_break_max = 24; @@ -107,7 +106,6 @@ struct generation_inputs const float smoothing_factor = 0.0f; const logit_bias logit_biases[logit_bias_max] = {}; const char * banned_tokens[ban_token_max] = {}; - const char * banned_phrases[ban_phrase_max] = {}; }; struct generation_outputs { diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 3f4b1e27f..ecaa9dbab 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -2511,26 +2511,48 @@ generation_outputs gpttype_generate(const generation_inputs inputs) } } - //handle custom token bans + //handle custom token bans and antislop phrase banning + banned_phrases.clear(); + delayed_generated_tokens_limit = 0; + antislop_banned_token_ids.clear(); banned_tokens.clear(); for(int x=0;x toks; + TokenizeString(word, toks, file_format, false); + int tokcount = toks.size(); + if(tokcount==0) + { + continue; + } + if(tokcount==1 && word.length()<2) //only use banned tokens for single characters + { + banned_tokens.push_back(word); + } + else + { + tokcount += 3; //add some extra buffer + delayed_generated_tokens_limit = (tokcount > delayed_generated_tokens_limit ? tokcount : delayed_generated_tokens_limit); + banned_phrases.push_back(word); + } } } + banned_token_ids.clear(); if(banned_tokens.size()>0) { if(debugmode==1) { - printf("\nBanning %zu token sequences...",banned_tokens.size()); + printf("\nBanning %zu single character sequences...",banned_tokens.size()); } for(int v=0;v toks; - TokenizeString(word, toks, file_format, false); - int tokcount = toks.size(); - if(tokcount>0) - { - tokcount += 3; //add some extra buffer - } - delayed_generated_tokens_limit = (tokcount>delayed_generated_tokens_limit?tokcount:delayed_generated_tokens_limit); - banned_phrases.push_back(word); - } - } if(debugmode==1 && banned_phrases.size()>0) { printf("\nBanned a total of %zu phrases, with max token count of %d.\n",banned_phrases.size(),delayed_generated_tokens_limit); diff --git a/klite.embd b/klite.embd index 7614f02b1..12103784d 100644 --- a/klite.embd +++ b/klite.embd @@ -12,7 +12,7 @@ Current version indicated by LITEVER below. -->