unify antislop and token bans

2025-09-10 17:14:36 +00:00 · 2024-10-10 18:21:07 +08:00 · 2024-10-10 18:21:07 +08:00 · fe5479f286
commit fe5479f286
parent a6bf568fda
4 changed files with 37 additions and 108 deletions
--- a/expose.h
+++ b/expose.h
@ -2,8 +2,7 @@
 #include <cstdint>
 const int stop_token_max = 24;
-const int ban_token_max = 16;
+const int ban_token_max = 24;
 const int ban_phrase_max = 16;
 const int tensor_split_max = 16;
 const int logit_bias_max = 24;
 const int dry_seq_break_max = 24;
@ -107,7 +106,6 @@ struct generation_inputs
    const float smoothing_factor = 0.0f;
    const logit_bias logit_biases[logit_bias_max] = {};
    const char * banned_tokens[ban_token_max] = {};
    const char * banned_phrases[ban_phrase_max] = {};
 };
 struct generation_outputs
 {
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@ -2511,26 +2511,48 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
        }
    }
-    //handle custom token bans
+    //handle custom token bans and antislop phrase banning
    banned_phrases.clear();
    delayed_generated_tokens_limit = 0;
    antislop_banned_token_ids.clear();
    banned_tokens.clear();
    for(int x=0;x<ban_token_max;++x)
    {
        std::string word = inputs.banned_tokens[x];
        word = toLowerCase(word);
        if(word!="")
        {
-            banned_tokens.push_back(word);
+            std::vector<int> toks;
            TokenizeString(word, toks, file_format, false);
            int tokcount = toks.size();
            if(tokcount==0)
            {
                continue;
            }
            if(tokcount==1 && word.length()<2) //only use banned tokens for single characters
            {
                banned_tokens.push_back(word);
            }
            else
            {
                tokcount += 3; //add some extra buffer
                delayed_generated_tokens_limit = (tokcount > delayed_generated_tokens_limit ? tokcount : delayed_generated_tokens_limit);
                banned_phrases.push_back(word);
            }
        }
    }
    banned_token_ids.clear();
    if(banned_tokens.size()>0)
    {
        if(debugmode==1)
        {
-            printf("\nBanning %zu token sequences...",banned_tokens.size());
+            printf("\nBanning %zu single character sequences...",banned_tokens.size());
        }
        for(int v=0;v<n_vocab;++v)
        {
            std::string word = FileFormatTokenizeID(v,file_format, true);
            word = toLowerCase(word);
            for(int i=0;i<banned_tokens.size();++i)
            {
                if (word.find(banned_tokens[i]) != std::string::npos)
@ -2542,30 +2564,10 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
        }
        if(debugmode==1)
        {
-            printf("\nBanned a total of %zu tokens.\n",banned_token_ids.size());
+            printf("\nBanned a total of %zu individual tokens.\n",banned_token_ids.size());
        }
    }
    //antislop phrase banning
    banned_phrases.clear();
    delayed_generated_tokens_limit = 0;
    antislop_banned_token_ids.clear();
    for(int x=0;x<ban_phrase_max;++x)
    {
        std::string word = inputs.banned_phrases[x];
        if(word!="")
        {
            std::vector<int> toks;
            TokenizeString(word, toks, file_format, false);
            int tokcount = toks.size();
            if(tokcount>0)
            {
                tokcount += 3; //add some extra buffer
            }
            delayed_generated_tokens_limit = (tokcount>delayed_generated_tokens_limit?tokcount:delayed_generated_tokens_limit);
            banned_phrases.push_back(word);
        }
    }
    if(debugmode==1 && banned_phrases.size()>0)
    {
        printf("\nBanned a total of %zu phrases, with max token count of %d.\n",banned_phrases.size(),delayed_generated_tokens_limit);
--- a/klite.embd
+++ b/klite.embd
@ -12,7 +12,7 @@ Current version indicated by LITEVER below.
 -->
 <script>
-	const LITEVER = 179;
+	const LITEVER = 180;
 	const urlParams = new URLSearchParams(window.location.search);
 	var localflag = true;
 	const STORAGE_PREFIX = (localflag?"e_":"")+"kaihordewebui_";
@ -4258,7 +4258,6 @@ Current version indicated by LITEVER below.
 	var current_anotetemplate = "[Author\'s note: <|>]";
 	var extrastopseq = "";
 	var tokenbans = "";
 	var phrasebans = "";
 	var anote_strength = 320; //distance from end
 	var newlineaftermemory = true;
 	var current_wi = []; //each item stores a wi object.
@ -6093,7 +6092,6 @@ Current version indicated by LITEVER below.
 		//extra unofficial fields for the story
 		new_save_storyobj.extrastopseq = extrastopseq;
 		new_save_storyobj.tokenbans = tokenbans;
 		new_save_storyobj.phrasebans = phrasebans;
 		new_save_storyobj.anotestr = anote_strength;
 		new_save_storyobj.wisearchdepth = wi_searchdepth;
 		new_save_storyobj.wiinsertlocation = wi_insertlocation;
@ -6285,7 +6283,6 @@ Current version indicated by LITEVER below.
 			let old_current_wi = current_wi;
 			let old_extrastopseq = extrastopseq;
 			let old_tokenbans = tokenbans;
 			let old_phrasebans = phrasebans;
 			let old_notes = personal_notes;
 			let old_regexreplace_data = regexreplace_data;
 			let old_placeholder_tags_data = placeholder_tags_data;
@ -6345,10 +6342,6 @@ Current version indicated by LITEVER below.
 				{
 					tokenbans = storyobj.tokenbans;
 				}
 				if(storyobj.phrasebans)
 				{
 					phrasebans = storyobj.phrasebans;
 				}
 				if (storyobj.anotestr) {
 					anote_strength = storyobj.anotestr;
 				}
@ -6446,7 +6439,6 @@ Current version indicated by LITEVER below.
 					extrastopseq = old_extrastopseq;
 					regexreplace_data = old_regexreplace_data;
 					tokenbans = old_tokenbans;
 					phrasebans = old_phrasebans;
 					placeholder_tags_data = old_placeholder_tags_data;
 				}
@ -8063,7 +8055,7 @@ Current version indicated by LITEVER below.
 	function expand_tokens_section(targetid)
 	{
-		let tablist = ["expandregexreplace","expandtokenbans","expandphrasebans","expandlogitbias","expandplaceholdertags"];
+		let tablist = ["expandregexreplace","expandtokenbans","expandlogitbias","expandplaceholdertags"];
 		for(let i=0;i<tablist.length;++i)
 		{
@ -8159,7 +8151,7 @@ Current version indicated by LITEVER below.
 	function add_token_ban()
 	{
-		inputBox("Enter a token substring to be banned. ALL matching tokens will be removed.\nFor example adding 'ice' will also ban 'nice' and 'rice', assuming they are individual tokens.","Add Banned Token Substring","","Enter a Token Substring",()=>{
+		inputBox("Enter a string to be banned (e.g. Slop text to remove). If it's generated, the AI will try something else. Work for both individual words or long phrases, not case-sensitive. All substring matches will be prevented.\nFor example adding 'ice bag' will also ban 'nice bag' and 'rice bag'.","Add Banned String","","Enter String To Ban",()=>{
 			let userinput = getInputBoxValue();
 			if(userinput.trim()!="")
 			{
@ -8174,23 +8166,6 @@ Current version indicated by LITEVER below.
 		},false);
 	}
 	function add_phrase_ban()
 	{
 		inputBox("Enter a string to be banned (e.g. Slop text to remove). If it's generated, the AI backtracks and tries something else.","Add Banned Phrase String","","Enter String To Ban",()=>{
 			let userinput = getInputBoxValue();
 			if(userinput.trim()!="")
 			{
 				let ov = document.getElementById("phrasebans").value;
 				if(ov!="")
 				{
 					ov += "||$||";
 				}
 				ov += userinput.trim();
 				document.getElementById("phrasebans").value = ov;
 			}
 		},false);
 	}
 	var msgboxOnDone = hide_msgbox;
 	function hide_msgbox() {
 		//hide msgbox ONLY
@ -10726,7 +10701,6 @@ Current version indicated by LITEVER below.
 		anote_strength = document.getElementById("anote_strength").value;
 		extrastopseq = document.getElementById("extrastopseq").value;
 		tokenbans = document.getElementById("tokenbans").value;
 		phrasebans = document.getElementById("phrasebans").value;
 		newlineaftermemory = (document.getElementById("newlineaftermemory").checked?true:false);
 		try
 		{
@ -10995,7 +10969,6 @@ Current version indicated by LITEVER below.
 			current_wi = [];
 			extrastopseq = "";
 			tokenbans = "";
 			phrasebans = "";
 			anote_strength = 320;
 			logitbiasdict = {};
 			wi_searchdepth = 0;
@ -12561,24 +12534,6 @@ Current version indicated by LITEVER below.
 		return seqs;
 	}
 	function get_phrase_bans()
 	{
 		let seqs = [];
 		if (phrasebans != "") {
 			let rep = replaceAll(phrasebans, "\\n", "\n");
 			let srep = rep.split("||$||");
 			if (srep.length > 0 && !seqs) {
 				seqs = [];
 			}
 			for (let i = 0; i < srep.length; ++i) {
 				if (srep[i] && srep[i] != "") {
 					seqs.push(srep[i]);
 				}
 			}
 		}
 		return seqs;
 	}
 	function cleanup_story_completion(resp)
 	{
 		if(gametext_arr.length>0)
@ -12636,7 +12591,6 @@ Current version indicated by LITEVER below.
 			submit_payload.params.dynatemp_exponent = localsettings.dynatemp_exponent;
 			submit_payload.params.smoothing_factor = localsettings.smoothing_factor;
 			submit_payload.params.banned_tokens = get_token_bans();
 			submit_payload.params.banned_phrases = get_phrase_bans();
 			submit_payload.params.render_special = localsettings.render_special_tags;
 		}
 		if(custom_kobold_endpoint != "" && is_using_kcpp_with_dry() && localsettings.dry_multiplier > 0)
@ -16541,7 +16495,6 @@ Current version indicated by LITEVER below.
 		document.getElementById("anote_strength").value = anote_strength;
 		document.getElementById("extrastopseq").value = extrastopseq;
 		document.getElementById("tokenbans").value = tokenbans;
 		document.getElementById("phrasebans").value = phrasebans;
 		document.getElementById("newlineaftermemory").checked = (newlineaftermemory?true:false);
 		document.getElementById("logitbiastxtarea").value = JSON.stringify(logitbiasdict,null,2);
@ -16565,7 +16518,6 @@ Current version indicated by LITEVER below.
 		{
 			document.getElementById("nologitbias").classList.add("hidden");
 			document.getElementById("notokenbans").classList.add("hidden");
 			document.getElementById("nophrasebans").classList.add("hidden");
 			if(is_using_kcpp_with_added_memory())
 			{
 				document.getElementById("newlogitbiasstringtogglesection").classList.remove("hidden");
@ -16578,7 +16530,6 @@ Current version indicated by LITEVER below.
 		{
 			document.getElementById("nologitbias").classList.remove("hidden");
 			document.getElementById("notokenbans").classList.remove("hidden");
 			document.getElementById("nophrasebans").classList.remove("hidden");
 			document.getElementById("newlogitbiasstringtogglesection").classList.add("hidden");
 			document.getElementById("newlogitbiasstringtoggle").checked = false;
 		}
@ -19305,32 +19256,19 @@ Current version indicated by LITEVER below.
 					</div>
 				</div>
-				<div style="padding:3px;" class="justifyleft settinglabel">Token Filter <span class="helpicon">?<span
+				<div style="padding:3px;" class="justifyleft settinglabel">Phrase / Word Ban (Anti-Slop) <span class="helpicon">?<span
-					class="helptext">Outright removal for ANY tokens containing a specific substring from model vocab. If you want multiple sequences, separate them with the following delimiter: ||$||</span></span>
+					class="helptext">Prevents specific words or phrases from being generated, either modifying model vocab or by backtracking and regenerating when they appear. If you want multiple sequences, separate them with the following delimiter: ||$||</span></span>
-					<button type="button" title="Token Filter" class="btn btn-primary" style="font-size:12px;padding:2px 2px;" onclick="expand_tokens_section('expandtokenbans')">Expand Section</button>
+					<button type="button" title="Phrase / Token Ban (Anti-Slop)" class="btn btn-primary" style="font-size:12px;padding:2px 2px;" onclick="expand_tokens_section('expandtokenbans')">Expand Section</button>
 				</div>
 				<div id="expandtokenbans" class="hidden">
-					<div class="color_red hidden" id="notokenbans">Token filter may be unavailable.</div>
+					<div class="color_red hidden" id="notokenbans">Phrase banning may be unavailable.</div>
-					<div style="color:#ffffff;">Outright removal for ANY tokens containing a specific substring from model vocab. If you want multiple sequences, separate them with the following delimiter: ||$||<br><em>Note: If you're trying to ban a specific token ID, you should use Logit Bias instead!</em><br></div>
+					<div style="color:#ffffff;">Prevents specific words or phrases from being generated, either modifying model vocab or by backtracking and regenerating when they appear. If you want multiple sequences, separate them with the following delimiter: ||$||<br><em>Note: If you're trying to ban a specific token by ID, you should use Logit Bias instead!</em><br></div>
 					<div style="display: flex; column-gap: 4px; margin-bottom: 4px;">
 					<input class="form-control stopseqbox inlineinput" type="text" placeholder="None" value="" id="tokenbans">
 					<button type="button" class="btn btn-primary" style="width:90px;padding:6px 6px;" onclick="add_token_ban()">Add New</button>
 					</div>
 				</div>
 				<div style="padding:3px;" class="justifyleft settinglabel">Phrase Ban (Anti-Slop) <span class="helpicon">?<span
 					class="helptext">Prevents specific phrases from being generated by backtracking and regenerating when they appear. If you want multiple sequences, separate them with the following delimiter: ||$||</span></span>
 					<button type="button" title="Phrase Ban (Anti-Slop)" class="btn btn-primary" style="font-size:12px;padding:2px 2px;" onclick="expand_tokens_section('expandphrasebans')">Expand Section</button>
 				</div>
 				<div id="expandphrasebans" class="hidden">
 					<div class="color_red hidden" id="nophrasebans">Phrase Ban (Anti-Slop) may be unavailable.</div>
 					<div style="color:#ffffff;">Prevents specific phrases from being generated by backtracking and regenerating when they appear. If you want multiple sequences, separate them with the following delimiter: ||$||<br></div>
 					<div style="display: flex; column-gap: 4px; margin-bottom: 4px;">
 					<input class="form-control stopseqbox inlineinput" type="text" placeholder="None" value="" id="phrasebans">
 					<button type="button" class="btn btn-primary" style="width:90px;padding:6px 6px;" onclick="add_phrase_ban()">Add New</button>
 					</div>
 				</div>
 				<div style="padding:3px;" class="justifyleft settinglabel">Regex Replace <span class="helpicon">?<span
 					class="helptext">Allows transforming incoming text with regex patterns, modifying all matches. Replacements will be applied in sequence.</span></span>
 					<button type="button" title="Regex Replace" class="btn btn-primary" style="font-size:12px;padding:2px 2px;" onclick="expand_tokens_section('expandregexreplace')">Expand Section</button>
--- a/koboldcpp.py
+++ b/koboldcpp.py
@ -20,8 +20,7 @@ from datetime import datetime, timezone
 # constants
 sampler_order_max = 7
 stop_token_max = 24
-ban_token_max = 16
+ban_token_max = 24
 ban_phrase_max = 16
 tensor_split_max = 16
 logit_bias_max = 24
 dry_seq_break_max = 24
@ -172,8 +171,7 @@ class generation_inputs(ctypes.Structure):
                ("dynatemp_exponent", ctypes.c_float),
                ("smoothing_factor", ctypes.c_float),
                ("logit_biases", logit_bias * logit_bias_max),
-                ("banned_tokens", ctypes.c_char_p * ban_token_max),
+                ("banned_tokens", ctypes.c_char_p * ban_token_max)]
                ("banned_phrases", ctypes.c_char_p * ban_phrase_max)]
 class generation_outputs(ctypes.Structure):
    _fields_ = [("status", ctypes.c_int),
@ -912,7 +910,6 @@ def generate(genparams, is_quiet=False, stream_flag=False):
    logit_biases = genparams.get('logit_bias', {})
    render_special = genparams.get('render_special', False)
    banned_tokens = genparams.get('banned_tokens', [])
    banned_phrases = genparams.get('banned_phrases', [])
    bypass_eos_token = genparams.get('bypass_eos', False)
    inputs = generation_inputs()
@ -1031,12 +1028,6 @@ def generate(genparams, is_quiet=False, stream_flag=False):
        else:
            inputs.banned_tokens[n] = banned_tokens[n].encode("UTF-8")
    for n in range(ban_phrase_max):
        if not banned_phrases or n >= len(banned_phrases):
            inputs.banned_phrases[n] = "".encode("UTF-8")
        else:
            inputs.banned_phrases[n] = banned_phrases[n].encode("UTF-8")
    currentusergenkey = genkey
    totalgens += 1
    #early exit if aborted