refactored a lot of code, remove bantokens, move it to api

2025-09-10 17:14:36 +00:00 · 2024-04-27 17:57:13 +08:00 · 2024-04-27 17:57:13 +08:00 · c230b78906
commit c230b78906
parent 4ec8a9c57b
6 changed files with 214 additions and 76 deletions
--- a/class.py
+++ b/class.py
@ -270,7 +270,7 @@ class model_backend(InferenceModel):
        port=5001, port_param=5001, host='', launch=False, lora=None, threads=self.kcpp_threads, blasthreads=self.kcpp_threads,
        psutil_set_threads=False, highpriority=False, contextsize=self.kcpp_ctxsize,
        blasbatchsize=self.kcpp_blasbatchsize, ropeconfig=[self.kcpp_ropescale, self.kcpp_ropebase], stream=False, smartcontext=self.kcpp_smartcontext,
-        unbantokens=False, bantokens=None, usemirostat=None, forceversion=0, nommap=self.kcpp_nommap,
+        usemirostat=None, forceversion=0, nommap=self.kcpp_nommap,
        usemlock=False, noavx2=self.kcpp_noavx2, debugmode=self.kcpp_debugmode, skiplauncher=True, hordeconfig=None, noblas=self.kcpp_noblas,
        useclblast=self.kcpp_useclblast, usecublas=self.kcpp_usecublas, usevulkan=self.kcpp_usevulkan, gpulayers=self.kcpp_gpulayers, tensor_split=self.kcpp_tensor_split, config=None,
        onready='', multiuser=False, foreground=False, preloadstory=None, noshift=False, remotetunnel=False, ssl=False, benchmark=None, nocertify=False, sdconfig=None, mmproj=None,
--- a/expose.h
+++ b/expose.h
@ -55,7 +55,6 @@ struct load_model_inputs
    const int gpulayers = 0;
    const float rope_freq_scale = 1.0f;
    const float rope_freq_base = 10000.0f;
    const char * banned_tokens[ban_token_max];
    const float tensor_split[tensor_split_max];
 };
 struct generation_inputs
@ -92,7 +91,7 @@ struct generation_inputs
    const float dynatemp_exponent = 1.0f;
    const float smoothing_factor = 0.0f;
    const logit_bias logit_biases[logit_bias_max];
-
+    const char * banned_tokens[ban_token_max];
 };
 struct generation_outputs
 {
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@ -837,17 +837,6 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
    gptj_ctx_v3.hparams.rope_freq_scale = neox_ctx_v3.hparams.rope_freq_scale = rope_freq_scale;
    gptj_ctx_v3.hparams.rope_freq_base = neox_ctx_v3.hparams.rope_freq_base = rope_freq_base;
    //handle custom token bans
    banned_tokens.clear();
    for(int x=0;x<ban_token_max;++x)
    {
        std::string word = inputs.banned_tokens[x];
        if(word!="")
        {
            banned_tokens.push_back(word);
        }
    }
    //this is used for the mem_per_token eval, openblas needs more RAM
    bool v3_use_scratch = ggml_v3_cpu_has_gpublas();
@ -1624,6 +1613,41 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
        }
    }
    //handle custom token bans
    banned_tokens.clear();
    for(int x=0;x<ban_token_max;++x)
    {
        std::string word = inputs.banned_tokens[x];
        if(word!="")
        {
            banned_tokens.push_back(word);
        }
    }
    banned_token_ids.clear();
    if(banned_tokens.size()>0)
    {
        if(debugmode==1)
        {
            printf("\nBanning %zu token sequences...",banned_tokens.size());
        }
        for(int v=0;v<n_vocab;++v)
        {
            std::string word = FileFormatTokenizeID(v,file_format, true);
            for(int i=0;i<banned_tokens.size();++i)
            {
                if (word.find(banned_tokens[i]) != std::string::npos)
                {
                    banned_token_ids.push_back(v);
                    break;
                }
            }
        }
        if(debugmode==1)
        {
            printf("\nBanned a total of %zu tokens.\n",banned_token_ids.size());
        }
    }
    logit_biases.clear();
    for(int x=0;x<logit_bias_max;++x)
    {
@ -1993,25 +2017,6 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
        printf("\nWarning! n_vocab is invalid, maybe bad format!");
    }
    //prepare banned tokens
    if(banned_token_ids.size()==0 && banned_tokens.size()>0)
    {
        printf("\n[First Run] Banning %zu token sequences...",banned_tokens.size());
        for(int v=0;v<n_vocab;++v)
        {
            std::string word = FileFormatTokenizeID(v,file_format, true);
            for(int i=0;i<banned_tokens.size();++i)
            {
                if (word.find(banned_tokens[i]) != std::string::npos)
                {
                    banned_token_ids.push_back(v);
                    break;
                }
            }
        }
        printf("\nBanned a total of %zu tokens.\n",banned_token_ids.size());
    }
    if(allow_regular_prints)
    {
        printf("\n");
--- a/kcpp_docs.embd
+++ b/kcpp_docs.embd
@ -136,7 +136,7 @@
                         },
                         "use_default_badwordsids": {
                            "default": false,
-                            "description": "If true, prevents the EOS token from being generated (Ban EOS). For unbantokens, set this to false.",
+                            "description": "If true, prevents the EOS token from being generated (Ban EOS).",
                            "type": "boolean"
                         },
                         "dynatemp_range": {
--- a/klite.embd
+++ b/klite.embd
@ -7,7 +7,7 @@ Just copy this single static HTML file anywhere and open it in a browser, or fro
 Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite.
 If you are submitting a pull request for Lite, PLEASE use the above repo, not the KoboldCpp one.
 Kobold Lite is under the AGPL v3.0 License unless otherwise exempted. Please do not remove this line.
-Current version: 135
+Current version: 136
 -Concedo
 -->
@ -299,7 +299,7 @@ Current version: 135
 			padding-right: 10px;
 		}
-		#extrastopseq, #anotetemplate {
+		.inlineinput {
 			background-color: #404040;
 			color: #ffffff;
 			resize: none;
@ -3603,6 +3603,7 @@ Current version: 135
 	var current_anote = ""; //stored author note
 	var current_anotetemplate = "[Author\'s note: <|>]";
 	var extrastopseq = "";
 	var tokenbans = "";
 	var anote_strength = 320; //distance from end
 	var newlineaftermemory = true;
 	var current_wi = []; //each item stores a wi object.
@ -3738,8 +3739,8 @@ Current version: 135
 		passed_ai_warning: false, //used to store AI safety panel acknowledgement state
 		entersubmit: true, //enter sends the prompt
-		max_context_length: 1600,
+		max_context_length: 1800,
-		max_length: 120,
+		max_length: 140,
 		auto_ctxlen: true,
 		auto_genamt: true,
 		rep_pen: 1.1,
@ -5095,6 +5096,7 @@ Current version: 135
 		//extra unofficial fields for the story
 		new_save_storyobj.extrastopseq = extrastopseq;
 		new_save_storyobj.tokenbans = tokenbans;
 		new_save_storyobj.anotestr = anote_strength;
 		new_save_storyobj.wisearchdepth = wi_searchdepth;
 		new_save_storyobj.wiinsertlocation = wi_insertlocation;
@ -5271,6 +5273,7 @@ Current version: 135
 			let old_current_memory = current_memory;
 			let old_current_wi = current_wi;
 			let old_extrastopseq = extrastopseq;
 			let old_tokenbans = tokenbans;
 			let old_notes = personal_notes;
 			let old_regexreplace_data = regexreplace_data;
@ -5325,6 +5328,10 @@ Current version: 135
 				if (storyobj.extrastopseq) {
 					extrastopseq = storyobj.extrastopseq;
 				}
 				if(storyobj.tokenbans)
 				{
 					tokenbans = storyobj.tokenbans;
 				}
 				if (storyobj.anotestr) {
 					anote_strength = storyobj.anotestr;
 				}
@ -5416,6 +5423,7 @@ Current version: 135
 				{
 					extrastopseq = old_extrastopseq;
 					regexreplace_data = old_regexreplace_data;
 					tokenbans = old_tokenbans;
 				}
 				if (storyobj.savedsettings && storyobj.savedsettings != "")
@ -6746,25 +6754,53 @@ Current version: 135
 		},false,true);
 	}
-	var pendinglogitbias = {};
+	function expand_tokens_section(targetid)
 	function set_logit_bias()
 	{
-		inputBox("Enter OpenAI-formatted logit bias dictionary. Each key is the integer token IDs and their values are the biases (-100.0 to 100.0)<br><a href='https://platform.openai.com/docs/api-reference/chat/create#chat-create-logit_bias' class='color_blueurl'>Input is a JSON object, reference here.</a><br>Leave blank to disable.<br>","Set Logit Biases",JSON.stringify(pendinglogitbias),"Enter JSON Object",()=>{
+		let tablist = ["expandregexreplace","expandtokenbans","expandlogitbias"];
-			let userinput = getInputBoxValue().trim();
+
-			if(userinput=="")
+		for(let i=0;i<tablist.length;++i)
 		{
-				pendinglogitbias = {};
+			if(tablist[i]!=targetid)
 			}
 			else
 			{
-				try {
+				document.getElementById(tablist[i]).classList.add("hidden");
 					pendinglogitbias = JSON.parse(userinput);
 				} catch (e) {
 					msgbox("Your logit bias JSON dictionary was not correctly formatted!");
 			}
 		}
-		},true,true);
+		if(targetid!="")
 		{
 			if(document.getElementById(targetid).classList.contains("hidden"))
 			{
 				document.getElementById(targetid).classList.remove("hidden");
 			}
 			else
 			{
 				document.getElementById(targetid).classList.add("hidden");
 			}
 		}
 	}
 	function add_logit_bias()
 	{
 		let key = document.getElementById("newlogitbiasid").value;
 		let val = document.getElementById("newlogitbiasval").value;
 		if(key && val && key.trim()!="" && val.trim()!="")
 		{
 			let old = document.getElementById("logitbiastxtarea").value;
 			try {
 				let dict = JSON.parse(old);
 				key = parseInt(key);
 				val = parseInt(val);
 				if(!isNaN(key) && !isNaN(val))
 				{
 					dict[key] = parseInt(val);
 					document.getElementById("logitbiastxtarea").value = JSON.stringify(dict,null,2);
 				}
 			} catch (e) {
 				msgbox("Your inputs or logit bias JSON dictionary was not correctly formatted!");
 			}
 			document.getElementById("newlogitbiasid").value = "";
 			document.getElementById("newlogitbiasval").value = "";
 		}
 	}
 	function add_stop_seq()
@ -6784,6 +6820,23 @@ Current version: 135
 		},false);
 	}
 	function add_token_ban()
 	{
 		inputBox("Enter a token substring to be banned. ALL matching tokens will be removed.\nFor example adding 'ice' will also ban 'nice' and 'rice', assuming they are individual tokens.","Add Banned Token Substring","","Enter a Token Substring",()=>{
 			let userinput = getInputBoxValue();
 			if(userinput.trim()!="")
 			{
 				let ov = document.getElementById("tokenbans").value;
 				if(ov!="")
 				{
 					ov += "||$||";
 				}
 				ov += userinput.trim();
 				document.getElementById("tokenbans").value = ov;
 			}
 		},false);
 	}
 	var msgboxOnDone = hide_msgbox;
 	function hide_msgbox() {
 		//hide msgbox ONLY
@ -8919,8 +8972,20 @@ Current version: 135
 		current_anotetemplate = document.getElementById("anotetemplate").value;
 		anote_strength = document.getElementById("anote_strength").value;
 		extrastopseq = document.getElementById("extrastopseq").value;
 		tokenbans = document.getElementById("tokenbans").value;
 		newlineaftermemory = (document.getElementById("newlineaftermemory").checked?true:false);
-		logitbiasdict = pendinglogitbias;
+		try
 		{
 			let lb = document.getElementById("logitbiastxtarea").value;
 			let dict = {};
 			if(lb!="")
 			{
 				dict = JSON.parse(lb);
 			}
 			logitbiasdict = dict;
 		} catch (e) {
 			console.log("Your logit bias JSON dictionary was not correctly formatted!");
 		}
 		regexreplace_data = [];
 		for(let i=0;i<num_regex_rows;++i)
 		{
@ -9144,6 +9209,7 @@ Current version: 135
 			current_anote = "";
 			current_wi = [];
 			extrastopseq = "";
 			tokenbans = "";
 			anote_strength = 320;
 			logitbiasdict = {};
 			wi_searchdepth = 0;
@ -10348,6 +10414,24 @@ Current version: 135
 		return seqs;
 	}
 	function get_token_bans()
 	{
 		let seqs = [];
 		if (tokenbans != "") {
 			let rep = replaceAll(tokenbans, "\\n", "\n");
 			let srep = rep.split("||$||");
 			if (srep.length > 0 && !seqs) {
 				seqs = [];
 			}
 			for (let i = 0; i < srep.length; ++i) {
 				if (srep[i] && srep[i] != "") {
 					seqs.push(srep[i]);
 				}
 			}
 		}
 		return seqs;
 	}
 	function dispatch_submit_generation(submit_payload, input_was_empty) //if input is not empty, always unban eos
 	{
 		console.log(submit_payload);
@ -10367,6 +10451,7 @@ Current version: 135
 			submit_payload.params.dynatemp_range = localsettings.dynatemp_range;
 			submit_payload.params.dynatemp_exponent = localsettings.dynatemp_exponent;
 			submit_payload.params.smoothing_factor = localsettings.smoothing_factor;
 			submit_payload.params.banned_tokens = get_token_bans();
 		}
 		//presence pen and logit bias for OAI and newer kcpp
 		if((custom_kobold_endpoint != "" && is_using_kcpp_with_mirostat()) || custom_oai_endpoint!="")
@ -13165,8 +13250,10 @@ Current version: 135
 		document.getElementById("anotetemplate").value = current_anotetemplate;
 		document.getElementById("anote_strength").value = anote_strength;
 		document.getElementById("extrastopseq").value = extrastopseq;
 		document.getElementById("tokenbans").value = tokenbans;
 		document.getElementById("newlineaftermemory").checked = (newlineaftermemory?true:false);
-		pendinglogitbias = logitbiasdict;
+		document.getElementById("logitbiastxtarea").value = JSON.stringify(logitbiasdict,null,2);
 		if(custom_kobold_endpoint!="" || !is_using_custom_ep() )
 		{
 			document.getElementById("noextrastopseq").classList.add("hidden");
@ -13183,7 +13270,16 @@ Current version: 135
 		//setup regex replacers
 		populate_regex_replacers();
-		document.getElementById("btnlogitbias").disabled = !is_using_custom_ep();
+		if(is_using_custom_ep())
 		{
 			document.getElementById("nologitbias").classList.add("hidden");
 			document.getElementById("notokenbans").classList.add("hidden");
 		}
 		else
 		{
 			document.getElementById("nologitbias").classList.remove("hidden");
 			document.getElementById("notokenbans").classList.remove("hidden");
 		}
 	}
@ -15291,7 +15387,7 @@ Current version: 135
 					</span>
 				</div>
 				<div style="display: flex; column-gap: 4px;">
-				<input class="form-control anotetempbox" type="text"
+				<input class="form-control anotetempbox inlineinput" type="text"
 					placeholder="(the &lt;|&gt; will be replaced with the Author's Note text)" value="" id="anotetemplate">
 					<select style="padding:4px;" class="anotetempscale form-control" id="anote_strength">
 						<option value="480">Weak</option>
@ -15341,23 +15437,57 @@ Current version: 135
 					class="helptext">Triggers the text generator to stop generating early if this sequence appears, in addition to default stop sequences. If you want multiple sequences, separate them with the following delimiter: ||$||</span></span></div>
 					<div class="color_red hidden" id="noextrastopseq">Stop Sequences may be unavailable.</div>
 					<div style="display: flex; column-gap: 4px; margin-bottom: 4px;">
-					<input class="form-control stopseqbox" type="text" placeholder="None" value="" id="extrastopseq">
+					<input class="form-control stopseqbox inlineinput" type="text" placeholder="None" value="" id="extrastopseq">
 					<button type="button" class="btn btn-primary" style="width:90px;padding:6px 6px;" onclick="add_stop_seq()">Add New</button>
 				</div>
 				<div class="settinglabel">
 					<div class="justifyleft"><br>Logit Biases <span class="helpicon">?<span
 						class="helptext">Specify a dictionary of token IDs to modify the probability of occuring.</span></span></div>
 				</div>
 				<div><button type="button" class="btn btn-primary" style="width:134px;padding:6px 6px;" id="btnlogitbias" onclick="set_logit_bias()">Edit Logit Biases</button></div>
 				<div class="settinglabel">
 					<div class="justifyleft"><br>Custom Regex Replace <span class="helpicon">?<span
 						class="helptext">Allows transforming incoming text with regex patterns, modifying all matches. Replacements will be applied in sequence.</span></span></div>
 				</div>
 				<table id="regex_replace_table" class="settinglabel text-center" style="border-spacing: 3px 2px;	border-collapse: separate;">
 				<div style="padding:3px;" class="justifyleft settinglabel">Logit Biases <span class="helpicon">?<span
 					class="helptext">Specify a dictionary of token IDs to modify the probability of occuring.</span></span>
 					<button type="button" class="btn btn-primary" style="font-size:12px;padding:2px 2px;" onclick="expand_tokens_section('expandlogitbias')">Expand Section</button>
 				</div>
 				<div id="expandlogitbias" class="hidden">
 					<div class="color_red hidden" id="nologitbias">Logit bias may be unavailable.</div>
 					<div style="color:#ffffff;">Enter OpenAI-formatted logit bias dictionary. Each key is the integer token IDs and their values are the biases (-100.0 to 100.0). Leave blank to disable.<br><a href='https://platform.openai.com/docs/api-reference/chat/create#chat-create-logit_bias' target='_blank' class='color_blueurl'>Input is a JSON object, reference here.</a><br></div>
 					<textarea class="form-control" style="line-height:1.1;margin-bottom: 4px;padding:3px" id="logitbiastxtarea" placeholder="" rows="5"></textarea>
 					<div style="display: flex; column-gap: 4px; margin-bottom: 4px;">
 					<input style="padding:2px" class="form-control stopseqbox inlineinput" inputmode="decimal" type="text" placeholder="Token ID" value="" id="newlogitbiasid">
 					<input style="padding:2px" class="form-control stopseqbox inlineinput" inputmode="decimal" type="text" placeholder="Bias Value" value="" id="newlogitbiasval">
 					<button type="button" class="btn btn-primary" style="width:90px;padding:6px 6px;" onclick="add_logit_bias()">Add New</button>
 					</div>
 				</div>
 				<div style="padding:3px;" class="justifyleft settinglabel">Token Bans <span class="helpicon">?<span
 					class="helptext">Outright removal for ANY tokens containing a specific substring from model vocab. If you want multiple sequences, separate them with the following delimiter: ||$||</span></span>
 					<button type="button" class="btn btn-primary" style="font-size:12px;padding:2px 2px;" onclick="expand_tokens_section('expandtokenbans')">Expand Section</button>
 				</div>
 				<div id="expandtokenbans" class="hidden">
 					<div class="color_red hidden" id="notokenbans">Token bans may be unavailable.</div>
 					<div style="color:#ffffff;">Outright removal for ANY tokens containing a specific substring from model vocab. If you want multiple sequences, separate them with the following delimiter: ||$||<br></div>
 					<div style="display: flex; column-gap: 4px; margin-bottom: 4px;">
 					<input class="form-control stopseqbox inlineinput" type="text" placeholder="None" value="" id="tokenbans">
 					<button type="button" class="btn btn-primary" style="width:90px;padding:6px 6px;" onclick="add_token_ban()">Add New</button>
 					</div>
 				</div>
 				<div style="padding:3px;" class="justifyleft settinglabel">Regex Replace <span class="helpicon">?<span
 					class="helptext">Allows transforming incoming text with regex patterns, modifying all matches. Replacements will be applied in sequence.</span></span>
 					<button type="button" class="btn btn-primary" style="font-size:12px;padding:2px 2px;" onclick="expand_tokens_section('expandregexreplace')">Expand Section</button>
 				</div>
 				<div id="expandregexreplace" class="hidden">
 					<table id="regex_replace_table" class="settinglabel text-center" style="border-spacing: 3px 2px; border-collapse: separate;">
 					</table>
 				</div>
 				<!-- <div style="padding:3px;" class="justifyleft settinglabel">Repetition Exclusions <span class="helpicon">?<span
 					class="helptext">Configure specific tokens that will be excluded from repetition and presence penalties.</span></span>
 					<button type="button" class="btn btn-primary" style="font-size:12px;padding:2px 2px;" onclick="expand_tokens_section('')">Expand Section</button>
 				</div>
 				<div style="padding:3px;" class="justifyleft settinglabel">Placeholder Tags <span class="helpicon">?<span
 					class="helptext">Configure automatic substitutions for placeholders in text.</span></span>
 					<button type="button" class="btn btn-primary" style="font-size:12px;padding:2px 2px;" onclick="expand_tokens_section('')">Expand Section</button>
 				</div> -->
 			</div>
 			<div class="popupfooter">
 				<button type="button" class="btn btn-primary" onclick="confirm_memory();save_wi();render_gametext();hide_popups()">OK</button>
 				<button type="button" class="btn btn-primary" onclick="revert_wi();hide_popups()">Cancel</button>
--- a/koboldcpp.py
+++ b/koboldcpp.py
@ -56,7 +56,6 @@ class load_model_inputs(ctypes.Structure):
                ("gpulayers", ctypes.c_int),
                ("rope_freq_scale", ctypes.c_float),
                ("rope_freq_base", ctypes.c_float),
                ("banned_tokens", ctypes.c_char_p * ban_token_max),
                ("tensor_split", ctypes.c_float * tensor_split_max)]
 class generation_inputs(ctypes.Structure):
@ -91,7 +90,8 @@ class generation_inputs(ctypes.Structure):
                ("dynatemp_range", ctypes.c_float),
                ("dynatemp_exponent", ctypes.c_float),
                ("smoothing_factor", ctypes.c_float),
-                ("logit_biases", logit_bias * logit_bias_max)]
+                ("logit_biases", logit_bias * logit_bias_max),
                ("banned_tokens", ctypes.c_char_p * ban_token_max)]
 class generation_outputs(ctypes.Structure):
    _fields_ = [("status", ctypes.c_int),
@ -391,16 +391,10 @@ def load_model(model_filename):
    inputs.executable_path = (getdirpath()+"/").encode("UTF-8")
    inputs.debugmode = args.debugmode
    banned_tokens = args.bantokens
    for n in range(ban_token_max):
        if not banned_tokens or n >= len(banned_tokens):
            inputs.banned_tokens[n] = "".encode("UTF-8")
        else:
            inputs.banned_tokens[n] = banned_tokens[n].encode("UTF-8")
    ret = handle.load_model(inputs)
    return ret
-def generate(prompt, memory="", images=[], max_length=32, max_context_length=512, temperature=0.7, top_k=100, top_a=0.0, top_p=0.92, min_p=0.0, typical_p=1.0, tfs=1.0, rep_pen=1.0, rep_pen_range=128, presence_penalty=0.0, mirostat=0, mirostat_tau=5.0, mirostat_eta=0.1, sampler_order=[6,0,1,3,4,2,5], seed=-1, stop_sequence=[], use_default_badwordsids=False, stream_sse=False, grammar='', grammar_retain_state=False, genkey='', trimstop=False, quiet=False, dynatemp_range=0.0, dynatemp_exponent=1.0, smoothing_factor=0.0, logit_biases={}, render_special=False):
+def generate(prompt, memory="", images=[], max_length=32, max_context_length=512, temperature=0.7, top_k=100, top_a=0.0, top_p=0.92, min_p=0.0, typical_p=1.0, tfs=1.0, rep_pen=1.0, rep_pen_range=128, presence_penalty=0.0, mirostat=0, mirostat_tau=5.0, mirostat_eta=0.1, sampler_order=[6,0,1,3,4,2,5], seed=-1, stop_sequence=[], use_default_badwordsids=False, stream_sse=False, grammar='', grammar_retain_state=False, genkey='', trimstop=False, quiet=False, dynatemp_range=0.0, dynatemp_exponent=1.0, smoothing_factor=0.0, logit_biases={}, render_special=False, banned_tokens=[]):
    global maxctx, args, currentusergenkey, totalgens, pendingabortkey
    inputs = generation_inputs()
    inputs.prompt = prompt.encode("UTF-8")
@ -487,6 +481,12 @@ def generate(prompt, memory="", images=[], max_length=32, max_context_length=512
                inputs.logit_biases[n] = logit_bias(-1, 0.0)
                print(f"Skipped unparsable logit bias:{ex}")
    for n in range(ban_token_max):
        if not banned_tokens or n >= len(banned_tokens):
            inputs.banned_tokens[n] = "".encode("UTF-8")
        else:
            inputs.banned_tokens[n] = banned_tokens[n].encode("UTF-8")
    currentusergenkey = genkey
    totalgens += 1
    #early exit if aborted
@ -672,6 +672,10 @@ def transform_genparams(genparams, api_format):
        genparams["top_k"] = int(genparams.get('top_k', 120))
        genparams["max_length"] = genparams.get('max', 100)
    elif api_format==2:
        if "ignore_eos" in genparams and not ("use_default_badwordsids" in genparams):
            genparams["use_default_badwordsids"] = genparams.get('ignore_eos', False)
    elif api_format==3 or api_format==4:
        genparams["max_length"] = genparams.get('max_tokens', 100)
        presence_penalty = genparams.get('presence_penalty', genparams.get('frequency_penalty', 0.0))
@ -813,6 +817,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
                smoothing_factor=genparams.get('smoothing_factor', 0.0),
                logit_biases=genparams.get('logit_bias', {}),
                render_special=genparams.get('render_special', False),
                banned_tokens=genparams.get('banned_tokens', []),
                )
        genout = {"text":"","status":-1,"stopreason":-1}
@ -3281,7 +3286,6 @@ if __name__ == '__main__':
    parser.add_argument("--lora", help="LLAMA models only, applies a lora file on top of model. Experimental.", metavar=('[lora_filename]', '[lora_base]'), nargs='+')
    parser.add_argument("--smartcontext", help="Reserving a portion of context to try processing less frequently.", action='store_true')
    parser.add_argument("--noshift", help="If set, do not attempt to Trim and Shift the GGUF context.", action='store_true')
    parser.add_argument("--bantokens", help="You can manually specify a list of token SUBSTRINGS that the AI cannot use. This bans ALL instances of that substring.", metavar=('[token_substrings]'), nargs='+')
    parser.add_argument("--forceversion", help="If the model file format detection fails (e.g. rogue modified model) you can set this to override the detected format (enter desired version, e.g. 401 for GPTNeoX-Type2).",metavar=('[version]'), type=int, default=0)
    parser.add_argument("--nommap", help="If set, do not use mmap to load newer models", action='store_true')
    parser.add_argument("--usemlock", help="For Apple Systems. Force system to keep model in RAM rather than swapping or compressing", action='store_true')