refactored a lot of code, remove bantokens, move it to api

This commit is contained in:
Concedo 2024-04-27 17:57:13 +08:00
parent 4ec8a9c57b
commit c230b78906
6 changed files with 214 additions and 76 deletions

View file

@ -270,7 +270,7 @@ class model_backend(InferenceModel):
port=5001, port_param=5001, host='', launch=False, lora=None, threads=self.kcpp_threads, blasthreads=self.kcpp_threads, port=5001, port_param=5001, host='', launch=False, lora=None, threads=self.kcpp_threads, blasthreads=self.kcpp_threads,
psutil_set_threads=False, highpriority=False, contextsize=self.kcpp_ctxsize, psutil_set_threads=False, highpriority=False, contextsize=self.kcpp_ctxsize,
blasbatchsize=self.kcpp_blasbatchsize, ropeconfig=[self.kcpp_ropescale, self.kcpp_ropebase], stream=False, smartcontext=self.kcpp_smartcontext, blasbatchsize=self.kcpp_blasbatchsize, ropeconfig=[self.kcpp_ropescale, self.kcpp_ropebase], stream=False, smartcontext=self.kcpp_smartcontext,
unbantokens=False, bantokens=None, usemirostat=None, forceversion=0, nommap=self.kcpp_nommap, usemirostat=None, forceversion=0, nommap=self.kcpp_nommap,
usemlock=False, noavx2=self.kcpp_noavx2, debugmode=self.kcpp_debugmode, skiplauncher=True, hordeconfig=None, noblas=self.kcpp_noblas, usemlock=False, noavx2=self.kcpp_noavx2, debugmode=self.kcpp_debugmode, skiplauncher=True, hordeconfig=None, noblas=self.kcpp_noblas,
useclblast=self.kcpp_useclblast, usecublas=self.kcpp_usecublas, usevulkan=self.kcpp_usevulkan, gpulayers=self.kcpp_gpulayers, tensor_split=self.kcpp_tensor_split, config=None, useclblast=self.kcpp_useclblast, usecublas=self.kcpp_usecublas, usevulkan=self.kcpp_usevulkan, gpulayers=self.kcpp_gpulayers, tensor_split=self.kcpp_tensor_split, config=None,
onready='', multiuser=False, foreground=False, preloadstory=None, noshift=False, remotetunnel=False, ssl=False, benchmark=None, nocertify=False, sdconfig=None, mmproj=None, onready='', multiuser=False, foreground=False, preloadstory=None, noshift=False, remotetunnel=False, ssl=False, benchmark=None, nocertify=False, sdconfig=None, mmproj=None,

View file

@ -55,7 +55,6 @@ struct load_model_inputs
const int gpulayers = 0; const int gpulayers = 0;
const float rope_freq_scale = 1.0f; const float rope_freq_scale = 1.0f;
const float rope_freq_base = 10000.0f; const float rope_freq_base = 10000.0f;
const char * banned_tokens[ban_token_max];
const float tensor_split[tensor_split_max]; const float tensor_split[tensor_split_max];
}; };
struct generation_inputs struct generation_inputs
@ -92,7 +91,7 @@ struct generation_inputs
const float dynatemp_exponent = 1.0f; const float dynatemp_exponent = 1.0f;
const float smoothing_factor = 0.0f; const float smoothing_factor = 0.0f;
const logit_bias logit_biases[logit_bias_max]; const logit_bias logit_biases[logit_bias_max];
const char * banned_tokens[ban_token_max];
}; };
struct generation_outputs struct generation_outputs
{ {

View file

@ -837,17 +837,6 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
gptj_ctx_v3.hparams.rope_freq_scale = neox_ctx_v3.hparams.rope_freq_scale = rope_freq_scale; gptj_ctx_v3.hparams.rope_freq_scale = neox_ctx_v3.hparams.rope_freq_scale = rope_freq_scale;
gptj_ctx_v3.hparams.rope_freq_base = neox_ctx_v3.hparams.rope_freq_base = rope_freq_base; gptj_ctx_v3.hparams.rope_freq_base = neox_ctx_v3.hparams.rope_freq_base = rope_freq_base;
//handle custom token bans
banned_tokens.clear();
for(int x=0;x<ban_token_max;++x)
{
std::string word = inputs.banned_tokens[x];
if(word!="")
{
banned_tokens.push_back(word);
}
}
//this is used for the mem_per_token eval, openblas needs more RAM //this is used for the mem_per_token eval, openblas needs more RAM
bool v3_use_scratch = ggml_v3_cpu_has_gpublas(); bool v3_use_scratch = ggml_v3_cpu_has_gpublas();
@ -1624,6 +1613,41 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
} }
} }
//handle custom token bans
banned_tokens.clear();
for(int x=0;x<ban_token_max;++x)
{
std::string word = inputs.banned_tokens[x];
if(word!="")
{
banned_tokens.push_back(word);
}
}
banned_token_ids.clear();
if(banned_tokens.size()>0)
{
if(debugmode==1)
{
printf("\nBanning %zu token sequences...",banned_tokens.size());
}
for(int v=0;v<n_vocab;++v)
{
std::string word = FileFormatTokenizeID(v,file_format, true);
for(int i=0;i<banned_tokens.size();++i)
{
if (word.find(banned_tokens[i]) != std::string::npos)
{
banned_token_ids.push_back(v);
break;
}
}
}
if(debugmode==1)
{
printf("\nBanned a total of %zu tokens.\n",banned_token_ids.size());
}
}
logit_biases.clear(); logit_biases.clear();
for(int x=0;x<logit_bias_max;++x) for(int x=0;x<logit_bias_max;++x)
{ {
@ -1993,25 +2017,6 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
printf("\nWarning! n_vocab is invalid, maybe bad format!"); printf("\nWarning! n_vocab is invalid, maybe bad format!");
} }
//prepare banned tokens
if(banned_token_ids.size()==0 && banned_tokens.size()>0)
{
printf("\n[First Run] Banning %zu token sequences...",banned_tokens.size());
for(int v=0;v<n_vocab;++v)
{
std::string word = FileFormatTokenizeID(v,file_format, true);
for(int i=0;i<banned_tokens.size();++i)
{
if (word.find(banned_tokens[i]) != std::string::npos)
{
banned_token_ids.push_back(v);
break;
}
}
}
printf("\nBanned a total of %zu tokens.\n",banned_token_ids.size());
}
if(allow_regular_prints) if(allow_regular_prints)
{ {
printf("\n"); printf("\n");

View file

@ -136,7 +136,7 @@
}, },
"use_default_badwordsids": { "use_default_badwordsids": {
"default": false, "default": false,
"description": "If true, prevents the EOS token from being generated (Ban EOS). For unbantokens, set this to false.", "description": "If true, prevents the EOS token from being generated (Ban EOS).",
"type": "boolean" "type": "boolean"
}, },
"dynatemp_range": { "dynatemp_range": {

View file

@ -7,7 +7,7 @@ Just copy this single static HTML file anywhere and open it in a browser, or fro
Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite. Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite.
If you are submitting a pull request for Lite, PLEASE use the above repo, not the KoboldCpp one. If you are submitting a pull request for Lite, PLEASE use the above repo, not the KoboldCpp one.
Kobold Lite is under the AGPL v3.0 License unless otherwise exempted. Please do not remove this line. Kobold Lite is under the AGPL v3.0 License unless otherwise exempted. Please do not remove this line.
Current version: 135 Current version: 136
-Concedo -Concedo
--> -->
@ -299,7 +299,7 @@ Current version: 135
padding-right: 10px; padding-right: 10px;
} }
#extrastopseq, #anotetemplate { .inlineinput {
background-color: #404040; background-color: #404040;
color: #ffffff; color: #ffffff;
resize: none; resize: none;
@ -3603,6 +3603,7 @@ Current version: 135
var current_anote = ""; //stored author note var current_anote = ""; //stored author note
var current_anotetemplate = "[Author\'s note: <|>]"; var current_anotetemplate = "[Author\'s note: <|>]";
var extrastopseq = ""; var extrastopseq = "";
var tokenbans = "";
var anote_strength = 320; //distance from end var anote_strength = 320; //distance from end
var newlineaftermemory = true; var newlineaftermemory = true;
var current_wi = []; //each item stores a wi object. var current_wi = []; //each item stores a wi object.
@ -3738,8 +3739,8 @@ Current version: 135
passed_ai_warning: false, //used to store AI safety panel acknowledgement state passed_ai_warning: false, //used to store AI safety panel acknowledgement state
entersubmit: true, //enter sends the prompt entersubmit: true, //enter sends the prompt
max_context_length: 1600, max_context_length: 1800,
max_length: 120, max_length: 140,
auto_ctxlen: true, auto_ctxlen: true,
auto_genamt: true, auto_genamt: true,
rep_pen: 1.1, rep_pen: 1.1,
@ -5095,6 +5096,7 @@ Current version: 135
//extra unofficial fields for the story //extra unofficial fields for the story
new_save_storyobj.extrastopseq = extrastopseq; new_save_storyobj.extrastopseq = extrastopseq;
new_save_storyobj.tokenbans = tokenbans;
new_save_storyobj.anotestr = anote_strength; new_save_storyobj.anotestr = anote_strength;
new_save_storyobj.wisearchdepth = wi_searchdepth; new_save_storyobj.wisearchdepth = wi_searchdepth;
new_save_storyobj.wiinsertlocation = wi_insertlocation; new_save_storyobj.wiinsertlocation = wi_insertlocation;
@ -5271,6 +5273,7 @@ Current version: 135
let old_current_memory = current_memory; let old_current_memory = current_memory;
let old_current_wi = current_wi; let old_current_wi = current_wi;
let old_extrastopseq = extrastopseq; let old_extrastopseq = extrastopseq;
let old_tokenbans = tokenbans;
let old_notes = personal_notes; let old_notes = personal_notes;
let old_regexreplace_data = regexreplace_data; let old_regexreplace_data = regexreplace_data;
@ -5325,6 +5328,10 @@ Current version: 135
if (storyobj.extrastopseq) { if (storyobj.extrastopseq) {
extrastopseq = storyobj.extrastopseq; extrastopseq = storyobj.extrastopseq;
} }
if(storyobj.tokenbans)
{
tokenbans = storyobj.tokenbans;
}
if (storyobj.anotestr) { if (storyobj.anotestr) {
anote_strength = storyobj.anotestr; anote_strength = storyobj.anotestr;
} }
@ -5416,6 +5423,7 @@ Current version: 135
{ {
extrastopseq = old_extrastopseq; extrastopseq = old_extrastopseq;
regexreplace_data = old_regexreplace_data; regexreplace_data = old_regexreplace_data;
tokenbans = old_tokenbans;
} }
if (storyobj.savedsettings && storyobj.savedsettings != "") if (storyobj.savedsettings && storyobj.savedsettings != "")
@ -6746,25 +6754,53 @@ Current version: 135
},false,true); },false,true);
} }
var pendinglogitbias = {}; function expand_tokens_section(targetid)
function set_logit_bias()
{ {
inputBox("Enter OpenAI-formatted logit bias dictionary. Each key is the integer token IDs and their values are the biases (-100.0 to 100.0)<br><a href='https://platform.openai.com/docs/api-reference/chat/create#chat-create-logit_bias' class='color_blueurl'>Input is a JSON object, reference here.</a><br>Leave blank to disable.<br>","Set Logit Biases",JSON.stringify(pendinglogitbias),"Enter JSON Object",()=>{ let tablist = ["expandregexreplace","expandtokenbans","expandlogitbias"];
let userinput = getInputBoxValue().trim();
if(userinput=="") for(let i=0;i<tablist.length;++i)
{ {
pendinglogitbias = {}; if(tablist[i]!=targetid)
}
else
{ {
try { document.getElementById(tablist[i]).classList.add("hidden");
pendinglogitbias = JSON.parse(userinput);
} catch (e) {
msgbox("Your logit bias JSON dictionary was not correctly formatted!");
} }
} }
},true,true); if(targetid!="")
{
if(document.getElementById(targetid).classList.contains("hidden"))
{
document.getElementById(targetid).classList.remove("hidden");
}
else
{
document.getElementById(targetid).classList.add("hidden");
}
}
}
function add_logit_bias()
{
let key = document.getElementById("newlogitbiasid").value;
let val = document.getElementById("newlogitbiasval").value;
if(key && val && key.trim()!="" && val.trim()!="")
{
let old = document.getElementById("logitbiastxtarea").value;
try {
let dict = JSON.parse(old);
key = parseInt(key);
val = parseInt(val);
if(!isNaN(key) && !isNaN(val))
{
dict[key] = parseInt(val);
document.getElementById("logitbiastxtarea").value = JSON.stringify(dict,null,2);
}
} catch (e) {
msgbox("Your inputs or logit bias JSON dictionary was not correctly formatted!");
}
document.getElementById("newlogitbiasid").value = "";
document.getElementById("newlogitbiasval").value = "";
}
} }
function add_stop_seq() function add_stop_seq()
@ -6784,6 +6820,23 @@ Current version: 135
},false); },false);
} }
function add_token_ban()
{
inputBox("Enter a token substring to be banned. ALL matching tokens will be removed.\nFor example adding 'ice' will also ban 'nice' and 'rice', assuming they are individual tokens.","Add Banned Token Substring","","Enter a Token Substring",()=>{
let userinput = getInputBoxValue();
if(userinput.trim()!="")
{
let ov = document.getElementById("tokenbans").value;
if(ov!="")
{
ov += "||$||";
}
ov += userinput.trim();
document.getElementById("tokenbans").value = ov;
}
},false);
}
var msgboxOnDone = hide_msgbox; var msgboxOnDone = hide_msgbox;
function hide_msgbox() { function hide_msgbox() {
//hide msgbox ONLY //hide msgbox ONLY
@ -8919,8 +8972,20 @@ Current version: 135
current_anotetemplate = document.getElementById("anotetemplate").value; current_anotetemplate = document.getElementById("anotetemplate").value;
anote_strength = document.getElementById("anote_strength").value; anote_strength = document.getElementById("anote_strength").value;
extrastopseq = document.getElementById("extrastopseq").value; extrastopseq = document.getElementById("extrastopseq").value;
tokenbans = document.getElementById("tokenbans").value;
newlineaftermemory = (document.getElementById("newlineaftermemory").checked?true:false); newlineaftermemory = (document.getElementById("newlineaftermemory").checked?true:false);
logitbiasdict = pendinglogitbias; try
{
let lb = document.getElementById("logitbiastxtarea").value;
let dict = {};
if(lb!="")
{
dict = JSON.parse(lb);
}
logitbiasdict = dict;
} catch (e) {
console.log("Your logit bias JSON dictionary was not correctly formatted!");
}
regexreplace_data = []; regexreplace_data = [];
for(let i=0;i<num_regex_rows;++i) for(let i=0;i<num_regex_rows;++i)
{ {
@ -9144,6 +9209,7 @@ Current version: 135
current_anote = ""; current_anote = "";
current_wi = []; current_wi = [];
extrastopseq = ""; extrastopseq = "";
tokenbans = "";
anote_strength = 320; anote_strength = 320;
logitbiasdict = {}; logitbiasdict = {};
wi_searchdepth = 0; wi_searchdepth = 0;
@ -10348,6 +10414,24 @@ Current version: 135
return seqs; return seqs;
} }
function get_token_bans()
{
let seqs = [];
if (tokenbans != "") {
let rep = replaceAll(tokenbans, "\\n", "\n");
let srep = rep.split("||$||");
if (srep.length > 0 && !seqs) {
seqs = [];
}
for (let i = 0; i < srep.length; ++i) {
if (srep[i] && srep[i] != "") {
seqs.push(srep[i]);
}
}
}
return seqs;
}
function dispatch_submit_generation(submit_payload, input_was_empty) //if input is not empty, always unban eos function dispatch_submit_generation(submit_payload, input_was_empty) //if input is not empty, always unban eos
{ {
console.log(submit_payload); console.log(submit_payload);
@ -10367,6 +10451,7 @@ Current version: 135
submit_payload.params.dynatemp_range = localsettings.dynatemp_range; submit_payload.params.dynatemp_range = localsettings.dynatemp_range;
submit_payload.params.dynatemp_exponent = localsettings.dynatemp_exponent; submit_payload.params.dynatemp_exponent = localsettings.dynatemp_exponent;
submit_payload.params.smoothing_factor = localsettings.smoothing_factor; submit_payload.params.smoothing_factor = localsettings.smoothing_factor;
submit_payload.params.banned_tokens = get_token_bans();
} }
//presence pen and logit bias for OAI and newer kcpp //presence pen and logit bias for OAI and newer kcpp
if((custom_kobold_endpoint != "" && is_using_kcpp_with_mirostat()) || custom_oai_endpoint!="") if((custom_kobold_endpoint != "" && is_using_kcpp_with_mirostat()) || custom_oai_endpoint!="")
@ -13165,8 +13250,10 @@ Current version: 135
document.getElementById("anotetemplate").value = current_anotetemplate; document.getElementById("anotetemplate").value = current_anotetemplate;
document.getElementById("anote_strength").value = anote_strength; document.getElementById("anote_strength").value = anote_strength;
document.getElementById("extrastopseq").value = extrastopseq; document.getElementById("extrastopseq").value = extrastopseq;
document.getElementById("tokenbans").value = tokenbans;
document.getElementById("newlineaftermemory").checked = (newlineaftermemory?true:false); document.getElementById("newlineaftermemory").checked = (newlineaftermemory?true:false);
pendinglogitbias = logitbiasdict; document.getElementById("logitbiastxtarea").value = JSON.stringify(logitbiasdict,null,2);
if(custom_kobold_endpoint!="" || !is_using_custom_ep() ) if(custom_kobold_endpoint!="" || !is_using_custom_ep() )
{ {
document.getElementById("noextrastopseq").classList.add("hidden"); document.getElementById("noextrastopseq").classList.add("hidden");
@ -13183,7 +13270,16 @@ Current version: 135
//setup regex replacers //setup regex replacers
populate_regex_replacers(); populate_regex_replacers();
document.getElementById("btnlogitbias").disabled = !is_using_custom_ep(); if(is_using_custom_ep())
{
document.getElementById("nologitbias").classList.add("hidden");
document.getElementById("notokenbans").classList.add("hidden");
}
else
{
document.getElementById("nologitbias").classList.remove("hidden");
document.getElementById("notokenbans").classList.remove("hidden");
}
} }
@ -15291,7 +15387,7 @@ Current version: 135
</span> </span>
</div> </div>
<div style="display: flex; column-gap: 4px;"> <div style="display: flex; column-gap: 4px;">
<input class="form-control anotetempbox" type="text" <input class="form-control anotetempbox inlineinput" type="text"
placeholder="(the &lt;|&gt; will be replaced with the Author's Note text)" value="" id="anotetemplate"> placeholder="(the &lt;|&gt; will be replaced with the Author's Note text)" value="" id="anotetemplate">
<select style="padding:4px;" class="anotetempscale form-control" id="anote_strength"> <select style="padding:4px;" class="anotetempscale form-control" id="anote_strength">
<option value="480">Weak</option> <option value="480">Weak</option>
@ -15341,23 +15437,57 @@ Current version: 135
class="helptext">Triggers the text generator to stop generating early if this sequence appears, in addition to default stop sequences. If you want multiple sequences, separate them with the following delimiter: ||$||</span></span></div> class="helptext">Triggers the text generator to stop generating early if this sequence appears, in addition to default stop sequences. If you want multiple sequences, separate them with the following delimiter: ||$||</span></span></div>
<div class="color_red hidden" id="noextrastopseq">Stop Sequences may be unavailable.</div> <div class="color_red hidden" id="noextrastopseq">Stop Sequences may be unavailable.</div>
<div style="display: flex; column-gap: 4px; margin-bottom: 4px;"> <div style="display: flex; column-gap: 4px; margin-bottom: 4px;">
<input class="form-control stopseqbox" type="text" placeholder="None" value="" id="extrastopseq"> <input class="form-control stopseqbox inlineinput" type="text" placeholder="None" value="" id="extrastopseq">
<button type="button" class="btn btn-primary" style="width:90px;padding:6px 6px;" onclick="add_stop_seq()">Add New</button> <button type="button" class="btn btn-primary" style="width:90px;padding:6px 6px;" onclick="add_stop_seq()">Add New</button>
</div> </div>
<div class="settinglabel">
<div class="justifyleft"><br>Logit Biases <span class="helpicon">?<span
class="helptext">Specify a dictionary of token IDs to modify the probability of occuring.</span></span></div>
</div>
<div><button type="button" class="btn btn-primary" style="width:134px;padding:6px 6px;" id="btnlogitbias" onclick="set_logit_bias()">Edit Logit Biases</button></div>
<div class="settinglabel">
<div class="justifyleft"><br>Custom Regex Replace <span class="helpicon">?<span
class="helptext">Allows transforming incoming text with regex patterns, modifying all matches. Replacements will be applied in sequence.</span></span></div>
</div>
<table id="regex_replace_table" class="settinglabel text-center" style="border-spacing: 3px 2px; border-collapse: separate;">
<div style="padding:3px;" class="justifyleft settinglabel">Logit Biases <span class="helpicon">?<span
class="helptext">Specify a dictionary of token IDs to modify the probability of occuring.</span></span>
<button type="button" class="btn btn-primary" style="font-size:12px;padding:2px 2px;" onclick="expand_tokens_section('expandlogitbias')">Expand Section</button>
</div>
<div id="expandlogitbias" class="hidden">
<div class="color_red hidden" id="nologitbias">Logit bias may be unavailable.</div>
<div style="color:#ffffff;">Enter OpenAI-formatted logit bias dictionary. Each key is the integer token IDs and their values are the biases (-100.0 to 100.0). Leave blank to disable.<br><a href='https://platform.openai.com/docs/api-reference/chat/create#chat-create-logit_bias' target='_blank' class='color_blueurl'>Input is a JSON object, reference here.</a><br></div>
<textarea class="form-control" style="line-height:1.1;margin-bottom: 4px;padding:3px" id="logitbiastxtarea" placeholder="" rows="5"></textarea>
<div style="display: flex; column-gap: 4px; margin-bottom: 4px;">
<input style="padding:2px" class="form-control stopseqbox inlineinput" inputmode="decimal" type="text" placeholder="Token ID" value="" id="newlogitbiasid">
<input style="padding:2px" class="form-control stopseqbox inlineinput" inputmode="decimal" type="text" placeholder="Bias Value" value="" id="newlogitbiasval">
<button type="button" class="btn btn-primary" style="width:90px;padding:6px 6px;" onclick="add_logit_bias()">Add New</button>
</div>
</div>
<div style="padding:3px;" class="justifyleft settinglabel">Token Bans <span class="helpicon">?<span
class="helptext">Outright removal for ANY tokens containing a specific substring from model vocab. If you want multiple sequences, separate them with the following delimiter: ||$||</span></span>
<button type="button" class="btn btn-primary" style="font-size:12px;padding:2px 2px;" onclick="expand_tokens_section('expandtokenbans')">Expand Section</button>
</div>
<div id="expandtokenbans" class="hidden">
<div class="color_red hidden" id="notokenbans">Token bans may be unavailable.</div>
<div style="color:#ffffff;">Outright removal for ANY tokens containing a specific substring from model vocab. If you want multiple sequences, separate them with the following delimiter: ||$||<br></div>
<div style="display: flex; column-gap: 4px; margin-bottom: 4px;">
<input class="form-control stopseqbox inlineinput" type="text" placeholder="None" value="" id="tokenbans">
<button type="button" class="btn btn-primary" style="width:90px;padding:6px 6px;" onclick="add_token_ban()">Add New</button>
</div>
</div>
<div style="padding:3px;" class="justifyleft settinglabel">Regex Replace <span class="helpicon">?<span
class="helptext">Allows transforming incoming text with regex patterns, modifying all matches. Replacements will be applied in sequence.</span></span>
<button type="button" class="btn btn-primary" style="font-size:12px;padding:2px 2px;" onclick="expand_tokens_section('expandregexreplace')">Expand Section</button>
</div>
<div id="expandregexreplace" class="hidden">
<table id="regex_replace_table" class="settinglabel text-center" style="border-spacing: 3px 2px; border-collapse: separate;">
</table> </table>
</div> </div>
<!-- <div style="padding:3px;" class="justifyleft settinglabel">Repetition Exclusions <span class="helpicon">?<span
class="helptext">Configure specific tokens that will be excluded from repetition and presence penalties.</span></span>
<button type="button" class="btn btn-primary" style="font-size:12px;padding:2px 2px;" onclick="expand_tokens_section('')">Expand Section</button>
</div>
<div style="padding:3px;" class="justifyleft settinglabel">Placeholder Tags <span class="helpicon">?<span
class="helptext">Configure automatic substitutions for placeholders in text.</span></span>
<button type="button" class="btn btn-primary" style="font-size:12px;padding:2px 2px;" onclick="expand_tokens_section('')">Expand Section</button>
</div> -->
</div>
<div class="popupfooter"> <div class="popupfooter">
<button type="button" class="btn btn-primary" onclick="confirm_memory();save_wi();render_gametext();hide_popups()">OK</button> <button type="button" class="btn btn-primary" onclick="confirm_memory();save_wi();render_gametext();hide_popups()">OK</button>
<button type="button" class="btn btn-primary" onclick="revert_wi();hide_popups()">Cancel</button> <button type="button" class="btn btn-primary" onclick="revert_wi();hide_popups()">Cancel</button>

View file

@ -56,7 +56,6 @@ class load_model_inputs(ctypes.Structure):
("gpulayers", ctypes.c_int), ("gpulayers", ctypes.c_int),
("rope_freq_scale", ctypes.c_float), ("rope_freq_scale", ctypes.c_float),
("rope_freq_base", ctypes.c_float), ("rope_freq_base", ctypes.c_float),
("banned_tokens", ctypes.c_char_p * ban_token_max),
("tensor_split", ctypes.c_float * tensor_split_max)] ("tensor_split", ctypes.c_float * tensor_split_max)]
class generation_inputs(ctypes.Structure): class generation_inputs(ctypes.Structure):
@ -91,7 +90,8 @@ class generation_inputs(ctypes.Structure):
("dynatemp_range", ctypes.c_float), ("dynatemp_range", ctypes.c_float),
("dynatemp_exponent", ctypes.c_float), ("dynatemp_exponent", ctypes.c_float),
("smoothing_factor", ctypes.c_float), ("smoothing_factor", ctypes.c_float),
("logit_biases", logit_bias * logit_bias_max)] ("logit_biases", logit_bias * logit_bias_max),
("banned_tokens", ctypes.c_char_p * ban_token_max)]
class generation_outputs(ctypes.Structure): class generation_outputs(ctypes.Structure):
_fields_ = [("status", ctypes.c_int), _fields_ = [("status", ctypes.c_int),
@ -391,16 +391,10 @@ def load_model(model_filename):
inputs.executable_path = (getdirpath()+"/").encode("UTF-8") inputs.executable_path = (getdirpath()+"/").encode("UTF-8")
inputs.debugmode = args.debugmode inputs.debugmode = args.debugmode
banned_tokens = args.bantokens
for n in range(ban_token_max):
if not banned_tokens or n >= len(banned_tokens):
inputs.banned_tokens[n] = "".encode("UTF-8")
else:
inputs.banned_tokens[n] = banned_tokens[n].encode("UTF-8")
ret = handle.load_model(inputs) ret = handle.load_model(inputs)
return ret return ret
def generate(prompt, memory="", images=[], max_length=32, max_context_length=512, temperature=0.7, top_k=100, top_a=0.0, top_p=0.92, min_p=0.0, typical_p=1.0, tfs=1.0, rep_pen=1.0, rep_pen_range=128, presence_penalty=0.0, mirostat=0, mirostat_tau=5.0, mirostat_eta=0.1, sampler_order=[6,0,1,3,4,2,5], seed=-1, stop_sequence=[], use_default_badwordsids=False, stream_sse=False, grammar='', grammar_retain_state=False, genkey='', trimstop=False, quiet=False, dynatemp_range=0.0, dynatemp_exponent=1.0, smoothing_factor=0.0, logit_biases={}, render_special=False): def generate(prompt, memory="", images=[], max_length=32, max_context_length=512, temperature=0.7, top_k=100, top_a=0.0, top_p=0.92, min_p=0.0, typical_p=1.0, tfs=1.0, rep_pen=1.0, rep_pen_range=128, presence_penalty=0.0, mirostat=0, mirostat_tau=5.0, mirostat_eta=0.1, sampler_order=[6,0,1,3,4,2,5], seed=-1, stop_sequence=[], use_default_badwordsids=False, stream_sse=False, grammar='', grammar_retain_state=False, genkey='', trimstop=False, quiet=False, dynatemp_range=0.0, dynatemp_exponent=1.0, smoothing_factor=0.0, logit_biases={}, render_special=False, banned_tokens=[]):
global maxctx, args, currentusergenkey, totalgens, pendingabortkey global maxctx, args, currentusergenkey, totalgens, pendingabortkey
inputs = generation_inputs() inputs = generation_inputs()
inputs.prompt = prompt.encode("UTF-8") inputs.prompt = prompt.encode("UTF-8")
@ -487,6 +481,12 @@ def generate(prompt, memory="", images=[], max_length=32, max_context_length=512
inputs.logit_biases[n] = logit_bias(-1, 0.0) inputs.logit_biases[n] = logit_bias(-1, 0.0)
print(f"Skipped unparsable logit bias:{ex}") print(f"Skipped unparsable logit bias:{ex}")
for n in range(ban_token_max):
if not banned_tokens or n >= len(banned_tokens):
inputs.banned_tokens[n] = "".encode("UTF-8")
else:
inputs.banned_tokens[n] = banned_tokens[n].encode("UTF-8")
currentusergenkey = genkey currentusergenkey = genkey
totalgens += 1 totalgens += 1
#early exit if aborted #early exit if aborted
@ -672,6 +672,10 @@ def transform_genparams(genparams, api_format):
genparams["top_k"] = int(genparams.get('top_k', 120)) genparams["top_k"] = int(genparams.get('top_k', 120))
genparams["max_length"] = genparams.get('max', 100) genparams["max_length"] = genparams.get('max', 100)
elif api_format==2:
if "ignore_eos" in genparams and not ("use_default_badwordsids" in genparams):
genparams["use_default_badwordsids"] = genparams.get('ignore_eos', False)
elif api_format==3 or api_format==4: elif api_format==3 or api_format==4:
genparams["max_length"] = genparams.get('max_tokens', 100) genparams["max_length"] = genparams.get('max_tokens', 100)
presence_penalty = genparams.get('presence_penalty', genparams.get('frequency_penalty', 0.0)) presence_penalty = genparams.get('presence_penalty', genparams.get('frequency_penalty', 0.0))
@ -813,6 +817,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
smoothing_factor=genparams.get('smoothing_factor', 0.0), smoothing_factor=genparams.get('smoothing_factor', 0.0),
logit_biases=genparams.get('logit_bias', {}), logit_biases=genparams.get('logit_bias', {}),
render_special=genparams.get('render_special', False), render_special=genparams.get('render_special', False),
banned_tokens=genparams.get('banned_tokens', []),
) )
genout = {"text":"","status":-1,"stopreason":-1} genout = {"text":"","status":-1,"stopreason":-1}
@ -3281,7 +3286,6 @@ if __name__ == '__main__':
parser.add_argument("--lora", help="LLAMA models only, applies a lora file on top of model. Experimental.", metavar=('[lora_filename]', '[lora_base]'), nargs='+') parser.add_argument("--lora", help="LLAMA models only, applies a lora file on top of model. Experimental.", metavar=('[lora_filename]', '[lora_base]'), nargs='+')
parser.add_argument("--smartcontext", help="Reserving a portion of context to try processing less frequently.", action='store_true') parser.add_argument("--smartcontext", help="Reserving a portion of context to try processing less frequently.", action='store_true')
parser.add_argument("--noshift", help="If set, do not attempt to Trim and Shift the GGUF context.", action='store_true') parser.add_argument("--noshift", help="If set, do not attempt to Trim and Shift the GGUF context.", action='store_true')
parser.add_argument("--bantokens", help="You can manually specify a list of token SUBSTRINGS that the AI cannot use. This bans ALL instances of that substring.", metavar=('[token_substrings]'), nargs='+')
parser.add_argument("--forceversion", help="If the model file format detection fails (e.g. rogue modified model) you can set this to override the detected format (enter desired version, e.g. 401 for GPTNeoX-Type2).",metavar=('[version]'), type=int, default=0) parser.add_argument("--forceversion", help="If the model file format detection fails (e.g. rogue modified model) you can set this to override the detected format (enter desired version, e.g. 401 for GPTNeoX-Type2).",metavar=('[version]'), type=int, default=0)
parser.add_argument("--nommap", help="If set, do not use mmap to load newer models", action='store_true') parser.add_argument("--nommap", help="If set, do not use mmap to load newer models", action='store_true')
parser.add_argument("--usemlock", help="For Apple Systems. Force system to keep model in RAM rather than swapping or compressing", action='store_true') parser.add_argument("--usemlock", help="For Apple Systems. Force system to keep model in RAM rather than swapping or compressing", action='store_true')