mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
refactored a lot of code, remove bantokens, move it to api
This commit is contained in:
parent
4ec8a9c57b
commit
c230b78906
6 changed files with 214 additions and 76 deletions
2
class.py
2
class.py
|
@ -270,7 +270,7 @@ class model_backend(InferenceModel):
|
|||
port=5001, port_param=5001, host='', launch=False, lora=None, threads=self.kcpp_threads, blasthreads=self.kcpp_threads,
|
||||
psutil_set_threads=False, highpriority=False, contextsize=self.kcpp_ctxsize,
|
||||
blasbatchsize=self.kcpp_blasbatchsize, ropeconfig=[self.kcpp_ropescale, self.kcpp_ropebase], stream=False, smartcontext=self.kcpp_smartcontext,
|
||||
unbantokens=False, bantokens=None, usemirostat=None, forceversion=0, nommap=self.kcpp_nommap,
|
||||
usemirostat=None, forceversion=0, nommap=self.kcpp_nommap,
|
||||
usemlock=False, noavx2=self.kcpp_noavx2, debugmode=self.kcpp_debugmode, skiplauncher=True, hordeconfig=None, noblas=self.kcpp_noblas,
|
||||
useclblast=self.kcpp_useclblast, usecublas=self.kcpp_usecublas, usevulkan=self.kcpp_usevulkan, gpulayers=self.kcpp_gpulayers, tensor_split=self.kcpp_tensor_split, config=None,
|
||||
onready='', multiuser=False, foreground=False, preloadstory=None, noshift=False, remotetunnel=False, ssl=False, benchmark=None, nocertify=False, sdconfig=None, mmproj=None,
|
||||
|
|
3
expose.h
3
expose.h
|
@ -55,7 +55,6 @@ struct load_model_inputs
|
|||
const int gpulayers = 0;
|
||||
const float rope_freq_scale = 1.0f;
|
||||
const float rope_freq_base = 10000.0f;
|
||||
const char * banned_tokens[ban_token_max];
|
||||
const float tensor_split[tensor_split_max];
|
||||
};
|
||||
struct generation_inputs
|
||||
|
@ -92,7 +91,7 @@ struct generation_inputs
|
|||
const float dynatemp_exponent = 1.0f;
|
||||
const float smoothing_factor = 0.0f;
|
||||
const logit_bias logit_biases[logit_bias_max];
|
||||
|
||||
const char * banned_tokens[ban_token_max];
|
||||
};
|
||||
struct generation_outputs
|
||||
{
|
||||
|
|
|
@ -837,17 +837,6 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
|
|||
gptj_ctx_v3.hparams.rope_freq_scale = neox_ctx_v3.hparams.rope_freq_scale = rope_freq_scale;
|
||||
gptj_ctx_v3.hparams.rope_freq_base = neox_ctx_v3.hparams.rope_freq_base = rope_freq_base;
|
||||
|
||||
//handle custom token bans
|
||||
banned_tokens.clear();
|
||||
for(int x=0;x<ban_token_max;++x)
|
||||
{
|
||||
std::string word = inputs.banned_tokens[x];
|
||||
if(word!="")
|
||||
{
|
||||
banned_tokens.push_back(word);
|
||||
}
|
||||
}
|
||||
|
||||
//this is used for the mem_per_token eval, openblas needs more RAM
|
||||
bool v3_use_scratch = ggml_v3_cpu_has_gpublas();
|
||||
|
||||
|
@ -1624,6 +1613,41 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
|||
}
|
||||
}
|
||||
|
||||
//handle custom token bans
|
||||
banned_tokens.clear();
|
||||
for(int x=0;x<ban_token_max;++x)
|
||||
{
|
||||
std::string word = inputs.banned_tokens[x];
|
||||
if(word!="")
|
||||
{
|
||||
banned_tokens.push_back(word);
|
||||
}
|
||||
}
|
||||
banned_token_ids.clear();
|
||||
if(banned_tokens.size()>0)
|
||||
{
|
||||
if(debugmode==1)
|
||||
{
|
||||
printf("\nBanning %zu token sequences...",banned_tokens.size());
|
||||
}
|
||||
for(int v=0;v<n_vocab;++v)
|
||||
{
|
||||
std::string word = FileFormatTokenizeID(v,file_format, true);
|
||||
for(int i=0;i<banned_tokens.size();++i)
|
||||
{
|
||||
if (word.find(banned_tokens[i]) != std::string::npos)
|
||||
{
|
||||
banned_token_ids.push_back(v);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if(debugmode==1)
|
||||
{
|
||||
printf("\nBanned a total of %zu tokens.\n",banned_token_ids.size());
|
||||
}
|
||||
}
|
||||
|
||||
logit_biases.clear();
|
||||
for(int x=0;x<logit_bias_max;++x)
|
||||
{
|
||||
|
@ -1993,25 +2017,6 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
|||
printf("\nWarning! n_vocab is invalid, maybe bad format!");
|
||||
}
|
||||
|
||||
//prepare banned tokens
|
||||
if(banned_token_ids.size()==0 && banned_tokens.size()>0)
|
||||
{
|
||||
printf("\n[First Run] Banning %zu token sequences...",banned_tokens.size());
|
||||
for(int v=0;v<n_vocab;++v)
|
||||
{
|
||||
std::string word = FileFormatTokenizeID(v,file_format, true);
|
||||
for(int i=0;i<banned_tokens.size();++i)
|
||||
{
|
||||
if (word.find(banned_tokens[i]) != std::string::npos)
|
||||
{
|
||||
banned_token_ids.push_back(v);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
printf("\nBanned a total of %zu tokens.\n",banned_token_ids.size());
|
||||
}
|
||||
|
||||
if(allow_regular_prints)
|
||||
{
|
||||
printf("\n");
|
||||
|
|
|
@ -136,7 +136,7 @@
|
|||
},
|
||||
"use_default_badwordsids": {
|
||||
"default": false,
|
||||
"description": "If true, prevents the EOS token from being generated (Ban EOS). For unbantokens, set this to false.",
|
||||
"description": "If true, prevents the EOS token from being generated (Ban EOS).",
|
||||
"type": "boolean"
|
||||
},
|
||||
"dynatemp_range": {
|
||||
|
|
194
klite.embd
194
klite.embd
|
@ -7,7 +7,7 @@ Just copy this single static HTML file anywhere and open it in a browser, or fro
|
|||
Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite.
|
||||
If you are submitting a pull request for Lite, PLEASE use the above repo, not the KoboldCpp one.
|
||||
Kobold Lite is under the AGPL v3.0 License unless otherwise exempted. Please do not remove this line.
|
||||
Current version: 135
|
||||
Current version: 136
|
||||
-Concedo
|
||||
-->
|
||||
|
||||
|
@ -299,7 +299,7 @@ Current version: 135
|
|||
padding-right: 10px;
|
||||
}
|
||||
|
||||
#extrastopseq, #anotetemplate {
|
||||
.inlineinput {
|
||||
background-color: #404040;
|
||||
color: #ffffff;
|
||||
resize: none;
|
||||
|
@ -3603,6 +3603,7 @@ Current version: 135
|
|||
var current_anote = ""; //stored author note
|
||||
var current_anotetemplate = "[Author\'s note: <|>]";
|
||||
var extrastopseq = "";
|
||||
var tokenbans = "";
|
||||
var anote_strength = 320; //distance from end
|
||||
var newlineaftermemory = true;
|
||||
var current_wi = []; //each item stores a wi object.
|
||||
|
@ -3738,8 +3739,8 @@ Current version: 135
|
|||
passed_ai_warning: false, //used to store AI safety panel acknowledgement state
|
||||
entersubmit: true, //enter sends the prompt
|
||||
|
||||
max_context_length: 1600,
|
||||
max_length: 120,
|
||||
max_context_length: 1800,
|
||||
max_length: 140,
|
||||
auto_ctxlen: true,
|
||||
auto_genamt: true,
|
||||
rep_pen: 1.1,
|
||||
|
@ -5095,6 +5096,7 @@ Current version: 135
|
|||
|
||||
//extra unofficial fields for the story
|
||||
new_save_storyobj.extrastopseq = extrastopseq;
|
||||
new_save_storyobj.tokenbans = tokenbans;
|
||||
new_save_storyobj.anotestr = anote_strength;
|
||||
new_save_storyobj.wisearchdepth = wi_searchdepth;
|
||||
new_save_storyobj.wiinsertlocation = wi_insertlocation;
|
||||
|
@ -5271,6 +5273,7 @@ Current version: 135
|
|||
let old_current_memory = current_memory;
|
||||
let old_current_wi = current_wi;
|
||||
let old_extrastopseq = extrastopseq;
|
||||
let old_tokenbans = tokenbans;
|
||||
let old_notes = personal_notes;
|
||||
let old_regexreplace_data = regexreplace_data;
|
||||
|
||||
|
@ -5325,6 +5328,10 @@ Current version: 135
|
|||
if (storyobj.extrastopseq) {
|
||||
extrastopseq = storyobj.extrastopseq;
|
||||
}
|
||||
if(storyobj.tokenbans)
|
||||
{
|
||||
tokenbans = storyobj.tokenbans;
|
||||
}
|
||||
if (storyobj.anotestr) {
|
||||
anote_strength = storyobj.anotestr;
|
||||
}
|
||||
|
@ -5416,6 +5423,7 @@ Current version: 135
|
|||
{
|
||||
extrastopseq = old_extrastopseq;
|
||||
regexreplace_data = old_regexreplace_data;
|
||||
tokenbans = old_tokenbans;
|
||||
}
|
||||
|
||||
if (storyobj.savedsettings && storyobj.savedsettings != "")
|
||||
|
@ -6746,25 +6754,53 @@ Current version: 135
|
|||
},false,true);
|
||||
}
|
||||
|
||||
var pendinglogitbias = {};
|
||||
function set_logit_bias()
|
||||
function expand_tokens_section(targetid)
|
||||
{
|
||||
inputBox("Enter OpenAI-formatted logit bias dictionary. Each key is the integer token IDs and their values are the biases (-100.0 to 100.0)<br><a href='https://platform.openai.com/docs/api-reference/chat/create#chat-create-logit_bias' class='color_blueurl'>Input is a JSON object, reference here.</a><br>Leave blank to disable.<br>","Set Logit Biases",JSON.stringify(pendinglogitbias),"Enter JSON Object",()=>{
|
||||
let userinput = getInputBoxValue().trim();
|
||||
if(userinput=="")
|
||||
let tablist = ["expandregexreplace","expandtokenbans","expandlogitbias"];
|
||||
|
||||
for(let i=0;i<tablist.length;++i)
|
||||
{
|
||||
if(tablist[i]!=targetid)
|
||||
{
|
||||
pendinglogitbias = {};
|
||||
document.getElementById(tablist[i]).classList.add("hidden");
|
||||
}
|
||||
}
|
||||
|
||||
if(targetid!="")
|
||||
{
|
||||
if(document.getElementById(targetid).classList.contains("hidden"))
|
||||
{
|
||||
document.getElementById(targetid).classList.remove("hidden");
|
||||
}
|
||||
else
|
||||
{
|
||||
try {
|
||||
pendinglogitbias = JSON.parse(userinput);
|
||||
} catch (e) {
|
||||
msgbox("Your logit bias JSON dictionary was not correctly formatted!");
|
||||
}
|
||||
document.getElementById(targetid).classList.add("hidden");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
},true,true);
|
||||
function add_logit_bias()
|
||||
{
|
||||
let key = document.getElementById("newlogitbiasid").value;
|
||||
let val = document.getElementById("newlogitbiasval").value;
|
||||
if(key && val && key.trim()!="" && val.trim()!="")
|
||||
{
|
||||
let old = document.getElementById("logitbiastxtarea").value;
|
||||
try {
|
||||
let dict = JSON.parse(old);
|
||||
key = parseInt(key);
|
||||
val = parseInt(val);
|
||||
if(!isNaN(key) && !isNaN(val))
|
||||
{
|
||||
dict[key] = parseInt(val);
|
||||
document.getElementById("logitbiastxtarea").value = JSON.stringify(dict,null,2);
|
||||
}
|
||||
} catch (e) {
|
||||
msgbox("Your inputs or logit bias JSON dictionary was not correctly formatted!");
|
||||
}
|
||||
document.getElementById("newlogitbiasid").value = "";
|
||||
document.getElementById("newlogitbiasval").value = "";
|
||||
}
|
||||
}
|
||||
|
||||
function add_stop_seq()
|
||||
|
@ -6784,6 +6820,23 @@ Current version: 135
|
|||
},false);
|
||||
}
|
||||
|
||||
function add_token_ban()
|
||||
{
|
||||
inputBox("Enter a token substring to be banned. ALL matching tokens will be removed.\nFor example adding 'ice' will also ban 'nice' and 'rice', assuming they are individual tokens.","Add Banned Token Substring","","Enter a Token Substring",()=>{
|
||||
let userinput = getInputBoxValue();
|
||||
if(userinput.trim()!="")
|
||||
{
|
||||
let ov = document.getElementById("tokenbans").value;
|
||||
if(ov!="")
|
||||
{
|
||||
ov += "||$||";
|
||||
}
|
||||
ov += userinput.trim();
|
||||
document.getElementById("tokenbans").value = ov;
|
||||
}
|
||||
},false);
|
||||
}
|
||||
|
||||
var msgboxOnDone = hide_msgbox;
|
||||
function hide_msgbox() {
|
||||
//hide msgbox ONLY
|
||||
|
@ -8919,8 +8972,20 @@ Current version: 135
|
|||
current_anotetemplate = document.getElementById("anotetemplate").value;
|
||||
anote_strength = document.getElementById("anote_strength").value;
|
||||
extrastopseq = document.getElementById("extrastopseq").value;
|
||||
tokenbans = document.getElementById("tokenbans").value;
|
||||
newlineaftermemory = (document.getElementById("newlineaftermemory").checked?true:false);
|
||||
logitbiasdict = pendinglogitbias;
|
||||
try
|
||||
{
|
||||
let lb = document.getElementById("logitbiastxtarea").value;
|
||||
let dict = {};
|
||||
if(lb!="")
|
||||
{
|
||||
dict = JSON.parse(lb);
|
||||
}
|
||||
logitbiasdict = dict;
|
||||
} catch (e) {
|
||||
console.log("Your logit bias JSON dictionary was not correctly formatted!");
|
||||
}
|
||||
regexreplace_data = [];
|
||||
for(let i=0;i<num_regex_rows;++i)
|
||||
{
|
||||
|
@ -9144,6 +9209,7 @@ Current version: 135
|
|||
current_anote = "";
|
||||
current_wi = [];
|
||||
extrastopseq = "";
|
||||
tokenbans = "";
|
||||
anote_strength = 320;
|
||||
logitbiasdict = {};
|
||||
wi_searchdepth = 0;
|
||||
|
@ -10348,6 +10414,24 @@ Current version: 135
|
|||
return seqs;
|
||||
}
|
||||
|
||||
function get_token_bans()
|
||||
{
|
||||
let seqs = [];
|
||||
if (tokenbans != "") {
|
||||
let rep = replaceAll(tokenbans, "\\n", "\n");
|
||||
let srep = rep.split("||$||");
|
||||
if (srep.length > 0 && !seqs) {
|
||||
seqs = [];
|
||||
}
|
||||
for (let i = 0; i < srep.length; ++i) {
|
||||
if (srep[i] && srep[i] != "") {
|
||||
seqs.push(srep[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
return seqs;
|
||||
}
|
||||
|
||||
function dispatch_submit_generation(submit_payload, input_was_empty) //if input is not empty, always unban eos
|
||||
{
|
||||
console.log(submit_payload);
|
||||
|
@ -10367,6 +10451,7 @@ Current version: 135
|
|||
submit_payload.params.dynatemp_range = localsettings.dynatemp_range;
|
||||
submit_payload.params.dynatemp_exponent = localsettings.dynatemp_exponent;
|
||||
submit_payload.params.smoothing_factor = localsettings.smoothing_factor;
|
||||
submit_payload.params.banned_tokens = get_token_bans();
|
||||
}
|
||||
//presence pen and logit bias for OAI and newer kcpp
|
||||
if((custom_kobold_endpoint != "" && is_using_kcpp_with_mirostat()) || custom_oai_endpoint!="")
|
||||
|
@ -13165,8 +13250,10 @@ Current version: 135
|
|||
document.getElementById("anotetemplate").value = current_anotetemplate;
|
||||
document.getElementById("anote_strength").value = anote_strength;
|
||||
document.getElementById("extrastopseq").value = extrastopseq;
|
||||
document.getElementById("tokenbans").value = tokenbans;
|
||||
document.getElementById("newlineaftermemory").checked = (newlineaftermemory?true:false);
|
||||
pendinglogitbias = logitbiasdict;
|
||||
document.getElementById("logitbiastxtarea").value = JSON.stringify(logitbiasdict,null,2);
|
||||
|
||||
if(custom_kobold_endpoint!="" || !is_using_custom_ep() )
|
||||
{
|
||||
document.getElementById("noextrastopseq").classList.add("hidden");
|
||||
|
@ -13183,7 +13270,16 @@ Current version: 135
|
|||
//setup regex replacers
|
||||
populate_regex_replacers();
|
||||
|
||||
document.getElementById("btnlogitbias").disabled = !is_using_custom_ep();
|
||||
if(is_using_custom_ep())
|
||||
{
|
||||
document.getElementById("nologitbias").classList.add("hidden");
|
||||
document.getElementById("notokenbans").classList.add("hidden");
|
||||
}
|
||||
else
|
||||
{
|
||||
document.getElementById("nologitbias").classList.remove("hidden");
|
||||
document.getElementById("notokenbans").classList.remove("hidden");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
@ -15291,7 +15387,7 @@ Current version: 135
|
|||
</span>
|
||||
</div>
|
||||
<div style="display: flex; column-gap: 4px;">
|
||||
<input class="form-control anotetempbox" type="text"
|
||||
<input class="form-control anotetempbox inlineinput" type="text"
|
||||
placeholder="(the <|> will be replaced with the Author's Note text)" value="" id="anotetemplate">
|
||||
<select style="padding:4px;" class="anotetempscale form-control" id="anote_strength">
|
||||
<option value="480">Weak</option>
|
||||
|
@ -15341,21 +15437,55 @@ Current version: 135
|
|||
class="helptext">Triggers the text generator to stop generating early if this sequence appears, in addition to default stop sequences. If you want multiple sequences, separate them with the following delimiter: ||$||</span></span></div>
|
||||
<div class="color_red hidden" id="noextrastopseq">Stop Sequences may be unavailable.</div>
|
||||
<div style="display: flex; column-gap: 4px; margin-bottom: 4px;">
|
||||
<input class="form-control stopseqbox" type="text" placeholder="None" value="" id="extrastopseq">
|
||||
<input class="form-control stopseqbox inlineinput" type="text" placeholder="None" value="" id="extrastopseq">
|
||||
<button type="button" class="btn btn-primary" style="width:90px;padding:6px 6px;" onclick="add_stop_seq()">Add New</button>
|
||||
</div>
|
||||
<div class="settinglabel">
|
||||
<div class="justifyleft"><br>Logit Biases <span class="helpicon">?<span
|
||||
class="helptext">Specify a dictionary of token IDs to modify the probability of occuring.</span></span></div>
|
||||
</div>
|
||||
<div><button type="button" class="btn btn-primary" style="width:134px;padding:6px 6px;" id="btnlogitbias" onclick="set_logit_bias()">Edit Logit Biases</button></div>
|
||||
<div class="settinglabel">
|
||||
<div class="justifyleft"><br>Custom Regex Replace <span class="helpicon">?<span
|
||||
class="helptext">Allows transforming incoming text with regex patterns, modifying all matches. Replacements will be applied in sequence.</span></span></div>
|
||||
</div>
|
||||
<table id="regex_replace_table" class="settinglabel text-center" style="border-spacing: 3px 2px; border-collapse: separate;">
|
||||
|
||||
</table>
|
||||
<div style="padding:3px;" class="justifyleft settinglabel">Logit Biases <span class="helpicon">?<span
|
||||
class="helptext">Specify a dictionary of token IDs to modify the probability of occuring.</span></span>
|
||||
<button type="button" class="btn btn-primary" style="font-size:12px;padding:2px 2px;" onclick="expand_tokens_section('expandlogitbias')">Expand Section</button>
|
||||
</div>
|
||||
<div id="expandlogitbias" class="hidden">
|
||||
<div class="color_red hidden" id="nologitbias">Logit bias may be unavailable.</div>
|
||||
<div style="color:#ffffff;">Enter OpenAI-formatted logit bias dictionary. Each key is the integer token IDs and their values are the biases (-100.0 to 100.0). Leave blank to disable.<br><a href='https://platform.openai.com/docs/api-reference/chat/create#chat-create-logit_bias' target='_blank' class='color_blueurl'>Input is a JSON object, reference here.</a><br></div>
|
||||
<textarea class="form-control" style="line-height:1.1;margin-bottom: 4px;padding:3px" id="logitbiastxtarea" placeholder="" rows="5"></textarea>
|
||||
<div style="display: flex; column-gap: 4px; margin-bottom: 4px;">
|
||||
<input style="padding:2px" class="form-control stopseqbox inlineinput" inputmode="decimal" type="text" placeholder="Token ID" value="" id="newlogitbiasid">
|
||||
<input style="padding:2px" class="form-control stopseqbox inlineinput" inputmode="decimal" type="text" placeholder="Bias Value" value="" id="newlogitbiasval">
|
||||
<button type="button" class="btn btn-primary" style="width:90px;padding:6px 6px;" onclick="add_logit_bias()">Add New</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div style="padding:3px;" class="justifyleft settinglabel">Token Bans <span class="helpicon">?<span
|
||||
class="helptext">Outright removal for ANY tokens containing a specific substring from model vocab. If you want multiple sequences, separate them with the following delimiter: ||$||</span></span>
|
||||
<button type="button" class="btn btn-primary" style="font-size:12px;padding:2px 2px;" onclick="expand_tokens_section('expandtokenbans')">Expand Section</button>
|
||||
</div>
|
||||
<div id="expandtokenbans" class="hidden">
|
||||
<div class="color_red hidden" id="notokenbans">Token bans may be unavailable.</div>
|
||||
<div style="color:#ffffff;">Outright removal for ANY tokens containing a specific substring from model vocab. If you want multiple sequences, separate them with the following delimiter: ||$||<br></div>
|
||||
<div style="display: flex; column-gap: 4px; margin-bottom: 4px;">
|
||||
<input class="form-control stopseqbox inlineinput" type="text" placeholder="None" value="" id="tokenbans">
|
||||
<button type="button" class="btn btn-primary" style="width:90px;padding:6px 6px;" onclick="add_token_ban()">Add New</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div style="padding:3px;" class="justifyleft settinglabel">Regex Replace <span class="helpicon">?<span
|
||||
class="helptext">Allows transforming incoming text with regex patterns, modifying all matches. Replacements will be applied in sequence.</span></span>
|
||||
<button type="button" class="btn btn-primary" style="font-size:12px;padding:2px 2px;" onclick="expand_tokens_section('expandregexreplace')">Expand Section</button>
|
||||
</div>
|
||||
<div id="expandregexreplace" class="hidden">
|
||||
<table id="regex_replace_table" class="settinglabel text-center" style="border-spacing: 3px 2px; border-collapse: separate;">
|
||||
</table>
|
||||
</div>
|
||||
|
||||
<!-- <div style="padding:3px;" class="justifyleft settinglabel">Repetition Exclusions <span class="helpicon">?<span
|
||||
class="helptext">Configure specific tokens that will be excluded from repetition and presence penalties.</span></span>
|
||||
<button type="button" class="btn btn-primary" style="font-size:12px;padding:2px 2px;" onclick="expand_tokens_section('')">Expand Section</button>
|
||||
</div>
|
||||
<div style="padding:3px;" class="justifyleft settinglabel">Placeholder Tags <span class="helpicon">?<span
|
||||
class="helptext">Configure automatic substitutions for placeholders in text.</span></span>
|
||||
<button type="button" class="btn btn-primary" style="font-size:12px;padding:2px 2px;" onclick="expand_tokens_section('')">Expand Section</button>
|
||||
</div> -->
|
||||
</div>
|
||||
|
||||
<div class="popupfooter">
|
||||
|
|
24
koboldcpp.py
24
koboldcpp.py
|
@ -56,7 +56,6 @@ class load_model_inputs(ctypes.Structure):
|
|||
("gpulayers", ctypes.c_int),
|
||||
("rope_freq_scale", ctypes.c_float),
|
||||
("rope_freq_base", ctypes.c_float),
|
||||
("banned_tokens", ctypes.c_char_p * ban_token_max),
|
||||
("tensor_split", ctypes.c_float * tensor_split_max)]
|
||||
|
||||
class generation_inputs(ctypes.Structure):
|
||||
|
@ -91,7 +90,8 @@ class generation_inputs(ctypes.Structure):
|
|||
("dynatemp_range", ctypes.c_float),
|
||||
("dynatemp_exponent", ctypes.c_float),
|
||||
("smoothing_factor", ctypes.c_float),
|
||||
("logit_biases", logit_bias * logit_bias_max)]
|
||||
("logit_biases", logit_bias * logit_bias_max),
|
||||
("banned_tokens", ctypes.c_char_p * ban_token_max)]
|
||||
|
||||
class generation_outputs(ctypes.Structure):
|
||||
_fields_ = [("status", ctypes.c_int),
|
||||
|
@ -391,16 +391,10 @@ def load_model(model_filename):
|
|||
|
||||
inputs.executable_path = (getdirpath()+"/").encode("UTF-8")
|
||||
inputs.debugmode = args.debugmode
|
||||
banned_tokens = args.bantokens
|
||||
for n in range(ban_token_max):
|
||||
if not banned_tokens or n >= len(banned_tokens):
|
||||
inputs.banned_tokens[n] = "".encode("UTF-8")
|
||||
else:
|
||||
inputs.banned_tokens[n] = banned_tokens[n].encode("UTF-8")
|
||||
ret = handle.load_model(inputs)
|
||||
return ret
|
||||
|
||||
def generate(prompt, memory="", images=[], max_length=32, max_context_length=512, temperature=0.7, top_k=100, top_a=0.0, top_p=0.92, min_p=0.0, typical_p=1.0, tfs=1.0, rep_pen=1.0, rep_pen_range=128, presence_penalty=0.0, mirostat=0, mirostat_tau=5.0, mirostat_eta=0.1, sampler_order=[6,0,1,3,4,2,5], seed=-1, stop_sequence=[], use_default_badwordsids=False, stream_sse=False, grammar='', grammar_retain_state=False, genkey='', trimstop=False, quiet=False, dynatemp_range=0.0, dynatemp_exponent=1.0, smoothing_factor=0.0, logit_biases={}, render_special=False):
|
||||
def generate(prompt, memory="", images=[], max_length=32, max_context_length=512, temperature=0.7, top_k=100, top_a=0.0, top_p=0.92, min_p=0.0, typical_p=1.0, tfs=1.0, rep_pen=1.0, rep_pen_range=128, presence_penalty=0.0, mirostat=0, mirostat_tau=5.0, mirostat_eta=0.1, sampler_order=[6,0,1,3,4,2,5], seed=-1, stop_sequence=[], use_default_badwordsids=False, stream_sse=False, grammar='', grammar_retain_state=False, genkey='', trimstop=False, quiet=False, dynatemp_range=0.0, dynatemp_exponent=1.0, smoothing_factor=0.0, logit_biases={}, render_special=False, banned_tokens=[]):
|
||||
global maxctx, args, currentusergenkey, totalgens, pendingabortkey
|
||||
inputs = generation_inputs()
|
||||
inputs.prompt = prompt.encode("UTF-8")
|
||||
|
@ -487,6 +481,12 @@ def generate(prompt, memory="", images=[], max_length=32, max_context_length=512
|
|||
inputs.logit_biases[n] = logit_bias(-1, 0.0)
|
||||
print(f"Skipped unparsable logit bias:{ex}")
|
||||
|
||||
for n in range(ban_token_max):
|
||||
if not banned_tokens or n >= len(banned_tokens):
|
||||
inputs.banned_tokens[n] = "".encode("UTF-8")
|
||||
else:
|
||||
inputs.banned_tokens[n] = banned_tokens[n].encode("UTF-8")
|
||||
|
||||
currentusergenkey = genkey
|
||||
totalgens += 1
|
||||
#early exit if aborted
|
||||
|
@ -672,6 +672,10 @@ def transform_genparams(genparams, api_format):
|
|||
genparams["top_k"] = int(genparams.get('top_k', 120))
|
||||
genparams["max_length"] = genparams.get('max', 100)
|
||||
|
||||
elif api_format==2:
|
||||
if "ignore_eos" in genparams and not ("use_default_badwordsids" in genparams):
|
||||
genparams["use_default_badwordsids"] = genparams.get('ignore_eos', False)
|
||||
|
||||
elif api_format==3 or api_format==4:
|
||||
genparams["max_length"] = genparams.get('max_tokens', 100)
|
||||
presence_penalty = genparams.get('presence_penalty', genparams.get('frequency_penalty', 0.0))
|
||||
|
@ -813,6 +817,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
|||
smoothing_factor=genparams.get('smoothing_factor', 0.0),
|
||||
logit_biases=genparams.get('logit_bias', {}),
|
||||
render_special=genparams.get('render_special', False),
|
||||
banned_tokens=genparams.get('banned_tokens', []),
|
||||
)
|
||||
|
||||
genout = {"text":"","status":-1,"stopreason":-1}
|
||||
|
@ -3281,7 +3286,6 @@ if __name__ == '__main__':
|
|||
parser.add_argument("--lora", help="LLAMA models only, applies a lora file on top of model. Experimental.", metavar=('[lora_filename]', '[lora_base]'), nargs='+')
|
||||
parser.add_argument("--smartcontext", help="Reserving a portion of context to try processing less frequently.", action='store_true')
|
||||
parser.add_argument("--noshift", help="If set, do not attempt to Trim and Shift the GGUF context.", action='store_true')
|
||||
parser.add_argument("--bantokens", help="You can manually specify a list of token SUBSTRINGS that the AI cannot use. This bans ALL instances of that substring.", metavar=('[token_substrings]'), nargs='+')
|
||||
parser.add_argument("--forceversion", help="If the model file format detection fails (e.g. rogue modified model) you can set this to override the detected format (enter desired version, e.g. 401 for GPTNeoX-Type2).",metavar=('[version]'), type=int, default=0)
|
||||
parser.add_argument("--nommap", help="If set, do not use mmap to load newer models", action='store_true')
|
||||
parser.add_argument("--usemlock", help="For Apple Systems. Force system to keep model in RAM rather than swapping or compressing", action='store_true')
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue