mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
EOS bypass mode added
This commit is contained in:
parent
3667cc0113
commit
173c7272d5
4 changed files with 112 additions and 49 deletions
1
expose.h
1
expose.h
|
@ -82,6 +82,7 @@ struct generation_inputs
|
|||
const samplers sampler_order[KCPP_SAMPLER_MAX];
|
||||
const int sampler_len;
|
||||
const bool allow_eos_token;
|
||||
const bool bypass_eos_token = false;
|
||||
const bool render_special;
|
||||
const char * stop_sequence[stop_token_max];
|
||||
const bool stream_sse;
|
||||
|
|
|
@ -2205,7 +2205,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
|||
lowestLogit = LowestLogit(logits);
|
||||
}
|
||||
|
||||
if (!inputs.allow_eos_token)
|
||||
if (!inputs.allow_eos_token && !inputs.bypass_eos_token)
|
||||
{
|
||||
// set the logit of the eos token to very low to avoid sampling it
|
||||
logitsPtr[eosID] = lowestLogit;
|
||||
|
@ -2274,7 +2274,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
|||
printf("]\n");
|
||||
}
|
||||
|
||||
if(inputs.allow_eos_token && (id==eosID || (id==eotID && id!=-1)))
|
||||
if(!inputs.bypass_eos_token && inputs.allow_eos_token && (id==eosID || (id==eotID && id!=-1)))
|
||||
{
|
||||
stopper_unused_tokens = remaining_tokens;
|
||||
if(allow_regular_prints)
|
||||
|
|
95
klite.embd
95
klite.embd
|
@ -7,7 +7,7 @@ Just copy this single static HTML file anywhere and open it in a browser, or fro
|
|||
Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite.
|
||||
If you are submitting a pull request for Lite, PLEASE use the above repo, not the KoboldCpp one.
|
||||
Kobold Lite is under the AGPL v3.0 License unless otherwise exempted. Please do not remove this line.
|
||||
Current version: 137
|
||||
Current version: 138
|
||||
-Concedo
|
||||
-->
|
||||
|
||||
|
@ -2081,6 +2081,7 @@ Current version: 137
|
|||
const instructmodels1 = ["gpt4all","supercot","hermes","airoboros","chrono","wizard","mantis","vicuna","manticore","alpaca","myth","xwin","spicyboros","mlewd","mxlewd","mistral","maid","mixtral","estopia","fighter","fimbul"];
|
||||
const instructmodels2 = ["erebus","nerys","nerybus","janeway","opt","llama"];
|
||||
const defaultmodels = ["gpt4all","supercot","hermes","airoboros","chrono","wizard","mantis","vicuna","manticore","alpaca","myth","xwin","spicyboros","mlewd","mxlewd","llama","mistral","maid","mixtral","estopia","fighter","fimbul"];
|
||||
const ignoredmodels = ["tinyllama"];
|
||||
|
||||
const instructstartplaceholder = "\n{{[INPUT]}}\n";
|
||||
const instructendplaceholder = "\n{{[OUTPUT]}}\n";
|
||||
|
@ -3705,7 +3706,7 @@ Current version: 137
|
|||
trimsentences: true, //trim to last punctuation
|
||||
trimwhitespace: false, //trim trailing whitespace
|
||||
compressnewlines: false, //compress multiple newlines
|
||||
eos_ban_mode: 0, //allow the EOS token when using locally 0=auto,1=unban,2=ban
|
||||
eos_ban_mode: 0, //allow the EOS token when using locally 0=auto,1=unban,2=ban,3=bypass
|
||||
opmode: 4, //what mode are we in? 1=story, 2=adventure, 3=chat, 4=instruct
|
||||
adventure_is_action: false, //in adventure mode, determine story or action
|
||||
adventure_context_mod: true, //extra injection for adventure mode
|
||||
|
@ -3995,6 +3996,16 @@ Current version: 137
|
|||
if (mdls.length > 0)
|
||||
{
|
||||
for (var i = 0; i < mdls.length; ++i) {
|
||||
let skipignored = false;
|
||||
for(let k=0;k<ignoredmodels.length;++k)
|
||||
{
|
||||
if(mdls[i].name.trim().toLowerCase().includes(ignoredmodels[k].trim().toLowerCase()))
|
||||
{
|
||||
skipignored = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!skipignored) {
|
||||
for (var j = 0; j < defaultmodels.length; ++j) {
|
||||
if (mdls[i].name.trim().toLowerCase().includes(defaultmodels[j].trim().toLowerCase()) ||
|
||||
defaultmodels[j].trim().toLowerCase().includes(mdls[i].name.trim().toLowerCase())) {
|
||||
|
@ -4002,6 +4013,7 @@ Current version: 137
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (selected_models.length == 0) //no matching models, just assign one
|
||||
{
|
||||
selected_models.push(mdls[0]);
|
||||
|
@ -4665,6 +4677,11 @@ Current version: 137
|
|||
}
|
||||
|
||||
function determine_if_ban_eos(input_was_empty) {
|
||||
if(localsettings.eos_ban_mode == 3)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
if (localsettings.eos_ban_mode == 0) {
|
||||
if (localsettings.opmode == 1) {
|
||||
return true; //story mode always ban
|
||||
|
@ -4826,6 +4843,16 @@ Current version: 137
|
|||
selected_models = [];
|
||||
|
||||
for (var i = 0; i < mdls.length; ++i) {
|
||||
let skipignored = false;
|
||||
for(let k=0;k<ignoredmodels.length;++k)
|
||||
{
|
||||
if(mdls[i].name.trim().toLowerCase().includes(ignoredmodels[k].trim().toLowerCase()))
|
||||
{
|
||||
skipignored = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!skipignored) {
|
||||
for (var j = 0; j < defaultmodels.length; ++j) {
|
||||
if (mdls[i].name.trim().toLowerCase().includes(defaultmodels[j].trim().toLowerCase()) ||
|
||||
defaultmodels[j].trim().toLowerCase().includes(mdls[i].name.trim().toLowerCase())) {
|
||||
|
@ -4833,6 +4860,7 @@ Current version: 137
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (selected_models.length == 0) //no matching models, just assign one
|
||||
{
|
||||
|
@ -4887,6 +4915,17 @@ Current version: 137
|
|||
if(selected_models.length==0)
|
||||
{
|
||||
for (var i = 0; i < mdls.length; ++i) {
|
||||
let skipignored = false;
|
||||
for(let k=0;k<ignoredmodels.length;++k)
|
||||
{
|
||||
if(mdls[i].name.trim().toLowerCase().includes(ignoredmodels[k].trim().toLowerCase()))
|
||||
{
|
||||
skipignored = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(!skipignored)
|
||||
{
|
||||
for (var j = 0; j < defaultmodels.length; ++j) {
|
||||
if (mdls[i].name.trim().toLowerCase().includes(defaultmodels[j].trim().toLowerCase()) ||
|
||||
defaultmodels[j].trim().toLowerCase().includes(mdls[i].name.trim().toLowerCase())) {
|
||||
|
@ -4895,6 +4934,7 @@ Current version: 137
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (selected_models.length == 0) //no matching models, just assign one
|
||||
{
|
||||
|
@ -6291,58 +6331,72 @@ Current version: 137
|
|||
let nsfwmodels = ["erebus","shinen","horni","litv2","lit-6b","spicyboros","mlewd","mxlewd"];
|
||||
selected_models = [];
|
||||
for (var i = 0; i < mdls.length; ++i) {
|
||||
let skipignored = false;
|
||||
for(let k=0;k<ignoredmodels.length;++k)
|
||||
{
|
||||
if(mdls[i].name.trim().toLowerCase().includes(ignoredmodels[k].trim().toLowerCase()))
|
||||
{
|
||||
skipignored = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!skipignored) {
|
||||
for (var j = 0; j < temp_scenario.prefmodel1.length; ++j) {
|
||||
if (mdls[i].name.trim().toLowerCase().includes(temp_scenario.prefmodel1[j].trim().toLowerCase()) ||
|
||||
temp_scenario.prefmodel1[j].trim().toLowerCase().includes(mdls[i].name.trim().toLowerCase())) {
|
||||
|
||||
let allow = true;
|
||||
if (!scenarioallownsfw)
|
||||
{
|
||||
for(var k=0;k<nsfwmodels.length;++k)
|
||||
{
|
||||
if(mdls[i].name.trim().toLowerCase().includes(nsfwmodels[k]))
|
||||
{
|
||||
if (!scenarioallownsfw) {
|
||||
for (var k = 0; k < nsfwmodels.length; ++k) {
|
||||
if (mdls[i].name.trim().toLowerCase().includes(nsfwmodels[k])) {
|
||||
allow = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if(allow)
|
||||
{
|
||||
if (allow) {
|
||||
selected_models.push(mdls[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (selected_models.length == 0) //no selected model, secondary options
|
||||
{
|
||||
for (var i = 0; i < mdls.length; ++i)
|
||||
{
|
||||
let skipignored = false;
|
||||
for(let k=0;k<ignoredmodels.length;++k)
|
||||
{
|
||||
if(mdls[i].name.trim().toLowerCase().includes(ignoredmodels[k].trim().toLowerCase()))
|
||||
{
|
||||
skipignored = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!skipignored) {
|
||||
for (var j = 0; j < temp_scenario.prefmodel2.length; ++j) {
|
||||
if (mdls[i].name.trim().toLowerCase().includes(temp_scenario.prefmodel2[j].trim().toLowerCase()) ||
|
||||
temp_scenario.prefmodel2[j].trim().toLowerCase().includes(mdls[i].name.trim().toLowerCase())) {
|
||||
let allow = true;
|
||||
if (!scenarioallownsfw)
|
||||
{
|
||||
for(var k=0;k<nsfwmodels.length;++k)
|
||||
{
|
||||
if(mdls[i].name.trim().toLowerCase().includes(nsfwmodels[k]))
|
||||
{
|
||||
if (!scenarioallownsfw) {
|
||||
for (var k = 0; k < nsfwmodels.length; ++k) {
|
||||
if (mdls[i].name.trim().toLowerCase().includes(nsfwmodels[k])) {
|
||||
allow = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if(allow)
|
||||
{
|
||||
if (allow) {
|
||||
selected_models.push(mdls[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (selected_models.length == 0) //still no selected model, pick first one
|
||||
{
|
||||
|
@ -10588,6 +10642,10 @@ Current version: 137
|
|||
if (kobold_endpoint_version && kobold_endpoint_version != "" && compare_version_str(kobold_endpoint_version, "1.2.4") >= 0)
|
||||
{
|
||||
submit_payload.use_default_badwordsids = determine_if_ban_eos(input_was_empty);
|
||||
if(is_using_kcpp_with_added_memory())
|
||||
{
|
||||
submit_payload.bypass_eos = (localsettings.eos_ban_mode == 3?true:false);
|
||||
}
|
||||
}
|
||||
|
||||
let pseudostreaming = (determine_streaming_type()==1);
|
||||
|
@ -15447,6 +15505,7 @@ Current version: 137
|
|||
<option value="0">Auto</option>
|
||||
<option value="1">Unban</option>
|
||||
<option value="2">Ban</option>
|
||||
<option value="3">Bypass</option>
|
||||
</select>
|
||||
</div>
|
||||
<div class="settinglabel">
|
||||
|
|
|
@ -82,6 +82,7 @@ class generation_inputs(ctypes.Structure):
|
|||
("sampler_order", ctypes.c_int * sampler_order_max),
|
||||
("sampler_len", ctypes.c_int),
|
||||
("allow_eos_token", ctypes.c_bool),
|
||||
("bypass_eos_token", ctypes.c_bool),
|
||||
("render_special", ctypes.c_bool),
|
||||
("stop_sequence", ctypes.c_char_p * stop_token_max),
|
||||
("stream_sse", ctypes.c_bool),
|
||||
|
@ -396,7 +397,7 @@ def load_model(model_filename):
|
|||
ret = handle.load_model(inputs)
|
||||
return ret
|
||||
|
||||
def generate(prompt, memory="", images=[], max_length=32, max_context_length=512, temperature=0.7, top_k=100, top_a=0.0, top_p=0.92, min_p=0.0, typical_p=1.0, tfs=1.0, rep_pen=1.0, rep_pen_range=128, presence_penalty=0.0, mirostat=0, mirostat_tau=5.0, mirostat_eta=0.1, sampler_order=[6,0,1,3,4,2,5], seed=-1, stop_sequence=[], use_default_badwordsids=False, stream_sse=False, grammar='', grammar_retain_state=False, genkey='', trimstop=False, quiet=False, dynatemp_range=0.0, dynatemp_exponent=1.0, smoothing_factor=0.0, logit_biases={}, render_special=False, banned_tokens=[]):
|
||||
def generate(prompt, memory="", images=[], max_length=32, max_context_length=512, temperature=0.7, top_k=100, top_a=0.0, top_p=0.92, min_p=0.0, typical_p=1.0, tfs=1.0, rep_pen=1.0, rep_pen_range=128, presence_penalty=0.0, mirostat=0, mirostat_tau=5.0, mirostat_eta=0.1, sampler_order=[6,0,1,3,4,2,5], seed=-1, stop_sequence=[], use_default_badwordsids=False, stream_sse=False, grammar='', grammar_retain_state=False, genkey='', trimstop=False, quiet=False, dynatemp_range=0.0, dynatemp_exponent=1.0, smoothing_factor=0.0, logit_biases={}, render_special=False, banned_tokens=[], bypass_eos_token=False):
|
||||
global maxctx, args, currentusergenkey, totalgens, pendingabortkey
|
||||
inputs = generation_inputs()
|
||||
inputs.prompt = prompt.encode("UTF-8")
|
||||
|
@ -435,6 +436,7 @@ def generate(prompt, memory="", images=[], max_length=32, max_context_length=512
|
|||
inputs.grammar = grammar.encode("UTF-8")
|
||||
inputs.grammar_retain_state = grammar_retain_state
|
||||
inputs.allow_eos_token = not use_default_badwordsids
|
||||
inputs.bypass_eos_token = bypass_eos_token
|
||||
inputs.render_special = render_special
|
||||
if mirostat in (1, 2):
|
||||
inputs.mirostat = mirostat
|
||||
|
@ -823,6 +825,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
|||
logit_biases=genparams.get('logit_bias', {}),
|
||||
render_special=genparams.get('render_special', False),
|
||||
banned_tokens=genparams.get('banned_tokens', []),
|
||||
bypass_eos_token=genparams.get('bypass_eos', False),
|
||||
)
|
||||
|
||||
genout = {"text":"","status":-1,"stopreason":-1}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue