mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 01:24:36 +00:00
expose stop reason in generation
This commit is contained in:
parent
327682fb97
commit
4ec8a9c57b
4 changed files with 184 additions and 85 deletions
1
expose.h
1
expose.h
|
@ -97,6 +97,7 @@ struct generation_inputs
|
|||
struct generation_outputs
|
||||
{
|
||||
int status = -1;
|
||||
int stopreason = stop_reason::INVALID;
|
||||
const char * text; //response will now be stored in c++ allocated memory
|
||||
};
|
||||
struct token_count_outputs
|
||||
|
|
|
@ -1584,6 +1584,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
|||
printf("\nWarning: KCPP text generation not initialized!\n");
|
||||
output.text = nullptr;
|
||||
output.status = 0;
|
||||
output.stopreason = stop_reason::INVALID;
|
||||
generation_finished = true;
|
||||
return output;
|
||||
}
|
||||
|
@ -2125,6 +2126,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
|||
fprintf(stderr, "\nFailed to predict at %d! Check your context buffer sizes!\n",n_past);
|
||||
output.text = nullptr;
|
||||
output.status = 0;
|
||||
output.stopreason = stop_reason::INVALID;
|
||||
generation_finished = true;
|
||||
return output;
|
||||
}
|
||||
|
@ -2334,6 +2336,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
|||
fprintf(stderr, "\nFailed to eval llava image at %d!\n",n_past);
|
||||
output.text = nullptr;
|
||||
output.status = 0;
|
||||
output.stopreason = stop_reason::INVALID;
|
||||
generation_finished = true;
|
||||
return output;
|
||||
}
|
||||
|
@ -2344,6 +2347,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
|||
fprintf(stderr, "\nLLAVA image tokens mismatch at %d! (%d vs %d tokens)\n",n_past,llavatokenscounted,llavatokensevaled);
|
||||
output.text = nullptr;
|
||||
output.status = 0;
|
||||
output.stopreason = stop_reason::INVALID;
|
||||
generation_finished = true;
|
||||
return output;
|
||||
}
|
||||
|
@ -2381,6 +2385,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
|||
printf("\nCtxLimit: %d/%d, Process:%.2fs (%.1fms/T = %.2fT/s), Generate:%.2fs (%.1fms/T = %.2fT/s), Total:%.2fs (%.2fT/s)",(int)current_context_tokens.size(),(int)nctx, time1, pt1, ts1, time2, pt2, ts2, (time1 + time2), tokens_per_second);
|
||||
fflush(stdout);
|
||||
output.status = 1;
|
||||
output.stopreason = last_stop_reason;
|
||||
generation_finished = true;
|
||||
last_eval_time = pt2;
|
||||
last_process_time = pt1;
|
||||
|
|
188
klite.embd
188
klite.embd
|
@ -7,7 +7,7 @@ Just copy this single static HTML file anywhere and open it in a browser, or fro
|
|||
Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite.
|
||||
If you are submitting a pull request for Lite, PLEASE use the above repo, not the KoboldCpp one.
|
||||
Kobold Lite is under the AGPL v3.0 License unless otherwise exempted. Please do not remove this line.
|
||||
Current version: 134
|
||||
Current version: 135
|
||||
-Concedo
|
||||
-->
|
||||
|
||||
|
@ -1402,6 +1402,14 @@ Current version: 134
|
|||
margin: 2px;
|
||||
font-weight: bolder;
|
||||
}
|
||||
.wiarrowbtn
|
||||
{
|
||||
font-size: 12px;
|
||||
height: 18px;
|
||||
padding: 2px;
|
||||
margin: 0px 1px 0px 1px;
|
||||
font-weight: bolder;
|
||||
}
|
||||
.wiinputkeycol
|
||||
{
|
||||
min-width: 70px;
|
||||
|
@ -3065,6 +3073,9 @@ Current version: 134
|
|||
if (custom_kobold_endpoint != "" && data && data.results != null && data.results.length > 0) {
|
||||
synchro_streaming_response += data.results[0].text;
|
||||
synchro_streaming_tokens_left -= tokens_per_tick;
|
||||
if (data.results[0].finish_reason == "stop") {
|
||||
last_stop_reason = "stop";
|
||||
}
|
||||
|
||||
//handle some early stopping criterias
|
||||
if (localsettings.opmode == 3) //stop on selfname found
|
||||
|
@ -3213,6 +3224,10 @@ Current version: 134
|
|||
for (let event of chunk) {
|
||||
if (event.event === 'message') {
|
||||
synchro_pending_stream += event.data.token;
|
||||
if(event.data.finish_reason=="stop")
|
||||
{
|
||||
last_stop_reason = "stop";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -3614,6 +3629,7 @@ Current version: 134
|
|||
var custom_claude_model = "";
|
||||
var uses_cors_proxy = false; //we start off attempting a direct connection. switch to proxy if that fails
|
||||
var synchro_polled_response = null;
|
||||
var last_stop_reason = ""; //update stop reason if known
|
||||
var synchro_pending_stream = ""; //used for token pseduo streaming for kobold api only
|
||||
var waiting_for_autosummary = false;
|
||||
var italics_regex = new RegExp(/\*(\S[^*]+\S)\*/g); //the fallback regex
|
||||
|
@ -3633,8 +3649,8 @@ Current version: 134
|
|||
var welcome = "";
|
||||
var personal_notes = "";
|
||||
var logitbiasdict = {};
|
||||
var regexreplace_pattern = [];
|
||||
var regexreplace_replacement = [];
|
||||
var regexreplace_data = [];
|
||||
const num_regex_rows = 4;
|
||||
|
||||
var localsettings = {
|
||||
my_api_key: "0000000000", //put here so it can be saved and loaded in persistent mode
|
||||
|
@ -5084,8 +5100,7 @@ Current version: 134
|
|||
new_save_storyobj.wiinsertlocation = wi_insertlocation;
|
||||
new_save_storyobj.personal_notes = personal_notes;
|
||||
new_save_storyobj.logitbiasdict = JSON.parse(JSON.stringify(logitbiasdict));
|
||||
new_save_storyobj.regexreplace_pattern = JSON.parse(JSON.stringify(regexreplace_pattern));
|
||||
new_save_storyobj.regexreplace_replacement = JSON.parse(JSON.stringify(regexreplace_replacement));
|
||||
new_save_storyobj.regexreplace_data = JSON.parse(JSON.stringify(regexreplace_data));
|
||||
|
||||
if (export_settings) {
|
||||
new_save_storyobj.savedsettings = JSON.parse(JSON.stringify(localsettings));
|
||||
|
@ -5257,8 +5272,7 @@ Current version: 134
|
|||
let old_current_wi = current_wi;
|
||||
let old_extrastopseq = extrastopseq;
|
||||
let old_notes = personal_notes;
|
||||
let old_regexreplace_pattern = regexreplace_pattern;
|
||||
let old_regexreplace_replacement = regexreplace_replacement;
|
||||
let old_regexreplace_data = regexreplace_data;
|
||||
|
||||
//determine if oldui file or newui file format
|
||||
restart_new_game(false);
|
||||
|
@ -5329,11 +5343,18 @@ Current version: 134
|
|||
if (storyobj.personal_notes) {
|
||||
personal_notes = storyobj.personal_notes;
|
||||
}
|
||||
if (storyobj.regexreplace_pattern) {
|
||||
regexreplace_pattern = storyobj.regexreplace_pattern;
|
||||
//todo: remove temporary backwards compatibility for regex
|
||||
if (storyobj.regexreplace_pattern && storyobj.regexreplace_replacement) {
|
||||
let pat = storyobj.regexreplace_pattern;
|
||||
let rep = storyobj.regexreplace_replacement;
|
||||
let ll = Math.min(pat.length,rep.length)
|
||||
for(let i=0;i<ll;++i)
|
||||
{
|
||||
regexreplace_data.push({"p":pat[i],"r":rep[i],"b":false});
|
||||
}
|
||||
if (storyobj.regexreplace_replacement) {
|
||||
regexreplace_replacement = storyobj.regexreplace_replacement;
|
||||
}
|
||||
if (storyobj.regexreplace_data) {
|
||||
regexreplace_data = storyobj.regexreplace_data;
|
||||
}
|
||||
} else {
|
||||
//v2 load
|
||||
|
@ -5394,8 +5415,7 @@ Current version: 134
|
|||
if(!loadstopseq)
|
||||
{
|
||||
extrastopseq = old_extrastopseq;
|
||||
regexreplace_pattern = old_regexreplace_pattern;
|
||||
regexreplace_replacement = old_regexreplace_replacement;
|
||||
regexreplace_data = old_regexreplace_data;
|
||||
}
|
||||
|
||||
if (storyobj.savedsettings && storyobj.savedsettings != "")
|
||||
|
@ -8901,14 +8921,15 @@ Current version: 134
|
|||
extrastopseq = document.getElementById("extrastopseq").value;
|
||||
newlineaftermemory = (document.getElementById("newlineaftermemory").checked?true:false);
|
||||
logitbiasdict = pendinglogitbias;
|
||||
regexreplace_pattern = [];
|
||||
regexreplace_replacement = [];
|
||||
for(let i=0;i<20;++i)
|
||||
regexreplace_data = [];
|
||||
for(let i=0;i<num_regex_rows;++i)
|
||||
{
|
||||
let v1 = "";
|
||||
let v2 = "";
|
||||
let bothways = false;
|
||||
let box1 = document.getElementById("regexreplace_pattern"+i);
|
||||
let box2 = document.getElementById("regexreplace_replacement"+i);
|
||||
let bw = document.getElementById("regexreplace_bothways"+i).checked;
|
||||
if(!box1 || !box2)
|
||||
{
|
||||
break;
|
||||
|
@ -8924,8 +8945,7 @@ Current version: 134
|
|||
|
||||
if(v1)
|
||||
{
|
||||
regexreplace_pattern.push(v1);
|
||||
regexreplace_replacement.push(v2);
|
||||
regexreplace_data.push({"p":v1,"r":v2,"b":bw});
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -9081,6 +9101,7 @@ Current version: 134
|
|||
pending_response_id = "";
|
||||
poll_in_progress = false;
|
||||
synchro_polled_response = null;
|
||||
last_stop_reason = "";
|
||||
synchro_pending_stream = "";
|
||||
waiting_for_autosummary = false;
|
||||
horde_poll_nearly_completed = false;
|
||||
|
@ -9097,6 +9118,7 @@ Current version: 134
|
|||
nextgeneratedimagemilestone = generateimagesinterval;
|
||||
pending_response_id = "";
|
||||
synchro_polled_response = null;
|
||||
last_stop_reason = "";
|
||||
synchro_pending_stream = "";
|
||||
waiting_for_autosummary = false;
|
||||
last_reply_was_empty = false;
|
||||
|
@ -9127,8 +9149,7 @@ Current version: 134
|
|||
wi_searchdepth = 0;
|
||||
wi_insertlocation = 0;
|
||||
current_anotetemplate = "[Author's note: <|>]";
|
||||
regexreplace_pattern = [];
|
||||
regexreplace_replacement = [];
|
||||
regexreplace_data = [];
|
||||
}
|
||||
render_gametext(save); //necessary to trigger an autosave to wipe out current story in case they exit browser after newgame.
|
||||
}
|
||||
|
@ -9718,6 +9739,20 @@ Current version: 134
|
|||
function submit_generation() {
|
||||
|
||||
let newgen = document.getElementById("input_text").value;
|
||||
|
||||
//apply regex transforms
|
||||
if(regexreplace_data && regexreplace_data.length>0)
|
||||
{
|
||||
for(let i=0;i<regexreplace_data.length;++i)
|
||||
{
|
||||
if(regexreplace_data[i].b && regexreplace_data[i].p!="")
|
||||
{
|
||||
let pat = new RegExp(regexreplace_data[i].p, "gm");
|
||||
newgen = newgen.replace(pat, regexreplace_data[i].r);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const user_input_empty = (newgen.trim()=="");
|
||||
let doNotGenerate = false;
|
||||
pending_context_postinjection = "";
|
||||
|
@ -10349,6 +10384,7 @@ Current version: 134
|
|||
poll_ticks_passed = 0;
|
||||
poll_in_progress = false;
|
||||
synchro_polled_response = null;
|
||||
last_stop_reason = "";
|
||||
synchro_pending_stream = "";
|
||||
|
||||
//if this is set, we don't use horde, use the custom endpoint instead
|
||||
|
@ -11420,7 +11456,11 @@ Current version: 134
|
|||
gentxt = trim_extra_stop_seqs(gentxt,true);
|
||||
|
||||
//always trim incomplete sentences for adventure and chat (if not multiline)
|
||||
if (localsettings.opmode == 2 || (localsettings.opmode == 3 && !localsettings.allow_continue_chat) || localsettings.trimsentences == true) {
|
||||
//do not trim if instruct mode AND stop token reached
|
||||
let donottrim = ((localsettings.opmode == 4||localsettings.opmode == 3) && last_stop_reason=="stop");
|
||||
if (!donottrim && (localsettings.opmode == 2
|
||||
|| (localsettings.opmode == 3 && !localsettings.allow_continue_chat)
|
||||
|| localsettings.trimsentences == true)) {
|
||||
gentxt = end_trim_to_sentence(gentxt,true);
|
||||
}
|
||||
|
||||
|
@ -11428,14 +11468,14 @@ Current version: 134
|
|||
gentxt = trim_extra_stop_seqs(gentxt,false);
|
||||
|
||||
//apply regex transform
|
||||
if(regexreplace_pattern && regexreplace_pattern.length>0)
|
||||
if(regexreplace_data && regexreplace_data.length>0)
|
||||
{
|
||||
for(let i=0;i<regexreplace_pattern.length;++i)
|
||||
for(let i=0;i<regexreplace_data.length;++i)
|
||||
{
|
||||
if(regexreplace_pattern[i]!="")
|
||||
if(regexreplace_data[i].p!="")
|
||||
{
|
||||
let pat = new RegExp(regexreplace_pattern[i], "gm");
|
||||
gentxt = gentxt.replace(pat, regexreplace_replacement[i]);
|
||||
let pat = new RegExp(regexreplace_data[i].p, "gm");
|
||||
gentxt = gentxt.replace(pat, regexreplace_data[i].r);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -12001,6 +12041,7 @@ Current version: 134
|
|||
}
|
||||
}
|
||||
synchro_polled_response = null;
|
||||
last_stop_reason = "";
|
||||
synchro_pending_stream = "";
|
||||
show_abort_button(false);
|
||||
render_gametext();
|
||||
|
@ -13139,26 +13180,56 @@ Current version: 134
|
|||
backup_wi();
|
||||
update_wi();
|
||||
|
||||
for(let i=0;i<20;++i)
|
||||
//setup regex replacers
|
||||
populate_regex_replacers();
|
||||
|
||||
document.getElementById("btnlogitbias").disabled = !is_using_custom_ep();
|
||||
|
||||
}
|
||||
|
||||
function populate_regex_replacers()
|
||||
{
|
||||
let regextablehtml = `
|
||||
<tr>
|
||||
<th>Pattern <span class="helpicon">?<span class="helptext">The regex pattern to match against any incoming text. Leave blank to disable.</span></span></th>
|
||||
<th>Replacement <span class="helpicon">?<span class="helptext">The string to replace matches with. Capture groups are allowed (e.g. $1). To remove all matches, leave this blank.</span></span></th>
|
||||
<th>Both Ways <span class="helpicon">?<span class="helptext">If enabled, regex applies for both inputs and outputs, otherwise output only.</span></span></th>
|
||||
</tr>`;
|
||||
let regextable = document.getElementById("regex_replace_table");
|
||||
|
||||
for(let i=0;i<num_regex_rows;++i)
|
||||
{
|
||||
regextablehtml += `
|
||||
<tr>
|
||||
<td><input class="settinglabel miniinput" type="text" placeholder="(Inactive)" value="" id="regexreplace_pattern${i}"></td>
|
||||
<td><input class="settinglabel miniinput" type="text" placeholder="(Remove)" value="" id="regexreplace_replacement${i}"></td>
|
||||
<td><input type="checkbox" id="regexreplace_bothways${i}" style="margin:0px 0 0;"></td>
|
||||
</tr>
|
||||
`;
|
||||
}
|
||||
|
||||
regextable.innerHTML = regextablehtml;
|
||||
|
||||
for(let i=0;i<num_regex_rows;++i)
|
||||
{
|
||||
let a1 = document.getElementById("regexreplace_pattern"+i);
|
||||
let a2 = document.getElementById("regexreplace_replacement"+i);
|
||||
if(a1 && a2)
|
||||
let a3 = document.getElementById("regexreplace_bothways"+i);
|
||||
if(a1 && a2 && a3)
|
||||
{
|
||||
if(i<regexreplace_pattern.length)
|
||||
if(i<regexreplace_data.length)
|
||||
{
|
||||
a1.value = regexreplace_pattern[i];
|
||||
a2.value = regexreplace_replacement[i];
|
||||
a1.value = regexreplace_data[i].p;
|
||||
a2.value = regexreplace_data[i].r;
|
||||
a3.checked = (regexreplace_data[i].b?true:false);
|
||||
}
|
||||
else
|
||||
{
|
||||
a1.value = a2.value = "";
|
||||
a3.checked = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
document.getElementById("btnlogitbias").disabled = !is_using_custom_ep();
|
||||
|
||||
}
|
||||
|
||||
function toggle_wi_sk(idx) {
|
||||
|
@ -13197,6 +13268,28 @@ Current version: 134
|
|||
update_wi();
|
||||
}
|
||||
|
||||
function up_wi(idx) {
|
||||
save_wi();
|
||||
var ce = current_wi[idx];
|
||||
if (idx > 0 && idx < current_wi.length) {
|
||||
const temp = current_wi[idx - 1];
|
||||
current_wi[idx - 1] = current_wi[idx];
|
||||
current_wi[idx] = temp;
|
||||
}
|
||||
update_wi();
|
||||
}
|
||||
|
||||
function down_wi(idx) {
|
||||
save_wi();
|
||||
var ce = current_wi[idx];
|
||||
if (idx >= 0 && idx+1 < current_wi.length) {
|
||||
const temp = current_wi[idx + 1];
|
||||
current_wi[idx + 1] = current_wi[idx];
|
||||
current_wi[idx] = temp;
|
||||
}
|
||||
update_wi();
|
||||
}
|
||||
|
||||
function add_wi() {
|
||||
save_wi();
|
||||
var ne = {
|
||||
|
@ -13263,8 +13356,10 @@ Current version: 134
|
|||
|
||||
let probarr = [100,90,75,50,25,10,5,1];
|
||||
|
||||
selectionhtml += `<tr class='`+ (ishidden?"hidden":"") +`' id="wirow` + i + `"><td class="col-8" style="font-size: 10px;">` +
|
||||
`<button type="button" class="btn btn-danger widelbtn" id="widel` + i + `" onclick="return del_wi(` + i + `)">X</button></td>` +
|
||||
selectionhtml += `<tr class='`+ (ishidden?"hidden":"") +`' id="wirow` + i + `"><td class="col-8" style="font-size: 10px;">`
|
||||
+`<button type="button" class="btn btn-danger widelbtn" id="widel` + i + `" onclick="return del_wi(` + i + `)">X</button></td>`
|
||||
+`<td><button type="button" class="btn btn-primary wiarrowbtn" id="wiup` + i + `" onclick="return up_wi(` + i + `)">▲</button>`
|
||||
+`<button type="button" class="btn btn-primary wiarrowbtn" id="widown` + i + `" onclick="return down_wi(` + i + `)">▼</button></td>` +
|
||||
`<td class="col-6 wiinputkeycol">
|
||||
<input class="form-control wiinputkey" id="wikey`+ i + `" placeholder="Key(s)" value="` + winame + `">
|
||||
<input class="form-control wiinputkey `+ (curr.selective ? `` : `hidden`) + `" id="wikeysec` + i + `" placeholder="Sec. Key(s)" value="` + wisec + `">` + `</td>
|
||||
|
@ -15256,25 +15351,10 @@ Current version: 134
|
|||
<div><button type="button" class="btn btn-primary" style="width:134px;padding:6px 6px;" id="btnlogitbias" onclick="set_logit_bias()">Edit Logit Biases</button></div>
|
||||
<div class="settinglabel">
|
||||
<div class="justifyleft"><br>Custom Regex Replace <span class="helpicon">?<span
|
||||
class="helptext">Allows transforming incoming text with up to 3 regex patterns, modifying all matches. Replacements will be applied in sequence.</span></span></div>
|
||||
class="helptext">Allows transforming incoming text with regex patterns, modifying all matches. Replacements will be applied in sequence.</span></span></div>
|
||||
</div>
|
||||
<table class="settinglabel text-center" style="border-spacing: 3px 2px; border-collapse: separate;">
|
||||
<tr>
|
||||
<th>Pattern <span class="helpicon">?<span class="helptext">The regex pattern to match against any incoming text. Leave blank to disable.</span></span></th>
|
||||
<th>Replacement <span class="helpicon">?<span class="helptext">The string to replace matches with. Capture groups are allowed (e.g. $1). To remove all matches, leave this blank.</span></span></th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><input class="settinglabel miniinput" type="text" placeholder="(Inactive)" value="" id="regexreplace_pattern0"></td>
|
||||
<td><input class="settinglabel miniinput" type="text" placeholder="(Remove)" value="" id="regexreplace_replacement0"></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><input class="settinglabel miniinput" type="text" placeholder="(Inactive)" value="" id="regexreplace_pattern1"></td>
|
||||
<td><input class="settinglabel miniinput" type="text" placeholder="(Remove)" value="" id="regexreplace_replacement1"></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><input class="settinglabel miniinput" type="text" placeholder="(Inactive)" value="" id="regexreplace_pattern2"></td>
|
||||
<td><input class="settinglabel miniinput" type="text" placeholder="(Remove)" value="" id="regexreplace_replacement2"></td>
|
||||
</tr>
|
||||
<table id="regex_replace_table" class="settinglabel text-center" style="border-spacing: 3px 2px; border-collapse: separate;">
|
||||
|
||||
</table>
|
||||
</div>
|
||||
|
||||
|
|
47
koboldcpp.py
47
koboldcpp.py
|
@ -95,6 +95,7 @@ class generation_inputs(ctypes.Structure):
|
|||
|
||||
class generation_outputs(ctypes.Structure):
|
||||
_fields_ = [("status", ctypes.c_int),
|
||||
("stopreason", ctypes.c_int),
|
||||
("text", ctypes.c_char_p)]
|
||||
|
||||
class sd_load_model_inputs(ctypes.Structure):
|
||||
|
@ -493,7 +494,7 @@ def generate(prompt, memory="", images=[], max_length=32, max_context_length=512
|
|||
if pendingabortkey!="" and pendingabortkey==genkey:
|
||||
print(f"\nDeferred Abort for GenKey: {pendingabortkey}")
|
||||
pendingabortkey = ""
|
||||
return ""
|
||||
return {"text":"","status":-1,"stopreason":-1}
|
||||
else:
|
||||
ret = handle.generate(inputs)
|
||||
outstr = ""
|
||||
|
@ -504,7 +505,7 @@ def generate(prompt, memory="", images=[], max_length=32, max_context_length=512
|
|||
sindex = outstr.find(trim_str)
|
||||
if sindex != -1 and trim_str!="":
|
||||
outstr = outstr[:sindex]
|
||||
return outstr
|
||||
return {"text":outstr,"status":ret.status,"stopreason":ret.stopreason}
|
||||
|
||||
|
||||
def sd_load_model(model_filename):
|
||||
|
@ -656,6 +657,7 @@ nocertify = False
|
|||
start_time = time.time()
|
||||
last_req_time = time.time()
|
||||
last_non_horde_req_time = time.time()
|
||||
currfinishreason = "null"
|
||||
|
||||
def transform_genparams(genparams, api_format):
|
||||
#alias all nonstandard alternative names for rep pen.
|
||||
|
@ -765,8 +767,9 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
|||
|
||||
async def generate_text(self, genparams, api_format, stream_flag):
|
||||
from datetime import datetime
|
||||
global friendlymodelname, chatcompl_adapter
|
||||
global friendlymodelname, chatcompl_adapter, currfinishreason
|
||||
is_quiet = args.quiet
|
||||
currfinishreason = "null"
|
||||
|
||||
def run_blocking(): #api format 1=basic,2=kai,3=oai,4=oai-chat
|
||||
|
||||
|
@ -812,13 +815,16 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
|||
render_special=genparams.get('render_special', False),
|
||||
)
|
||||
|
||||
recvtxt = ""
|
||||
genout = {"text":"","status":-1,"stopreason":-1}
|
||||
if stream_flag:
|
||||
loop = asyncio.get_event_loop()
|
||||
executor = ThreadPoolExecutor()
|
||||
recvtxt = await loop.run_in_executor(executor, run_blocking)
|
||||
genout = await loop.run_in_executor(executor, run_blocking)
|
||||
else:
|
||||
recvtxt = run_blocking()
|
||||
genout = run_blocking()
|
||||
|
||||
recvtxt = genout['text']
|
||||
currfinishreason = ("length" if (genout['stopreason']!=1) else "stop")
|
||||
|
||||
#flag instance as non-idle for a while
|
||||
washordereq = genparams.get('genkey', '').startswith('HORDEREQ_')
|
||||
|
@ -834,15 +840,15 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
|||
elif api_format==3:
|
||||
res = {"id": "cmpl-1", "object": "text_completion", "created": 1, "model": friendlymodelname,
|
||||
"usage": {"prompt_tokens": 100,"completion_tokens": 100,"total_tokens": 200},
|
||||
"choices": [{"text": recvtxt, "index": 0, "finish_reason": "length"}]}
|
||||
"choices": [{"text": recvtxt, "index": 0, "finish_reason": currfinishreason}]}
|
||||
elif api_format==4:
|
||||
res = {"id": "chatcmpl-1", "object": "chat.completion", "created": 1, "model": friendlymodelname,
|
||||
"usage": {"prompt_tokens": 100,"completion_tokens": 100,"total_tokens": 200},
|
||||
"choices": [{"index": 0, "message":{"role": "assistant", "content": recvtxt,}, "finish_reason": "length"}]}
|
||||
"choices": [{"index": 0, "message":{"role": "assistant", "content": recvtxt,}, "finish_reason": currfinishreason}]}
|
||||
elif api_format==5:
|
||||
res = {"caption": end_trim_to_sentence(recvtxt)}
|
||||
else:
|
||||
res = {"results": [{"text": recvtxt}]}
|
||||
res = {"results": [{"text": recvtxt, "finish_reason":currfinishreason}]}
|
||||
|
||||
try:
|
||||
return res
|
||||
|
@ -863,7 +869,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
|||
self.wfile.flush()
|
||||
|
||||
async def handle_sse_stream(self, genparams, api_format):
|
||||
global friendlymodelname
|
||||
global friendlymodelname, currfinishreason
|
||||
self.send_response(200)
|
||||
self.send_header("cache-control", "no-cache")
|
||||
self.send_header("connection", "keep-alive")
|
||||
|
@ -877,6 +883,9 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
|||
tokenReserve = "" #keeps fully formed tokens that we cannot send out yet
|
||||
while True:
|
||||
streamDone = handle.has_finished() #exit next loop on done
|
||||
if streamDone:
|
||||
sr = handle.get_last_stop_reason()
|
||||
currfinishreason = ("length" if (sr!=1) else "stop")
|
||||
tokenStr = ""
|
||||
streamcount = handle.get_stream_count()
|
||||
while current_token < streamcount:
|
||||
|
@ -893,13 +902,14 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
|||
incomplete_token_buffer.clear()
|
||||
tokenStr += tokenSeg
|
||||
|
||||
if tokenStr!="":
|
||||
if tokenStr!="" or streamDone:
|
||||
sseq = genparams.get('stop_sequence', [])
|
||||
trimstop = genparams.get('trim_stop', False)
|
||||
if trimstop and not streamDone and string_contains_sequence_substring(tokenStr,sseq):
|
||||
tokenReserve += tokenStr
|
||||
await asyncio.sleep(async_sleep_short) #if a stop sequence could trigger soon, do not send output
|
||||
else:
|
||||
if tokenStr!="":
|
||||
tokenStr = tokenReserve + tokenStr
|
||||
tokenReserve = ""
|
||||
|
||||
|
@ -910,15 +920,15 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
|||
if sindex != -1 and trim_str!="":
|
||||
tokenStr = tokenStr[:sindex]
|
||||
|
||||
if tokenStr!="":
|
||||
if tokenStr!="" or streamDone:
|
||||
if api_format == 4: # if oai chat, set format to expected openai streaming response
|
||||
event_str = json.dumps({"id":"koboldcpp","object":"chat.completion.chunk","created":1,"model":friendlymodelname,"choices":[{"index":0,"finish_reason":"length","delta":{'role':'assistant','content':tokenStr}}]})
|
||||
event_str = json.dumps({"id":"koboldcpp","object":"chat.completion.chunk","created":1,"model":friendlymodelname,"choices":[{"index":0,"finish_reason":currfinishreason,"delta":{'role':'assistant','content':tokenStr}}]})
|
||||
await self.send_oai_sse_event(event_str)
|
||||
elif api_format == 3: # non chat completions
|
||||
event_str = json.dumps({"id":"koboldcpp","object":"text_completion","created":1,"model":friendlymodelname,"choices":[{"index":0,"finish_reason":"length","text":tokenStr}]})
|
||||
event_str = json.dumps({"id":"koboldcpp","object":"text_completion","created":1,"model":friendlymodelname,"choices":[{"index":0,"finish_reason":currfinishreason,"text":tokenStr}]})
|
||||
await self.send_oai_sse_event(event_str)
|
||||
else:
|
||||
event_str = json.dumps({"token": tokenStr})
|
||||
event_str = json.dumps({"token": tokenStr, "finish_reason":currfinishreason})
|
||||
await self.send_kai_sse_event(event_str)
|
||||
tokenStr = ""
|
||||
else:
|
||||
|
@ -3159,7 +3169,8 @@ def main(launch_args,start_server=True):
|
|||
benchprompt = "11111111"
|
||||
for i in range(0,10): #generate massive prompt
|
||||
benchprompt += benchprompt
|
||||
result = generate(benchprompt,memory="",images=[],max_length=benchlen,max_context_length=benchmaxctx,temperature=0.1,top_k=1,rep_pen=1,use_default_badwordsids=True)
|
||||
genout = generate(benchprompt,memory="",images=[],max_length=benchlen,max_context_length=benchmaxctx,temperature=0.1,top_k=1,rep_pen=1,use_default_badwordsids=True)
|
||||
result = genout['text']
|
||||
result = (result[:5] if len(result)>5 else "")
|
||||
resultok = (result=="11111")
|
||||
t_pp = float(handle.get_last_process_time())*float(benchmaxctx-benchlen)*0.001
|
||||
|
@ -3212,7 +3223,9 @@ def run_in_queue(launch_args, input_queue, output_queue):
|
|||
data = input_queue.get()
|
||||
if data['command'] == 'generate':
|
||||
(args, kwargs) = data['data']
|
||||
output_queue.put({'command': 'generated text', 'data': generate(*args, **kwargs)})
|
||||
genout = generate(*args, **kwargs)
|
||||
result = genout['text']
|
||||
output_queue.put({'command': 'generated text', 'data': result})
|
||||
time.sleep(0.2)
|
||||
|
||||
def start_in_seperate_process(launch_args):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue