mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-04-26 10:41:25 +00:00
fixed a deadlock
This commit is contained in:
parent
ee2ecfbf81
commit
18a3bedf63
2 changed files with 109 additions and 53 deletions
|
|
@ -760,6 +760,10 @@ Current version indicated by LITEVER below.
|
|||
}
|
||||
}
|
||||
|
||||
#chat_msg_body {
|
||||
padding-bottom: 1.5rem;
|
||||
}
|
||||
|
||||
|
||||
/* Viewports */
|
||||
#maineditbody
|
||||
|
|
@ -4550,6 +4554,7 @@ Current version indicated by LITEVER below.
|
|||
adaptivep_target: -1.0,
|
||||
adaptivep_decay: 0.9,
|
||||
sampler_order: [6, 0, 1, 3, 4, 2, 5],
|
||||
custom_sampler_fields: {},
|
||||
};
|
||||
|
||||
const defaultsettings = JSON.parse(JSON.stringify(localsettings));
|
||||
|
|
@ -12813,6 +12818,30 @@ Current version indicated by LITEVER below.
|
|||
document.getElementById("secondepsamplecontainer").classList.remove("hidden");
|
||||
}
|
||||
|
||||
var pendingcustomfieldssampler = {};
|
||||
function show_customfields_sampler()
|
||||
{
|
||||
inputBoxOkCancel("Insert JSON object containing custom fields to send. These values will be added to every generation payload.","Extra Custom Fields",JSON.stringify(pendingcustomfieldssampler),"Paste JSON Here",()=>{
|
||||
let userinput = getInputBoxValue().trim();
|
||||
try
|
||||
{
|
||||
pendingcustomfieldssampler = {};
|
||||
if(userinput!="")
|
||||
{
|
||||
pendingcustomfieldssampler = JSON.parse(userinput);
|
||||
}
|
||||
document.getElementById("custom_field_overview").innerText = (pendingcustomfieldssampler && Object.keys(pendingcustomfieldssampler).length>0)?(`${Object.keys(pendingcustomfieldssampler).length} items`):"OFF";
|
||||
sampler_setting_tweaked();
|
||||
} catch (e) {
|
||||
console.log("Custom fields incorrectly formatted: "+e);
|
||||
}
|
||||
|
||||
},
|
||||
()=>{
|
||||
//do nothing on cancel
|
||||
},false,true);
|
||||
}
|
||||
|
||||
function explain_horde()
|
||||
{
|
||||
msgbox("The AI Horde generates text using crowdsourced GPUs by volunteer workers. By default your inputs are not logged, but as Horde workers are open source, they can be modified to do so. <br><br>In all cases, the sender will *always be anonymous*, however you are still advised to avoid sending privacy sensitive information.<br>","Disclaimer",true);
|
||||
|
|
@ -15797,6 +15826,7 @@ Current version indicated by LITEVER below.
|
|||
document.getElementById("nsigma").value = localsettings.nsigma;
|
||||
document.getElementById("dynatemp_overview").innerText = (localsettings.dynatemp_range!=0?"ON":"OFF");
|
||||
document.getElementById("second_ep_overview").innerText = (localsettings.second_ep_qty>0 && localsettings.second_ep_url?"ON":"OFF");
|
||||
document.getElementById("custom_field_overview").innerText = (localsettings.custom_sampler_fields && Object.keys(localsettings.custom_sampler_fields).length>0)?(`${Object.keys(localsettings.custom_sampler_fields).length} items`):"OFF";
|
||||
document.getElementById("presence_penalty").value = localsettings.presence_penalty;
|
||||
document.getElementById("sampler_seed").value = localsettings.sampler_seed;
|
||||
document.getElementById("top_k").value = document.getElementById("top_k_slide").value = localsettings.top_k;
|
||||
|
|
@ -15934,6 +15964,7 @@ Current version indicated by LITEVER below.
|
|||
document.getElementById("imagestyleinput").value = localsettings.image_styles;
|
||||
document.getElementById("negpromptinput").value = localsettings.image_negprompt;
|
||||
pendinggrammar = localsettings.grammar;
|
||||
pendingcustomfieldssampler = JSON.parse(JSON.stringify(localsettings.custom_sampler_fields));
|
||||
|
||||
//prepare the input for sampler order
|
||||
let samplerstr = localsettings.sampler_order.toString();
|
||||
|
|
@ -16138,6 +16169,7 @@ Current version indicated by LITEVER below.
|
|||
document.getElementById("miro_tau").value = found.miro_tau == null? defaultsettings.miro_tau : found.miro_tau;
|
||||
document.getElementById("miro_eta").value = found.miro_eta == null? defaultsettings.miro_eta : found.miro_eta;
|
||||
pendinggrammar = found.grammar == null? defaultsettings.grammar : found.grammar;
|
||||
pendingcustomfieldssampler = found.custom_sampler_fields == null? defaultsettings.custom_sampler_fields : found.custom_sampler_fields;
|
||||
document.getElementById("dynatemp_range").value = found.dynatemp_range;
|
||||
document.getElementById("dynatemp_exponent").value = found.dynatemp_exponent;
|
||||
document.getElementById("dynatemp_overview").innerText = (document.getElementById("dynatemp_range").value!=0?"ON":"OFF");
|
||||
|
|
@ -16145,6 +16177,7 @@ Current version indicated by LITEVER below.
|
|||
document.getElementById("second_ep_model").value = found.second_ep_model == null? defaultsettings.second_ep_model : found.second_ep_model;
|
||||
document.getElementById("second_ep_url").value = found.second_ep_url == null? defaultsettings.second_ep_url : found.second_ep_url;
|
||||
document.getElementById("second_ep_overview").innerText = (document.getElementById("second_ep_qty").value>0 && document.getElementById("second_ep_url").value!=""?"ON":"OFF");
|
||||
document.getElementById("custom_field_overview").innerText = (pendingcustomfieldssampler && Object.keys(pendingcustomfieldssampler).length>0)?(`${Object.keys(pendingcustomfieldssampler).length} items`):"OFF";
|
||||
} else {
|
||||
document.getElementById("presetsdesc").innerText = "";
|
||||
}
|
||||
|
|
@ -16281,6 +16314,7 @@ Current version indicated by LITEVER below.
|
|||
miro_tau: parseFloat(document.getElementById("miro_tau").value),
|
||||
miro_eta: parseFloat(document.getElementById("miro_eta").value),
|
||||
grammar: pendinggrammar,
|
||||
custom_sampler_fields: pendingcustomfieldssampler,
|
||||
dynatemp_range: parseFloat(document.getElementById("dynatemp_range").value),
|
||||
dynatemp_exponent: parseFloat(document.getElementById("dynatemp_exponent").value),
|
||||
second_ep_qty: document.getElementById("second_ep_qty").value,
|
||||
|
|
@ -16427,6 +16461,7 @@ Current version indicated by LITEVER below.
|
|||
document.getElementById("adaptivep_decay").value != (found.adaptivep_decay != null ? found.adaptivep_decay : defaultsettings.adaptivep_decay) ||
|
||||
document.getElementById("nsigma").value != found.nsigma ||
|
||||
pendinggrammar != (found.grammar ? found.grammar : defaultsettings.grammar) ||
|
||||
JSON.stringify(pendingcustomfieldssampler) != (found.custom_sampler_fields ? JSON.stringify(found.custom_sampler_fields) : JSON.stringify(defaultsettings.custom_sampler_fields)) ||
|
||||
document.getElementById("dynatemp_range").value != found.dynatemp_range ||
|
||||
document.getElementById("dynatemp_exponent").value != found.dynatemp_exponent ||
|
||||
document.getElementById("second_ep_qty").value != (found.second_ep_qty != null ? found.second_ep_qty : defaultsettings.second_ep_qty) ||
|
||||
|
|
@ -16847,6 +16882,7 @@ Current version indicated by LITEVER below.
|
|||
localsettings.image_styles = document.getElementById("imagestyleinput").value;
|
||||
localsettings.image_negprompt = document.getElementById("negpromptinput").value;
|
||||
localsettings.grammar = pendinggrammar;
|
||||
localsettings.custom_sampler_fields = pendingcustomfieldssampler;
|
||||
localsettings.tokenstreammode = document.getElementById("tokenstreammode").value;
|
||||
localsettings.img_autogen_type = document.getElementById("img_autogen_type").value;
|
||||
localsettings.img_crop = (document.getElementById("img_crop").checked ? true : false);
|
||||
|
|
@ -17446,6 +17482,8 @@ Current version indicated by LITEVER below.
|
|||
"models": selected_models.map((m) => { return m.name }),
|
||||
};
|
||||
|
||||
Object.assign(submit_payload.params, localsettings.custom_sampler_fields);
|
||||
|
||||
if (localsettings.sampler_seed >= 1) {
|
||||
submit_payload.params.sampler_seed = localsettings.sampler_seed;
|
||||
}
|
||||
|
|
@ -17522,6 +17560,8 @@ Current version indicated by LITEVER below.
|
|||
"models": selected_models.map((m) => { return m.name }),
|
||||
};
|
||||
|
||||
Object.assign(submit_payload.params, localsettings.custom_sampler_fields);
|
||||
|
||||
if(localsettings.sampler_seed>=1)
|
||||
{
|
||||
submit_payload.params.sampler_seed = localsettings.sampler_seed;
|
||||
|
|
@ -20515,6 +20555,8 @@ Current version indicated by LITEVER below.
|
|||
"models": selected_models.map((m) => { return m.name }),
|
||||
};
|
||||
|
||||
Object.assign(submit_payload.params, localsettings.custom_sampler_fields);
|
||||
|
||||
if(is_using_kcpp_with_added_memory())
|
||||
{
|
||||
submit_payload.params.memory = truncated_memory;
|
||||
|
|
@ -21008,6 +21050,16 @@ Current version indicated by LITEVER below.
|
|||
"temperature": submit_payload.params.temperature,
|
||||
"top_p": submit_payload.params.top_p
|
||||
}
|
||||
if(document.getElementById("useoainonstandard").checked)
|
||||
{
|
||||
//send ALL non standard params
|
||||
if(submit_payload.params.sampler_seed>=1)
|
||||
{
|
||||
oai_payload.seed = submit_payload.params.sampler_seed;
|
||||
}
|
||||
Object.assign(oai_payload, submit_payload.params);
|
||||
}
|
||||
Object.assign(oai_payload, localsettings.custom_sampler_fields);
|
||||
if(localsettings.request_logprobs && !targetep.toLowerCase().includes("api.x.ai") && !targetep.toLowerCase().includes("api.mistral.ai"))
|
||||
{
|
||||
if(document.getElementById("useoaichatcompl").checked || targetep.toLowerCase().includes("api.x.ai"))
|
||||
|
|
@ -21034,18 +21086,10 @@ Current version indicated by LITEVER below.
|
|||
if(!targetep.toLowerCase().includes("pollinations.ai") && !targetep.toLowerCase().includes("api.mistral.ai") && !targetep.toLowerCase().includes("api.x.ai"))
|
||||
{
|
||||
//mistral api does not support presence pen
|
||||
oai_payload.presence_penalty = scaled_rep_pen;
|
||||
}
|
||||
if(document.getElementById("useoainonstandard").checked)
|
||||
{
|
||||
//featherless api supports additional fields, include them
|
||||
oai_payload.top_k = (submit_payload.params.top_k<1?300:submit_payload.params.top_k);
|
||||
oai_payload.min_p = localsettings.min_p;
|
||||
if(submit_payload.params.sampler_seed>=1)
|
||||
if(scaled_rep_pen>0)
|
||||
{
|
||||
oai_payload.seed = submit_payload.params.sampler_seed;
|
||||
oai_payload.presence_penalty = scaled_rep_pen;
|
||||
}
|
||||
oai_payload.top_a = localsettings.top_a;
|
||||
}
|
||||
if(submit_payload.params.logit_bias && JSON.stringify(submit_payload.params.logit_bias) != '{}')
|
||||
{
|
||||
|
|
@ -21312,6 +21356,7 @@ Current version indicated by LITEVER below.
|
|||
"max_tokens": submit_payload.params.max_length,
|
||||
"temperature": submit_payload.params.temperature,
|
||||
};
|
||||
Object.assign(claude_payload, localsettings.custom_sampler_fields);
|
||||
claude_payload.messages.push({"role": "user", "content": submit_payload.prompt})
|
||||
if(sysprompt)
|
||||
{
|
||||
|
|
@ -21528,6 +21573,8 @@ Current version indicated by LITEVER below.
|
|||
}
|
||||
};
|
||||
|
||||
Object.assign(payload, localsettings.custom_sampler_fields);
|
||||
|
||||
if(document.getElementById("usegeminiweb").checked)
|
||||
{
|
||||
payload["tools"] = [{"google_search": {}}];
|
||||
|
|
@ -21710,6 +21757,8 @@ Current version indicated by LITEVER below.
|
|||
submit_payload.params.smoothing_factor = localsettings.smoothing_factor;
|
||||
// submit_payload.params.smoothing_curve = localsettings.smoothing_curve; //no idea if horde supports this, dont care to check
|
||||
// submit_payload.params.nsigma = localsettings.nsigma;
|
||||
|
||||
Object.assign(submit_payload.params, localsettings.custom_sampler_fields);
|
||||
}
|
||||
|
||||
last_request_str = JSON.stringify(submit_payload);
|
||||
|
|
@ -30130,6 +30179,15 @@ Current version indicated by LITEVER below.
|
|||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div style="display:flex;width:100%;">
|
||||
<div class="settinglabel settingcell">
|
||||
<div class="justifyleft" style="width:100%">Extra Custom Fields <span class="helpicon">?<span class="helptext">
|
||||
Define custom parameters to send along with all generation payloads. Advanced users only.</span></span></div>
|
||||
<div class="justifyleft" style="width:100%;">
|
||||
<button title="Extra Custom Fields" type="button" class="btn btn-primary" style="padding:2px 4px;font-size:12px;" onclick="show_customfields_sampler()"><span id="custom_field_overview">OFF</span></button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
|
@ -31440,7 +31498,7 @@ Current version indicated by LITEVER below.
|
|||
<div><input type="checkbox" id="useoaichatcompl" title="Use ChatCompletions API" onchange="toggleoaichatcompl()">
|
||||
<div class="box-label">Chat-Completions API</div></div>
|
||||
<div><input type="checkbox" id="useoainonstandard" title="Send Non-Standard Fields">
|
||||
<div class="box-label">Non-Standard Fields <span class="helpicon">?<span class="helptext">Send extra non-standard samplers like Min-P, Top-K and Top-A.</span></span></div></div>
|
||||
<div class="box-label">Non-Standard Fields <span class="helpicon">?<span class="helptext">Send all extra non-standard samplers (e.g Min-P, Top-K, Top-A, TFS). May be rejected by many endpoints.</span></span></div></div>
|
||||
</div>
|
||||
<div id="useoaichatcomplbox" class="hidden" onload="toggleoaichatcompl();">
|
||||
<div style="display: inline-block;">
|
||||
|
|
|
|||
82
koboldcpp.py
82
koboldcpp.py
|
|
@ -73,7 +73,7 @@ dry_seq_break_max = 128
|
|||
extra_images_max = 4 # for kontext/qwen img
|
||||
|
||||
# global vars
|
||||
KcppVersion = "1.112.1"
|
||||
KcppVersion = "1.112.2"
|
||||
showdebug = True
|
||||
kcpp_instance = None #global running instance
|
||||
global_memory = {"tunnel_url": "", "restart_target":"", "input_to_exit":False, "load_complete":False, "restart_override_base_config":"", "last_active_timestamp":datetime.now(), "triggered_sleeping":False, "current_model":"initial_model", "base_config":"", "swapReqType": None, "autoswapmode": False}
|
||||
|
|
@ -4401,30 +4401,29 @@ class KcppProxyHandler(http.server.BaseHTTPRequestHandler):
|
|||
|
||||
if is_different_model or was_auto_unloaded:
|
||||
model_switch_pass = True
|
||||
with proxy_reload_lock:
|
||||
whitelist = get_current_admindir_list() # see if its an allowed swap
|
||||
if was_auto_unloaded and not model_name:
|
||||
model_name = "initial_model"
|
||||
if is_different_model and (model_name in whitelist):
|
||||
global_memory["last_active_timestamp"] = datetime.now()
|
||||
global_memory["triggered_sleeping"] = False
|
||||
reqbody = json.dumps({"filename":model_name})
|
||||
reqheaders = {
|
||||
'Content-Type': 'application/json',
|
||||
'Content-Length': str(len(reqbody)),
|
||||
}
|
||||
if args.adminpassword:
|
||||
reqheaders["Authorization"] = f"Bearer {args.adminpassword}"
|
||||
conn = http.client.HTTPConnection('localhost', upstream_port, timeout=600)
|
||||
conn.request("POST", "/api/admin/reload_config", body=reqbody, headers=reqheaders)
|
||||
resp = conn.getresponse()
|
||||
time.sleep(3)
|
||||
global_memory["last_active_timestamp"] = datetime.now()
|
||||
global_memory["triggered_sleeping"] = False
|
||||
if not self.wait_for_upstream_ready(upstream_port,120,0.5):
|
||||
self.send_error(504, "KoboldCpp model swap reload timed out")
|
||||
return
|
||||
time.sleep(0.1)
|
||||
whitelist = get_current_admindir_list() # see if its an allowed swap
|
||||
if was_auto_unloaded and not model_name:
|
||||
model_name = "initial_model"
|
||||
if is_different_model and (model_name in whitelist):
|
||||
global_memory["last_active_timestamp"] = datetime.now()
|
||||
global_memory["triggered_sleeping"] = False
|
||||
reqbody = json.dumps({"filename":model_name})
|
||||
reqheaders = {
|
||||
'Content-Type': 'application/json',
|
||||
'Content-Length': str(len(reqbody)),
|
||||
}
|
||||
if args.adminpassword:
|
||||
reqheaders["Authorization"] = f"Bearer {args.adminpassword}"
|
||||
conn = http.client.HTTPConnection('localhost', upstream_port, timeout=600)
|
||||
conn.request("POST", "/api/admin/reload_config", body=reqbody, headers=reqheaders)
|
||||
resp = conn.getresponse()
|
||||
time.sleep(3)
|
||||
global_memory["last_active_timestamp"] = datetime.now()
|
||||
global_memory["triggered_sleeping"] = False
|
||||
if not self.wait_for_upstream_ready(upstream_port,120,0.5):
|
||||
self.send_error(504, "KoboldCpp model swap reload timed out")
|
||||
return
|
||||
time.sleep(0.1)
|
||||
if autoswapEnabled and not model_switch_pass:
|
||||
textReqs = ["/api/extra/generate/stream","/api/extra/tokencount","/api/v1/generate","/sdapi/v1/interrogate","/v1/completions","/v1/chat/completions","/v1/responses","/completions","/chat/completions","/responses"]
|
||||
sttReqs = ["/api/extra/transcribe","/v1/audio/transcriptions"]
|
||||
|
|
@ -4454,23 +4453,22 @@ class KcppProxyHandler(http.server.BaseHTTPRequestHandler):
|
|||
swapModeChanged = True
|
||||
|
||||
if (global_memory["swapReqType"] is not None and swapModeChanged):
|
||||
with proxy_reload_lock:
|
||||
reqbody = json.dumps({"filename":global_memory["current_model"], "baseconfig": global_memory["base_config"]})
|
||||
reqheaders = {
|
||||
'Content-Type': 'application/json',
|
||||
'Content-Length': str(len(reqbody)),
|
||||
}
|
||||
if args.adminpassword:
|
||||
reqheaders["Authorization"] = f"Bearer {args.adminpassword}"
|
||||
conn = http.client.HTTPConnection('localhost', upstream_port, timeout=600)
|
||||
conn.request("POST", "/api/admin/reload_config", body=reqbody, headers=reqheaders)
|
||||
resp = conn.getresponse()
|
||||
time.sleep(3)
|
||||
global_memory["last_active_timestamp"] = datetime.now()
|
||||
if not self.wait_for_upstream_ready(upstream_port,120,0.5):
|
||||
self.send_error(504, "KoboldCpp model swap reload timed out")
|
||||
return
|
||||
time.sleep(0.1)
|
||||
reqbody = json.dumps({"filename":global_memory["current_model"], "baseconfig": global_memory["base_config"]})
|
||||
reqheaders = {
|
||||
'Content-Type': 'application/json',
|
||||
'Content-Length': str(len(reqbody)),
|
||||
}
|
||||
if args.adminpassword:
|
||||
reqheaders["Authorization"] = f"Bearer {args.adminpassword}"
|
||||
conn = http.client.HTTPConnection('localhost', upstream_port, timeout=600)
|
||||
conn.request("POST", "/api/admin/reload_config", body=reqbody, headers=reqheaders)
|
||||
resp = conn.getresponse()
|
||||
time.sleep(3)
|
||||
global_memory["last_active_timestamp"] = datetime.now()
|
||||
if not self.wait_for_upstream_ready(upstream_port,120,0.5):
|
||||
self.send_error(504, "KoboldCpp model swap reload timed out")
|
||||
return
|
||||
time.sleep(0.1)
|
||||
|
||||
try: # connect upstream
|
||||
conn = http.client.HTTPConnection('localhost', upstream_port, timeout=600)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue