KCPP SD: add warn and step restriction., updated lite, handle quant mode

This commit is contained in:
Concedo 2024-03-01 16:41:19 +08:00
parent 3463688a0e
commit 80011ed8aa
4 changed files with 136 additions and 45 deletions

View file

@ -106,6 +106,7 @@ struct sd_load_model_inputs
const int cublas_info = 0;
const char * vulkan_info;
const int threads;
const int quant = 0;
const int debugmode = 0;
};
struct sd_generation_inputs

View file

@ -7,7 +7,7 @@ Just copy this single static HTML file anywhere and open it in a browser, or fro
Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite.
If you are submitting a pull request for Lite, PLEASE use the above repo, not the KoboldCpp one.
Kobold Lite is under the AGPL v3.0 License unless otherwise exempted. Please do not remove this line.
Current version: 117
Current version: 118
-Concedo
-->
@ -3478,7 +3478,9 @@ Current version: 117
saved_palm_key: "", //do not ever share this in save files!
saved_kai_addr: "", //do not ever share this in save files!
saved_oai_jailbreak: "", //customized oai system prompt
saved_oai_jailbreak2: "", //oai assistant postfix
saved_oai_custommodel: "", //customized oai custom model
saved_oai_role: 0, //0=user,1=assistant,2=system
saved_a1111_url: default_a1111_base,
saved_xtts_url: default_xtts_base,
prev_custom_endpoint_type: 0, //show a reconnect box to custom endpoint if needed. 0 is horde, otherwise its dropdown value+1
@ -6589,6 +6591,38 @@ Current version: 117
document.getElementById("jailbreakprompttext").classList.add("hidden");
}
}
function togglejailbreak2()
{
if(localsettings.saved_oai_jailbreak2=="")
{
document.getElementById("jailbreakprompttext2").value = "";
}
else
{
document.getElementById("jailbreakprompttext2").value = localsettings.saved_oai_jailbreak2;
}
if(document.getElementById("jailbreakprompt2").checked)
{
document.getElementById("jailbreakprompttext2").classList.remove("hidden");
}else{
document.getElementById("jailbreakprompttext2").classList.add("hidden");
}
}
function toggleoaichatcompl()
{
if(document.getElementById("useoaichatcompl").checked)
{
document.getElementById("useoaichatcomplbox").classList.remove("hidden");
if(localsettings.saved_oai_role!=null)
{
document.getElementById("oairoledropdown").value = localsettings.saved_oai_role;
}
}else{
document.getElementById("useoaichatcomplbox").classList.add("hidden");
}
togglejailbreak();
togglejailbreak2();
}
function select_custom_oai_model()
{
@ -6619,6 +6653,7 @@ Current version: 117
} else {
document.getElementById("useoaichatcompl").checked = !non_completions;
}
toggleoaichatcompl();
}
function oai_fetch_models()
{
@ -6728,7 +6763,7 @@ Current version: 117
}
oai_model_change();
togglejailbreak();
toggleoaichatcompl();
}
else if(epchoice==2)
{
@ -6847,28 +6882,22 @@ Current version: 117
{
//now we get the version number, however this is optional
//if it fails we can still proceed
let urls2 = [
apply_proxy_url(tmpep + kobold_custom_version_endpoint),
];
Promise.all(urls2.map(url => fetch(url)
.then(response => response.json())))
fetch(apply_proxy_url(tmpep + kobold_custom_version_endpoint))
.then(response => response.json())
.then(values2 => {
console.log(values2);
let ep_version = values2[0].result;
let ep_version = values2.result;
kobold_endpoint_version = (ep_version?ep_version:"");
}).catch(error => {
console.log("Failed to get KAI version number: " + error);
});
//also get max ctx supported
let urls3 = [
apply_proxy_url(tmpep + kobold_custom_maxctxlen_endpoint),
];
Promise.all(urls3.map(url => fetch(url)
.then(response => response.json())))
fetch(apply_proxy_url(tmpep + kobold_custom_maxctxlen_endpoint))
.then(response => response.json())
.then(values3 => {
console.log(values3);
let ep_maxctx = values3[0].value;
let ep_maxctx = values3.value;
if(ep_maxctx && ep_maxctx>document.getElementById("max_context_length_slide").max)
{
document.getElementById("max_context_length_slide").max = ep_maxctx;
@ -6883,7 +6912,7 @@ Current version: 117
//allow kcpp version check for remote endpoints too
{
//for local mode, check if we are using koboldcpp, if so we can use streaming if permitted by version
fetch(tmpep + koboldcpp_version_endpoint)
fetch(apply_proxy_url(tmpep + koboldcpp_version_endpoint))
.then(x => x.json())
.then(data => {
if(data && data!="" && data.version && data.version!="")
@ -6892,14 +6921,11 @@ Current version: 117
console.log("KoboldCpp Detected: " + koboldcpp_version);
//also check against kcpp's max true context length
let urls4 = [
apply_proxy_url(tmpep + koboldcpp_truemaxctxlen_endpoint),
];
Promise.all(urls4.map(url => fetch(url)
.then(response => response.json())))
fetch(apply_proxy_url(tmpep + koboldcpp_truemaxctxlen_endpoint))
.then(response => response.json())
.then(values4 => {
console.log(values4);
let ep_maxctx = values4[0].value;
let ep_maxctx = values4.value;
if(ep_maxctx && ep_maxctx>document.getElementById("max_context_length_slide").max)
{
document.getElementById("max_context_length_slide").max = ep_maxctx;
@ -6910,13 +6936,10 @@ Current version: 117
});
//and check if there's a kcpp savefile preloaded
let urls5 = [
apply_proxy_url(tmpep + koboldcpp_preloadstory_endpoint),
];
Promise.all(urls5.map(url => fetch(url)
.then(response => response.json())))
fetch(apply_proxy_url(tmpep + koboldcpp_preloadstory_endpoint))
.then(response => response.json())
.then(values5 => {
let tmpstory = values5[0];
let tmpstory = values5;
let is_kai = !(tmpstory.prompt==null);
if(is_kai)
{
@ -6931,6 +6954,41 @@ Current version: 117
console.log("Failed to get preloaded story: " + error);
});
//check if image gen is supported
fetch(apply_proxy_url(tmpep + a1111_models_endpoint))
.then(response => response.json())
.then(values6 => {
console.log(values6);
if(values6 && values6.length>0)
{
let firstitem = values6[0];
if(firstitem.model_name!="inactive" && firstitem.filename!=null)
{
//local image gen is available
if(localsettings.generate_images_mode==0)
{
console.log("Connect to KoboldCpp Image Gen");
localsettings.generate_images_mode = 2;
localsettings.saved_a1111_url = tmpep;
connect_to_a1111(true);
render_gametext(true);
}
}
else
{
//hide the add img if the image server is down
if(localsettings.generate_images_mode==2 && localsettings.saved_a1111_url==tmpep)
{
localsettings.generate_images_mode = 0;
localsettings.saved_a1111_url = default_a1111_base
render_gametext(true);
}
}
}
}).catch(error => {
console.log("Failed to get local image models: " + error);
});
}else{
console.log("Unknown KoboldCpp Check Response: " + data);
}
@ -7009,6 +7067,8 @@ Current version: 117
{
document.getElementById("jailbreakprompttext").value = defaultoaijailbreak;
}
localsettings.saved_oai_role = document.getElementById("oairoledropdown").value;
localsettings.saved_oai_jailbreak2 = document.getElementById("jailbreakprompttext2").value;
let isOpenrouter = (document.getElementById("customapidropdown").value==5);
let dropdown = (isOpenrouter?document.getElementById("custom_openrouter_model"):document.getElementById("custom_oai_model"));
custom_oai_model = dropdown.value.trim();
@ -7028,7 +7088,7 @@ Current version: 117
document.getElementById("connectstatus").classList.add("color_green");
}
document.getElementById("connectstatus").innerHTML = "Connected to OAI Endpoint";
render_gametext();
render_gametext(true);
}
}
else if(epchoice==2) //connect to Scale Endpoint
@ -9704,18 +9764,17 @@ Current version: 117
}
if (document.getElementById("useoaichatcompl").checked) {
let myrole = (localsettings.saved_oai_role==2)?"system":(localsettings.saved_oai_role==1?"assistant":"user");
oai_payload.messages = [];
targetep = (custom_oai_endpoint + oai_submit_endpoint_turbo);
if (document.getElementById("jailbreakprompt") && document.getElementById("jailbreakprompt").checked && document.getElementById("jailbreakprompttext").value!="") {
oai_payload.messages = [
{ "role": "system", "content": document.getElementById("jailbreakprompttext").value },
{ "role": "user", "content": submit_payload.prompt },
];
oai_payload.messages.push({ "role": "system", "content": document.getElementById("jailbreakprompttext").value });
}
else {
oai_payload.messages = [
{ "role": "user", "content": submit_payload.prompt },
];
oai_payload.messages.push({ "role": myrole, "content": submit_payload.prompt });
if (document.getElementById("jailbreakprompt2") && document.getElementById("jailbreakprompt2").checked && document.getElementById("jailbreakprompttext2").value!="") {
oai_payload.messages.push({ "role": "assistant", "content": document.getElementById("jailbreakprompttext2").value });
}
}
else {
//apply custom logit bias for official OAI only
@ -13355,12 +13414,28 @@ Current version: 117
<button type="button" class="btn btn-primary" style="display:inline;width:105px;" id="oaiusecustom" onclick="select_custom_oai_model()">Use Custom</button>
<input type="checkbox" id="oaiaddversion" onchange="" checked>
<div class="box-label" title="Add endpoint version">Add Endpoint Version</div>
<input type="checkbox" id="jailbreakprompt" onchange="togglejailbreak()">
<div class="box-label" title="Adds extra text to improve AI response">Add System Message</div>
<input type="checkbox" id="useoaichatcompl">
<input type="checkbox" id="useoaichatcompl" onchange="toggleoaichatcompl()">
<div class="box-label" id="useoaichatcompllabel" title="">Use ChatCompletions API</div>
<input class="form-control hidden" type="text" id="jailbreakprompttext" placeholder="(Enter System Message)"
value="" onload="togglejailbreak()">
<span id="useoaichatcomplbox" class="hidden" onload="toggleoaichatcompl();">
<br>
Message Role:
<select class="form-control" style="height: 25px; font-size:12px; padding:4px;display:inline;width:100px" id="oairoledropdown">
<option value="0" selected>User</option>
<option value="1">Assistant</option>
<option value="2">System</option>
</select>
<input type="checkbox" id="jailbreakprompt" onchange="togglejailbreak()">
<div class="box-label" title="Adds extra text at the start to improve AI response">Add System Prefix</div>
<input type="checkbox" id="jailbreakprompt2" onchange="togglejailbreak2()">
<div class="box-label" title="Adds extra text to the end to improve AI response">Add Assistant Postfix</div>
<input class="form-control hidden" type="text" id="jailbreakprompttext" placeholder="(Enter System Prefix)"
value="" onload="togglejailbreak();">
<input class="form-control hidden" type="text" id="jailbreakprompttext2" placeholder="(Enter Assistant Postfix)"
value="" onload="togglejailbreak2();">
</span>
</div>
<div id="scalecustom" class="aidgpopuplistheader anotelabel hidden">
Uses Spellbook by Scale. This is an experimental endpoint. It may break at any time.<br><br>

View file

@ -99,6 +99,7 @@ class sd_load_model_inputs(ctypes.Structure):
("cublas_info", ctypes.c_int),
("vulkan_info", ctypes.c_char_p),
("threads", ctypes.c_int),
("quant", ctypes.c_int),
("debugmode", ctypes.c_int)]
class sd_generation_inputs(ctypes.Structure):
@ -484,11 +485,16 @@ def sd_load_model(model_filename):
inputs.debugmode = args.debugmode
inputs.model_filename = model_filename.encode("UTF-8")
thds = args.threads
quant = 0
if len(args.sdconfig) > 2:
sdt = int(args.sdconfig[2])
if sdt > 0:
thds = sdt
if len(args.sdconfig) > 3:
quant = (1 if args.sdconfig[3]=="quant" else 0)
inputs.threads = thds
inputs.quant = quant
inputs = set_backend_props(inputs)
ret = handle.sd_load_model(inputs)
return ret
@ -502,11 +508,16 @@ def sd_generate(genparams):
seed = genparams.get("seed", -1)
sample_method = genparams.get("sampler_name", "euler a")
#clean vars
cfg_scale = (1 if cfg_scale < 1 else (20 if cfg_scale > 20 else cfg_scale))
sample_steps = (1 if sample_steps < 1 else (50 if sample_steps > 50 else sample_steps))
#quick mode
if args.sdconfig and len(args.sdconfig)>1 and args.sdconfig[1]=="quick":
cfg_scale = 1
sample_steps = 7
sample_method = "dpm++ 2m karras"
print("Image generation set to Quick Mode (Low Quality). Step counts, sampler, and cfg scale are fixed.")
inputs = sd_generation_inputs()
inputs.prompt = prompt.encode("UTF-8")
@ -1387,7 +1398,8 @@ def show_new_gui():
sd_model_var = ctk.StringVar()
sd_quick_var = ctk.IntVar(value=0)
sd_threads_var = ctk.StringVar()
sd_threads_var = ctk.StringVar(value=str(default_threads))
sd_quant_var = ctk.IntVar(value=0)
def tabbuttonaction(name):
for t in tabcontent:
@ -1866,6 +1878,7 @@ def show_new_gui():
makefileentry(images_tab, "Stable Diffusion Model (f16):", "Select Stable Diffusion Model File", sd_model_var, 1, filetypes=[("*.safetensors","*.safetensors")], tooltiptxt="Select a .safetensors Stable Diffusion model file on disk to be loaded.")
makecheckbox(images_tab, "Quick Mode (Low Quality)", sd_quick_var, 4,tooltiptxt="Force optimal generation settings for speed.")
makelabelentry(images_tab, "Image threads:" , sd_threads_var, 6, 50,"How many threads to use during image generation.\nIf left blank, uses same value as threads.")
makecheckbox(images_tab, "Compress Weights (Slight Memory Saved)", sd_quant_var, 8,tooltiptxt="Quantizes the SD model weights to save memory. May degrade quality.")
# launch
@ -1954,7 +1967,7 @@ def show_new_gui():
else:
args.hordeconfig = None if usehorde_var.get() == 0 else [horde_name_var.get(), horde_gen_var.get(), horde_context_var.get(), horde_apikey_var.get(), horde_workername_var.get()]
args.sdconfig = None if sd_model_var.get() == "" else [sd_model_var.get(), ("quick" if sd_quick_var.get()==1 else "normal"),(int(threads_var.get()) if sd_threads_var.get()=="" else int(sd_threads_var.get()))]
args.sdconfig = None if sd_model_var.get() == "" else [sd_model_var.get(), ("quick" if sd_quick_var.get()==1 else "normal"),(int(threads_var.get()) if sd_threads_var.get()=="" else int(sd_threads_var.get())),("quant" if sd_quant_var.get()==1 else "noquant")]
def import_vars(dict):
if "threads" in dict:
@ -2089,6 +2102,8 @@ def show_new_gui():
sd_quick_var.set(1 if dict["sdconfig"][1]=="quick" else 0)
if len(dict["sdconfig"]) > 2:
sd_threads_var.set(str(dict["sdconfig"][2]))
if len(dict["sdconfig"]) > 3:
sd_quant_var.set(str(dict["sdconfig"][3]))
def save_config():
file_type = [("KoboldCpp Settings", "*.kcpps")]
@ -2865,6 +2880,6 @@ if __name__ == '__main__':
parser.add_argument("--quiet", help="Enable quiet mode, which hides generation inputs and outputs in the terminal. Quiet mode is automatically enabled when running --hordeconfig.", action='store_true')
parser.add_argument("--ssl", help="Allows all content to be served over SSL instead. A valid UNENCRYPTED SSL cert and key .pem files must be provided", metavar=('[cert_pem]', '[key_pem]'), nargs='+')
parser.add_argument("--nocertify", help="Allows insecure SSL connections. Use this if you have cert errors and need to bypass certificate restrictions.", action='store_true')
parser.add_argument("--sdconfig", help="Specify a stable diffusion safetensors model to enable image generation. If quick is specified, force optimal generation settings for speed.",metavar=('[sd_filename]', '[normal|quick] [sd_threads]'), nargs='+')
parser.add_argument("--sdconfig", help="Specify a stable diffusion safetensors model to enable image generation. If quick is specified, force optimal generation settings for speed.",metavar=('[sd_filename]', '[normal|quick] [sd_threads] [quant|noquant]'), nargs='+')
main(parser.parse_args(),start_server=True)

View file

@ -182,7 +182,7 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
sd_params = new SDParams();
sd_params->model_path = inputs.model_filename;
sd_params->wtype = SD_TYPE_F16;
sd_params->wtype = (inputs.quant==0?SD_TYPE_F16:SD_TYPE_Q4_0);
sd_params->n_threads = inputs.threads; //if -1 use physical cores
sd_params->input_path = ""; //unused
sd_params->batch_count = 1;