mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
KCPP SD: add warn and step restriction., updated lite, handle quant mode
This commit is contained in:
parent
3463688a0e
commit
80011ed8aa
4 changed files with 136 additions and 45 deletions
1
expose.h
1
expose.h
|
@ -106,6 +106,7 @@ struct sd_load_model_inputs
|
|||
const int cublas_info = 0;
|
||||
const char * vulkan_info;
|
||||
const int threads;
|
||||
const int quant = 0;
|
||||
const int debugmode = 0;
|
||||
};
|
||||
struct sd_generation_inputs
|
||||
|
|
157
klite.embd
157
klite.embd
|
@ -7,7 +7,7 @@ Just copy this single static HTML file anywhere and open it in a browser, or fro
|
|||
Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite.
|
||||
If you are submitting a pull request for Lite, PLEASE use the above repo, not the KoboldCpp one.
|
||||
Kobold Lite is under the AGPL v3.0 License unless otherwise exempted. Please do not remove this line.
|
||||
Current version: 117
|
||||
Current version: 118
|
||||
-Concedo
|
||||
-->
|
||||
|
||||
|
@ -3478,7 +3478,9 @@ Current version: 117
|
|||
saved_palm_key: "", //do not ever share this in save files!
|
||||
saved_kai_addr: "", //do not ever share this in save files!
|
||||
saved_oai_jailbreak: "", //customized oai system prompt
|
||||
saved_oai_jailbreak2: "", //oai assistant postfix
|
||||
saved_oai_custommodel: "", //customized oai custom model
|
||||
saved_oai_role: 0, //0=user,1=assistant,2=system
|
||||
saved_a1111_url: default_a1111_base,
|
||||
saved_xtts_url: default_xtts_base,
|
||||
prev_custom_endpoint_type: 0, //show a reconnect box to custom endpoint if needed. 0 is horde, otherwise its dropdown value+1
|
||||
|
@ -6589,6 +6591,38 @@ Current version: 117
|
|||
document.getElementById("jailbreakprompttext").classList.add("hidden");
|
||||
}
|
||||
}
|
||||
function togglejailbreak2()
|
||||
{
|
||||
if(localsettings.saved_oai_jailbreak2=="")
|
||||
{
|
||||
document.getElementById("jailbreakprompttext2").value = "";
|
||||
}
|
||||
else
|
||||
{
|
||||
document.getElementById("jailbreakprompttext2").value = localsettings.saved_oai_jailbreak2;
|
||||
}
|
||||
if(document.getElementById("jailbreakprompt2").checked)
|
||||
{
|
||||
document.getElementById("jailbreakprompttext2").classList.remove("hidden");
|
||||
}else{
|
||||
document.getElementById("jailbreakprompttext2").classList.add("hidden");
|
||||
}
|
||||
}
|
||||
function toggleoaichatcompl()
|
||||
{
|
||||
if(document.getElementById("useoaichatcompl").checked)
|
||||
{
|
||||
document.getElementById("useoaichatcomplbox").classList.remove("hidden");
|
||||
if(localsettings.saved_oai_role!=null)
|
||||
{
|
||||
document.getElementById("oairoledropdown").value = localsettings.saved_oai_role;
|
||||
}
|
||||
}else{
|
||||
document.getElementById("useoaichatcomplbox").classList.add("hidden");
|
||||
}
|
||||
togglejailbreak();
|
||||
togglejailbreak2();
|
||||
}
|
||||
|
||||
function select_custom_oai_model()
|
||||
{
|
||||
|
@ -6619,6 +6653,7 @@ Current version: 117
|
|||
} else {
|
||||
document.getElementById("useoaichatcompl").checked = !non_completions;
|
||||
}
|
||||
toggleoaichatcompl();
|
||||
}
|
||||
function oai_fetch_models()
|
||||
{
|
||||
|
@ -6728,7 +6763,7 @@ Current version: 117
|
|||
|
||||
}
|
||||
oai_model_change();
|
||||
togglejailbreak();
|
||||
toggleoaichatcompl();
|
||||
}
|
||||
else if(epchoice==2)
|
||||
{
|
||||
|
@ -6847,28 +6882,22 @@ Current version: 117
|
|||
{
|
||||
//now we get the version number, however this is optional
|
||||
//if it fails we can still proceed
|
||||
let urls2 = [
|
||||
apply_proxy_url(tmpep + kobold_custom_version_endpoint),
|
||||
];
|
||||
Promise.all(urls2.map(url => fetch(url)
|
||||
.then(response => response.json())))
|
||||
fetch(apply_proxy_url(tmpep + kobold_custom_version_endpoint))
|
||||
.then(response => response.json())
|
||||
.then(values2 => {
|
||||
console.log(values2);
|
||||
let ep_version = values2[0].result;
|
||||
let ep_version = values2.result;
|
||||
kobold_endpoint_version = (ep_version?ep_version:"");
|
||||
}).catch(error => {
|
||||
console.log("Failed to get KAI version number: " + error);
|
||||
});
|
||||
|
||||
//also get max ctx supported
|
||||
let urls3 = [
|
||||
apply_proxy_url(tmpep + kobold_custom_maxctxlen_endpoint),
|
||||
];
|
||||
Promise.all(urls3.map(url => fetch(url)
|
||||
.then(response => response.json())))
|
||||
fetch(apply_proxy_url(tmpep + kobold_custom_maxctxlen_endpoint))
|
||||
.then(response => response.json())
|
||||
.then(values3 => {
|
||||
console.log(values3);
|
||||
let ep_maxctx = values3[0].value;
|
||||
let ep_maxctx = values3.value;
|
||||
if(ep_maxctx && ep_maxctx>document.getElementById("max_context_length_slide").max)
|
||||
{
|
||||
document.getElementById("max_context_length_slide").max = ep_maxctx;
|
||||
|
@ -6883,7 +6912,7 @@ Current version: 117
|
|||
//allow kcpp version check for remote endpoints too
|
||||
{
|
||||
//for local mode, check if we are using koboldcpp, if so we can use streaming if permitted by version
|
||||
fetch(tmpep + koboldcpp_version_endpoint)
|
||||
fetch(apply_proxy_url(tmpep + koboldcpp_version_endpoint))
|
||||
.then(x => x.json())
|
||||
.then(data => {
|
||||
if(data && data!="" && data.version && data.version!="")
|
||||
|
@ -6892,14 +6921,11 @@ Current version: 117
|
|||
console.log("KoboldCpp Detected: " + koboldcpp_version);
|
||||
|
||||
//also check against kcpp's max true context length
|
||||
let urls4 = [
|
||||
apply_proxy_url(tmpep + koboldcpp_truemaxctxlen_endpoint),
|
||||
];
|
||||
Promise.all(urls4.map(url => fetch(url)
|
||||
.then(response => response.json())))
|
||||
fetch(apply_proxy_url(tmpep + koboldcpp_truemaxctxlen_endpoint))
|
||||
.then(response => response.json())
|
||||
.then(values4 => {
|
||||
console.log(values4);
|
||||
let ep_maxctx = values4[0].value;
|
||||
let ep_maxctx = values4.value;
|
||||
if(ep_maxctx && ep_maxctx>document.getElementById("max_context_length_slide").max)
|
||||
{
|
||||
document.getElementById("max_context_length_slide").max = ep_maxctx;
|
||||
|
@ -6910,13 +6936,10 @@ Current version: 117
|
|||
});
|
||||
|
||||
//and check if there's a kcpp savefile preloaded
|
||||
let urls5 = [
|
||||
apply_proxy_url(tmpep + koboldcpp_preloadstory_endpoint),
|
||||
];
|
||||
Promise.all(urls5.map(url => fetch(url)
|
||||
.then(response => response.json())))
|
||||
fetch(apply_proxy_url(tmpep + koboldcpp_preloadstory_endpoint))
|
||||
.then(response => response.json())
|
||||
.then(values5 => {
|
||||
let tmpstory = values5[0];
|
||||
let tmpstory = values5;
|
||||
let is_kai = !(tmpstory.prompt==null);
|
||||
if(is_kai)
|
||||
{
|
||||
|
@ -6931,6 +6954,41 @@ Current version: 117
|
|||
console.log("Failed to get preloaded story: " + error);
|
||||
});
|
||||
|
||||
//check if image gen is supported
|
||||
fetch(apply_proxy_url(tmpep + a1111_models_endpoint))
|
||||
.then(response => response.json())
|
||||
.then(values6 => {
|
||||
console.log(values6);
|
||||
if(values6 && values6.length>0)
|
||||
{
|
||||
let firstitem = values6[0];
|
||||
if(firstitem.model_name!="inactive" && firstitem.filename!=null)
|
||||
{
|
||||
//local image gen is available
|
||||
if(localsettings.generate_images_mode==0)
|
||||
{
|
||||
console.log("Connect to KoboldCpp Image Gen");
|
||||
localsettings.generate_images_mode = 2;
|
||||
localsettings.saved_a1111_url = tmpep;
|
||||
connect_to_a1111(true);
|
||||
render_gametext(true);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
//hide the add img if the image server is down
|
||||
if(localsettings.generate_images_mode==2 && localsettings.saved_a1111_url==tmpep)
|
||||
{
|
||||
localsettings.generate_images_mode = 0;
|
||||
localsettings.saved_a1111_url = default_a1111_base
|
||||
render_gametext(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
}).catch(error => {
|
||||
console.log("Failed to get local image models: " + error);
|
||||
});
|
||||
|
||||
}else{
|
||||
console.log("Unknown KoboldCpp Check Response: " + data);
|
||||
}
|
||||
|
@ -7009,6 +7067,8 @@ Current version: 117
|
|||
{
|
||||
document.getElementById("jailbreakprompttext").value = defaultoaijailbreak;
|
||||
}
|
||||
localsettings.saved_oai_role = document.getElementById("oairoledropdown").value;
|
||||
localsettings.saved_oai_jailbreak2 = document.getElementById("jailbreakprompttext2").value;
|
||||
let isOpenrouter = (document.getElementById("customapidropdown").value==5);
|
||||
let dropdown = (isOpenrouter?document.getElementById("custom_openrouter_model"):document.getElementById("custom_oai_model"));
|
||||
custom_oai_model = dropdown.value.trim();
|
||||
|
@ -7028,7 +7088,7 @@ Current version: 117
|
|||
document.getElementById("connectstatus").classList.add("color_green");
|
||||
}
|
||||
document.getElementById("connectstatus").innerHTML = "Connected to OAI Endpoint";
|
||||
render_gametext();
|
||||
render_gametext(true);
|
||||
}
|
||||
}
|
||||
else if(epchoice==2) //connect to Scale Endpoint
|
||||
|
@ -9704,18 +9764,17 @@ Current version: 117
|
|||
}
|
||||
|
||||
if (document.getElementById("useoaichatcompl").checked) {
|
||||
let myrole = (localsettings.saved_oai_role==2)?"system":(localsettings.saved_oai_role==1?"assistant":"user");
|
||||
oai_payload.messages = [];
|
||||
targetep = (custom_oai_endpoint + oai_submit_endpoint_turbo);
|
||||
if (document.getElementById("jailbreakprompt") && document.getElementById("jailbreakprompt").checked && document.getElementById("jailbreakprompttext").value!="") {
|
||||
oai_payload.messages = [
|
||||
{ "role": "system", "content": document.getElementById("jailbreakprompttext").value },
|
||||
{ "role": "user", "content": submit_payload.prompt },
|
||||
];
|
||||
oai_payload.messages.push({ "role": "system", "content": document.getElementById("jailbreakprompttext").value });
|
||||
}
|
||||
else {
|
||||
oai_payload.messages = [
|
||||
{ "role": "user", "content": submit_payload.prompt },
|
||||
];
|
||||
oai_payload.messages.push({ "role": myrole, "content": submit_payload.prompt });
|
||||
if (document.getElementById("jailbreakprompt2") && document.getElementById("jailbreakprompt2").checked && document.getElementById("jailbreakprompttext2").value!="") {
|
||||
oai_payload.messages.push({ "role": "assistant", "content": document.getElementById("jailbreakprompttext2").value });
|
||||
}
|
||||
|
||||
}
|
||||
else {
|
||||
//apply custom logit bias for official OAI only
|
||||
|
@ -13355,12 +13414,28 @@ Current version: 117
|
|||
<button type="button" class="btn btn-primary" style="display:inline;width:105px;" id="oaiusecustom" onclick="select_custom_oai_model()">Use Custom</button>
|
||||
<input type="checkbox" id="oaiaddversion" onchange="" checked>
|
||||
<div class="box-label" title="Add endpoint version">Add Endpoint Version</div>
|
||||
<input type="checkbox" id="jailbreakprompt" onchange="togglejailbreak()">
|
||||
<div class="box-label" title="Adds extra text to improve AI response">Add System Message</div>
|
||||
<input type="checkbox" id="useoaichatcompl">
|
||||
<input type="checkbox" id="useoaichatcompl" onchange="toggleoaichatcompl()">
|
||||
<div class="box-label" id="useoaichatcompllabel" title="">Use ChatCompletions API</div>
|
||||
<input class="form-control hidden" type="text" id="jailbreakprompttext" placeholder="(Enter System Message)"
|
||||
value="" onload="togglejailbreak()">
|
||||
|
||||
<span id="useoaichatcomplbox" class="hidden" onload="toggleoaichatcompl();">
|
||||
<br>
|
||||
Message Role:
|
||||
<select class="form-control" style="height: 25px; font-size:12px; padding:4px;display:inline;width:100px" id="oairoledropdown">
|
||||
<option value="0" selected>User</option>
|
||||
<option value="1">Assistant</option>
|
||||
<option value="2">System</option>
|
||||
</select>
|
||||
<input type="checkbox" id="jailbreakprompt" onchange="togglejailbreak()">
|
||||
<div class="box-label" title="Adds extra text at the start to improve AI response">Add System Prefix</div>
|
||||
<input type="checkbox" id="jailbreakprompt2" onchange="togglejailbreak2()">
|
||||
<div class="box-label" title="Adds extra text to the end to improve AI response">Add Assistant Postfix</div>
|
||||
|
||||
<input class="form-control hidden" type="text" id="jailbreakprompttext" placeholder="(Enter System Prefix)"
|
||||
value="" onload="togglejailbreak();">
|
||||
<input class="form-control hidden" type="text" id="jailbreakprompttext2" placeholder="(Enter Assistant Postfix)"
|
||||
value="" onload="togglejailbreak2();">
|
||||
</span>
|
||||
|
||||
</div>
|
||||
<div id="scalecustom" class="aidgpopuplistheader anotelabel hidden">
|
||||
Uses Spellbook by Scale. This is an experimental endpoint. It may break at any time.<br><br>
|
||||
|
|
21
koboldcpp.py
21
koboldcpp.py
|
@ -99,6 +99,7 @@ class sd_load_model_inputs(ctypes.Structure):
|
|||
("cublas_info", ctypes.c_int),
|
||||
("vulkan_info", ctypes.c_char_p),
|
||||
("threads", ctypes.c_int),
|
||||
("quant", ctypes.c_int),
|
||||
("debugmode", ctypes.c_int)]
|
||||
|
||||
class sd_generation_inputs(ctypes.Structure):
|
||||
|
@ -484,11 +485,16 @@ def sd_load_model(model_filename):
|
|||
inputs.debugmode = args.debugmode
|
||||
inputs.model_filename = model_filename.encode("UTF-8")
|
||||
thds = args.threads
|
||||
quant = 0
|
||||
if len(args.sdconfig) > 2:
|
||||
sdt = int(args.sdconfig[2])
|
||||
if sdt > 0:
|
||||
thds = sdt
|
||||
if len(args.sdconfig) > 3:
|
||||
quant = (1 if args.sdconfig[3]=="quant" else 0)
|
||||
|
||||
inputs.threads = thds
|
||||
inputs.quant = quant
|
||||
inputs = set_backend_props(inputs)
|
||||
ret = handle.sd_load_model(inputs)
|
||||
return ret
|
||||
|
@ -502,11 +508,16 @@ def sd_generate(genparams):
|
|||
seed = genparams.get("seed", -1)
|
||||
sample_method = genparams.get("sampler_name", "euler a")
|
||||
|
||||
#clean vars
|
||||
cfg_scale = (1 if cfg_scale < 1 else (20 if cfg_scale > 20 else cfg_scale))
|
||||
sample_steps = (1 if sample_steps < 1 else (50 if sample_steps > 50 else sample_steps))
|
||||
|
||||
#quick mode
|
||||
if args.sdconfig and len(args.sdconfig)>1 and args.sdconfig[1]=="quick":
|
||||
cfg_scale = 1
|
||||
sample_steps = 7
|
||||
sample_method = "dpm++ 2m karras"
|
||||
print("Image generation set to Quick Mode (Low Quality). Step counts, sampler, and cfg scale are fixed.")
|
||||
|
||||
inputs = sd_generation_inputs()
|
||||
inputs.prompt = prompt.encode("UTF-8")
|
||||
|
@ -1387,7 +1398,8 @@ def show_new_gui():
|
|||
|
||||
sd_model_var = ctk.StringVar()
|
||||
sd_quick_var = ctk.IntVar(value=0)
|
||||
sd_threads_var = ctk.StringVar()
|
||||
sd_threads_var = ctk.StringVar(value=str(default_threads))
|
||||
sd_quant_var = ctk.IntVar(value=0)
|
||||
|
||||
def tabbuttonaction(name):
|
||||
for t in tabcontent:
|
||||
|
@ -1866,6 +1878,7 @@ def show_new_gui():
|
|||
makefileentry(images_tab, "Stable Diffusion Model (f16):", "Select Stable Diffusion Model File", sd_model_var, 1, filetypes=[("*.safetensors","*.safetensors")], tooltiptxt="Select a .safetensors Stable Diffusion model file on disk to be loaded.")
|
||||
makecheckbox(images_tab, "Quick Mode (Low Quality)", sd_quick_var, 4,tooltiptxt="Force optimal generation settings for speed.")
|
||||
makelabelentry(images_tab, "Image threads:" , sd_threads_var, 6, 50,"How many threads to use during image generation.\nIf left blank, uses same value as threads.")
|
||||
makecheckbox(images_tab, "Compress Weights (Slight Memory Saved)", sd_quant_var, 8,tooltiptxt="Quantizes the SD model weights to save memory. May degrade quality.")
|
||||
|
||||
|
||||
# launch
|
||||
|
@ -1954,7 +1967,7 @@ def show_new_gui():
|
|||
else:
|
||||
args.hordeconfig = None if usehorde_var.get() == 0 else [horde_name_var.get(), horde_gen_var.get(), horde_context_var.get(), horde_apikey_var.get(), horde_workername_var.get()]
|
||||
|
||||
args.sdconfig = None if sd_model_var.get() == "" else [sd_model_var.get(), ("quick" if sd_quick_var.get()==1 else "normal"),(int(threads_var.get()) if sd_threads_var.get()=="" else int(sd_threads_var.get()))]
|
||||
args.sdconfig = None if sd_model_var.get() == "" else [sd_model_var.get(), ("quick" if sd_quick_var.get()==1 else "normal"),(int(threads_var.get()) if sd_threads_var.get()=="" else int(sd_threads_var.get())),("quant" if sd_quant_var.get()==1 else "noquant")]
|
||||
|
||||
def import_vars(dict):
|
||||
if "threads" in dict:
|
||||
|
@ -2089,6 +2102,8 @@ def show_new_gui():
|
|||
sd_quick_var.set(1 if dict["sdconfig"][1]=="quick" else 0)
|
||||
if len(dict["sdconfig"]) > 2:
|
||||
sd_threads_var.set(str(dict["sdconfig"][2]))
|
||||
if len(dict["sdconfig"]) > 3:
|
||||
sd_quant_var.set(str(dict["sdconfig"][3]))
|
||||
|
||||
def save_config():
|
||||
file_type = [("KoboldCpp Settings", "*.kcpps")]
|
||||
|
@ -2865,6 +2880,6 @@ if __name__ == '__main__':
|
|||
parser.add_argument("--quiet", help="Enable quiet mode, which hides generation inputs and outputs in the terminal. Quiet mode is automatically enabled when running --hordeconfig.", action='store_true')
|
||||
parser.add_argument("--ssl", help="Allows all content to be served over SSL instead. A valid UNENCRYPTED SSL cert and key .pem files must be provided", metavar=('[cert_pem]', '[key_pem]'), nargs='+')
|
||||
parser.add_argument("--nocertify", help="Allows insecure SSL connections. Use this if you have cert errors and need to bypass certificate restrictions.", action='store_true')
|
||||
parser.add_argument("--sdconfig", help="Specify a stable diffusion safetensors model to enable image generation. If quick is specified, force optimal generation settings for speed.",metavar=('[sd_filename]', '[normal|quick] [sd_threads]'), nargs='+')
|
||||
parser.add_argument("--sdconfig", help="Specify a stable diffusion safetensors model to enable image generation. If quick is specified, force optimal generation settings for speed.",metavar=('[sd_filename]', '[normal|quick] [sd_threads] [quant|noquant]'), nargs='+')
|
||||
|
||||
main(parser.parse_args(),start_server=True)
|
||||
|
|
|
@ -182,7 +182,7 @@ bool sdtype_load_model(const sd_load_model_inputs inputs) {
|
|||
|
||||
sd_params = new SDParams();
|
||||
sd_params->model_path = inputs.model_filename;
|
||||
sd_params->wtype = SD_TYPE_F16;
|
||||
sd_params->wtype = (inputs.quant==0?SD_TYPE_F16:SD_TYPE_Q4_0);
|
||||
sd_params->n_threads = inputs.threads; //if -1 use physical cores
|
||||
sd_params->input_path = ""; //unused
|
||||
sd_params->batch_count = 1;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue