From 08236ccc97856ab3640f4cb554a7c1e345fcfe99 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Tue, 23 Jan 2024 16:56:12 +0800 Subject: [PATCH] better abort handling, added support for dynatemp exponent --- common/common.h | 1 + expose.h | 1 + gpttype_adapter.cpp | 9 ++-- klite.embd | 120 +++++++++++++++++++++++++++++++------------- koboldcpp.py | 22 +++++--- llama.cpp | 4 +- llama.h | 3 +- 7 files changed, 110 insertions(+), 50 deletions(-) diff --git a/common/common.h b/common/common.h index aa4ddff45..5df3a04e9 100644 --- a/common/common.h +++ b/common/common.h @@ -89,6 +89,7 @@ struct gpt_params { // DynaTemp! float dynatemp_range = 0.0f; // enables DynaTemp if greater than 0. dynatemp_min = temperature - dt_range, dynatemp_max = temperature + dt_range + float dynatemp_exponent = 1.0f; // // sampling parameters struct llama_sampling_params sparams; diff --git a/expose.h b/expose.h index 55d815e89..1d26fc9df 100644 --- a/expose.h +++ b/expose.h @@ -82,6 +82,7 @@ struct generation_inputs const bool grammar_retain_state; const bool quiet = false; const float dynatemp_range = 0.0f; + const float dynatemp_exponent = 1.0f; const logit_bias logit_biases[logit_bias_max]; }; diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index bcdcc83e3..423029ea5 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -482,7 +482,7 @@ void sample_grammar(FileFormat file_format, int32_t n_vocab, llama_token_data_ar } int SampleLogits(const float * logits, int n_ctx, int n_vocab, int rep_pen_range, float rep_pen, float presence_penalty, float top_k, float top_a, float top_p, float min_p, float typical_p, float tfs, float temp, std::mt19937 & rng, -int mirostat, float mirostat_tau, float mirostat_eta, const std::vector & sampler_order, llama_grammar * grammar, float dynatemp_range) +int mirostat, float mirostat_tau, float mirostat_eta, const std::vector & sampler_order, llama_grammar * grammar, float dynatemp_range, float dynatemp_exponent) { int id = 0; std::vector candidates; @@ -548,7 +548,8 @@ int mirostat, float mirostat_tau, float mirostat_eta, const std::vectormirostat_eta = inputs.mirostat_eta; kcpp_params->mirostat_tau = inputs.mirostat_tau; kcpp_params->dynatemp_range = inputs.dynatemp_range; + kcpp_params->dynatemp_exponent = inputs.dynatemp_exponent; kcpp_params->n_ctx = inputs.max_context_length; kcpp_params->n_batch = n_batch; kcpp_params->n_threads = n_threads; @@ -1913,6 +1915,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o const float typical_p = kcpp_params->typical_p; const float tfs_z = kcpp_params->tfs_z; const float dynatemp_range = kcpp_params->dynatemp_range; + const float dynatemp_exponent = kcpp_params->dynatemp_exponent; if (!startedsampling) { @@ -1968,7 +1971,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o id = SampleLogits(logitsPtr, nctx, n_vocab, last_n_size, repeat_penalty, presence_penalty, top_k, top_a, top_p, min_p, typical_p, tfs_z, temp, rng, - kcpp_params->mirostat, kcpp_params->mirostat_tau, kcpp_params->mirostat_eta, sampler_order, grammar, dynatemp_range); + kcpp_params->mirostat, kcpp_params->mirostat_tau, kcpp_params->mirostat_eta, sampler_order, grammar, dynatemp_range, dynatemp_exponent); if (grammar != nullptr) { grammar_accept_token(file_format, n_vocab, grammar, id); diff --git a/klite.embd b/klite.embd index e2c043047..f50e782b1 100644 --- a/klite.embd +++ b/klite.embd @@ -6,7 +6,7 @@ It requires no dependencies, installation or setup. Just copy this single static HTML file anywhere and open it in a browser, or from a webserver. Please go to https://github.com/LostRuins/lite.koboldai.net for updates on Kobold Lite. Kobold Lite is under the AGPL v3.0 License unless otherwise exempted. Please do not remove this line. -Current version: 105 +Current version: 106 -Concedo --> @@ -3406,6 +3406,7 @@ Current version: 105 rep_pen_slope: 0.7, temperature: 0.7, dynatemp_range: 0.0, + dynatemp_exponent: 1.0, top_p: 0.92, min_p: 0.00, presence_penalty: 0.00, @@ -3429,6 +3430,7 @@ Current version: 105 description: "Known Working Settings.", temp: defaultsettings.temperature, dynatemp_range: defaultsettings.dynatemp_range, + dynatemp_exponent: defaultsettings.dynatemp_exponent, genamt: defaultsettings.max_length, top_k: defaultsettings.top_k, top_p: defaultsettings.top_p, @@ -3447,6 +3449,7 @@ Current version: 105 description: "Good defaults with a different sampler order.", temp: defaultsettings.temperature, dynatemp_range: defaultsettings.dynatemp_range, + dynatemp_exponent: defaultsettings.dynatemp_exponent, genamt: defaultsettings.max_length, top_k: defaultsettings.top_k, top_p: defaultsettings.top_p, @@ -3460,7 +3463,7 @@ Current version: 105 rep_pen_slope: defaultsettings.rep_pen_slope, sampler_order: [0, 1, 2, 3, 4, 5, 6] }, - {"preset":"Godlike","description":"Makes AI give a descriptive and sensual output.","temp":0.7,"dynatemp_range":0.0,"genamt":120,"top_k":0,"top_p":0.5,"min_p":0.0,"presence_penalty":0.0,"top_a":0.75,"typical":0.19,"tfs":0.97,"rep_pen":1.1,"rep_pen_range":1024,"rep_pen_slope":0.7,"sampler_order":[6,5,4,3,2,1,0]},{"preset":"Mayday","description":"Wacky plot, creativity from AI, crazy stories you want AI to weird out.","temp":1.05,"dynatemp_range":0.0,"genamt":120,"top_k":0,"top_p":0.95,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.1,"rep_pen_range":1024,"rep_pen_slope":0.7,"sampler_order":[6,0,1,2,3,4,5]},{"preset":"Good Winds","description":"Let AI direct the plot, but still stay logical.","temp":0.7,"dynatemp_range":0.0,"genamt":120,"top_k":0,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":0.9,"rep_pen":1.1,"rep_pen_range":1024,"rep_pen_slope":0.7,"sampler_order":[6,0,1,2,3,4,5]},{"preset":"Liminal Drift","description":"Drives coherent dialogue, responses, and behavior, sometimes surreal situations arise based on information already present in the story.","temp":0.66,"dynatemp_range":0.0,"genamt":120,"top_k":0,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"top_a":0.96,"typical":0.6,"tfs":1,"rep_pen":1.1,"rep_pen_range":1024,"rep_pen_slope":0.7,"sampler_order":[6,4,5,1,0,2,3]},{"preset":"TavernAI","description":"Preset used in TavernAI.","temp":0.79,"dynatemp_range":0.0,"genamt":120,"top_k":0,"top_p":0.9,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":0.95,"rep_pen":1.19,"rep_pen_range":1024,"rep_pen_slope":0.9,"sampler_order":[6,0,1,2,3,4,5]},{"preset":"Storywriter 6B","description":"Optimized settings for relevant output.","genamt":120,"rep_pen":1.1,"rep_pen_range":2048,"rep_pen_slope":0.2,"sampler_order":[6,5,0,2,3,1,4],"temp":0.72,"dynatemp_range":0.0,"tfs":1,"top_a":0,"top_k":0,"top_p":0.73,"min_p":0.0,"presence_penalty":0.0,"typical":1},{"preset":"Coherent Creativity 6B","description":"A good balance between coherence, creativity, and quality of prose.","genamt":120,"rep_pen":1.2,"rep_pen_range":2048,"rep_pen_slope":0,"sampler_order":[6,5,0,2,3,1,4],"temp":0.51,"dynatemp_range":0.0,"tfs":0.99,"top_a":0,"top_k":0,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"typical":1},{"preset":"Luna Moth 6B","description":"A great degree of creativity without losing coherency.","temp":1.5,"dynatemp_range":0.0,"genamt":120,"top_k":85,"top_p":0.24,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.1,"rep_pen_range":2048,"rep_pen_slope":0,"sampler_order":[6,5,0,2,3,1,4]},{"preset":"Pleasing Results 6B","description":"Expectable output with alternative context settings.","temp":0.44,"dynatemp_range":0.0,"genamt":120,"top_k":0,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":0.9,"rep_pen":1.15,"rep_pen_range":2048,"rep_pen_slope":6.8,"sampler_order":[6,5,0,2,3,1,4]},{"preset":"Genesis 13B","description":"Stable and logical, but with scattered creativity.","temp":0.63,"dynatemp_range":0.0,"genamt":120,"top_k":0,"top_p":0.98,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":0.98,"rep_pen":1.05,"rep_pen_range":2048,"rep_pen_slope":0.1,"sampler_order":[6,2,0,3,5,1,4]},{"preset":"Basic Coherence 13B","description":"Keep things on track.","temp":0.59,"dynatemp_range":0.0,"genamt":120,"top_k":0,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":0.87,"rep_pen":1.1,"rep_pen_range":2048,"rep_pen_slope":0.3,"sampler_order":[6,5,0,2,3,1,4]},{"preset":"Ouroboros 13B","description":"Versatile, conforms well to poems, lists, chat, etc.","temp":1.07,"dynatemp_range":0.0,"genamt":120,"top_k":100,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":0.93,"rep_pen":1.05,"rep_pen_range":404,"rep_pen_slope":0.8,"sampler_order":[6,0,5,3,2,1,4]},{"preset":"Ace of Spades 13B","description":"Expressive, while still staying focused.","temp":1.15,"dynatemp_range":0.0,"genamt":120,"top_k":0,"top_p":0.95,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":0.8,"rep_pen":1.05,"rep_pen_range":2048,"rep_pen_slope":7,"sampler_order":[6,3,2,0,5,1,4]},{"preset":"Low Rider 13B","description":"Reliable, aimed at story development.","temp":0.94,"dynatemp_range":0.0,"genamt":120,"top_k":12,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":0.94,"rep_pen":1.05,"rep_pen_range":2048,"rep_pen_slope":0.2,"sampler_order":[6,5,0,2,3,1,4]},{"preset":"Pro Writer 13B","description":"Optimal setting for readability, based on AI-powered mass statistical analysis of Euterpe output.","temp":1.35,"dynatemp_range":0.0,"genamt":120,"top_k":0,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":0.69,"rep_pen":1.15,"rep_pen_range":2048,"rep_pen_slope":0.1,"sampler_order":[6,3,2,5,0,1,4]},{"preset":"Default 20B","description":"Good starting settings for NeoX 20B.","temp":0.6,"dynatemp_range":0.0,"genamt":120,"top_k":0,"top_p":0.9,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.04,"rep_pen_range":1024,"rep_pen_slope":0.7,"sampler_order":[6,0,1,2,3,4,5]},{"preset":"Min-P","description":"A good default for Min-P, only works on backends with min-p.","temp":1.25,"dynatemp_range":0.0,"genamt":120,"top_k":0,"top_p":1,"min_p":0.1,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.03,"rep_pen_range":320,"rep_pen_slope":0.7,"sampler_order":[6,5,0,1,3,4,2]} + {"preset":"Godlike","description":"Makes AI give a descriptive and sensual output.","temp":0.7,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"genamt":120,"top_k":0,"top_p":0.5,"min_p":0.0,"presence_penalty":0.0,"top_a":0.75,"typical":0.19,"tfs":0.97,"rep_pen":1.1,"rep_pen_range":1024,"rep_pen_slope":0.7,"sampler_order":[6,5,4,3,2,1,0]},{"preset":"Mayday","description":"Wacky plot, creativity from AI, crazy stories you want AI to weird out.","temp":1.05,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"genamt":120,"top_k":0,"top_p":0.95,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.1,"rep_pen_range":1024,"rep_pen_slope":0.7,"sampler_order":[6,0,1,2,3,4,5]},{"preset":"Good Winds","description":"Let AI direct the plot, but still stay logical.","temp":0.7,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"genamt":120,"top_k":0,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":0.9,"rep_pen":1.1,"rep_pen_range":1024,"rep_pen_slope":0.7,"sampler_order":[6,0,1,2,3,4,5]},{"preset":"Liminal Drift","description":"Drives coherent dialogue, responses, and behavior, sometimes surreal situations arise based on information already present in the story.","temp":0.66,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"genamt":120,"top_k":0,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"top_a":0.96,"typical":0.6,"tfs":1,"rep_pen":1.1,"rep_pen_range":1024,"rep_pen_slope":0.7,"sampler_order":[6,4,5,1,0,2,3]},{"preset":"TavernAI","description":"Preset used in TavernAI.","temp":0.79,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"genamt":120,"top_k":0,"top_p":0.9,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":0.95,"rep_pen":1.19,"rep_pen_range":1024,"rep_pen_slope":0.9,"sampler_order":[6,0,1,2,3,4,5]},{"preset":"Storywriter 6B","description":"Optimized settings for relevant output.","genamt":120,"rep_pen":1.1,"rep_pen_range":2048,"rep_pen_slope":0.2,"sampler_order":[6,5,0,2,3,1,4],"temp":0.72,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"tfs":1,"top_a":0,"top_k":0,"top_p":0.73,"min_p":0.0,"presence_penalty":0.0,"typical":1},{"preset":"Coherent Creativity 6B","description":"A good balance between coherence, creativity, and quality of prose.","genamt":120,"rep_pen":1.2,"rep_pen_range":2048,"rep_pen_slope":0,"sampler_order":[6,5,0,2,3,1,4],"temp":0.51,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"tfs":0.99,"top_a":0,"top_k":0,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"typical":1},{"preset":"Luna Moth 6B","description":"A great degree of creativity without losing coherency.","temp":1.5,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"genamt":120,"top_k":85,"top_p":0.24,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.1,"rep_pen_range":2048,"rep_pen_slope":0,"sampler_order":[6,5,0,2,3,1,4]},{"preset":"Pleasing Results 6B","description":"Expectable output with alternative context settings.","temp":0.44,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"genamt":120,"top_k":0,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":0.9,"rep_pen":1.15,"rep_pen_range":2048,"rep_pen_slope":6.8,"sampler_order":[6,5,0,2,3,1,4]},{"preset":"Genesis 13B","description":"Stable and logical, but with scattered creativity.","temp":0.63,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"genamt":120,"top_k":0,"top_p":0.98,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":0.98,"rep_pen":1.05,"rep_pen_range":2048,"rep_pen_slope":0.1,"sampler_order":[6,2,0,3,5,1,4]},{"preset":"Basic Coherence 13B","description":"Keep things on track.","temp":0.59,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"genamt":120,"top_k":0,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":0.87,"rep_pen":1.1,"rep_pen_range":2048,"rep_pen_slope":0.3,"sampler_order":[6,5,0,2,3,1,4]},{"preset":"Ouroboros 13B","description":"Versatile, conforms well to poems, lists, chat, etc.","temp":1.07,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"genamt":120,"top_k":100,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":0.93,"rep_pen":1.05,"rep_pen_range":404,"rep_pen_slope":0.8,"sampler_order":[6,0,5,3,2,1,4]},{"preset":"Ace of Spades 13B","description":"Expressive, while still staying focused.","temp":1.15,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"genamt":120,"top_k":0,"top_p":0.95,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":0.8,"rep_pen":1.05,"rep_pen_range":2048,"rep_pen_slope":7,"sampler_order":[6,3,2,0,5,1,4]},{"preset":"Low Rider 13B","description":"Reliable, aimed at story development.","temp":0.94,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"genamt":120,"top_k":12,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":0.94,"rep_pen":1.05,"rep_pen_range":2048,"rep_pen_slope":0.2,"sampler_order":[6,5,0,2,3,1,4]},{"preset":"Pro Writer 13B","description":"Optimal setting for readability, based on AI-powered mass statistical analysis of Euterpe output.","temp":1.35,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"genamt":120,"top_k":0,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":0.69,"rep_pen":1.15,"rep_pen_range":2048,"rep_pen_slope":0.1,"sampler_order":[6,3,2,5,0,1,4]},{"preset":"Default 20B","description":"Good starting settings for NeoX 20B.","temp":0.6,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"genamt":120,"top_k":0,"top_p":0.9,"min_p":0.0,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.04,"rep_pen_range":1024,"rep_pen_slope":0.7,"sampler_order":[6,0,1,2,3,4,5]},{"preset":"Min-P","description":"A good default for Min-P, only works on backends with min-p.","temp":1.25,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"genamt":120,"top_k":0,"top_p":1,"min_p":0.1,"presence_penalty":0.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.03,"rep_pen_range":320,"rep_pen_slope":0.7,"sampler_order":[6,5,0,1,3,4,2]} ]; function polyfills() @@ -4033,8 +4036,6 @@ Current version: 105 "sd_model_checkpoint": desired_model, "eta_noise_seed_delta": 0.0, "CLIP_stop_at_last_layers": 1.0, - "eta_ddim": 0.0, - "eta_ancestral": 1.0, "ddim_discretize": "uniform", "img2img_fix_steps": false, "sd_hypernetwork": "None", @@ -5947,38 +5948,48 @@ Current version: 105 document.getElementById("advancedloadfile").classList.add("hidden"); } - function preview_dynatemp() + function preview_dynatemp(isModifiedRange) { - let a1 = parseFloat(document.getElementById("dynatemp_min").value); - let a2 = parseFloat(document.getElementById("dynatemp_max").value); - let avg = (a1+a2)*0.5; - let diff = Math.abs(a2 - a1)*0.5; - document.getElementById("dynatemp_outtemp").innerText = avg.toFixed(3); - document.getElementById("dynatemp_outrange").innerText = diff.toFixed(3); + if(isModifiedRange) + { + let currtmp = parseFloat(document.getElementById("dynatemp_outtemp").value); + let currrng = parseFloat(document.getElementById("dynatemp_range").value); + let a1 = currtmp - currrng; + let a2 = currtmp + currrng; + a1 = a1<0?0:a1; + a2 = a2<0?0:a2; + document.getElementById("dynatemp_min").value = a1.toFixed(2); + document.getElementById("dynatemp_max").value = a2.toFixed(2); + document.getElementById("temperature").value = currtmp.toFixed(3); + document.getElementById("temperature_slide").value = document.getElementById("temperature").value; + } + else + { + let a1 = parseFloat(document.getElementById("dynatemp_min").value); + let a2 = parseFloat(document.getElementById("dynatemp_max").value); + if (a2localsettings.temperature?localsettings.temperature:localsettings.dynatemp_range); + localsettings.dynatemp_exponent = cleannum(localsettings.dynatemp_exponent, 0.0, 10.0); localsettings.presence_penalty = cleannum(localsettings.presence_penalty, -2, 2); localsettings.top_k = cleannum(Math.floor(localsettings.top_k), 0, 300); localsettings.top_a = cleannum(localsettings.top_a, 0, 1); @@ -7988,6 +8003,7 @@ Current version: 105 groupchat_removals = []; welcome = ""; last_known_filename = "saved_story.json"; + is_impersonate_user = false; if (!keep_memory) { current_memory = ""; @@ -8462,7 +8478,12 @@ Current version: 105 //randomize opponent if there is more than one let hasMulti = false; - if(co.includes("||$||")) + if(is_impersonate_user) + { + is_impersonate_user = false; + co = localsettings.chatname; + } + else if(co.includes("||$||")) { let coarr = co.split("||$||"); coarr = coarr.filter(x=>(x&&x!="")); @@ -8774,6 +8795,7 @@ Current version: 105 render_gametext(); } + is_impersonate_user = false; } function get_stop_sequences() //the input object may not always be the same! @@ -8847,6 +8869,7 @@ Current version: 105 //also supports min_p, in that it wont crash, so add it on. it will be ignored if not found submit_payload.params.min_p = localsettings.min_p; submit_payload.params.dynatemp_range = localsettings.dynatemp_range; + submit_payload.params.dynatemp_exponent = localsettings.dynatemp_exponent; } //presence pen and logit bias for OAI and newer kcpp if((custom_kobold_endpoint != "" && is_using_kcpp_with_mirostat()) || custom_oai_endpoint!="") @@ -9289,6 +9312,8 @@ Current version: 105 //horde should support min_p in future too submit_payload.params.min_p = localsettings.min_p; + submit_payload.params.dynatemp_range = localsettings.dynatemp_range; + submit_payload.params.dynatemp_exponent = localsettings.dynatemp_exponent; } last_request_str = JSON.stringify(submit_payload); @@ -9442,7 +9467,7 @@ Current version: 105 { //console.log(outputimg); let origImg = "data:image/jpeg;base64," + outputimg; - let imgres = localsettings.img_allowhd?380:256; + let imgres = localsettings.img_allowhd?400:256; compressImage(origImg, (newDataUri) => { image_db[imgid].done = true; image_db[imgid].result = newDataUri; @@ -11269,8 +11294,24 @@ Current version: 105 + `Impersonate `+localsettings.chatopponent+` speaking as them`; } + gs += `
Make the AI write a response as me (for 1 turn)`; + document.getElementById("groupselectitems").innerHTML = gs; } + var is_impersonate_user = false; + function impersonate_user() + { + hide_popups(); + let willsubmit = (document.getElementById("entersubmit").checked ? true : false); + if (willsubmit) { + document.getElementById("input_text").value = ""; + document.getElementById("cht_inp").value = ""; + is_impersonate_user = true; + submit_generation(); + }else{ + msgbox("Backend is generating or busy - try again later"); + } + } function impersonate_message(index) { hide_popups(); @@ -12543,7 +12584,7 @@ Current version: 105 Seed Min-P PrPen. - DyTmp.R + DyTmp. @@ -12553,8 +12594,7 @@ Current version: 105 id="min_p"> - + @@ -13011,17 +13051,25 @@ Current version: 105
Minimum Temperature:
- +
Maximum Temperature:
- + +
+
+
+
Temperature:
+
-
Result Temperature:
+
DynaTemp-Range:
+
+
-
Result DynaTemp-Range:
+
DynaTemp-Exponent:
+
diff --git a/koboldcpp.py b/koboldcpp.py index 062c03921..c99f5cd8d 100755 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -79,6 +79,7 @@ class generation_inputs(ctypes.Structure): ("grammar_retain_state", ctypes.c_bool), ("quiet", ctypes.c_bool), ("dynatemp_range", ctypes.c_float), + ("dynatemp_exponent", ctypes.c_float), ("logit_biases", logit_bias * logit_bias_max)] class generation_outputs(ctypes.Structure): @@ -311,7 +312,7 @@ def load_model(model_filename): ret = handle.load_model(inputs) return ret -def generate(prompt, memory="", max_length=32, max_context_length=512, temperature=0.7, top_k=100, top_a=0.0, top_p=0.92, min_p=0.0, typical_p=1.0, tfs=1.0, rep_pen=1.0, rep_pen_range=128, presence_penalty=0.0, mirostat=0, mirostat_tau=5.0, mirostat_eta=0.1, sampler_order=[6,0,1,3,4,2,5], seed=-1, stop_sequence=[], use_default_badwordsids=False, stream_sse=False, grammar='', grammar_retain_state=False, genkey='', trimstop=False, quiet=False, dynatemp_range=0.0, logit_biases={}): +def generate(prompt, memory="", max_length=32, max_context_length=512, temperature=0.7, top_k=100, top_a=0.0, top_p=0.92, min_p=0.0, typical_p=1.0, tfs=1.0, rep_pen=1.0, rep_pen_range=128, presence_penalty=0.0, mirostat=0, mirostat_tau=5.0, mirostat_eta=0.1, sampler_order=[6,0,1,3,4,2,5], seed=-1, stop_sequence=[], use_default_badwordsids=False, stream_sse=False, grammar='', grammar_retain_state=False, genkey='', trimstop=False, quiet=False, dynatemp_range=0.0, dynatemp_exponent=1.0, logit_biases={}): global maxctx, args, currentusergenkey, totalgens inputs = generation_inputs() outputs = ctypes.create_unicode_buffer(ctypes.sizeof(generation_outputs)) @@ -340,6 +341,7 @@ def generate(prompt, memory="", max_length=32, max_context_length=512, temperatu inputs.stream_sse = stream_sse inputs.quiet = quiet inputs.dynatemp_range = dynatemp_range + inputs.dynatemp_exponent = dynatemp_exponent inputs.grammar = grammar.encode("UTF-8") inputs.grammar_retain_state = grammar_retain_state inputs.unban_tokens_rt = not use_default_badwordsids @@ -558,6 +560,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler): trimstop=genparams.get('trim_stop', False), quiet=is_quiet, dynatemp_range=genparams.get('dynatemp_range', 0.0), + dynatemp_exponent=genparams.get('dynatemp_exponent', 1.0), logit_biases=genparams.get('logit_bias', {}) ) @@ -652,8 +655,10 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler): await self.send_oai_sse_event('[DONE]') break except Exception as ex: - print("SSE streaming was interrupted due to an exception") + print("Token streaming was interrupted or aborted!") print(ex) + handle.abort_generate() + time.sleep(0.2) #short delay # flush buffers, sleep a bit to make sure all data sent, and then force close the connection self.wfile.flush() @@ -665,17 +670,18 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler): async def handle_request(self, genparams, api_format, stream_flag): tasks = [] - if stream_flag: - tasks.append(self.handle_sse_stream(api_format)) - - generate_task = asyncio.create_task(self.generate_text(genparams, api_format, stream_flag)) - tasks.append(generate_task) - try: + if stream_flag: + tasks.append(self.handle_sse_stream(api_format)) + + generate_task = asyncio.create_task(self.generate_text(genparams, api_format, stream_flag)) + tasks.append(generate_task) + await asyncio.gather(*tasks) generate_result = generate_task.result() return generate_result except (BrokenPipeError, ConnectionAbortedError) as cae: # attempt to abort if connection lost + print("An ongoing connection was aborted or interrupted!") print(cae) handle.abort_generate() time.sleep(0.2) #short delay diff --git a/llama.cpp b/llama.cpp index a17c4407f..87b559697 100644 --- a/llama.cpp +++ b/llama.cpp @@ -8479,12 +8479,12 @@ void llama_sample_temperature(struct llama_context * ctx, llama_token_data_array llama_sample_temp(ctx, candidates_p, temp); } -void llama_sample_entropy(struct llama_context * ctx, llama_token_data_array * candidates_p, float temp, float min_temp = 0, float max_temp = 2.0f) { +void llama_sample_entropy(struct llama_context * ctx, llama_token_data_array * candidates_p, float temp, float min_temp = 0, float max_temp = 2.0f, float dynatemp_exponent = 1.0f) { const int64_t t_start_sample_us = ggml_time_us(); llama_sample_softmax(ctx, candidates_p); - float exponent_val = 1.0f; + float exponent_val = dynatemp_exponent; // Calculate entropy of the softmax probabilities float entropy = 0.0f; diff --git a/llama.h b/llama.h index 04756bfcd..61943d5ec 100644 --- a/llama.h +++ b/llama.h @@ -771,7 +771,8 @@ extern "C" { float p, size_t min_keep, float min_temp, - float max_temp); + float max_temp, + float dynatemp_exponent); /// @details Tail Free Sampling described in https://www.trentonbricken.com/Tail-Free-Sampling/. LLAMA_API void llama_sample_tail_free(