diff --git a/embd_res/klite.embd b/embd_res/klite.embd index 49240b533..99f55dee6 100644 --- a/embd_res/klite.embd +++ b/embd_res/klite.embd @@ -3936,7 +3936,7 @@ Current version indicated by LITEVER below. top_p: 0.92, min_p: 0.00, presence_penalty: 0.00, - powerlaw_target: -1.0, + adaptivep_target: -1.0, sampler_seed: -1, top_k: 100, top_a: 0, @@ -3975,7 +3975,7 @@ Current version indicated by LITEVER below. top_p: defaultsettings.top_p, min_p: defaultsettings.min_p, presence_penalty: defaultsettings.presence_penalty, - powerlaw_target: defaultsettings.powerlaw_target, + adaptivep_target: defaultsettings.adaptivep_target, top_a: defaultsettings.top_a, typical: defaultsettings.typ_s, tfs: defaultsettings.tfs_s, @@ -3984,7 +3984,7 @@ Current version indicated by LITEVER below. rep_pen_slope: defaultsettings.rep_pen_slope, sampler_order: defaultsettings.sampler_order }, - {"preset":"Simple Logical","description":"A very predictable preset with low randomness.","temp":0.3,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"smoothing_curve":1.0,"nsigma":0.0,"top_k":100,"top_p":0.6,"min_p":0.0,"presence_penalty":0.0,"powerlaw_target":-1.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.02,"rep_pen_range":360,"rep_pen_slope":0.7,"sampler_order":[6,0,1,3,4,2,5]},{"preset":"Simple Balanced","description":"A good balanced preset with medium randomness.","temp":0.75,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"smoothing_curve":1.0,"nsigma":0.0,"top_k":100,"top_p":0.92,"min_p":0.0,"presence_penalty":0.0,"powerlaw_target":-1.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.05,"rep_pen_range":360,"rep_pen_slope":0.7,"sampler_order":[6,0,1,3,4,2,5]},{"preset":"Simple Creative","description":"A wild and unpredictable preset with higher randomness.","temp":1.0,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"smoothing_curve":1.0,"nsigma":0.0,"top_k":100,"top_p":0.98,"min_p":0.0,"presence_penalty":0.0,"powerlaw_target":-1.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.1,"rep_pen_range":360,"rep_pen_slope":0.7,"sampler_order":[6,0,1,3,4,2,5]},{"preset":"Basic Min-P","description":"A good default for Min-P, only works on backends with min-p.","temp":1.25,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"smoothing_curve":1.0,"nsigma":0.0,"top_k":0,"top_p":1,"min_p":0.1,"presence_penalty":0.0,"powerlaw_target":-1.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.03,"rep_pen_range":360,"rep_pen_slope":0.7,"sampler_order":[6,5,0,1,3,4,2]},{"preset":"Basic Top-nsigma","description":"A good default for Top-nsigma, only works on backends with Top-nsigma.","temp":1,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"smoothing_curve":1.0,"nsigma":1.0,"top_k":0,"top_p":1,"min_p":0.01,"presence_penalty":0.0,"powerlaw_target":-1.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1,"rep_pen_range":360,"rep_pen_slope":0.7,"sampler_order":[6,5,0,1,3,4,2]},{"preset":"Basic DynaTemp","description":"A good default for DynaTemp, only works on backends with it.","temp":1.25,"dynatemp_range":0.75,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"smoothing_curve":1.0,"nsigma":0.0,"top_k":0,"top_p":1,"min_p":0.05,"presence_penalty":0.0,"powerlaw_target":-1.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.03,"rep_pen_range":360,"rep_pen_slope":0.7,"sampler_order":[6,5,0,1,3,4,2]},{"preset":"Basic SmoothSample","description":"A good default for Smooth Sampling, only works on backends with it.","temp":1.0,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.25,"smoothing_curve":1.0,"nsigma":0.0,"top_k":0,"top_p":1,"min_p":0.05,"presence_penalty":0.0,"powerlaw_target":-1.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.03,"rep_pen_range":360,"rep_pen_slope":0.7,"sampler_order":[6,5,0,1,3,4,2]},{"preset":"Basic SillyTavern","description":"Similar to default preset used in SillyTavern.","temp":0.75,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"smoothing_curve":1.0,"nsigma":0.0,"top_k":40,"top_p":0.6,"min_p":0,"presence_penalty":0.0,"powerlaw_target":-1.0,"top_a":0,"typical":1,"tfs":1.0,"rep_pen":1.18,"rep_pen_range":1024,"rep_pen_slope":0.8,"sampler_order":[6,0,1,3,4,2,5]},{"preset":"Immortal","description":"Modernized version of the Godlike preset, designed for creative and longer story co-writing use.","temp":0.7,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"smoothing_curve":1.0,"nsigma":1.75,"top_k":0,"top_p":1.0,"min_p":0.0,"presence_penalty":0.0,"powerlaw_target":-1.0,"top_a":0.75,"typical":0.19,"tfs":0.97,"rep_pen":1.1,"rep_pen_range":1024,"rep_pen_slope":0.7,"sampler_order":[6,5,4,3,2,1,0]},{"preset":"Neutral (Disabled)","description":"Sets all samplers neutralized, allowing you to customize your own.","temp":1.0,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"smoothing_curve":1.0,"nsigma":0.0,"top_k":200,"top_p":1.0,"min_p":0.0,"presence_penalty":0.0,"powerlaw_target":-1.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.0,"rep_pen_range":360,"rep_pen_slope":0.7,"sampler_order":[6,0,1,3,4,2,5]},{"preset":"CoherentCreativity (Legacy)","description":"Legacy preset. A good balance between coherence, creativity, and quality of prose.","rep_pen":1.2,"rep_pen_range":360,"rep_pen_slope":0,"sampler_order":[6,5,0,2,3,1,4],"temp":0.5,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"smoothing_curve":1.0,"nsigma":0.0,"tfs":0.99,"top_a":0,"top_k":0,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"powerlaw_target":-1.0,"typical":1},{"preset":"Godlike (Legacy)","description":"Legacy preset. Makes AI give a descriptive and sensual output.","temp":0.7,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"smoothing_curve":1.0,"nsigma":0.0,"top_k":0,"top_p":0.5,"min_p":0.0,"presence_penalty":0.0,"powerlaw_target":-1.0,"top_a":0.75,"typical":0.19,"tfs":0.97,"rep_pen":1.1,"rep_pen_range":1024,"rep_pen_slope":0.7,"sampler_order":[6,5,4,3,2,1,0]},{"preset":"LiminalDrift (Legacy)","description":"Legacy preset. Sometimes surreal situations arise based on information already present in the story.","temp":0.66,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"smoothing_curve":1.0,"nsigma":0.0,"top_k":0,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"powerlaw_target":-1.0,"top_a":0.96,"typical":0.6,"tfs":1,"rep_pen":1.1,"rep_pen_range":1024,"rep_pen_slope":0.7,"sampler_order":[6,4,5,1,0,2,3]} + {"preset":"Simple Logical","description":"A very predictable preset with low randomness.","temp":0.3,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"smoothing_curve":1.0,"nsigma":0.0,"top_k":100,"top_p":0.6,"min_p":0.0,"presence_penalty":0.0,"adaptivep_target":-1.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.02,"rep_pen_range":360,"rep_pen_slope":0.7,"sampler_order":[6,0,1,3,4,2,5]},{"preset":"Simple Balanced","description":"A good balanced preset with medium randomness.","temp":0.75,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"smoothing_curve":1.0,"nsigma":0.0,"top_k":100,"top_p":0.92,"min_p":0.0,"presence_penalty":0.0,"adaptivep_target":-1.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.05,"rep_pen_range":360,"rep_pen_slope":0.7,"sampler_order":[6,0,1,3,4,2,5]},{"preset":"Simple Creative","description":"A wild and unpredictable preset with higher randomness.","temp":1.0,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"smoothing_curve":1.0,"nsigma":0.0,"top_k":100,"top_p":0.98,"min_p":0.0,"presence_penalty":0.0,"adaptivep_target":-1.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.1,"rep_pen_range":360,"rep_pen_slope":0.7,"sampler_order":[6,0,1,3,4,2,5]},{"preset":"Basic Min-P","description":"A good default for Min-P, only works on backends with min-p.","temp":1.25,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"smoothing_curve":1.0,"nsigma":0.0,"top_k":0,"top_p":1,"min_p":0.1,"presence_penalty":0.0,"adaptivep_target":-1.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.03,"rep_pen_range":360,"rep_pen_slope":0.7,"sampler_order":[6,5,0,1,3,4,2]},{"preset":"Basic Top-nsigma","description":"A good default for Top-nsigma, only works on backends with Top-nsigma.","temp":1,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"smoothing_curve":1.0,"nsigma":1.0,"top_k":0,"top_p":1,"min_p":0.01,"presence_penalty":0.0,"adaptivep_target":-1.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1,"rep_pen_range":360,"rep_pen_slope":0.7,"sampler_order":[6,5,0,1,3,4,2]},{"preset":"Basic DynaTemp","description":"A good default for DynaTemp, only works on backends with it.","temp":1.25,"dynatemp_range":0.75,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"smoothing_curve":1.0,"nsigma":0.0,"top_k":0,"top_p":1,"min_p":0.05,"presence_penalty":0.0,"adaptivep_target":-1.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.03,"rep_pen_range":360,"rep_pen_slope":0.7,"sampler_order":[6,5,0,1,3,4,2]},{"preset":"Basic SmoothSample","description":"A good default for Smooth Sampling, only works on backends with it.","temp":1.0,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.25,"smoothing_curve":1.0,"nsigma":0.0,"top_k":0,"top_p":1,"min_p":0.05,"presence_penalty":0.0,"adaptivep_target":-1.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.03,"rep_pen_range":360,"rep_pen_slope":0.7,"sampler_order":[6,5,0,1,3,4,2]},{"preset":"Basic SillyTavern","description":"Similar to default preset used in SillyTavern.","temp":0.75,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"smoothing_curve":1.0,"nsigma":0.0,"top_k":40,"top_p":0.6,"min_p":0,"presence_penalty":0.0,"adaptivep_target":-1.0,"top_a":0,"typical":1,"tfs":1.0,"rep_pen":1.18,"rep_pen_range":1024,"rep_pen_slope":0.8,"sampler_order":[6,0,1,3,4,2,5]},{"preset":"Immortal","description":"Modernized version of the Godlike preset, designed for creative and longer story co-writing use.","temp":0.7,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"smoothing_curve":1.0,"nsigma":1.75,"top_k":0,"top_p":1.0,"min_p":0.0,"presence_penalty":0.0,"adaptivep_target":-1.0,"top_a":0.75,"typical":0.19,"tfs":0.97,"rep_pen":1.1,"rep_pen_range":1024,"rep_pen_slope":0.7,"sampler_order":[6,5,4,3,2,1,0]},{"preset":"Neutral (Disabled)","description":"Sets all samplers neutralized, allowing you to customize your own.","temp":1.0,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"smoothing_curve":1.0,"nsigma":0.0,"top_k":200,"top_p":1.0,"min_p":0.0,"presence_penalty":0.0,"adaptivep_target":-1.0,"top_a":0,"typical":1,"tfs":1,"rep_pen":1.0,"rep_pen_range":360,"rep_pen_slope":0.7,"sampler_order":[6,0,1,3,4,2,5]},{"preset":"CoherentCreativity (Legacy)","description":"Legacy preset. A good balance between coherence, creativity, and quality of prose.","rep_pen":1.2,"rep_pen_range":360,"rep_pen_slope":0,"sampler_order":[6,5,0,2,3,1,4],"temp":0.5,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"smoothing_curve":1.0,"nsigma":0.0,"tfs":0.99,"top_a":0,"top_k":0,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"adaptivep_target":-1.0,"typical":1},{"preset":"Godlike (Legacy)","description":"Legacy preset. Makes AI give a descriptive and sensual output.","temp":0.7,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"smoothing_curve":1.0,"nsigma":0.0,"top_k":0,"top_p":0.5,"min_p":0.0,"presence_penalty":0.0,"adaptivep_target":-1.0,"top_a":0.75,"typical":0.19,"tfs":0.97,"rep_pen":1.1,"rep_pen_range":1024,"rep_pen_slope":0.7,"sampler_order":[6,5,4,3,2,1,0]},{"preset":"LiminalDrift (Legacy)","description":"Legacy preset. Sometimes surreal situations arise based on information already present in the story.","temp":0.66,"dynatemp_range":0.0,"dynatemp_exponent":1.0,"smoothing_factor":0.0,"smoothing_curve":1.0,"nsigma":0.0,"top_k":0,"top_p":1,"min_p":0.0,"presence_penalty":0.0,"adaptivep_target":-1.0,"top_a":0.96,"typical":0.6,"tfs":1,"rep_pen":1.1,"rep_pen_range":1024,"rep_pen_slope":0.7,"sampler_order":[6,4,5,1,0,2,3]} ]; const instructpresets = [ @@ -14524,7 +14524,7 @@ Current version indicated by LITEVER below. document.getElementById("dynatemp_overview").innerText = (localsettings.dynatemp_range!=0?"ON":"OFF"); document.getElementById("second_ep_overview").innerText = (localsettings.second_ep_qty>0 && localsettings.second_ep_url?"ON":"OFF"); document.getElementById("presence_penalty").value = localsettings.presence_penalty; - document.getElementById("powerlaw_target").value = localsettings.powerlaw_target; + document.getElementById("adaptivep_target").value = localsettings.adaptivep_target; document.getElementById("sampler_seed").value = localsettings.sampler_seed; document.getElementById("top_k").value = document.getElementById("top_k_slide").value = localsettings.top_k; document.getElementById("top_a").value = localsettings.top_a; @@ -14794,7 +14794,7 @@ Current version indicated by LITEVER below. if (found) { document.getElementById("temperature").value = document.getElementById("temperature_slide").value = found.temp; document.getElementById("presence_penalty").value = found.presence_penalty; - document.getElementById("powerlaw_target").value = found.powerlaw_target; + document.getElementById("adaptivep_target").value = found.adaptivep_target; document.getElementById("min_p").value = found.min_p; document.getElementById("dynatemp_range").value = found.dynatemp_range; document.getElementById("dynatemp_exponent").value = found.dynatemp_exponent; @@ -14884,7 +14884,7 @@ Current version indicated by LITEVER below. document.getElementById("presetsdesc").innerText = found.description; let changed = (document.getElementById("temperature").value != found.temp || document.getElementById("presence_penalty").value != found.presence_penalty || - document.getElementById("powerlaw_target").value != found.powerlaw_target || + document.getElementById("adaptivep_target").value != found.adaptivep_target || document.getElementById("min_p").value != found.min_p || document.getElementById("dynatemp_range").value != found.dynatemp_range || document.getElementById("dynatemp_exponent").value != found.dynatemp_exponent || @@ -15174,7 +15174,7 @@ Current version indicated by LITEVER below. localsettings.smoothing_curve = parseFloat(document.getElementById("smoothing_curve").value); localsettings.nsigma = parseFloat(document.getElementById("nsigma").value); localsettings.presence_penalty = parseFloat(document.getElementById("presence_penalty").value); - localsettings.powerlaw_target = parseFloat(document.getElementById("powerlaw_target").value); + localsettings.adaptivep_target = parseFloat(document.getElementById("adaptivep_target").value); localsettings.top_k = parseInt(document.getElementById("top_k").value); localsettings.top_a = parseFloat(document.getElementById("top_a").value); localsettings.typ_s = parseFloat(document.getElementById("typ_s").value); @@ -15391,7 +15391,7 @@ Current version indicated by LITEVER below. localsettings.smoothing_curve = cleannum(localsettings.smoothing_curve, 0.1, 5.0); localsettings.nsigma = cleannum(localsettings.nsigma, 0.0, 5.0); localsettings.presence_penalty = cleannum(localsettings.presence_penalty, -2, 2); - localsettings.powerlaw_target = cleannum(localsettings.powerlaw_target, -1, 1); + localsettings.adaptivep_target = cleannum(localsettings.adaptivep_target, -1, 1); localsettings.top_k = cleannum(Math.floor(localsettings.top_k), 0, 300); localsettings.top_a = cleannum(localsettings.top_a, 0, 1); localsettings.typ_s = cleannum(localsettings.typ_s, 0, 1); @@ -18991,7 +18991,7 @@ Current version indicated by LITEVER below. if((custom_kobold_endpoint != "" && is_using_kcpp_with_mirostat()) || custom_oai_endpoint!="") { submit_payload.params.presence_penalty = localsettings.presence_penalty; - submit_payload.params.power_law_target = localsettings.powerlaw_target; + submit_payload.params.adaptive_target = localsettings.adaptivep_target; submit_payload.params.logit_bias = JSON.parse(JSON.stringify(localsettings.logitbiasdict)); } @@ -27698,9 +27698,9 @@ Current version indicated by LITEVER below.
-
PLaw.Tgt.
+
Adapt.P.
-
+
diff --git a/expose.h b/expose.h index c4810fd5a..569993231 100644 --- a/expose.h +++ b/expose.h @@ -123,7 +123,7 @@ struct generation_inputs const float dynatemp_exponent = 1.0f; const float smoothing_factor = 0.0f; const float smoothing_curve = 1.0f; - const float power_law_target = -1.0f; + const float adaptive_target = -1.0f; const float dry_multiplier = 0.0f; const float dry_base = 0.0f; const int dry_allowed_length = 0; diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 81b2676f6..835742d18 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -125,8 +125,8 @@ static std::vector current_context_tokens; static size_t mem_per_token = 0; static std::vector logits; static std::vector smartcontext; -static float power_law_weighted_sum = 0; //power law sampling state vars -static float power_law_total_weight = 0; +static float adaptive_p_weighted_sum = 0; //adaptive p sampling state vars +static float adaptive_p_total_weight = 0; static std::vector stop_sequence; static std::vector special_stop_sequence; //for stop sequences that don't have a string representation static std::vector banned_tokens; @@ -1267,7 +1267,7 @@ void sample_dry(int n_ctx, int penalty_range, float penalty_multiplier, float pe } } -void sample_power_law( +void sample_adaptive_p( float target, // desired average probability (0..1), <=0 disables float & weighted_sum, // persistent EMA state float & total_weight, // persistent EMA state @@ -1290,7 +1290,7 @@ llama_token_data_array * cur_p) // compute the adapted target probability for the current sampling step float computed_target = std::clamp(total_weight == 0.0f ? target : 2.0f * target - (weighted_sum / total_weight),0.0f, 1.0f); - // power law transform + // adaptive p transform const float k = 4.0f; // controls sharpness for (size_t i = 0; i < cur_p->size; ++i) { float dist = (cur_p->data[i].p - computed_target) * inv_width; @@ -1301,18 +1301,18 @@ llama_token_data_array * cur_p) cur_p->sorted = false; sample_softmax(cur_p); - //update EMA history AFTER sampling, update_power_law_history(original_prob[idx]) + //update EMA history AFTER sampling, update_adaptive_p_history(original_prob[idx]) } -inline void power_law_update_history(float selected_token_prob, float & weighted_sum, float & total_weight) { +inline void adaptive_p_update_history(float selected_token_prob, float & weighted_sum, float & total_weight) { // decay controls how quickly history influence fades (0.0 to 0.99) // lower values = faster adaptation, more reactive to recent tokens // higher values = slower adaptation, more stable over time // effective history length ≈ 1/(1-decay) tokens // example: decay=0.5 --> ~2 tokens; decay=0.9 --> ~10 tokens; decay=0.95 --> ~20 tokens // keep <= 0.99 to prevent unbounded accumulation - const float power_law_decay = 0.90f; - weighted_sum = selected_token_prob + power_law_decay * weighted_sum; - total_weight = 1.0f + power_law_decay * total_weight; + const float adaptive_p_decay = 0.90f; + weighted_sum = selected_token_prob + adaptive_p_decay * weighted_sum; + total_weight = 1.0f + adaptive_p_decay * total_weight; } @@ -1741,7 +1741,7 @@ void sample_guidance(struct llama_context * ctx, struct llama_context * guidance int SampleLogits(const float * logits, int n_ctx, int n_vocab, int rep_pen_range, float rep_pen, float rep_pen_slope, float presence_penalty, float top_k, float top_a, float top_p, float min_p, float typical_p, float tfs, float nsigma, float temp, std::mt19937 & rng, int mirostat, float mirostat_tau, float mirostat_eta, float dry_multiplier, float dry_base, int dry_allowed_length, int dry_penalty_last_n, float xtc_threshold, float xtc_probability, -const std::vector & sampler_order, llama_grammar * grammar, float dynatemp_range, float dynatemp_exponent, float smoothing_factor, float smoothing_curve, float power_law_target) +const std::vector & sampler_order, llama_grammar * grammar, float dynatemp_range, float dynatemp_exponent, float smoothing_factor, float smoothing_curve, float adaptive_target) { // printf("SampleLogits called with: n_ctx=%d, n_vocab=%d, rep_pen_range=%d, rep_pen=%f, rep_pen_slope=%f, presence_penalty=%f, top_k=%f, top_a=%f, top_p=%f, min_p=%f, typical_p=%f, tfs=%f, nsigma=%f, temp=%f, mirostat=%d, mirostat_tau=%f, mirostat_eta=%f, dry_multiplier=%f, dry_base=%f, dry_allowed_length=%d, dry_penalty_last_n=%d, xtc_threshold=%f, xtc_probability=%f, sampler_order_size=%zu, dynatemp_range=%f, dynatemp_exponent=%f, smoothing_factor=%f\n", // n_ctx, n_vocab, rep_pen_range, rep_pen, rep_pen_slope, presence_penalty, top_k, top_a, top_p, min_p, typical_p, tfs, nsigma, temp, mirostat, mirostat_tau, mirostat_eta, dry_multiplier, dry_base, dry_allowed_length, dry_penalty_last_n, xtc_threshold, xtc_probability, sampler_order.size(), dynatemp_range, dynatemp_exponent, smoothing_factor); @@ -1847,8 +1847,8 @@ const std::vector & sampler_order, llama_grammar * grammar, float dyna } //xtc always last sample_xtc(&candidates_p, xtc_threshold, xtc_probability, rng); - //power law must be last, it messes up all probs - sample_power_law(power_law_target, power_law_weighted_sum, power_law_total_weight, &candidates_p); + //adaptive p must be last, it messes up all probs + sample_adaptive_p(adaptive_target, adaptive_p_weighted_sum, adaptive_p_total_weight, &candidates_p); id = sample_token(&candidates_p, rng); } @@ -3444,8 +3444,8 @@ generation_outputs gpttype_generate(const generation_inputs inputs) } } - power_law_weighted_sum = 0; - power_law_total_weight = 0; + adaptive_p_weighted_sum = 0; + adaptive_p_total_weight = 0; //handle custom token bans and antislop phrase banning banned_phrases.clear(); @@ -3655,7 +3655,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs) kcpp_data->n_ctx = inputs.max_context_length; kcpp_data->smoothing_factor = inputs.smoothing_factor; kcpp_data->smoothing_curve = inputs.smoothing_curve; - kcpp_data->power_law_target = inputs.power_law_target; + kcpp_data->adaptive_target = inputs.adaptive_target; // Parse dry sequence breakers / restart sequences kcpp_data->dry_sequence_breakers.clear(); @@ -4484,7 +4484,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs) const float dynatemp_exponent = kcpp_data->dynatemp_exponent; const float smoothing_factor = kcpp_data->smoothing_factor; const float smoothing_curve = kcpp_data->smoothing_curve; - const float power_law_target = kcpp_data->power_law_target; + const float adaptive_target = kcpp_data->adaptive_target; if (!startedsampling) { @@ -4562,9 +4562,9 @@ generation_outputs gpttype_generate(const generation_inputs inputs) lowestLogit = LowestLogit(logits); } - //if power law sampling is used, we need to cache the original probabilities + //if adaptive p sampling is used, we need to cache the original probabilities std::vector original_candidates; - if(power_law_target > 0.0f) + if(adaptive_target > 0.0f) { original_candidates.reserve(n_vocab); for (llama_token token_id = 0; token_id < n_vocab; token_id++) { @@ -4618,11 +4618,11 @@ generation_outputs gpttype_generate(const generation_inputs inputs) kcpp_data->mirostat, kcpp_data->mirostat_tau, kcpp_data->mirostat_eta, kcpp_data->dry_multiplier, kcpp_data->dry_base, kcpp_data->dry_allowed_length, kcpp_data->dry_penalty_last_n, kcpp_data->xtc_threshold, kcpp_data->xtc_probability, - sampler_order, grammar, dynatemp_range, dynatemp_exponent, smoothing_factor, smoothing_curve, power_law_target); + sampler_order, grammar, dynatemp_range, dynatemp_exponent, smoothing_factor, smoothing_curve, adaptive_target); - if (power_law_target > 0.0f) { + if (adaptive_target > 0.0f) { float original_prob = original_candidates[id].p; - power_law_update_history(original_prob, power_law_weighted_sum, power_law_total_weight); + adaptive_p_update_history(original_prob, adaptive_p_weighted_sum, adaptive_p_total_weight); } if(draft_used) diff --git a/koboldcpp.py b/koboldcpp.py index b7041d457..7c816427d 100755 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -265,7 +265,7 @@ class generation_inputs(ctypes.Structure): ("dynatemp_exponent", ctypes.c_float), ("smoothing_factor", ctypes.c_float), ("smoothing_curve", ctypes.c_float), - ("power_law_target", ctypes.c_float), + ("adaptive_target", ctypes.c_float), ("dry_multiplier", ctypes.c_float), ("dry_base", ctypes.c_float), ("dry_allowed_length", ctypes.c_int), @@ -1604,8 +1604,8 @@ def generate(genparams, stream_flag=False): dynatemp_exponent = tryparsefloat(genparams.get('dynatemp_exponent', 1.0),1.0) smoothing_factor = tryparsefloat(genparams.get('smoothing_factor', 0.0),0.0) smoothing_curve = tryparsefloat(genparams.get('smoothing_curve', 1.0),1.0) - power_law_target = tryparsefloat(genparams.get('power_law_target', -1.0),-1.0) - if power_law_target>0 and min_p<=0 and top_p>=1.0: #power law sampler requires a truncation sampler first, force a tiny min-p + adaptive_target = tryparsefloat(genparams.get('adaptive_target', -1.0),-1.0) + if adaptive_target>0 and min_p<=0 and top_p>=1.0: #adaptive p sampler requires a truncation sampler first, force a tiny min-p min_p = 0.01 logit_biases = genparams.get('logit_bias', {}) render_special = genparams.get('render_special', False) @@ -1670,7 +1670,7 @@ def generate(genparams, stream_flag=False): inputs.dynatemp_exponent = dynatemp_exponent inputs.smoothing_factor = smoothing_factor inputs.smoothing_curve = smoothing_curve - inputs.power_law_target = power_law_target + inputs.adaptive_target = adaptive_target inputs.grammar = grammar.encode("UTF-8") inputs.grammar_retain_state = grammar_retain_state inputs.allow_eos_token = not ban_eos_token diff --git a/otherarch/otherarch.h b/otherarch/otherarch.h index 9c177284c..7fd4658b2 100644 --- a/otherarch/otherarch.h +++ b/otherarch/otherarch.h @@ -50,7 +50,7 @@ struct kcpp_params { float xtc_probability = 0; float dynatemp_range = 0.0f; // enables DynaTemp if neq 0. dynatemp_min = temperature - dt_range, dynatemp_max = temperature + dt_range float dynatemp_exponent = 1.0f; - float power_law_target = -1.0f; // 0.0 - 1.0, <=0.0 is disabled + float adaptive_target = -1.0f; // 0.0 - 1.0, <=0.0 is disabled std::string model_filename = ""; // model path std::string prompt = "";