mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 00:54:41 +00:00
Full DynaTemp implementation + UI (#600)
* move Dynatemp changes to new branch * fix float header * Properly reintroduce variable expert count Controllable through experts.txt * first pass at DynaTemp UI Checkbox partial implemented, Min and Max Temp implemented * DynaTemp UI Checkbox Trigger DynaTemp on checkbox * DynaTemp UI checkbox edition Hell Yeah! DynaTemp! * Remove greedy dynatemp * Fix race condition caused by debug print * Fixed broken presets and miro Fixes broken presets and mirostat * Remove debug function + HHI temp Also removed unnecessary softmax double precision * Fix whitespace (?) for generate function * epic upstream renaming scheme fix * fix stupid indents * Other cleanup Reintroduce unused rep pen function, move temp functions first before entropy dynamic temp * Slight indent fix * revert batch pyinstaller maker to mainline and also delete experts.txt since adjustable routing is also being removed for the PR * compact dynatemp into a single value dynatemp_range. This is a float which represents the allowed deviation from the min and max temperature when using dynatemp. Thus, if we want a value of dynatemp_min=0.3, dynatemp_max=0.5, then we would simply set temperature=0.4 and dynatemp_range=0.1. Functionally dynatemp would operate the same, but it would simplify usage and make it a single easy to adjust value. --------- Co-authored-by: Alexander Abushady <aabushady214@gmail.com> Co-authored-by: Concedo <39025047+LostRuins@users.noreply.github.com>
This commit is contained in:
parent
427ba21e62
commit
123bff9a0f
9 changed files with 132 additions and 8 deletions
|
@ -482,7 +482,7 @@ void sample_grammar(FileFormat file_format, int32_t n_vocab, llama_token_data_ar
|
|||
}
|
||||
|
||||
int SampleLogits(const float * logits, int n_ctx, int n_vocab, int rep_pen_range, float rep_pen, float presence_penalty, float top_k, float top_a, float top_p, float min_p, float typical_p, float tfs, float temp, std::mt19937 & rng,
|
||||
int mirostat, float mirostat_tau, float mirostat_eta, const std::vector<samplers> & sampler_order, llama_grammar * grammar)
|
||||
int mirostat, float mirostat_tau, float mirostat_eta, const std::vector<samplers> & sampler_order, llama_grammar * grammar, float dynatemp_range)
|
||||
{
|
||||
int id = 0;
|
||||
std::vector<llama_token_data> candidates;
|
||||
|
@ -541,7 +541,19 @@ int mirostat, float mirostat_tau, float mirostat_eta, const std::vector<samplers
|
|||
llama_sample_typical(nullptr, &candidates_p, typical_p,1);
|
||||
break;
|
||||
case KCPP_SAMPLER_TEMP:
|
||||
sample_temperature(&candidates_p, temp);
|
||||
if (dynatemp_range>0)
|
||||
{
|
||||
float dynatemp_min = temp - dynatemp_range;
|
||||
float dynatemp_max = temp + dynatemp_range;
|
||||
//do not allow negative values
|
||||
dynatemp_min = dynatemp_min<0?0:dynatemp_min;
|
||||
dynatemp_max = dynatemp_max<0?0:dynatemp_max;
|
||||
llama_sample_entropy(nullptr, &candidates_p, temp, dynatemp_min, dynatemp_max);
|
||||
}
|
||||
else
|
||||
{
|
||||
sample_temperature(&candidates_p, temp);
|
||||
}
|
||||
break;
|
||||
case KCPP_SAMPLER_REP_PEN:
|
||||
sample_rep_pen(n_ctx, rep_pen_range, rep_pen, presence_penalty, &candidates_p);
|
||||
|
@ -1480,6 +1492,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
|
|||
}
|
||||
|
||||
std::string addedmemory = inputs.memory;
|
||||
|
||||
kcpp_params->prompt = inputs.prompt;
|
||||
kcpp_params->seed = inputs.seed;
|
||||
kcpp_params->n_predict = inputs.max_length;
|
||||
|
@ -1495,10 +1508,12 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
|
|||
kcpp_params->mirostat = inputs.mirostat;
|
||||
kcpp_params->mirostat_eta = inputs.mirostat_eta;
|
||||
kcpp_params->mirostat_tau = inputs.mirostat_tau;
|
||||
kcpp_params->dynatemp_range = inputs.dynatemp_range;
|
||||
kcpp_params->n_ctx = inputs.max_context_length;
|
||||
kcpp_params->n_batch = n_batch;
|
||||
kcpp_params->n_threads = n_threads;
|
||||
kcpp_params->n_threads_batch = n_blasthreads;
|
||||
|
||||
bool stream_sse = inputs.stream_sse;
|
||||
|
||||
bool allow_regular_prints = (debugmode!=-1 && !inputs.quiet) || debugmode >= 1;
|
||||
|
@ -1889,6 +1904,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
|
|||
const float presence_penalty = kcpp_params->presence_penalty;
|
||||
const float typical_p = kcpp_params->typical_p;
|
||||
const float tfs_z = kcpp_params->tfs_z;
|
||||
const float dynatemp_range = kcpp_params->dynatemp_range;
|
||||
|
||||
if (!startedsampling)
|
||||
{
|
||||
|
@ -1944,7 +1960,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs, generation_o
|
|||
|
||||
id = SampleLogits(logitsPtr, nctx, n_vocab, last_n_size, repeat_penalty, presence_penalty,
|
||||
top_k, top_a, top_p, min_p, typical_p, tfs_z, temp, rng,
|
||||
kcpp_params->mirostat, kcpp_params->mirostat_tau, kcpp_params->mirostat_eta, sampler_order, grammar);
|
||||
kcpp_params->mirostat, kcpp_params->mirostat_tau, kcpp_params->mirostat_eta, sampler_order, grammar, dynatemp_range);
|
||||
|
||||
if (grammar != nullptr) {
|
||||
grammar_accept_token(file_format, n_vocab, grammar, id);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue