mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 09:34:37 +00:00
Full DynaTemp implementation + UI (#600)
* move Dynatemp changes to new branch * fix float header * Properly reintroduce variable expert count Controllable through experts.txt * first pass at DynaTemp UI Checkbox partial implemented, Min and Max Temp implemented * DynaTemp UI Checkbox Trigger DynaTemp on checkbox * DynaTemp UI checkbox edition Hell Yeah! DynaTemp! * Remove greedy dynatemp * Fix race condition caused by debug print * Fixed broken presets and miro Fixes broken presets and mirostat * Remove debug function + HHI temp Also removed unnecessary softmax double precision * Fix whitespace (?) for generate function * epic upstream renaming scheme fix * fix stupid indents * Other cleanup Reintroduce unused rep pen function, move temp functions first before entropy dynamic temp * Slight indent fix * revert batch pyinstaller maker to mainline and also delete experts.txt since adjustable routing is also being removed for the PR * compact dynatemp into a single value dynatemp_range. This is a float which represents the allowed deviation from the min and max temperature when using dynatemp. Thus, if we want a value of dynatemp_min=0.3, dynatemp_max=0.5, then we would simply set temperature=0.4 and dynatemp_range=0.1. Functionally dynatemp would operate the same, but it would simplify usage and make it a single easy to adjust value. --------- Co-authored-by: Alexander Abushady <aabushady214@gmail.com> Co-authored-by: Concedo <39025047+LostRuins@users.noreply.github.com>
This commit is contained in:
parent
427ba21e62
commit
123bff9a0f
9 changed files with 132 additions and 8 deletions
71
llama.cpp
71
llama.cpp
|
@ -8510,10 +8510,81 @@ void llama_sample_temp(struct llama_context * ctx, llama_token_data_array * cand
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
void llama_sample_temperature(struct llama_context * ctx, llama_token_data_array * candidates_p, float temp) {
|
||||
llama_sample_temp(ctx, candidates_p, temp);
|
||||
}
|
||||
|
||||
void llama_sample_entropy(struct llama_context * ctx, llama_token_data_array * candidates_p, float temp, float min_temp = 0, float max_temp = 2.0f) {
|
||||
const int64_t t_start_sample_us = ggml_time_us();
|
||||
|
||||
llama_sample_softmax(ctx, candidates_p);
|
||||
|
||||
float exponent_val = 1.0f;
|
||||
|
||||
// Calculate entropy of the softmax probabilities
|
||||
float entropy = 0.0f;
|
||||
for (size_t i = 0; i < candidates_p->size; ++i) {
|
||||
float prob = candidates_p->data[i].p;
|
||||
if (prob > 0.0f) { // Ensure no log(0)
|
||||
entropy -= prob * logf(prob);
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate maximum possible entropy
|
||||
float max_entropy = -logf(1.0f / candidates_p->size);
|
||||
|
||||
// Guard against division by zero
|
||||
if (max_entropy == 0.0f) {
|
||||
max_entropy = 1.0f; // This ensures that normalized_entropy will be 0 when entropy is 0
|
||||
}
|
||||
|
||||
// Normalize the entropy
|
||||
float normalized_entropy = entropy / max_entropy;
|
||||
|
||||
// Map the normalized entropy to the desired temperature range using the power function
|
||||
float dyn_temp = min_temp + (max_temp - min_temp) * powf(normalized_entropy, exponent_val);
|
||||
|
||||
//todo: Ensure to hide print statements unless debugging!
|
||||
printf("Your text maxtemp value is: %f\n", max_temp);
|
||||
// Print the variables
|
||||
printf("Entropy: %f\n", entropy);
|
||||
printf("Max Possible Entropy: %f\n", max_entropy);
|
||||
printf("Normalized Entropy: %f\n", normalized_entropy);
|
||||
printf("Exponent: %f\n", exponent_val);
|
||||
printf("Dynamic Temperature (dyn_temp): %f\n", dyn_temp);
|
||||
|
||||
// Apply the dynamically calculated temperature scaling
|
||||
for (size_t i = 0; i < candidates_p->size; ++i) {
|
||||
candidates_p->data[i].logit /= dyn_temp;
|
||||
}
|
||||
|
||||
// Re-compute softmax probabilities after scaling logits with dynamic temperature
|
||||
double max_l_double = candidates_p->data[0].logit;
|
||||
double cum_sum_double = 0.0;
|
||||
for (size_t i = 0; i < candidates_p->size; ++i) {
|
||||
double p = exp(candidates_p->data[i].logit - max_l_double);
|
||||
candidates_p->data[i].p = p; // Store the scaled probability
|
||||
cum_sum_double += p;
|
||||
}
|
||||
for (size_t i = 0; i < candidates_p->size; ++i) {
|
||||
candidates_p->data[i].p /= cum_sum_double; // Re-normalize the probabilities
|
||||
}
|
||||
|
||||
//todo: Ensure to hide print statements unless debugging!
|
||||
// Print the updated top 25 probabilities after temperature scaling
|
||||
printf("\nUpdated Top 25 Probabilities After Dynamic Temperature Scaling (in percentages):\n");
|
||||
for (size_t i = 0; i < 25 && i < candidates_p->size; ++i) {
|
||||
printf("Token %zu: %f%%\n", i + 1, candidates_p->data[i].p * 100.0f);
|
||||
}
|
||||
|
||||
if (ctx) {
|
||||
ctx->t_sample_us += ggml_time_us() - t_start_sample_us;
|
||||
}
|
||||
}
|
||||
|
||||
// The llama.cpp repetition penalty code goes unused in kobold's API
|
||||
|
||||
void llama_sample_repetition_penalties(
|
||||
struct llama_context * ctx,
|
||||
llama_token_data_array * candidates,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue