mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
rep pen slope works (+1 squashed commits)
Squashed commits: [535ad566] experiment with rep pen range
This commit is contained in:
parent
e1e6833a7a
commit
44443edfda
4 changed files with 39 additions and 12 deletions
|
@ -97,6 +97,7 @@ struct gpt_params {
|
||||||
float smoothing_factor = 0.00f; // 0.00 = disabled
|
float smoothing_factor = 0.00f; // 0.00 = disabled
|
||||||
float repeat_penalty = 1.10f; // 1.0 = disabled
|
float repeat_penalty = 1.10f; // 1.0 = disabled
|
||||||
int32_t repeat_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size)
|
int32_t repeat_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size)
|
||||||
|
float rep_pen_slope = 1.0f;
|
||||||
float frequency_penalty = 0.00f; // 0.0 = disabled
|
float frequency_penalty = 0.00f; // 0.0 = disabled
|
||||||
float presence_penalty = 0.00f; // 0.0 = disabled
|
float presence_penalty = 0.00f; // 0.0 = disabled
|
||||||
int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
|
int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
|
||||||
|
|
1
expose.h
1
expose.h
|
@ -75,6 +75,7 @@ struct generation_inputs
|
||||||
const float tfs;
|
const float tfs;
|
||||||
const float rep_pen;
|
const float rep_pen;
|
||||||
const int rep_pen_range;
|
const int rep_pen_range;
|
||||||
|
const float rep_pen_slope = 1.0f;
|
||||||
const float presence_penalty = 0.0f;
|
const float presence_penalty = 0.0f;
|
||||||
const int mirostat = 0;
|
const int mirostat = 0;
|
||||||
const float mirostat_eta;
|
const float mirostat_eta;
|
||||||
|
|
|
@ -423,33 +423,50 @@ void sample_top_a(llama_token_data_array * candidates, float a, size_t min_keep)
|
||||||
candidates->size = last_idx;
|
candidates->size = last_idx;
|
||||||
}
|
}
|
||||||
|
|
||||||
void sample_rep_pen(int n_ctx, int rep_pen_range, float rep_pen, float presence_penalty, llama_token_data_array * candidates_p)
|
void sample_rep_pen(int n_ctx, int rep_pen_range, float rep_pen, float rep_pen_slope, float presence_penalty, llama_token_data_array * candidates_p)
|
||||||
{
|
{
|
||||||
auto last_n_repeat = std::min(std::min((int)last_n_tokens.size(), rep_pen_range), n_ctx);
|
auto last_n_repeat = std::min(std::min((int)last_n_tokens.size(), rep_pen_range), n_ctx);
|
||||||
|
|
||||||
const llama_token * last_tokens = last_n_tokens.data() + last_n_tokens.size() - last_n_repeat;
|
const llama_token * last_tokens = last_n_tokens.data() + last_n_tokens.size() - last_n_repeat;
|
||||||
size_t last_tokens_size = last_n_repeat;
|
size_t last_tokens_size = last_n_repeat;
|
||||||
llama_token_data_array * candidates = candidates_p;
|
llama_token_data_array * candidates = candidates_p;
|
||||||
float penalty = rep_pen;
|
|
||||||
|
|
||||||
if (last_tokens_size == 0 || (penalty == 1.0f && presence_penalty==0)) {
|
if (last_tokens_size == 0 || (rep_pen == 1.0f && presence_penalty==0)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const int64_t t_start_sample_us = ggml_time_us();
|
const int64_t t_start_sample_us = ggml_time_us();
|
||||||
|
|
||||||
// Create a frequency map to count occurrences of each token in last_tokens
|
// Create a frequency map to count occurrences of each token in last_tokens
|
||||||
std::unordered_map<llama_token, int> token_count;
|
std::unordered_map<llama_token, int> token_count_near;
|
||||||
|
std::unordered_map<llama_token, int> token_count_far;
|
||||||
for (size_t i = 0; i < last_n_repeat; ++i) {
|
for (size_t i = 0; i < last_n_repeat; ++i) {
|
||||||
token_count[last_tokens[i]]++;
|
if((i*2) >= last_n_repeat)
|
||||||
|
{
|
||||||
|
token_count_near[last_tokens[i]]++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
token_count_far[last_tokens[i]]++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
float rep_pen_reduced = rep_pen;
|
||||||
|
if(rep_pen_reduced>1.0f)
|
||||||
|
{
|
||||||
|
rep_pen_reduced = 1.0f + ((rep_pen-1.0f)*rep_pen_slope);
|
||||||
|
}
|
||||||
for (size_t i = 0; i < candidates->size; ++i) {
|
for (size_t i = 0; i < candidates->size; ++i) {
|
||||||
const auto token_iter = token_count.find(candidates->data[i].id);
|
const auto token_in_near = token_count_near.find(candidates->data[i].id);
|
||||||
if (token_iter == token_count.end()) {
|
const auto token_in_far = token_count_far.find(candidates->data[i].id);
|
||||||
|
bool in_near = (token_in_near != token_count_near.end());
|
||||||
|
bool in_far = (token_in_far != token_count_far.end());
|
||||||
|
if (!in_near && !in_far) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
float penalty = (in_near?rep_pen:rep_pen_reduced);
|
||||||
|
|
||||||
// The academic publication that described this technique actually just only divided, but that would cause tokens with negative logits to become more likely, which is obviously wrong.
|
// The academic publication that described this technique actually just only divided, but that would cause tokens with negative logits to become more likely, which is obviously wrong.
|
||||||
// This is common fix for this problem, which is to multiply by the penalty instead of dividing.
|
// This is common fix for this problem, which is to multiply by the penalty instead of dividing.
|
||||||
if (candidates->data[i].logit <= 0) {
|
if (candidates->data[i].logit <= 0) {
|
||||||
|
@ -520,7 +537,7 @@ void sample_grammar(FileFormat file_format, int32_t n_vocab, llama_token_data_ar
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int SampleLogits(const float * logits, int n_ctx, int n_vocab, int rep_pen_range, float rep_pen, float presence_penalty, float top_k, float top_a, float top_p, float min_p, float typical_p, float tfs, float temp, std::mt19937 & rng,
|
int SampleLogits(const float * logits, int n_ctx, int n_vocab, int rep_pen_range, float rep_pen, float rep_pen_slope, float presence_penalty, float top_k, float top_a, float top_p, float min_p, float typical_p, float tfs, float temp, std::mt19937 & rng,
|
||||||
int mirostat, float mirostat_tau, float mirostat_eta, const std::vector<samplers> & sampler_order, llama_grammar * grammar, float dynatemp_range, float dynatemp_exponent, float smoothing_factor)
|
int mirostat, float mirostat_tau, float mirostat_eta, const std::vector<samplers> & sampler_order, llama_grammar * grammar, float dynatemp_range, float dynatemp_exponent, float smoothing_factor)
|
||||||
{
|
{
|
||||||
int id = 0;
|
int id = 0;
|
||||||
|
@ -546,7 +563,7 @@ int mirostat, float mirostat_tau, float mirostat_eta, const std::vector<samplers
|
||||||
{
|
{
|
||||||
static float mirostat_mu = 2.0f * mirostat_tau;
|
static float mirostat_mu = 2.0f * mirostat_tau;
|
||||||
const int mirostat_m = 100;
|
const int mirostat_m = 100;
|
||||||
sample_rep_pen(n_ctx, rep_pen_range, rep_pen, presence_penalty, &candidates_p);
|
sample_rep_pen(n_ctx, rep_pen_range, rep_pen, rep_pen_slope, presence_penalty, &candidates_p);
|
||||||
sample_temperature(&candidates_p, temp, smoothing_factor);
|
sample_temperature(&candidates_p, temp, smoothing_factor);
|
||||||
if (mirostat == 1)
|
if (mirostat == 1)
|
||||||
{
|
{
|
||||||
|
@ -596,7 +613,7 @@ int mirostat, float mirostat_tau, float mirostat_eta, const std::vector<samplers
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case KCPP_SAMPLER_REP_PEN:
|
case KCPP_SAMPLER_REP_PEN:
|
||||||
sample_rep_pen(n_ctx, rep_pen_range, rep_pen, presence_penalty, &candidates_p);
|
sample_rep_pen(n_ctx, rep_pen_range, rep_pen, rep_pen_slope, presence_penalty, &candidates_p);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
printf("\nSampleLogits: Unknown Sampler : %d",sampler_order[i]);
|
printf("\nSampleLogits: Unknown Sampler : %d",sampler_order[i]);
|
||||||
|
@ -1716,6 +1733,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
||||||
kcpp_params->tfs_z = inputs.tfs;
|
kcpp_params->tfs_z = inputs.tfs;
|
||||||
kcpp_params->temp = inputs.temperature;
|
kcpp_params->temp = inputs.temperature;
|
||||||
kcpp_params->repeat_last_n = inputs.rep_pen_range;
|
kcpp_params->repeat_last_n = inputs.rep_pen_range;
|
||||||
|
kcpp_params->rep_pen_slope = inputs.rep_pen_slope;
|
||||||
kcpp_params->repeat_penalty = inputs.rep_pen;
|
kcpp_params->repeat_penalty = inputs.rep_pen;
|
||||||
kcpp_params->presence_penalty = inputs.presence_penalty;
|
kcpp_params->presence_penalty = inputs.presence_penalty;
|
||||||
kcpp_params->mirostat = inputs.mirostat;
|
kcpp_params->mirostat = inputs.mirostat;
|
||||||
|
@ -1753,6 +1771,10 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
||||||
{
|
{
|
||||||
kcpp_params->repeat_last_n = 1;
|
kcpp_params->repeat_last_n = 1;
|
||||||
}
|
}
|
||||||
|
if (kcpp_params->rep_pen_slope > 1 || kcpp_params->rep_pen_slope<=0)
|
||||||
|
{
|
||||||
|
kcpp_params->rep_pen_slope = 1;
|
||||||
|
}
|
||||||
if (kcpp_params->top_k < 1)
|
if (kcpp_params->top_k < 1)
|
||||||
{
|
{
|
||||||
kcpp_params->top_k = n_vocab; // all tokens in the vocabulary should be considered if top k is disabled
|
kcpp_params->top_k = n_vocab; // all tokens in the vocabulary should be considered if top k is disabled
|
||||||
|
@ -2222,7 +2244,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
id = SampleLogits(logitsPtr, nctx, n_vocab, last_n_size, repeat_penalty, presence_penalty,
|
id = SampleLogits(logitsPtr, nctx, n_vocab, last_n_size, repeat_penalty, kcpp_params->rep_pen_slope, presence_penalty,
|
||||||
top_k, top_a, top_p, min_p, typical_p, tfs_z, temp, rng,
|
top_k, top_a, top_p, min_p, typical_p, tfs_z, temp, rng,
|
||||||
kcpp_params->mirostat, kcpp_params->mirostat_tau, kcpp_params->mirostat_eta, sampler_order, grammar, dynatemp_range, dynatemp_exponent, smoothing_factor);
|
kcpp_params->mirostat, kcpp_params->mirostat_tau, kcpp_params->mirostat_eta, sampler_order, grammar, dynatemp_range, dynatemp_exponent, smoothing_factor);
|
||||||
|
|
||||||
|
|
|
@ -75,6 +75,7 @@ class generation_inputs(ctypes.Structure):
|
||||||
("tfs", ctypes.c_float),
|
("tfs", ctypes.c_float),
|
||||||
("rep_pen", ctypes.c_float),
|
("rep_pen", ctypes.c_float),
|
||||||
("rep_pen_range", ctypes.c_int),
|
("rep_pen_range", ctypes.c_int),
|
||||||
|
("rep_pen_slope", ctypes.c_float),
|
||||||
("presence_penalty", ctypes.c_float),
|
("presence_penalty", ctypes.c_float),
|
||||||
("mirostat", ctypes.c_int),
|
("mirostat", ctypes.c_int),
|
||||||
("mirostat_tau", ctypes.c_float),
|
("mirostat_tau", ctypes.c_float),
|
||||||
|
@ -403,7 +404,7 @@ def load_model(model_filename):
|
||||||
ret = handle.load_model(inputs)
|
ret = handle.load_model(inputs)
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def generate(prompt, memory="", images=[], max_length=32, max_context_length=512, temperature=0.7, top_k=100, top_a=0.0, top_p=0.92, min_p=0.0, typical_p=1.0, tfs=1.0, rep_pen=1.0, rep_pen_range=128, presence_penalty=0.0, mirostat=0, mirostat_tau=5.0, mirostat_eta=0.1, sampler_order=[6,0,1,3,4,2,5], seed=-1, stop_sequence=[], use_default_badwordsids=False, stream_sse=False, grammar='', grammar_retain_state=False, genkey='', trimstop=False, quiet=False, dynatemp_range=0.0, dynatemp_exponent=1.0, smoothing_factor=0.0, logit_biases={}, render_special=False, banned_tokens=[], bypass_eos_token=False):
|
def generate(prompt, memory="", images=[], max_length=32, max_context_length=512, temperature=0.7, top_k=100, top_a=0.0, top_p=0.92, min_p=0.0, typical_p=1.0, tfs=1.0, rep_pen=1.0, rep_pen_range=128, rep_pen_slope=1.0, presence_penalty=0.0, mirostat=0, mirostat_tau=5.0, mirostat_eta=0.1, sampler_order=[6,0,1,3,4,2,5], seed=-1, stop_sequence=[], use_default_badwordsids=False, stream_sse=False, grammar='', grammar_retain_state=False, genkey='', trimstop=False, quiet=False, dynatemp_range=0.0, dynatemp_exponent=1.0, smoothing_factor=0.0, logit_biases={}, render_special=False, banned_tokens=[], bypass_eos_token=False):
|
||||||
global maxctx, args, currentusergenkey, totalgens, pendingabortkey
|
global maxctx, args, currentusergenkey, totalgens, pendingabortkey
|
||||||
inputs = generation_inputs()
|
inputs = generation_inputs()
|
||||||
inputs.prompt = prompt.encode("UTF-8")
|
inputs.prompt = prompt.encode("UTF-8")
|
||||||
|
@ -433,6 +434,7 @@ def generate(prompt, memory="", images=[], max_length=32, max_context_length=512
|
||||||
inputs.tfs = tfs
|
inputs.tfs = tfs
|
||||||
inputs.rep_pen = rep_pen
|
inputs.rep_pen = rep_pen
|
||||||
inputs.rep_pen_range = rep_pen_range
|
inputs.rep_pen_range = rep_pen_range
|
||||||
|
inputs.rep_pen_slope = rep_pen_slope
|
||||||
inputs.presence_penalty = presence_penalty
|
inputs.presence_penalty = presence_penalty
|
||||||
inputs.stream_sse = stream_sse
|
inputs.stream_sse = stream_sse
|
||||||
inputs.quiet = quiet
|
inputs.quiet = quiet
|
||||||
|
@ -812,6 +814,7 @@ class ServerRequestHandler(http.server.SimpleHTTPRequestHandler):
|
||||||
tfs=genparams.get('tfs', 1.0),
|
tfs=genparams.get('tfs', 1.0),
|
||||||
rep_pen=genparams.get('rep_pen', 1.0),
|
rep_pen=genparams.get('rep_pen', 1.0),
|
||||||
rep_pen_range=genparams.get('rep_pen_range', 256),
|
rep_pen_range=genparams.get('rep_pen_range', 256),
|
||||||
|
rep_pen_slope=genparams.get('rep_pen_slope', 1.0),
|
||||||
presence_penalty=genparams.get('presence_penalty', 0.0),
|
presence_penalty=genparams.get('presence_penalty', 0.0),
|
||||||
mirostat=genparams.get('mirostat', 0),
|
mirostat=genparams.get('mirostat', 0),
|
||||||
mirostat_tau=genparams.get('mirostat_tau', 5.0),
|
mirostat_tau=genparams.get('mirostat_tau', 5.0),
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue