mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 01:24:36 +00:00
improve performance by actually applying nsigma's masking (#1602)
merging, please report any issues.
This commit is contained in:
parent
57ce374240
commit
0097de5c57
1 changed files with 5 additions and 7 deletions
|
@ -1433,12 +1433,11 @@ void sampler_typical(llama_token_data_array * cur_p, float p, size_t min_keep) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void sample_top_n_sigma(llama_token_data_array * cur_p, float nsigma) {
|
void sample_top_n_sigma(llama_token_data_array * cur_p, float nsigma) {
|
||||||
|
|
||||||
if (nsigma <= 0.0f || cur_p->size <= 1) {
|
if (nsigma <= 0.0f || cur_p->size <= 1) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// find max logit and calculate mean
|
// find max logit and calculate mean
|
||||||
float nsigmax = cur_p->data[0].logit;
|
float nsigmax = cur_p->data[0].logit;
|
||||||
float logits_sum = 0;
|
float logits_sum = 0;
|
||||||
for (size_t i = 0; i < cur_p->size; ++i) {
|
for (size_t i = 0; i < cur_p->size; ++i) {
|
||||||
if (cur_p->data[i].logit > nsigmax) {
|
if (cur_p->data[i].logit > nsigmax) {
|
||||||
|
@ -1456,11 +1455,10 @@ void sample_top_n_sigma(llama_token_data_array * cur_p, float nsigma) {
|
||||||
float nsigstd = sqrt(nsigacc / cur_p->size);
|
float nsigstd = sqrt(nsigacc / cur_p->size);
|
||||||
|
|
||||||
//apply mask
|
//apply mask
|
||||||
for (size_t i = 0; i < cur_p->size; ++i) {
|
auto last = std::remove_if(cur_p->data, cur_p->data + cur_p->size,
|
||||||
if (cur_p->data[i].logit < nsigmax - (nsigma * nsigstd)) {
|
[&](auto & tk) { return tk.logit < nsigmax - (nsigma * nsigstd); });
|
||||||
cur_p->data[i].logit -= 999.0f;
|
cur_p->size = last - cur_p->data;
|
||||||
}
|
|
||||||
}
|
|
||||||
sample_softmax(cur_p);
|
sample_softmax(cur_p);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue