mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 09:34:37 +00:00
Remove Unnecessary Rep Counting (#1394)
* stop counting reps * fix range-based initializer * strike that - reverse it
This commit is contained in:
parent
c088355d01
commit
e85c0e6901
1 changed files with 8 additions and 20 deletions
|
@ -11,6 +11,7 @@
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
#include <mutex>
|
#include <mutex>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
#include <unordered_set>
|
||||||
#include "model_adapter.h"
|
#include "model_adapter.h"
|
||||||
#include "otherarch.h"
|
#include "otherarch.h"
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
|
@ -1188,18 +1189,8 @@ void sample_rep_pen(int n_ctx, int rep_pen_range, float rep_pen, float rep_pen_s
|
||||||
const int64_t t_start_sample_us = ggml_time_us();
|
const int64_t t_start_sample_us = ggml_time_us();
|
||||||
|
|
||||||
// Create a frequency map to count occurrences of each token in last_tokens
|
// Create a frequency map to count occurrences of each token in last_tokens
|
||||||
std::unordered_map<llama_token, int> token_count_near;
|
std::unordered_set<llama_token> tokens_near(last_tokens + last_n_repeat / 2, last_tokens + last_n_repeat);
|
||||||
std::unordered_map<llama_token, int> token_count_far;
|
std::unordered_set<llama_token> tokens_far(last_tokens, last_tokens + last_n_repeat / 2);
|
||||||
for (size_t i = 0; i < last_n_repeat; ++i) {
|
|
||||||
if((i*2) >= last_n_repeat)
|
|
||||||
{
|
|
||||||
token_count_near[last_tokens[i]]++;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
token_count_far[last_tokens[i]]++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
float rep_pen_reduced = rep_pen;
|
float rep_pen_reduced = rep_pen;
|
||||||
if(rep_pen_reduced>1.0f)
|
if(rep_pen_reduced>1.0f)
|
||||||
|
@ -1207,15 +1198,13 @@ void sample_rep_pen(int n_ctx, int rep_pen_range, float rep_pen, float rep_pen_s
|
||||||
rep_pen_reduced = 1.0f + ((rep_pen-1.0f)*rep_pen_slope);
|
rep_pen_reduced = 1.0f + ((rep_pen-1.0f)*rep_pen_slope);
|
||||||
}
|
}
|
||||||
for (size_t i = 0; i < candidates->size; ++i) {
|
for (size_t i = 0; i < candidates->size; ++i) {
|
||||||
const auto token_in_near = token_count_near.find(candidates->data[i].id);
|
const bool token_in_near = tokens_near.find(candidates->data[i].id) != tokens_near.end();
|
||||||
const auto token_in_far = token_count_far.find(candidates->data[i].id);
|
const bool token_in_far = tokens_far.find(candidates->data[i].id) != tokens_far.end();
|
||||||
bool in_near = (token_in_near != token_count_near.end());
|
if (!token_in_near && !token_in_far) {
|
||||||
bool in_far = (token_in_far != token_count_far.end());
|
|
||||||
if (!in_near && !in_far) {
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
float penalty = (in_near?rep_pen:rep_pen_reduced);
|
float penalty = (token_in_near?rep_pen:rep_pen_reduced);
|
||||||
|
|
||||||
// The academic publication that described this technique actually just only divided, but that would cause tokens with negative logits to become more likely, which is obviously wrong.
|
// The academic publication that described this technique actually just only divided, but that would cause tokens with negative logits to become more likely, which is obviously wrong.
|
||||||
// This is common fix for this problem, which is to multiply by the penalty instead of dividing.
|
// This is common fix for this problem, which is to multiply by the penalty instead of dividing.
|
||||||
|
@ -1229,7 +1218,6 @@ void sample_rep_pen(int n_ctx, int rep_pen_range, float rep_pen, float rep_pen_s
|
||||||
}
|
}
|
||||||
|
|
||||||
candidates->sorted = false;
|
candidates->sorted = false;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void sample_top_p(llama_token_data_array * cur_p, float p, size_t min_keep) {
|
void sample_top_p(llama_token_data_array * cur_p, float p, size_t min_keep) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue