diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index e7b3135f2..39e238fc2 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include "model_adapter.h" #include "otherarch.h" #include "llama.h" @@ -1188,34 +1189,22 @@ void sample_rep_pen(int n_ctx, int rep_pen_range, float rep_pen, float rep_pen_s const int64_t t_start_sample_us = ggml_time_us(); // Create a frequency map to count occurrences of each token in last_tokens - std::unordered_map token_count_near; - std::unordered_map token_count_far; - for (size_t i = 0; i < last_n_repeat; ++i) { - if((i*2) >= last_n_repeat) - { - token_count_near[last_tokens[i]]++; - } - else - { - token_count_far[last_tokens[i]]++; - } - } - + std::unordered_set tokens_near(last_tokens + last_n_repeat / 2, last_tokens + last_n_repeat); + std::unordered_set tokens_far(last_tokens, last_tokens + last_n_repeat / 2); + float rep_pen_reduced = rep_pen; if(rep_pen_reduced>1.0f) { rep_pen_reduced = 1.0f + ((rep_pen-1.0f)*rep_pen_slope); } for (size_t i = 0; i < candidates->size; ++i) { - const auto token_in_near = token_count_near.find(candidates->data[i].id); - const auto token_in_far = token_count_far.find(candidates->data[i].id); - bool in_near = (token_in_near != token_count_near.end()); - bool in_far = (token_in_far != token_count_far.end()); - if (!in_near && !in_far) { + const bool token_in_near = tokens_near.find(candidates->data[i].id) != tokens_near.end(); + const bool token_in_far = tokens_far.find(candidates->data[i].id) != tokens_far.end(); + if (!token_in_near && !token_in_far) { continue; } - float penalty = (in_near?rep_pen:rep_pen_reduced); + float penalty = (token_in_near?rep_pen:rep_pen_reduced); // The academic publication that described this technique actually just only divided, but that would cause tokens with negative logits to become more likely, which is obviously wrong. // This is common fix for this problem, which is to multiply by the penalty instead of dividing. @@ -1229,7 +1218,6 @@ void sample_rep_pen(int n_ctx, int rep_pen_range, float rep_pen, float rep_pen_s } candidates->sorted = false; - } void sample_top_p(llama_token_data_array * cur_p, float p, size_t min_keep) {