mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-12 14:11:27 +00:00
builds but crashes
This commit is contained in:
parent
2bf128587d
commit
bc04366a65
43 changed files with 12183 additions and 2 deletions
204
otherarch/ttscpp/src/sampler.cpp
Normal file
204
otherarch/ttscpp/src/sampler.cpp
Normal file
|
|
@ -0,0 +1,204 @@
|
|||
#include "sampler.h"
|
||||
|
||||
void sampler::sample(float * logits, std::vector<uint32_t> & output_tokens) {
|
||||
// assume that we are pointing to the start of the first token output;
|
||||
if (!do_sample) {
|
||||
return max(logits, output_tokens);
|
||||
}
|
||||
std::vector<uint32_t> max_vals;
|
||||
// the max_head_probs variable is used when top-p is applied but exists to address the case in which top-k and top-p cause the cumulative probability of the nucleus to beless than or
|
||||
// equal to top_p;
|
||||
std::vector<float> max_head_probs;
|
||||
|
||||
// This allows us to perform an effective softmax without logarithms or big number calculations.
|
||||
// Additionally by avoiding large number division we drastically improve the stability of
|
||||
// our softmax implementation;
|
||||
max(logits, max_vals);
|
||||
|
||||
std::vector<std::vector<size_t>> picks;
|
||||
bool use_nucleus_sampling = false;
|
||||
bool performed_softmax = false;
|
||||
|
||||
if (top_p < 1.0) {
|
||||
// if we are nucleus sampling via top-p then we need to perform softmax over the samples before getting top_k samples, so that we don't trim beyond top_p.
|
||||
// Otherwise, if we are not performing top-p sampling then it is more efficient to perform softmax after getting the top_k nucleus.
|
||||
softmax(logits, picks, max_vals);
|
||||
performed_softmax = true;
|
||||
}
|
||||
if (top_k > 0 && top_k < vocab_size) {
|
||||
picks = topk(logits, performed_softmax);
|
||||
use_nucleus_sampling = true;
|
||||
}
|
||||
|
||||
if (top_p >= 1.0) {
|
||||
softmax(logits, picks, max_vals);
|
||||
performed_softmax = true;
|
||||
}
|
||||
|
||||
if (top_p < 1.0) {
|
||||
topp(logits, picks, max_head_probs);
|
||||
use_nucleus_sampling = true;
|
||||
}
|
||||
|
||||
bool has_repetition_penalty = repetition_penalty != 1.0;
|
||||
if (has_repetition_penalty && (last_token_ids.size() == 0 || repetition_counts.size() == 0)) {
|
||||
reset();
|
||||
}
|
||||
std::minstd_rand gen(std::random_device{}());
|
||||
std::uniform_real_distribution<float> dist(0.0f, 1.0f);
|
||||
for (int i = 0; i < n_output_heads; i++) {
|
||||
float assignment = top_p < 1.0 ? dist(gen) * max_head_probs[i] : dist(gen);
|
||||
float cumulative = 0.0f;
|
||||
for (uint32_t j = 0; j < (use_nucleus_sampling ? picks[i].size() : vocab_size); j++) {
|
||||
int ii = use_nucleus_sampling ? (int) picks[i][j] : j;
|
||||
cumulative += *(logits+(i*vocab_size+ii));
|
||||
// with top_k and top_p it is possible for the assignment to be greater than the cumulative value
|
||||
if (assignment <= cumulative || ii >= vocab_size + 1 || j >= picks[i].size() - 1) {
|
||||
if (has_repetition_penalty) {
|
||||
if (last_token_ids[i] != ii) {
|
||||
repetition_counts[i] = 0;
|
||||
}
|
||||
last_token_ids[i] = ii;
|
||||
repetition_counts[i] += 1;
|
||||
}
|
||||
output_tokens.push_back(ii);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void sampler::reset() {
|
||||
if (repetition_penalty != 1.0) {
|
||||
last_token_ids.clear();
|
||||
repetition_counts.clear();
|
||||
for (int i = 0; i < n_output_heads; i++) {
|
||||
last_token_ids.push_back(-1);
|
||||
repetition_counts.push_back(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void sampler::softmax(float * logits, std::vector<std::vector<size_t>> picks, std::vector<uint32_t> max_indices) {
|
||||
bool use_nucleus_sampling = picks.size() > 0;
|
||||
bool has_repetition_penalty = repetition_penalty != 1.0f;
|
||||
bool has_temperature = temperature != 1.0f;
|
||||
for (int i = 0; i < n_output_heads; i++) {
|
||||
float cumsum = 0.0;
|
||||
float max_val = logits[i*vocab_size + max_indices[i]];
|
||||
if (has_repetition_penalty && last_token_ids[i] == max_indices[i]) {
|
||||
max_val /= (pow(repetition_penalty, repetition_counts[i]));
|
||||
}
|
||||
if (has_temperature) {
|
||||
max_val /= temperature;
|
||||
}
|
||||
for (int j = 0; j < (use_nucleus_sampling ? picks[i].size() : vocab_size); j++) {
|
||||
int ii = use_nucleus_sampling ? (int) picks[i][j] : j;
|
||||
int index = i * vocab_size + ii;
|
||||
float v = *(logits + index);
|
||||
if (has_repetition_penalty && last_token_ids[i] == ii) {
|
||||
v /= (pow(repetition_penalty, repetition_counts[i]));
|
||||
}
|
||||
if (has_temperature) {
|
||||
v /= temperature;
|
||||
}
|
||||
v = expf(v - max_val);
|
||||
cumsum += v;
|
||||
logits[index] = v;
|
||||
}
|
||||
for (int j = 0; j < (use_nucleus_sampling ? picks[i].size() : vocab_size); j++) {
|
||||
int ii = use_nucleus_sampling ? picks[i][j] : j;
|
||||
int index = i * vocab_size + ii;
|
||||
float v = *(logits + index);
|
||||
logits[index] = v / cumsum;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void sampler::topp(float * logits, std::vector<std::vector<size_t>> & picks, std::vector<float> & max_head_probs) {
|
||||
if (picks.empty()) {
|
||||
// we need to get the softmaxed logits ordered
|
||||
for (int i = 0; i < n_output_heads; i++) {
|
||||
std::vector<size_t> head_picks(vocab_size);
|
||||
iota(head_picks.begin(), head_picks.end(), 0);
|
||||
// have to sort with repetition penalty applied so not to inavertently trim our nucleus size.
|
||||
std::sort(head_picks.begin(), head_picks.end(), [&logits, &i, this](size_t s1, size_t s2) {
|
||||
float v1 = logits[i*vocab_size+s1];
|
||||
float v2 = logits[i*vocab_size+s2];
|
||||
return v1 > v2;
|
||||
});
|
||||
|
||||
picks.push_back(head_picks);
|
||||
}
|
||||
}
|
||||
// if we didn't already perform topk or if the probable sum of topk logits is greater than top_p then we need to trim.
|
||||
for (int i = 0; i < n_output_heads; i++) {
|
||||
float prob_sum = 0.0f;
|
||||
int trim_to = -1;
|
||||
for (int ii = 0; ii < picks[i].size(); ii++) {
|
||||
prob_sum += logits[i*vocab_size+picks[i][ii]];
|
||||
if (prob_sum >= top_p) {
|
||||
trim_to = ii+1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
max_head_probs.push_back(std::min(prob_sum, top_p));
|
||||
if (trim_to > 0) {
|
||||
picks[i] = std::vector<size_t>(picks[i].begin(), picks[i].begin()+trim_to);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::vector<size_t>> sampler::topk(float * logits, bool performed_softmax) {
|
||||
bool has_repetition_penalty = repetition_penalty != 1.0f;
|
||||
std::vector<std::vector<size_t>> head_picks;
|
||||
if (vocab_size < top_k) {
|
||||
// technically we should never get here, but lets be protective.
|
||||
for (int i = 0; i < n_output_heads; i++) {
|
||||
std::vector<size_t> picks(vocab_size);
|
||||
iota(picks.begin(), picks.end(), 0);
|
||||
head_picks.push_back(picks);
|
||||
}
|
||||
return head_picks;
|
||||
}
|
||||
for (int i = 0; i < n_output_heads; i++) {
|
||||
std::vector<size_t> picks(vocab_size);
|
||||
iota(picks.begin(), picks.end(), 0);
|
||||
// have to sort with repetition penalty applied so not to inavertently trim our nucleus size.
|
||||
std::sort(picks.begin(), picks.end(), [&logits, &i, &has_repetition_penalty, &performed_softmax, this](size_t s1, size_t s2) {
|
||||
float v1 = logits[i*vocab_size+s1];
|
||||
float v2 = logits[i*vocab_size+s2];
|
||||
if (!performed_softmax) {
|
||||
if (has_repetition_penalty && last_token_ids[i] == s1) {
|
||||
v1 /= (pow(repetition_penalty, repetition_counts[i]));
|
||||
} else if (has_repetition_penalty && last_token_ids[i] == s2) {
|
||||
v2 /= (pow(repetition_penalty, repetition_counts[i]));
|
||||
}
|
||||
}
|
||||
return v1 > v2;
|
||||
});
|
||||
head_picks.push_back(std::vector<size_t>(picks.begin(), picks.begin() + top_k));
|
||||
}
|
||||
return head_picks;
|
||||
}
|
||||
|
||||
void sampler::max(float * logits, std::vector<uint32_t> & output_tokens) {
|
||||
bool has_repetition_penalty = repetition_penalty != 1.0f;
|
||||
for (int i = 0; i < n_output_heads; i++) {
|
||||
float max = -INFINITY;
|
||||
uint32_t token_id = 0;
|
||||
for (uint32_t ii = 0; ii < vocab_size; ii++) {
|
||||
float v = *(logits+i*vocab_size+ii);
|
||||
// while repetition penalty will never be used for maximum token selection, it is used for the logarithmic stabilization of
|
||||
// the softmax function in which case it is possible for repetition counts to be set.
|
||||
if (has_repetition_penalty && last_token_ids[i] == ii) {
|
||||
v /= (pow(repetition_penalty, repetition_counts[i]));
|
||||
}
|
||||
if (v > max) {
|
||||
max = v;
|
||||
token_id = ii;
|
||||
}
|
||||
}
|
||||
output_tokens.push_back(token_id);
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue