builds but crashes

2026-05-12 14:11:27 +00:00 · 2025-08-17 00:09:03 +08:00 · 2025-08-17 00:09:03 +08:00 · bc04366a65
commit bc04366a65
parent 2bf128587d
43 changed files with 12183 additions and 2 deletions
--- a/otherarch/ttscpp/src/sampler.cpp
+++ b/otherarch/ttscpp/src/sampler.cpp
@ -0,0 +1,204 @@
+#include "sampler.h"
+
+void sampler::sample(float * logits, std::vector<uint32_t> & output_tokens) {
+    // assume that we are pointing to the start of the first token output;
+    if (!do_sample) {
+        return max(logits, output_tokens);
+    }
+    std::vector<uint32_t> max_vals;
+    // the max_head_probs variable is used when top-p is applied but exists to address the case in which top-k and top-p cause the cumulative probability of the nucleus to beless than or 
+    // equal to top_p;
+    std::vector<float> max_head_probs;
+
+    // This allows us to perform an effective softmax without logarithms or big number calculations.
+    // Additionally by avoiding large number division we drastically improve the stability of
+    // our softmax implementation;
+    max(logits, max_vals);
+
+    std::vector<std::vector<size_t>> picks;
+    bool use_nucleus_sampling = false;
+    bool performed_softmax = false;
+
+    if (top_p < 1.0) {
+        // if we are nucleus sampling via top-p then we need to perform softmax over the samples before getting top_k samples, so that we don't trim beyond top_p.
+        // Otherwise, if we are not performing top-p sampling then it is more efficient to perform softmax after getting the top_k nucleus.
+        softmax(logits, picks, max_vals);
+        performed_softmax = true;
+    }
+    if (top_k > 0 && top_k < vocab_size) {
+        picks = topk(logits, performed_softmax);
+        use_nucleus_sampling = true;
+    }
+
+    if (top_p >= 1.0) {
+        softmax(logits, picks, max_vals);
+        performed_softmax = true;
+    }
+
+    if (top_p < 1.0) {
+        topp(logits, picks, max_head_probs);
+        use_nucleus_sampling = true;
+    }
+
+    bool has_repetition_penalty = repetition_penalty != 1.0;
+    if (has_repetition_penalty && (last_token_ids.size() == 0 || repetition_counts.size() == 0)) {
+        reset();
+    }
+    std::minstd_rand gen(std::random_device{}());
+    std::uniform_real_distribution<float> dist(0.0f, 1.0f);
+    for (int i = 0; i < n_output_heads; i++) {
+        float assignment =  top_p < 1.0 ? dist(gen) * max_head_probs[i] : dist(gen);
+        float cumulative = 0.0f;
+        for (uint32_t j = 0; j < (use_nucleus_sampling ? picks[i].size() : vocab_size); j++) {
+            int ii = use_nucleus_sampling ? (int) picks[i][j] : j;
+            cumulative += *(logits+(i*vocab_size+ii));
+            // with top_k and top_p it is possible for the assignment to be greater than the cumulative value
+            if (assignment <= cumulative || ii >= vocab_size + 1 || j >= picks[i].size() - 1) {
+                if (has_repetition_penalty) {
+                    if (last_token_ids[i] != ii) {
+                        repetition_counts[i] = 0;
+                    }
+                    last_token_ids[i] = ii;
+                    repetition_counts[i] += 1;
+                }
+                output_tokens.push_back(ii);
+                break;
+            }
+        }
+    }
+}
+
+void sampler::reset() {
+    if (repetition_penalty != 1.0) {
+        last_token_ids.clear();
+        repetition_counts.clear();
+        for (int i = 0; i < n_output_heads; i++) {
+            last_token_ids.push_back(-1);
+            repetition_counts.push_back(0);
+        }
+    }
+}
+
+void sampler::softmax(float * logits, std::vector<std::vector<size_t>> picks, std::vector<uint32_t> max_indices) {
+    bool use_nucleus_sampling = picks.size() > 0;
+    bool has_repetition_penalty = repetition_penalty != 1.0f;
+    bool has_temperature = temperature != 1.0f;
+    for (int i = 0; i < n_output_heads; i++) {
+        float cumsum = 0.0;
+        float max_val = logits[i*vocab_size + max_indices[i]];
+        if (has_repetition_penalty && last_token_ids[i] == max_indices[i]) {
+            max_val /= (pow(repetition_penalty, repetition_counts[i]));
+        }
+        if (has_temperature) {
+            max_val /= temperature;
+        }
+        for (int j = 0; j < (use_nucleus_sampling ? picks[i].size() : vocab_size); j++) {
+            int ii = use_nucleus_sampling ? (int) picks[i][j] : j;
+            int index = i * vocab_size + ii;
+            float v = *(logits + index);
+            if (has_repetition_penalty && last_token_ids[i] == ii) {
+                v /= (pow(repetition_penalty, repetition_counts[i]));
+            }
+            if (has_temperature) {
+                v /= temperature;
+            }
+            v = expf(v - max_val);
+            cumsum += v;
+            logits[index] = v;
+        }
+        for (int j = 0; j < (use_nucleus_sampling ? picks[i].size() : vocab_size); j++) {
+            int ii = use_nucleus_sampling ? picks[i][j] : j;
+            int index = i * vocab_size + ii;
+            float v = *(logits + index);
+            logits[index] = v / cumsum;
+        }
+    }
+}
+
+void sampler::topp(float * logits, std::vector<std::vector<size_t>> & picks, std::vector<float> & max_head_probs) {
+    if (picks.empty()) {
+        // we need to get the softmaxed logits ordered
+        for (int i = 0; i < n_output_heads; i++) {
+            std::vector<size_t> head_picks(vocab_size);
+            iota(head_picks.begin(), head_picks.end(), 0);
+            // have to sort with repetition penalty applied so not to inavertently trim our nucleus size.
+            std::sort(head_picks.begin(), head_picks.end(), [&logits, &i, this](size_t s1, size_t s2) {
+                float v1 = logits[i*vocab_size+s1];
+                float v2 = logits[i*vocab_size+s2];
+                return v1 > v2;
+            });
+
+            picks.push_back(head_picks);
+        }
+    }
+    // if we didn't already perform topk or if the probable sum of topk logits is greater than top_p then we need to trim.
+    for (int i = 0; i < n_output_heads; i++) {
+        float prob_sum = 0.0f;
+        int trim_to = -1;
+        for (int ii = 0; ii < picks[i].size(); ii++) {
+            prob_sum += logits[i*vocab_size+picks[i][ii]];
+            if (prob_sum >= top_p) {
+                trim_to = ii+1;
+                break;
+            }
+        }
+        max_head_probs.push_back(std::min(prob_sum, top_p));
+        if (trim_to > 0) {
+            picks[i] = std::vector<size_t>(picks[i].begin(), picks[i].begin()+trim_to);
+        }
+    }
+}
+
+std::vector<std::vector<size_t>> sampler::topk(float * logits, bool performed_softmax) {
+    bool has_repetition_penalty = repetition_penalty != 1.0f;
+    std::vector<std::vector<size_t>> head_picks;
+    if (vocab_size < top_k) {
+        // technically we should never get here, but lets be protective.
+        for (int i = 0; i < n_output_heads; i++) {
+            std::vector<size_t> picks(vocab_size);
+            iota(picks.begin(), picks.end(), 0);
+            head_picks.push_back(picks);
+        }
+        return head_picks;
+    }
+    for (int i = 0; i < n_output_heads; i++) {
+        std::vector<size_t> picks(vocab_size);
+        iota(picks.begin(), picks.end(), 0);
+        // have to sort with repetition penalty applied so not to inavertently trim our nucleus size.
+        std::sort(picks.begin(), picks.end(), [&logits, &i, &has_repetition_penalty, &performed_softmax, this](size_t s1, size_t s2) {
+            float v1 = logits[i*vocab_size+s1];
+            float v2 = logits[i*vocab_size+s2];
+            if (!performed_softmax) {
+                if (has_repetition_penalty && last_token_ids[i] == s1) {
+                    v1 /= (pow(repetition_penalty, repetition_counts[i]));
+                } else if (has_repetition_penalty && last_token_ids[i] == s2) {
+                    v2 /= (pow(repetition_penalty, repetition_counts[i]));
+                }
+            }
+            return v1 > v2;
+        });
+        head_picks.push_back(std::vector<size_t>(picks.begin(), picks.begin() + top_k));
+    }
+    return head_picks;
+}
+
+void sampler::max(float * logits, std::vector<uint32_t> & output_tokens) {
+    bool has_repetition_penalty = repetition_penalty != 1.0f;
+    for (int i = 0; i < n_output_heads; i++) {
+        float max = -INFINITY;
+        uint32_t token_id = 0;
+        for (uint32_t ii = 0; ii < vocab_size; ii++) {
+            float v = *(logits+i*vocab_size+ii);
+            // while repetition penalty will never be used for maximum token selection, it is used for the logarithmic stabilization of 
+            // the softmax function in which case it is possible for repetition counts to be set.
+            if (has_repetition_penalty && last_token_ids[i] == ii) {
+                v /= (pow(repetition_penalty, repetition_counts[i]));
+            }
+            if (v > max) {
+                max = v;
+                token_id = ii;
+            }
+        }
+        output_tokens.push_back(token_id);
+    }
+}