Merge branch 'upstream' into concedo_experimental

# Conflicts: # common/sampling.h # llama.h # tests/test-chat-template.cpp
2025-09-11 09:34:37 +00:00 · 2024-04-24 21:29:07 +08:00 · 2024-04-24 21:29:07 +08:00 · a681cdd9ef
commit a681cdd9ef
parent 15ed96c25a 3fe847b574
20 changed files with 788 additions and 355 deletions
--- a/common/sampling.cpp
+++ b/common/sampling.cpp
@ -1,4 +1,6 @@
+#define LLAMA_API_INTERNAL
 #include "sampling.h"
+#include <random>

 struct llama_sampling_context * llama_sampling_init(const struct llama_sampling_params & params) {
    struct llama_sampling_context * result = new llama_sampling_context();
@ -33,6 +35,8 @@ struct llama_sampling_context * llama_sampling_init(const struct llama_sampling_

    result->prev.resize(params.n_prev);

+    llama_sampling_set_rng_seed(result, params.seed);
+
    return result;
 }

@ -62,6 +66,13 @@ void llama_sampling_reset(llama_sampling_context * ctx) {
    ctx->cur.clear();
 }

+void llama_sampling_set_rng_seed(struct llama_sampling_context * ctx, uint32_t seed) {
+    if (seed == LLAMA_DEFAULT_SEED) {
+        seed = time(NULL);
+    }
+    ctx->rng.seed(seed);
+}
+
 void llama_sampling_cp(llama_sampling_context * src, llama_sampling_context * dst) {
    if (dst->grammar) {
        llama_grammar_free(dst->grammar);
@ -203,7 +214,7 @@ static llama_token llama_sampling_sample_impl(

            sampler_queue(ctx_main, params, cur_p, min_keep);

-            id = llama_sample_token(ctx_main, &cur_p);
+            id = llama_sample_token_with_rng(ctx_main, &cur_p, ctx_sampling->rng);

            //{
            //    const int n_top = 10;