Merge branch 'upstream' into concedo_experimental

# Conflicts: # common/sampling.h # llama.h # tests/test-chat-template.cpp
2025-09-12 18:09:42 +00:00 · 2024-04-24 21:29:07 +08:00 · 2024-04-24 21:29:07 +08:00 · a681cdd9ef
commit a681cdd9ef
parent 15ed96c25a 3fe847b574
20 changed files with 788 additions and 355 deletions
--- a/llama.h
+++ b/llama.h
@ -991,7 +991,7 @@ extern "C" {
            struct llama_context * ctx,
          llama_token_data_array * candidates);

-    /// @details Randomly selects a token from the candidates based on their probabilities.
+    /// @details Randomly selects a token from the candidates based on their probabilities using the RNG of ctx.
    LLAMA_API llama_token llama_sample_token(
            struct llama_context * ctx,
          llama_token_data_array * candidates);
@ -1078,8 +1078,9 @@ extern "C" {
 // Internal API to be implemented by llama.cpp and used by tests/benchmarks only
 //#ifdef LLAMA_API_INTERNAL

-#include <vector>
+#include <random>
 #include <string>
+#include <vector>

 struct ggml_tensor;

@ -1116,6 +1117,10 @@ std::pair<std::vector<uint32_t>, llama_partial_utf8> decode_utf8(
        const std::string & src,
        llama_partial_utf8   partial_start);

+// Randomly selects a token from the candidates based on their probabilities using given std::mt19937.
+// This is a temporary workaround in order to fix race conditions when sampling with multiple sequences.
+llama_token llama_sample_token_with_rng(struct llama_context * ctx, llama_token_data_array * candidates, std::mt19937 & rng);
+
 //#endif // LLAMA_API_INTERNAL

 #endif // LLAMA_H