Merge branch 'upstream' into concedo_experimental

# Conflicts:
#	common/sampling.h
#	llama.h
#	tests/test-chat-template.cpp
This commit is contained in:
Concedo 2024-04-24 21:29:07 +08:00
commit a681cdd9ef
20 changed files with 788 additions and 355 deletions

View file

@ -991,7 +991,7 @@ extern "C" {
struct llama_context * ctx,
llama_token_data_array * candidates);
/// @details Randomly selects a token from the candidates based on their probabilities.
/// @details Randomly selects a token from the candidates based on their probabilities using the RNG of ctx.
LLAMA_API llama_token llama_sample_token(
struct llama_context * ctx,
llama_token_data_array * candidates);
@ -1078,8 +1078,9 @@ extern "C" {
// Internal API to be implemented by llama.cpp and used by tests/benchmarks only
//#ifdef LLAMA_API_INTERNAL
#include <vector>
#include <random>
#include <string>
#include <vector>
struct ggml_tensor;
@ -1116,6 +1117,10 @@ std::pair<std::vector<uint32_t>, llama_partial_utf8> decode_utf8(
const std::string & src,
llama_partial_utf8 partial_start);
// Randomly selects a token from the candidates based on their probabilities using given std::mt19937.
// This is a temporary workaround in order to fix race conditions when sampling with multiple sequences.
llama_token llama_sample_token_with_rng(struct llama_context * ctx, llama_token_data_array * candidates, std::mt19937 & rng);
//#endif // LLAMA_API_INTERNAL
#endif // LLAMA_H