Merge branch 'upstream' into concedo_experimental

# Conflicts: # .dockerignore # .github/workflows/build.yml # .github/workflows/docker.yml # Makefile # README.md # examples/infill/infill.cpp # examples/perplexity/perplexity.cpp # examples/server/README.md # examples/speculative/speculative.cpp # flake.lock # ggml/src/CMakeLists.txt # scripts/sync-ggml.last # tests/test-backend-ops.cpp # tests/test-sampling.cpp
2025-09-12 01:54:37 +00:00 · 2024-09-27 11:21:28 +08:00 · 2024-09-27 11:21:28 +08:00 · ea55f69dc1
commit ea55f69dc1
parent 6342b414ea 95bc82fbc0
39 changed files with 2587 additions and 1564 deletions
--- a/include/llama.h
+++ b/include/llama.h
@ -1068,6 +1068,7 @@ extern "C" {
    LLAMA_API struct llama_sampler * llama_sampler_init_dist       (uint32_t seed);

    /// @details Sorts candidate tokens by their logits in descending order and calculate probabilities based on logits.
+    /// NOTE: Avoid using on the full vocabulary as the sorting can become slow. For example, apply top-k or top-p sampling first.
    LLAMA_API struct llama_sampler * llama_sampler_init_softmax    (void);

    /// @details Top-K sampling described in academic paper "The Curious Case of Neural Text Degeneration" https://arxiv.org/abs/1904.09751