note: also has support for completion tokens count

2025-09-14 10:59:41 +00:00 · 2024-11-01 00:44:14 +08:00 · 2024-11-01 00:44:14 +08:00 · a46f8acd03
commit a46f8acd03
parent aa26a58085 8f275a7c45
31 changed files with 138676 additions and 137399 deletions
--- a/include/llama.h
+++ b/include/llama.h
@ -1089,9 +1089,6 @@ extern "C" {
    /// @details Minimum P sampling as described in https://github.com/ggerganov/llama.cpp/pull/3841
    LLAMA_API struct llama_sampler * llama_sampler_init_min_p      (float   p, size_t min_keep);

-    /// @details Tail Free Sampling described in https://www.trentonbricken.com/Tail-Free-Sampling/.
-    LLAMA_API struct llama_sampler * llama_sampler_init_tail_free  (float   z, size_t min_keep);
-
    /// @details Locally Typical Sampling implementation described in the paper https://arxiv.org/abs/2202.00666.
    LLAMA_API struct llama_sampler * llama_sampler_init_typical    (float   p, size_t min_keep);

@ -1143,6 +1140,16 @@ extern "C" {
                                bool   penalize_nl,     // consider newlines as a repeatable token
                                bool   ignore_eos);     // ignore the end-of-sequence token

+    ///  @details DRY sampler, designed by p-e-w, as described in: https://github.com/oobabooga/text-generation-webui/pull/5677, porting Koboldcpp implementation authored by pi6am: https://github.com/LostRuins/koboldcpp/pull/982
+    LLAMA_API struct llama_sampler *    llama_sampler_init_dry(
+            const struct llama_model *  model,
+                               float    dry_multiplier,
+                               float    dry_base,
+                             int32_t    dry_allowed_length,
+                             int32_t    dry_penalty_last_n,
+                          const char ** seq_breakers,
+                              size_t    num_breakers);
+
    LLAMA_API struct llama_sampler * llama_sampler_init_logit_bias(
                             int32_t   n_vocab,
                             int32_t   n_logit_bias,