From f1c9db417461fef0a6288a9cd0a3ec70cb66e64f Mon Sep 17 00:00:00 2001 From: Reithan Date: Fri, 13 Jun 2025 03:46:38 -0700 Subject: [PATCH] fix-loss-of-destroyed-tokens-in-grammar-pre-pass (#1600) --- gpttype_adapter.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 606d54b93..e55e7c629 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -1653,7 +1653,7 @@ const std::vector & sampler_order, llama_grammar * grammar, float dyna //prefilter to top 3k tokens for improved speed bool use_grammar = grammar != nullptr; - size_t n_pre_cull = candidates_p.size; + std::vector precache = (use_grammar ? std::vector(candidates) : std::vector(0)); sample_top_k(&candidates_p, 3000); @@ -1661,7 +1661,7 @@ const std::vector & sampler_order, llama_grammar * grammar, float dyna sample_grammar(file_format, n_vocab, &candidates_p, grammar); // if top_k 3000 doesn't contain a valid candidate for this grammar, try again pre-cull if (candidates_p.size <= 0) { - candidates_p.size = n_pre_cull; + candidates_p = { precache.data(), precache.size(), false }; sample_grammar(file_format, n_vocab, &candidates_p, grammar); sample_top_k(&candidates_p, 3000); }