diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 64b537290..d5d5d8d0c 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -839,6 +839,9 @@ int mirostat, float mirostat_tau, float mirostat_eta, float dry_multiplier, floa sample_grammar(file_format, n_vocab, &candidates_p, grammar); } + //prefilter to top 5k tokens for improved speed + llama_sample_top_k(nullptr, &candidates_p, 5000, 1); + if (mirostat == 1 || mirostat == 2) { static float mirostat_mu = 2.0f * mirostat_tau;