From 7703bed260b2ff596f8b2786c2fc971796fec052 Mon Sep 17 00:00:00 2001 From: CasualAutopsy <107777585+CasualAutopsy@users.noreply.github.com> Date: Sat, 22 Nov 2025 03:06:50 -0500 Subject: [PATCH] Temp: Fix Needlessly Iterating on Candidates During Greedy Sampling (#1854) --- gpttype_adapter.cpp | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 845fa3515..9df8a4178 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -1540,13 +1540,10 @@ void sample_entropy(llama_token_data_array * cur_p, float min_temp, float max_te void sample_temperature(llama_token_data_array * candidates_p, float temp, float smoothing_factor, float smoothing_curve) { - bool isgreedy = false; if (temp <= 0) { - // Imitate greedy sampling - temp = 0.00390625f; //cannot be zero else div0, this is 1/256 - smoothing_factor = 0; - isgreedy = true; + sample_top_k(candidates_p, 1); //only want first candidate + return; } for (size_t i = 0; i < candidates_p->size; ++i) { @@ -1565,11 +1562,6 @@ void sample_temperature(llama_token_data_array * candidates_p, float temp, float } sample_softmax(candidates_p); } - - if(isgreedy) - { - sample_top_k(candidates_p, 1); //only want first candidate - } } void sample_grammar(FileFormat file_format, int32_t n_vocab, llama_token_data_array * candidates, const struct llama_grammar * grammar) {