From 7703bed260b2ff596f8b2786c2fc971796fec052 Mon Sep 17 00:00:00 2001
From: CasualAutopsy <107777585+CasualAutopsy@users.noreply.github.com>
Date: Sat, 22 Nov 2025 03:06:50 -0500
Subject: [PATCH] Temp: Fix Needlessly Iterating on Candidates During Greedy
 Sampling (#1854)

---
 gpttype_adapter.cpp | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp
index 845fa3515..9df8a4178 100644
--- a/gpttype_adapter.cpp
+++ b/gpttype_adapter.cpp
@@ -1540,13 +1540,10 @@ void sample_entropy(llama_token_data_array * cur_p, float min_temp, float max_te
 
 void sample_temperature(llama_token_data_array * candidates_p, float temp, float smoothing_factor, float smoothing_curve)
 {
-    bool isgreedy = false;
     if (temp <= 0)
     {
-        // Imitate greedy sampling
-        temp = 0.00390625f; //cannot be zero else div0, this is 1/256
-        smoothing_factor = 0;
-        isgreedy = true;
+        sample_top_k(candidates_p, 1);  //only want first candidate
+        return;
     }
 
     for (size_t i = 0; i < candidates_p->size; ++i) {
@@ -1565,11 +1562,6 @@ void sample_temperature(llama_token_data_array * candidates_p, float temp, float
         }
         sample_softmax(candidates_p);
     }
-
-    if(isgreedy)
-    {
-        sample_top_k(candidates_p, 1); //only want first candidate
-    }
 }
 
 void sample_grammar(FileFormat file_format, int32_t n_vocab, llama_token_data_array * candidates, const struct llama_grammar * grammar) {