Temp: Fix Needlessly Iterating on Candidates During Greedy Sampling (#1854)

This commit is contained in:
CasualAutopsy 2025-11-22 03:06:50 -05:00 committed by GitHub
parent b281d2554a
commit 7703bed260
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1540,13 +1540,10 @@ void sample_entropy(llama_token_data_array * cur_p, float min_temp, float max_te
void sample_temperature(llama_token_data_array * candidates_p, float temp, float smoothing_factor, float smoothing_curve)
{
bool isgreedy = false;
if (temp <= 0)
{
// Imitate greedy sampling
temp = 0.00390625f; //cannot be zero else div0, this is 1/256
smoothing_factor = 0;
isgreedy = true;
sample_top_k(candidates_p, 1); //only want first candidate
return;
}
for (size_t i = 0; i < candidates_p->size; ++i) {
@ -1565,11 +1562,6 @@ void sample_temperature(llama_token_data_array * candidates_p, float temp, float
}
sample_softmax(candidates_p);
}
if(isgreedy)
{
sample_top_k(candidates_p, 1); //only want first candidate
}
}
void sample_grammar(FileFormat file_format, int32_t n_vocab, llama_token_data_array * candidates, const struct llama_grammar * grammar) {