diff --git a/gpttype_adapter.cpp b/gpttype_adapter.cpp index 845fa3515..9df8a4178 100644 --- a/gpttype_adapter.cpp +++ b/gpttype_adapter.cpp @@ -1540,13 +1540,10 @@ void sample_entropy(llama_token_data_array * cur_p, float min_temp, float max_te void sample_temperature(llama_token_data_array * candidates_p, float temp, float smoothing_factor, float smoothing_curve) { - bool isgreedy = false; if (temp <= 0) { - // Imitate greedy sampling - temp = 0.00390625f; //cannot be zero else div0, this is 1/256 - smoothing_factor = 0; - isgreedy = true; + sample_top_k(candidates_p, 1); //only want first candidate + return; } for (size_t i = 0; i < candidates_p->size; ++i) { @@ -1565,11 +1562,6 @@ void sample_temperature(llama_token_data_array * candidates_p, float temp, float } sample_softmax(candidates_p); } - - if(isgreedy) - { - sample_top_k(candidates_p, 1); //only want first candidate - } } void sample_grammar(FileFormat file_format, int32_t n_vocab, llama_token_data_array * candidates, const struct llama_grammar * grammar) {