Merge branch 'master' into concedo

# Conflicts: # Makefile
2025-09-15 03:19:41 +00:00 · 2023-04-08 17:42:09 +08:00 · 2023-04-08 17:42:09 +08:00 · 0b904e12db
commit 0b904e12db
parent 5dd610032e 62cfc54f77
10 changed files with 430 additions and 33 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -1238,19 +1238,13 @@ static llama_vocab::id llama_sample_top_p_top_k(
        }
    }

-    if (top_k > 0 && top_k < n_logits) {
-        sample_top_k(logits_id, top_k);
-    }
-
-    float maxl = -std::numeric_limits<float>::infinity();
-    for (const auto & kv : logits_id) {
-        maxl = Max(maxl, kv.first);
-    }
+    sample_top_k(logits_id, top_k > 0 ? Min(top_k, n_logits) : n_logits);

    // compute probs for the top k tokens
    std::vector<float> probs;
    probs.reserve(logits_id.size());

+    float maxl = logits_id[0].first;
    double sum = 0.0;
    for (const auto & kv : logits_id) {
        const float p = expf(kv.first - maxl);
@ -1273,16 +1267,11 @@ static llama_vocab::id llama_sample_top_p_top_k(
                break;
            }
        }
-
-        cumsum = 1.0/cumsum;
-        for (int i = 0; i < (int) probs.size(); i++) {
-            probs[i] *= cumsum;
-        }
    }

    //printf("\n");
    //for (int i = 0; i < (int) 10; i++) {
-    //    printf("%d: '%s' %f\n", i, vocab.id_to_token.at(logits_id[i].second).c_str(), probs[i]);
+    //    printf("%d: '%s' %f\n", i, lctx.vocab.id_to_token.at(logits_id[i].second).tok.c_str(), probs[i]);
    //}
    //printf("\n\n");
    //exit(0);
@ -1865,3 +1854,8 @@ const char * llama_print_system_info(void) {

    return s.c_str();
 }
+
+// For internal test use
+std::unordered_map<std::string, struct ggml_tensor *>& llama_internal_get_tensor_map(struct llama_context * ctx) {
+    return ctx->model.tensors;
+}