add memory copy speed test

2025-09-10 06:34:53 +00:00 · 2024-12-09 10:07:42 +04:00 · 2024-12-09 10:07:42 +04:00 · d78fa427e7
commit d78fa427e7
parent 1aee5bd6da
4 changed files with 179 additions and 14 deletions
--- a/src/llama.cpp
+++ b/src/llama.cpp
@ -7386,8 +7386,8 @@ static bool llm_load_tensors_impl(
    if (my_rank == 0) {
        model.buft_input  = llama_default_buffer_type_cpu(model, true);
        model.buft_output = llama_default_buffer_type_cpu(model, true);
-        LLAMA_LOG_INFO("Layer input assigned to cpu\n");
-        LLAMA_LOG_INFO("Layer output assigned to cpu\n");
+        // LLAMA_LOG_INFO("Layer input assigned to cpu\n");
+        // LLAMA_LOG_INFO("Layer output assigned to cpu\n");
    }

    // count used buffer types
@ -20698,6 +20698,14 @@ uint64_t llama_model_n_params(const struct llama_model * model) {
    return nparams;
 }

+uint32_t llama_model_n_embd_k_gqa(struct llama_model * model) {
+    return model->hparams.n_embd_k_gqa();
+}
+
+uint32_t llama_model_n_embd_v_gqa(struct llama_model * model) {
+    return model->hparams.n_embd_v_gqa();
+}
+
 static void llama_model_reset_tensors(struct llama_model * model) {
    model->buft_input.buft         = nullptr;
    model->buft_input.buft_matrix  = nullptr;