add memory copy speed test

2025-09-10 08:34:33 +00:00 · 2024-12-09 10:07:42 +04:00 · 2024-12-09 10:07:42 +04:00 · d78fa427e7
commit d78fa427e7
parent 1aee5bd6da
4 changed files with 179 additions and 14 deletions
--- a/include/llama.h
+++ b/include/llama.h
@ -528,6 +528,12 @@ extern "C" {
    // Returns the total number of parameters in the model
    LLAMA_API uint64_t llama_model_n_params(const struct llama_model * model);

+    // Returns the embedding size of K in grouped query attention
+    LLAMA_API uint32_t llama_model_n_embd_k_gqa(struct llama_model * model);
+
+    // Returns the embedding size of V in grouped query attention
+    LLAMA_API uint32_t llama_model_n_embd_v_gqa(struct llama_model * model);
+
    // Return the size of compute buffer size, including input tensors and activations
    LLAMA_API void llama_model_compute_buf_size(
                                  uint64_t * cpu_buf,