add device_inp_embd_delay test, device_memory_bw test, device_cuda_memory_bw test,

2025-09-09 14:24:37 +00:00 · 2024-11-26 22:28:02 +04:00 · 2024-11-26 22:28:02 +04:00 · f78c437172
commit f78c437172
parent a7a95b53fe
4 changed files with 346 additions and 63 deletions
--- a/include/llama.h
+++ b/include/llama.h
@ -531,7 +531,8 @@ extern "C" {
                        struct model_flops * n_flops,
                       struct model_params * n_params,
                             const int64_t   n_input,
-                             const int64_t   n_history);
+                             const int64_t   n_history,
+                            enum ggml_type * inp_embd_dtype);

    // Get a llama model tensor
    LLAMA_API struct ggml_tensor * llama_get_model_tensor(struct llama_model * model, const char * name);