add device_inp_embd_delay test, device_memory_bw test, device_cuda_memory_bw test,

This commit is contained in:
Zonghang Li 2024-11-26 22:28:02 +04:00
parent a7a95b53fe
commit f78c437172
4 changed files with 346 additions and 63 deletions

View file

@ -531,7 +531,8 @@ extern "C" {
struct model_flops * n_flops,
struct model_params * n_params,
const int64_t n_input,
const int64_t n_history);
const int64_t n_history,
enum ggml_type * inp_embd_dtype);
// Get a llama model tensor
LLAMA_API struct ggml_tensor * llama_get_model_tensor(struct llama_model * model, const char * name);