add memory copy speed test

This commit is contained in:
Lizonghang 2024-12-09 10:07:42 +04:00
parent 1aee5bd6da
commit d78fa427e7
4 changed files with 179 additions and 14 deletions

View file

@ -7386,8 +7386,8 @@ static bool llm_load_tensors_impl(
if (my_rank == 0) {
model.buft_input = llama_default_buffer_type_cpu(model, true);
model.buft_output = llama_default_buffer_type_cpu(model, true);
LLAMA_LOG_INFO("Layer input assigned to cpu\n");
LLAMA_LOG_INFO("Layer output assigned to cpu\n");
// LLAMA_LOG_INFO("Layer input assigned to cpu\n");
// LLAMA_LOG_INFO("Layer output assigned to cpu\n");
}
// count used buffer types
@ -20698,6 +20698,14 @@ uint64_t llama_model_n_params(const struct llama_model * model) {
return nparams;
}
uint32_t llama_model_n_embd_k_gqa(struct llama_model * model) {
return model->hparams.n_embd_k_gqa();
}
uint32_t llama_model_n_embd_v_gqa(struct llama_model * model) {
return model->hparams.n_embd_v_gqa();
}
static void llama_model_reset_tensors(struct llama_model * model) {
model->buft_input.buft = nullptr;
model->buft_input.buft_matrix = nullptr;