mirror of
https://github.com/Lizonghang/prima.cpp.git
synced 2025-09-10 06:34:53 +00:00
add memory copy speed test
This commit is contained in:
parent
1aee5bd6da
commit
d78fa427e7
4 changed files with 179 additions and 14 deletions
|
@ -7386,8 +7386,8 @@ static bool llm_load_tensors_impl(
|
|||
if (my_rank == 0) {
|
||||
model.buft_input = llama_default_buffer_type_cpu(model, true);
|
||||
model.buft_output = llama_default_buffer_type_cpu(model, true);
|
||||
LLAMA_LOG_INFO("Layer input assigned to cpu\n");
|
||||
LLAMA_LOG_INFO("Layer output assigned to cpu\n");
|
||||
// LLAMA_LOG_INFO("Layer input assigned to cpu\n");
|
||||
// LLAMA_LOG_INFO("Layer output assigned to cpu\n");
|
||||
}
|
||||
|
||||
// count used buffer types
|
||||
|
@ -20698,6 +20698,14 @@ uint64_t llama_model_n_params(const struct llama_model * model) {
|
|||
return nparams;
|
||||
}
|
||||
|
||||
uint32_t llama_model_n_embd_k_gqa(struct llama_model * model) {
|
||||
return model->hparams.n_embd_k_gqa();
|
||||
}
|
||||
|
||||
uint32_t llama_model_n_embd_v_gqa(struct llama_model * model) {
|
||||
return model->hparams.n_embd_v_gqa();
|
||||
}
|
||||
|
||||
static void llama_model_reset_tensors(struct llama_model * model) {
|
||||
model->buft_input.buft = nullptr;
|
||||
model->buft_input.buft_matrix = nullptr;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue