mirror of
https://github.com/Lizonghang/prima.cpp.git
synced 2025-09-10 06:34:53 +00:00
add memcpy speed test
This commit is contained in:
parent
5b46c4e848
commit
a7ec685eda
3 changed files with 46 additions and 6 deletions
|
@ -3587,6 +3587,7 @@ void llama_profile_device(
|
|||
dev_info->memory.total_swap = round(device_swap_memory(false) / (double)(1 << 30) * 100) / 100;
|
||||
dev_info->memory.available_swap = round(device_swap_memory(true) / (double)(1 << 30) * 100) / 100;
|
||||
dev_info->memory.cpu_read_ram_bw = device_memory_bw(n_threads);
|
||||
dev_info->memory.mem_cpy_delay = device_cpu_mem_copy(model, n_threads);
|
||||
|
||||
struct model_flops * n_flops = &dev_info->model_flops;
|
||||
struct model_params * n_params = &dev_info->model_params;
|
||||
|
@ -3622,6 +3623,8 @@ void llama_profile_device(
|
|||
dev_info->gpu_props.memory_total = round(gpu_props.memory_total / (double)(1 << 30) * 100) / 100;
|
||||
dev_info->gpu_props.metal_read_vram_bw = device_metal_read_vram_bw();
|
||||
dev_info->gpu_props.cuda_read_vram_bw = device_cuda_read_vram_bw();
|
||||
dev_info->gpu_props.metal_mem_cpy_delay = device_metal_mem_copy(model);
|
||||
dev_info->gpu_props.cuda_mem_cpy_delay = device_cuda_mem_copy(model);
|
||||
|
||||
if (is_dtype_exist(n_params, GGML_TYPE_F32)) {
|
||||
dev_info->cpu_props.flops_f32_f32 = device_cpu_flops (model, GGML_TYPE_F32, GGML_TYPE_F32, n_threads);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue