add device_flops() for cpu, metal, and cuda

This commit is contained in:
Lizonghang 2024-11-10 23:11:05 +04:00
parent 5fae6ac36f
commit f4260bb346
3 changed files with 149 additions and 39 deletions

View file

@ -3581,6 +3581,8 @@ void llama_profile_device(device_info * dev_info, struct llama_model * model, co
dev_info->gpu_props.description = gpu_props.description;
dev_info->gpu_props.memory_free = round(gpu_props.memory_free / (double)(1 << 30) * 100) / 100;
dev_info->gpu_props.memory_total = round(gpu_props.memory_total / (double)(1 << 30) * 100) / 100;
dev_info->gpu_props.metal_flops = device_metal_flops(model, GGML_TYPE_F32);
dev_info->gpu_props.cuda_flops = device_cuda_flops(model, GGML_TYPE_F32);
}
ggml_backend_buffer_type_t llama_dev_buffer_type(struct llama_model * model, int device) {