add device_flops() for cpu, metal, and cuda

2025-09-10 06:44:36 +00:00 · 2024-11-10 23:11:05 +04:00 · 2024-11-10 23:11:05 +04:00 · f4260bb346
commit f4260bb346
parent 5fae6ac36f
3 changed files with 149 additions and 39 deletions
--- a/src/llama.cpp
+++ b/src/llama.cpp
@ -3581,6 +3581,8 @@ void llama_profile_device(device_info * dev_info, struct llama_model * model, co
    dev_info->gpu_props.description     = gpu_props.description;
    dev_info->gpu_props.memory_free     = round(gpu_props.memory_free  / (double)(1 << 30) * 100) / 100;
    dev_info->gpu_props.memory_total    = round(gpu_props.memory_total / (double)(1 << 30) * 100) / 100;
+    dev_info->gpu_props.metal_flops     = device_metal_flops(model, GGML_TYPE_F32);
+    dev_info->gpu_props.cuda_flops      = device_cuda_flops(model, GGML_TYPE_F32);
 }

 ggml_backend_buffer_type_t llama_dev_buffer_type(struct llama_model * model, int device) {