add llama_model_n_flops

2025-09-09 23:24:35 +00:00 · 2024-11-20 19:40:27 +04:00 · 2024-11-20 19:40:27 +04:00 · 477ecf2084
commit 477ecf2084
parent 10f6f92c7e
4 changed files with 445 additions and 107 deletions
--- a/common/common.cpp
+++ b/common/common.cpp
@ -841,6 +841,8 @@ static void llama_assign_n_layer_window(
        return;
    }

+    (void)my_rank;
+
    std::fill_n(n_layer_window, n_world, DEFAULT_N_LAYER_WINDOW);
 }

@ -894,7 +896,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {

    device_info dev_info;
    dev_info.rank = params.rank;
-    llama_profile_device(&dev_info, model, params.model.c_str(), params.cpuparams.n_threads);
+    llama_profile_device(&dev_info, model, ml, params.model.c_str(), params.cpuparams.n_threads);

    // create llama context
    struct llama_context_params cparams = llama_context_params_from_gpt_params(params);
--- a/common/profiler.h
+++ b/common/profiler.h
@ -67,6 +67,21 @@ struct device_info {
        : rank(0), device_name(""), disk_read_bandwidth(0.0f), cpu_props(), memory(), gpu_support(), gpu_props() {}
 };

+struct flops_info {
+    // model flops
+    int64_t input_flops;
+    int64_t output_flops;
+    int64_t layer_flops;
+    
+    // model params
+    int64_t input_params;
+    int64_t output_params;
+    int64_t layer_params;
+
+    flops_info()
+        : input_flops(0), output_flops(0), layer_flops(0), input_params(0), output_params(0), layer_params(0) {}
+};
+
 enum profiler_backend_type {
    PROFILER_BACKEND_TYPE_CPU   = 0,
    PROFILER_BACKEND_TYPE_METAL = 1,