diff --git a/src/llama.cpp b/src/llama.cpp
index 651ac081..7884a5b2 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -3592,12 +3592,11 @@ void llama_profile_device(
 
     struct model_flops  * n_flops  = &dev_info->model_flops;
     struct model_params * n_params = &dev_info->model_params;
-    struct model_bytes  * n_bytes  = &dev_info->model_bytes;
-    if (dev_info->rank == 0) {    
-        enum ggml_type inp_embd_dtype  = GGML_TYPE_F32;
-        llama_model_n_flops(model, ml, n_flops, n_params, n_bytes, n_predict, n_ctx, &inp_embd_dtype, flash_attn);
-        n_flops->inp_embd_ms = device_inp_embd_delay(model, inp_embd_dtype, 1, n_threads);
-    }
+    struct model_bytes  * n_bytes  = &dev_info->model_bytes;   
+
+    enum ggml_type inp_embd_dtype  = GGML_TYPE_F32;
+    llama_model_n_flops(model, ml, n_flops, n_params, n_bytes, n_predict, n_ctx, &inp_embd_dtype, flash_attn);
+    n_flops->inp_embd_ms = device_inp_embd_delay(model, inp_embd_dtype, 1, n_threads);
 
     device_disk_seq_bw(&dev_info->disk.read_seq_bw, &dev_info->disk.write_seq_bw, n_threads);
     device_disk_rnd_bw(&dev_info->disk.read_rnd_bw, &dev_info->disk.write_rnd_bw, n_threads);