From e7c6b830e6ecfcb7e1ee2c9998b2655e10e79020 Mon Sep 17 00:00:00 2001
From: Lizonghang <870644199@qq.com>
Date: Wed, 29 Jan 2025 11:15:45 +0400
Subject: [PATCH] fix auto schedule logic

---
 common/common.cpp | 50 ++++++++++++++++++-----------------------------
 1 file changed, 19 insertions(+), 31 deletions(-)

diff --git a/common/common.cpp b/common/common.cpp
index ff565163..524c7f9b 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -1483,14 +1483,12 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
     uint32_t n_world   = params.n_world;
     uint32_t my_rank   = params.rank;
     bool auto_schedule = params.n_layer_window[0] == 0;
-
-    if (auto_schedule) {
-        // get device profile
-        LOG_INF("\nstart profiling this device, this may take some seconds ...\n");
-        dev_info.rank = params.rank;
-        llama_profile_device(&dev_info, model, ml, params.gpu_mem, params.n_predict, params.n_ctx, params.cpuparams.n_threads, params.flash_attn);
-    }
-
+    
+    // get device profile
+    LOG_INF("\nstart profiling this device, this may take some seconds ...\n");
+    dev_info.rank = params.rank;
+    llama_profile_device(&dev_info, model, ml, params.gpu_mem, params.n_predict, params.n_ctx, params.cpuparams.n_threads, params.flash_attn);
+    
     // create llama context
     struct llama_context_params cparams = llama_context_params_from_gpt_params(params);
     llama_context * lctx                = llama_new_context_with_model(model, cparams);
@@ -1502,24 +1500,21 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
         mparams.n_layer_window[0] = n_layers;
         llama_context_n_layer_window(lctx)[0] = n_layers;
     } else {
+        uint32_t n_layer_window[32] = {0}, n_gpu_layers[32] = {0};
+
         // initialize sockets
         llama_init_sockets(lctx, n_world, my_rank);
 
-        uint32_t n_layer_window[32] = {0}, n_gpu_layers[32] = {0};
+        // sychronize device profile to the master node
+        struct device_info * dev_info_set = nullptr;
+        if (my_rank == 0) {
+            dev_info_set = (struct device_info *)malloc(n_world * sizeof(struct device_info));
+            dev_info_set[0] = dev_info;
 
-        if (auto_schedule) {
-            // sychronize device profile to the master node
-            struct device_info * dev_info_set = nullptr;
-            if (my_rank == 0) {
-                dev_info_set = (struct device_info *)malloc(n_world * sizeof(struct device_info));
-                dev_info_set[0] = dev_info;
-                llama_gather_device_info(lctx, dev_info_set);
-                device_print_props(dev_info_set, n_world, model, cparams);
-            } else {
-                llama_send_device_info(lctx, &dev_info);
-            }
+            llama_gather_device_info(lctx, dev_info_set);
+            device_print_props(dev_info_set, n_world, model, cparams);
 
-            if (my_rank == 0) {
+            if (auto_schedule) {
                 // automatically determine n_layer_window and n_gpu_layers
                 if (!assign_layers_to_device(n_world, my_rank, dev_info_set, n_layer_window, n_gpu_layers, model, cparams)) {
                     LOG_ERR("%s: Invalid allocation by HiGHS solver\n", __func__);
@@ -1527,21 +1522,14 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
                     llama_free_model(model);
                     return iparams;
                 }
-            }
-        } else {
-            if (my_rank == 0) {
-                // use the user-defined n_layer_window
-                std::copy(std::begin(params.n_layer_window), std::end(params.n_layer_window), n_layer_window);
-            }
-        }
-
-        if (my_rank == 0) {
-            if (auto_schedule) {      
                 llama_bcast_layer_setup(lctx, n_layer_window, n_gpu_layers);
             } else {
+                // use the user-defined n_layer_window
+                std::copy(std::begin(params.n_layer_window), std::end(params.n_layer_window), n_layer_window);
                 llama_bcast_layer_setup(lctx, n_layer_window, nullptr);
             }
         } else {
+            llama_send_device_info(lctx, &dev_info);
             llama_recv_layer_setup(lctx, n_layer_window, n_gpu_layers);
         }