diff --git a/common/common.cpp b/common/common.cpp
index 88b00075..a98337d3 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -1547,7 +1547,7 @@ static bool tune_layer_allocation(
         dev_infos_temp.clear();
         n_layer_windows_temp.clear();
         n_gpu_layers_temp.clear();
-        for(auto i=0; i<n_world; i++) {
+        for(uint32_t i=0; i<n_world; i++) {
             if (n_layer_windows_[i] > 1 || i==0 ) {
                 dev_infos_temp.push_back(dev_infos_[i]);
                 n_layer_windows_temp.push_back(n_layer_windows_[i]);
@@ -1561,7 +1561,7 @@ static bool tune_layer_allocation(
 
         n_world = dev_infos_temp.size();
     }
-    int i =0 , j =0;
+    uint32_t i =0 , j =0;
     while(j < n_world) {
         if(dev_infos[i].rank == dev_infos_temp[j].rank){
             n_layer_window[i] = n_layer_windows_temp[j];
@@ -1701,13 +1701,19 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
                 llama_recv_layer_setup(lctx, n_layer_window, n_gpu_layers);
             }
         }
+        if(n_layer_window[my_rank]<=0){
+            LOG_INF("%s: info: rank %d has no layers to run, skipping\n", __func__, my_rank);
+            llama_free(lctx);
+            llama_free_model(model);
+            exit(0);
+        }
 
         //update rank and n_world for consistency
         uint32_t update_rank = 0;
         uint32_t update_n_world = 1;
         std::vector<uint32_t> n_layer_window_temp = {n_layer_window[0]};
         std::vector<uint32_t> n_gpu_layers_temp = {n_gpu_layers[0]};
-        for(auto i=1; i<n_world; i++) {
+        for(uint32_t i=1; i<n_world; i++) {
             if(n_layer_window[i] <= 0 ){
                 continue;
             }
@@ -1720,7 +1726,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
         }
         memset(n_layer_window, 0, n_world * sizeof(uint32_t));
         memset(n_gpu_layers, 0, n_world * sizeof(uint32_t));
-        for (auto i=0; i<update_n_world; i++) {
+        for (uint32_t i=0; i<update_n_world; i++) {
             n_layer_window[i] = n_layer_window_temp[i];
             n_gpu_layers[i] = n_gpu_layers_temp[i];
         }
diff --git a/src/llama.cpp b/src/llama.cpp
index cfee40b6..5c640b90 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -20263,7 +20263,7 @@ int llama_send_device_info(struct llama_context * ctx, struct device_info * dev_
 }
 
 LLAMA_API int llama_bcast_startup_args(llama_context *ctx, uint32_t rank, startup_args *args) {
-    int32_t n_world = ctx->cparams.n_world;
+    auto n_world = ctx->cparams.n_world;
     if (n_world == 1) {
         return 0;
     }
@@ -20343,14 +20343,14 @@ LLAMA_API int llama_rebuild_topo(llama_context *ctx,
         }
         dev_info_ptr = new device_info[n_world];
         for (size_t i = 0; i < msgs.size(); i++) {
-            deserialize((const char *)msgs[i].data(), &dev_info_set[i]);
+            deserialize((const char *)msgs[i].data(), &dev_info_ptr[i]);
         }
     }else{
         char * buffer = nullptr;
         for(size_t i = 0; i < n_world; i++) {   
             size_t buffer_size = serialize(&dev_info_set[i], &buffer);
             msgs.emplace_back(buffer, buffer_size);
-
+            
             free(buffer);
         }
         dev_info_ptr = dev_info_set;
@@ -20361,9 +20361,9 @@ LLAMA_API int llama_rebuild_topo(llama_context *ctx,
 
     // notify next rank
     auto next_rank = (my_rank + 1) % n_world;
-    if(n_layer_window[next_rank] <= 0){
+    if(n_layer_window[next_rank] <= 0 && next_rank != 0){
         try {
-            ctx->send_socket->setsockopt(ZMQ_LINGER, 3500);
+            ctx->send_socket->set(zmq::sockopt::linger, 3500);
             zmq::send_multipart(*ctx->send_socket, msgs);
         } catch (const zmq::error_t& e) {
             LLAMA_LOG_INFO("Failed to send data: %s\n", e.what());
@@ -20382,7 +20382,7 @@ LLAMA_API int llama_rebuild_topo(llama_context *ctx,
         auto current_rank = my_rank;
         while(next_rank!=my_rank){
             if(n_layer_window[next_rank] > 0){
-                next_ip = dev_info_ptr[next_rank].next_ip;
+                next_ip = dev_info_ptr[current_rank].next_ip;
                 break;
             }
             next_rank = (next_rank + 1) % n_world;
@@ -20402,6 +20402,9 @@ LLAMA_API int llama_rebuild_topo(llama_context *ctx,
                 }
                 return -1;
             }
+        }else{
+            // only one node
+            ctx->next_node_ip = "";
         }
     }
     if(!dev_info_set){
@@ -20409,10 +20412,7 @@ LLAMA_API int llama_rebuild_topo(llama_context *ctx,
     }
     socket_to_close->close();
     delete socket_to_close;
-    if(n_layer_window[my_rank]<=0){
-        exit(0);
-    }
-    return true;
+    return 0;
 }
 
 int llama_recv_layer_setup(struct llama_context * ctx, uint32_t * n_layer_window, uint32_t * n_gpu_layers) {