From 64390909208c941f61e51b7358dc7a548ea19669 Mon Sep 17 00:00:00 2001 From: "Li, Zonghang" <870644199@qq.com> Date: Tue, 3 Jun 2025 23:53:24 +0400 Subject: [PATCH] reformat code --- Makefile | 2 +- common/common.cpp | 133 +++++++++++++++++++++-------------------- examples/main/main.cpp | 3 +- include/llama.h | 7 ++- src/llama.cpp | 122 ++++++++++++++++++------------------- 5 files changed, 137 insertions(+), 130 deletions(-) diff --git a/Makefile b/Makefile index 60cfc22f..06d91984 100644 --- a/Makefile +++ b/Makefile @@ -280,7 +280,7 @@ ifeq ($(USE_HIGHS),1) HIGHS_LDFLAGS = -L/usr/local/lib -lhighs ifeq ($(UNAME_S),Darwin) HIGHS_CPPFLAGS += -isystem /opt/homebrew/include/highs - HIGHS_LDFLAGS += -L/opt/homebrew/lib -lhighs + HIGHS_LDFLAGS += -L/opt/homebrew/lib endif MK_CPPFLAGS += $(HIGHS_CPPFLAGS) -DUSE_HIGHS MK_LDFLAGS += $(HIGHS_LDFLAGS) diff --git a/common/common.cpp b/common/common.cpp index 378ab87f..0072996c 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -847,8 +847,7 @@ static std::string vec_to_str(const std::vector & vec) { } static bool assign_layers_to_device( - uint32_t n_world, - uint32_t my_rank, + uint32_t n_world, const device_info * dev_info_set, uint32_t * n_layer_window, uint32_t * n_gpu_layers, @@ -857,15 +856,8 @@ static bool assign_layers_to_device( float min_disk_read_speed = 0.1f) { // minimum disk I/O speed: 100 MB/s GGML_ASSERT(dev_info_set != nullptr); GGML_ASSERT(n_layer_window != nullptr); - GGML_ASSERT(my_rank == 0); - // if only 1 device, it is assigned all layers const uint32_t n_layer = llama_model_n_layers(model); - if (n_world == 1) { - n_layer_window[0] = n_layer; - return true; - } - std::vector w(n_world, 0); std::vector n(n_world, 0); std::vector mem_budget(n_world, 0.0f); @@ -1102,7 +1094,6 @@ static bool assign_layers_to_device( }; (void)print_matrix; - double final_objective = 1.0e30; std::vector final_solution; int final_k = -1; @@ -1442,7 +1433,6 @@ static bool assign_layers_to_device( // update the global best solution final_k = best_k; - final_objective = best_objective; final_solution = best_solution; if (solution_unchanged) break; @@ -1461,8 +1451,7 @@ static bool assign_layers_to_device( LOG_INF(" - N Layer Window : %d\n", w[m]); LOG_INF(" - N GPU Layers : %d\n", n[m]); } - // LOG_INF("\nEstimated Latency: %.3f ms\n", final_objective); - // LOG_INF("------------------------------------------"); + LOG_INF("\n"); // copy value from w and n to n_layer_window and n_gpu_layers, respectively std::copy(w.begin(), w.end(), n_layer_window); @@ -1522,58 +1511,67 @@ static bool assign_layers_to_device( return true; } -static bool tune_layer_allocation( - uint32_t n_world, - uint32_t my_rank, +static bool assign_layers_and_select_devices( + uint32_t n_world, std::vector dev_infos, uint32_t * n_layer_window, uint32_t * n_gpu_layers, struct llama_model * model, - const struct llama_context_params cparams, - float min_disk_read_speed = 0.1f) { + const struct llama_context_params cparams) { memset(n_layer_window, 0, n_world * sizeof(uint32_t)); - memset(n_gpu_layers, 0, n_world * sizeof(uint32_t)); + memset(n_gpu_layers, 0, n_world * sizeof(uint32_t)); + std::vector dev_infos_temp = dev_infos; - std::vector n_layer_windows_temp; - std::vector n_gpu_layers_temp; - while(n_world > 0) { + std::vector n_layer_windows_temp, n_gpu_layers_temp; + + while (n_world > 0) { std::vector dev_infos_ = dev_infos_temp; - std::vector n_layer_windows_(n_world, 0); - std::vector n_gpu_layers_(n_world, 0); - if (!assign_layers_to_device(n_world, my_rank, dev_infos_.data(), + std::vector n_layer_windows_(n_world, 0), n_gpu_layers_(n_world, 0); + + if (!assign_layers_to_device(n_world, dev_infos_.data(), n_layer_windows_.data(), n_gpu_layers_.data(), model, cparams)) { return false; } + dev_infos_temp.clear(); n_layer_windows_temp.clear(); n_gpu_layers_temp.clear(); - for(uint32_t i=0; i 1 || i==0 ) { + + for (uint32_t i = 0; i < n_world; i++) { + if (n_layer_windows_[i] > 1 || i == 0 ) { dev_infos_temp.push_back(dev_infos_[i]); n_layer_windows_temp.push_back(n_layer_windows_[i]); n_gpu_layers_temp.push_back(n_gpu_layers_[i]); + } else { + // remove this device + LOG_INF("Remove device %s (rank %d) with only %d layer assigned.\n", + dev_infos_[i].device_name, dev_infos_[i].rank, n_layer_windows_[i]); } } + if(dev_infos_temp.size() == n_world) { // no device be removed break; } n_world = dev_infos_temp.size(); + + LOG_INF("Reassign layers to the remaining %d device(s).\n\n", n_world); } - uint32_t i =0 , j =0; - while(j < n_world) { - if(dev_infos[i].rank == dev_infos_temp[j].rank){ + + uint32_t i = 0 , j = 0; + while (j < n_world) { + if (dev_infos[i].rank == dev_infos_temp[j].rank) { n_layer_window[i] = n_layer_windows_temp[j]; - n_gpu_layers[i] = n_gpu_layers_temp[j]; + n_gpu_layers[i] = n_gpu_layers_temp[j]; j++; - i++; } else { n_layer_window[i] = 0; n_gpu_layers[i] = 0; - i++; } + i++; } + return true; } @@ -1698,16 +1696,14 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) { llama_gather_device_info(lctx, dev_info_set.data()); device_print_props(dev_info_set.data(), n_world, model, cparams); - // automatically determine n_layer_window and n_gpu_layers - if (!tune_layer_allocation(n_world, my_rank, dev_info_set, n_layer_window, n_gpu_layers, model, cparams)) { + // assign layers to devices and remove weak devices + if (!assign_layers_and_select_devices(n_world, dev_info_set, n_layer_window, n_gpu_layers, model, cparams)) { LOG_ERR("%s: Invalid allocation by HiGHS solver\n", __func__); llama_free(lctx); llama_free_model(model); return iparams; } llama_bcast_layer_setup(lctx, n_layer_window, n_gpu_layers); - - //rebuild topo llama_rebuild_topo(lctx, n_layer_window, dev_info_set.data()); } else { // use the user-defined n_layer_window @@ -1718,51 +1714,58 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) { if (auto_schedule){ llama_send_device_info(lctx, &dev_info); llama_recv_layer_setup(lctx, n_layer_window, n_gpu_layers); - // rebuild topo - llama_rebuild_topo(lctx,n_layer_window, nullptr); - }else{ + llama_rebuild_topo (lctx, n_layer_window, nullptr); + } else { llama_recv_layer_setup(lctx, n_layer_window, n_gpu_layers); } } - if(n_layer_window[my_rank]<=0){ - LOG_INF("%s: info: rank %d has no layers to run, skipping\n", __func__, my_rank); + + // if this is a weak device, then exit + if (n_layer_window[my_rank] <= 0) { + LOG_INF("No layer is assigned to me, exit.\n"); llama_free(lctx); llama_free_model(model); exit(0); } - //update rank and n_world for consistency - uint32_t update_rank = 0; - uint32_t update_n_world = 1; - std::vector n_layer_window_temp = {n_layer_window[0]}; - std::vector n_gpu_layers_temp = {n_gpu_layers[0]}; - for(uint32_t i=1; i n_layer_window_temp = {n_layer_window[0]}, n_gpu_layers_temp = {n_gpu_layers[0]}; + + for (uint32_t i = 1; i < n_world; i++) { + if (n_layer_window[i] <= 0) { continue; } - if(i <= my_rank){ + if (i <= my_rank) { update_rank++; } update_n_world++; n_layer_window_temp.push_back(n_layer_window[i]); n_gpu_layers_temp.push_back(n_gpu_layers[i]); } - memset(n_layer_window, 0, n_world * sizeof(uint32_t)); - memset(n_gpu_layers, 0, n_world * sizeof(uint32_t)); - for (uint32_t i=0; icpu_props.flops_f32_f32 = device_cpu_flops (model, GGML_TYPE_F32, GGML_TYPE_F32, n_threads); - dev_info->gpu_props.metal_flops_f32_f32 = device_metal_flops(model, GGML_TYPE_F32, GGML_TYPE_F32); - dev_info->gpu_props.cuda_flops_f32_f32 = device_cuda_flops (model, GGML_TYPE_F32, GGML_TYPE_F32); + dev_info->cpu_props.flops_f32_f32 = device_cpu_flops (model, GGML_TYPE_F32, GGML_TYPE_F32, n_threads); + dev_info->gpu_props.metal_flops_f32_f32 = device_metal_flops(model, GGML_TYPE_F32, GGML_TYPE_F32); + dev_info->gpu_props.cuda_flops_f32_f32 = device_cuda_flops (model, GGML_TYPE_F32, GGML_TYPE_F32); } if (is_dtype_exist(n_params, GGML_TYPE_F16)) { - dev_info->cpu_props.flops_f16_f32 = device_cpu_flops (model, GGML_TYPE_F16, GGML_TYPE_F32, n_threads); - dev_info->gpu_props.metal_flops_f16_f32 = device_metal_flops(model, GGML_TYPE_F16, GGML_TYPE_F32); - dev_info->gpu_props.cuda_flops_f16_f32 = device_cuda_flops (model, GGML_TYPE_F16, GGML_TYPE_F32); + dev_info->cpu_props.flops_f16_f32 = device_cpu_flops (model, GGML_TYPE_F16, GGML_TYPE_F32, n_threads); + dev_info->gpu_props.metal_flops_f16_f32 = device_metal_flops(model, GGML_TYPE_F16, GGML_TYPE_F32); + dev_info->gpu_props.cuda_flops_f16_f32 = device_cuda_flops (model, GGML_TYPE_F16, GGML_TYPE_F32); } if (is_dtype_exist(n_params, GGML_TYPE_Q2_K)) { - dev_info->cpu_props.flops_q2k_f32 = device_cpu_flops (model, GGML_TYPE_Q2_K, GGML_TYPE_F32, n_threads); - dev_info->gpu_props.metal_flops_q2k_f32 = device_metal_flops(model, GGML_TYPE_Q2_K, GGML_TYPE_F32); - dev_info->gpu_props.cuda_flops_q2k_f32 = device_cuda_flops (model, GGML_TYPE_Q2_K, GGML_TYPE_F32); + dev_info->cpu_props.flops_q2k_f32 = device_cpu_flops (model, GGML_TYPE_Q2_K, GGML_TYPE_F32, n_threads); + dev_info->gpu_props.metal_flops_q2k_f32 = device_metal_flops(model, GGML_TYPE_Q2_K, GGML_TYPE_F32); + dev_info->gpu_props.cuda_flops_q2k_f32 = device_cuda_flops (model, GGML_TYPE_Q2_K, GGML_TYPE_F32); } if (is_dtype_exist(n_params, GGML_TYPE_Q4_K)) { - dev_info->cpu_props.flops_q4k_f32 = device_cpu_flops (model, GGML_TYPE_Q4_K, GGML_TYPE_F32, n_threads); - dev_info->gpu_props.metal_flops_q4k_f32 = device_metal_flops(model, GGML_TYPE_Q4_K, GGML_TYPE_F32); - dev_info->gpu_props.cuda_flops_q4k_f32 = device_cuda_flops (model, GGML_TYPE_Q4_K, GGML_TYPE_F32); + dev_info->cpu_props.flops_q4k_f32 = device_cpu_flops (model, GGML_TYPE_Q4_K, GGML_TYPE_F32, n_threads); + dev_info->gpu_props.metal_flops_q4k_f32 = device_metal_flops(model, GGML_TYPE_Q4_K, GGML_TYPE_F32); + dev_info->gpu_props.cuda_flops_q4k_f32 = device_cuda_flops (model, GGML_TYPE_Q4_K, GGML_TYPE_F32); } if (is_dtype_exist(n_params, GGML_TYPE_Q5_K)) { - dev_info->cpu_props.flops_q5k_f32 = device_cpu_flops (model, GGML_TYPE_Q5_K, GGML_TYPE_F32, n_threads); - dev_info->gpu_props.metal_flops_q5k_f32 = device_metal_flops(model, GGML_TYPE_Q5_K, GGML_TYPE_F32); - dev_info->gpu_props.cuda_flops_q5k_f32 = device_cuda_flops (model, GGML_TYPE_Q5_K, GGML_TYPE_F32); + dev_info->cpu_props.flops_q5k_f32 = device_cpu_flops (model, GGML_TYPE_Q5_K, GGML_TYPE_F32, n_threads); + dev_info->gpu_props.metal_flops_q5k_f32 = device_metal_flops(model, GGML_TYPE_Q5_K, GGML_TYPE_F32); + dev_info->gpu_props.cuda_flops_q5k_f32 = device_cuda_flops (model, GGML_TYPE_Q5_K, GGML_TYPE_F32); } if (is_dtype_exist(n_params, GGML_TYPE_Q6_K)) { - dev_info->cpu_props.flops_q6k_f32 = device_cpu_flops (model, GGML_TYPE_Q6_K, GGML_TYPE_F32, n_threads); - dev_info->gpu_props.metal_flops_q6k_f32 = device_metal_flops(model, GGML_TYPE_Q6_K, GGML_TYPE_F32); - dev_info->gpu_props.cuda_flops_q6k_f32 = device_cuda_flops (model, GGML_TYPE_Q6_K, GGML_TYPE_F32); + dev_info->cpu_props.flops_q6k_f32 = device_cpu_flops (model, GGML_TYPE_Q6_K, GGML_TYPE_F32, n_threads); + dev_info->gpu_props.metal_flops_q6k_f32 = device_metal_flops(model, GGML_TYPE_Q6_K, GGML_TYPE_F32); + dev_info->gpu_props.cuda_flops_q6k_f32 = device_cuda_flops (model, GGML_TYPE_Q6_K, GGML_TYPE_F32); } if (is_dtype_exist(n_params, GGML_TYPE_IQ2_XXS)) { - dev_info->cpu_props.flops_iq2xxs_f32 = device_cpu_flops (model, GGML_TYPE_IQ2_XXS, GGML_TYPE_F32, n_threads); + dev_info->cpu_props.flops_iq2xxs_f32 = device_cpu_flops (model, GGML_TYPE_IQ2_XXS, GGML_TYPE_F32, n_threads); dev_info->gpu_props.metal_flops_iq2xxs_f32= device_metal_flops(model, GGML_TYPE_IQ2_XXS, GGML_TYPE_F32); dev_info->gpu_props.cuda_flops_iq2xxs_f32 = device_cuda_flops (model, GGML_TYPE_IQ2_XXS, GGML_TYPE_F32); } if (is_dtype_exist(n_params, GGML_TYPE_Q5_0)) { - dev_info->cpu_props.flops_q50_f32 = device_cpu_flops (model, GGML_TYPE_Q5_0, GGML_TYPE_F32, n_threads); - dev_info->gpu_props.metal_flops_q50_f32 = device_metal_flops(model, GGML_TYPE_Q5_0, GGML_TYPE_F32); - dev_info->gpu_props.cuda_flops_q50_f32 = device_cuda_flops (model, GGML_TYPE_Q5_0, GGML_TYPE_F32); + dev_info->cpu_props.flops_q50_f32 = device_cpu_flops (model, GGML_TYPE_Q5_0, GGML_TYPE_F32, n_threads); + dev_info->gpu_props.metal_flops_q50_f32 = device_metal_flops(model, GGML_TYPE_Q5_0, GGML_TYPE_F32); + dev_info->gpu_props.cuda_flops_q50_f32 = device_cuda_flops (model, GGML_TYPE_Q5_0, GGML_TYPE_F32); } if (is_dtype_exist(n_params, GGML_TYPE_Q8_0)) { - dev_info->cpu_props.flops_q80_f32 = device_cpu_flops (model, GGML_TYPE_Q8_0, GGML_TYPE_F32, n_threads); - dev_info->gpu_props.metal_flops_q80_f32 = device_metal_flops(model, GGML_TYPE_Q8_0, GGML_TYPE_F32); - dev_info->gpu_props.cuda_flops_q80_f32 = device_cuda_flops (model, GGML_TYPE_Q8_0, GGML_TYPE_F32); + dev_info->cpu_props.flops_q80_f32 = device_cpu_flops (model, GGML_TYPE_Q8_0, GGML_TYPE_F32, n_threads); + dev_info->gpu_props.metal_flops_q80_f32 = device_metal_flops(model, GGML_TYPE_Q8_0, GGML_TYPE_F32); + dev_info->gpu_props.cuda_flops_q80_f32 = device_cuda_flops (model, GGML_TYPE_Q8_0, GGML_TYPE_F32); } if (is_dtype_exist(n_params, GGML_TYPE_IQ1_S)) { - dev_info->cpu_props.flops_iq1s_f32 = device_cpu_flops (model, GGML_TYPE_IQ1_S, GGML_TYPE_F32, n_threads); - dev_info->gpu_props.metal_flops_iq1s_f32= device_metal_flops(model, GGML_TYPE_IQ1_S, GGML_TYPE_F32); - dev_info->gpu_props.cuda_flops_iq1s_f32 = device_cuda_flops (model, GGML_TYPE_IQ1_S, GGML_TYPE_F32); + dev_info->cpu_props.flops_iq1s_f32 = device_cpu_flops (model, GGML_TYPE_IQ1_S, GGML_TYPE_F32, n_threads); + dev_info->gpu_props.metal_flops_iq1s_f32 = device_metal_flops(model, GGML_TYPE_IQ1_S, GGML_TYPE_F32); + dev_info->gpu_props.cuda_flops_iq1s_f32 = device_cuda_flops (model, GGML_TYPE_IQ1_S, GGML_TYPE_F32); } if (is_dtype_exist(n_params, GGML_TYPE_IQ4_NL)) { - dev_info->cpu_props.flops_iq4nl_f32 = device_cpu_flops (model, GGML_TYPE_IQ4_NL, GGML_TYPE_F32, n_threads); - dev_info->gpu_props.metal_flops_iq4nl_f32= device_metal_flops(model, GGML_TYPE_IQ4_NL, GGML_TYPE_F32); - dev_info->gpu_props.cuda_flops_iq4nl_f32 = device_cuda_flops (model, GGML_TYPE_IQ4_NL, GGML_TYPE_F32); + dev_info->cpu_props.flops_iq4nl_f32 = device_cpu_flops (model, GGML_TYPE_IQ4_NL, GGML_TYPE_F32, n_threads); + dev_info->gpu_props.metal_flops_iq4nl_f32 = device_metal_flops(model, GGML_TYPE_IQ4_NL, GGML_TYPE_F32); + dev_info->gpu_props.cuda_flops_iq4nl_f32 = device_cuda_flops (model, GGML_TYPE_IQ4_NL, GGML_TYPE_F32); } if (is_dtype_exist(n_params, GGML_TYPE_IQ1_M)) { - dev_info->cpu_props.flops_iq1m_f32 = device_cpu_flops (model, GGML_TYPE_IQ1_M, GGML_TYPE_F32, n_threads); - dev_info->gpu_props.metal_flops_iq1m_f32= device_metal_flops(model, GGML_TYPE_IQ1_M, GGML_TYPE_F32); - dev_info->gpu_props.cuda_flops_iq1m_f32 = device_cuda_flops (model, GGML_TYPE_IQ1_M, GGML_TYPE_F32); + dev_info->cpu_props.flops_iq1m_f32 = device_cpu_flops (model, GGML_TYPE_IQ1_M, GGML_TYPE_F32, n_threads); + dev_info->gpu_props.metal_flops_iq1m_f32 = device_metal_flops(model, GGML_TYPE_IQ1_M, GGML_TYPE_F32); + dev_info->gpu_props.cuda_flops_iq1m_f32 = device_cuda_flops (model, GGML_TYPE_IQ1_M, GGML_TYPE_F32); } } @@ -7470,6 +7470,8 @@ static void llm_load_qwen2_tensors( const uint32_t * n_layer_window, bool * use_mmap_buffer, bool set_needed) { + (void)use_mmap_buffer; // unused in this function + const auto tn = LLM_TN(model.arch); ggml_context * ctx_input = nullptr; @@ -7487,8 +7489,7 @@ static void llm_load_qwen2_tensors( const llama_hparams hparams = model.hparams; const int64_t n_embd = hparams.n_embd; - const int64_t n_embd_gqa = hparams.n_embd_v_gqa(); - // const int64_t n_embd_gqa = n_embd_v_gqa; + const int64_t n_embd_gqa = hparams.n_embd_v_gqa(); const int64_t n_ff = hparams.n_ff(); const int64_t n_vocab = hparams.n_vocab; const int64_t n_layer = hparams.n_layer; @@ -20525,14 +20526,12 @@ int llama_bcast_layer_setup(struct llama_context * ctx, uint32_t * n_layer_windo return 0; } -LLAMA_API int llama_rebuild_topo(llama_context *ctx, - uint32_t *n_layer_window, - device_info *dev_info_set) { +int llama_rebuild_topo(llama_context * ctx, uint32_t * n_layer_window, device_info * dev_info_set) { uint32_t n_world = ctx->cparams.n_world; uint32_t my_rank = ctx->cparams.rank; - device_info* dev_info_ptr = nullptr; - if (dev_info_set == nullptr){ - // for rank!=0, recv all devices info + device_info * dev_info_ptr = nullptr; + + if (dev_info_set == nullptr) { std::vector msgs; if (!zmq::recv_multipart(*ctx->recv_socket, std::back_inserter(msgs))) { return -1; @@ -20542,7 +20541,7 @@ LLAMA_API int llama_rebuild_topo(llama_context *ctx, deserialize((const char *)msgs[i].data(), &dev_info_ptr[i]); } GGML_ASSERT(msgs.size() == n_world); - }else{ + } else { dev_info_ptr = dev_info_set; } @@ -20550,7 +20549,7 @@ LLAMA_API int llama_rebuild_topo(llama_context *ctx, // notify next rank auto next_rank = (my_rank + 1) % n_world; - if(n_layer_window[next_rank] <= 0 && next_rank != 0){ + if (n_layer_window[next_rank] <= 0 && next_rank != 0) { try { auto msgs = dev_infos_to_messages(dev_info_ptr, n_world); ctx->send_socket->set(zmq::sockopt::linger, 3500); @@ -20564,22 +20563,23 @@ LLAMA_API int llama_rebuild_topo(llama_context *ctx, } } - // check myself's layer - zmq::socket_t* socket_to_close = nullptr; - if(n_layer_window[my_rank] > 0) { + zmq::socket_t * socket_to_close = nullptr; + if (n_layer_window[my_rank] > 0) { // reconstruct socket to the next valid rank std::string next_ip; auto current_rank = my_rank; - while(next_rank!=my_rank){ - if(n_layer_window[next_rank] > 0){ + + while (next_rank != my_rank) { + if (n_layer_window[next_rank] > 0) { next_ip = dev_info_ptr[current_rank].next_ip; break; } - next_rank = (next_rank + 1) % n_world; + next_rank = (next_rank + 1) % n_world; current_rank = (current_rank + 1) % n_world; } - if(!next_ip.empty()){ - if((my_rank+1)%n_world != next_rank){ + + if (!next_ip.empty()) { + if ((my_rank + 1) % n_world != next_rank) { socket_to_close = ctx->send_socket; ctx->send_socket = new zmq::socket_t(*ctx->sock_context, zmq::socket_type::push); std::string send_endp = "tcp://" + next_ip + ":" + std::to_string(map_rank_to_port(next_rank, ctx->data_port)); @@ -20587,7 +20587,8 @@ LLAMA_API int llama_rebuild_topo(llama_context *ctx, ctx->next_node_ip = next_ip; ctx->cparams.original_next_rank = next_rank; } - if(next_rank != 0){ + + if (next_rank != 0) { try { auto msgs = dev_infos_to_messages(dev_info_ptr, n_world); zmq::send_multipart(*ctx->send_socket, msgs); @@ -20599,18 +20600,21 @@ LLAMA_API int llama_rebuild_topo(llama_context *ctx, return -1; } } - }else{ + } else { // only one node ctx->next_node_ip = ""; } } - if(!dev_info_set){ + + if (!dev_info_set) { delete[] dev_info_ptr; } + if(socket_to_close != nullptr){ socket_to_close->close(); delete socket_to_close; } + return 0; } @@ -20675,11 +20679,9 @@ void llama_free_sockets(struct llama_context * ctx, char ** msg) { } } -void llama_update_context_with_rankworld(struct llama_context * ctx, - uint32_t rank, - uint32_t n_world) { - if(ctx) { - ctx->cparams.rank = rank; +void llama_update_context_with_rankworld(struct llama_context * ctx, uint32_t rank, uint32_t n_world) { + if (ctx) { + ctx->cparams.rank = rank; ctx->cparams.n_world = n_world; } }