mirror of
https://github.com/Lizonghang/prima.cpp.git
synced 2025-09-06 04:39:32 +00:00
topo rebuild: add a delay to avoid packet interleaving
This commit is contained in:
parent
50807fd4e1
commit
729870fcd7
3 changed files with 40 additions and 21 deletions
|
@ -1788,6 +1788,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
|
|||
return iparams;
|
||||
}
|
||||
llama_bcast_layer_setup(lctx, n_layer_window, n_gpu_layers);
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(200)); // add a delay to avoid packet interleaving
|
||||
llama_rebuild_topo(lctx, n_layer_window, dev_info_set.data(), &node_type, is_forwarder);
|
||||
} else {
|
||||
// use the user-defined n_layer_window
|
||||
|
@ -1798,6 +1799,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
|
|||
if (auto_schedule){
|
||||
llama_send_device_info(lctx, &dev_info);
|
||||
llama_recv_layer_setup(lctx, n_layer_window, n_gpu_layers);
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(200)); // add a delay to avoid packet interleaving
|
||||
llama_rebuild_topo (lctx, n_layer_window, nullptr, &node_type, is_forwarder);
|
||||
} else {
|
||||
llama_recv_layer_setup(lctx, n_layer_window, n_gpu_layers);
|
||||
|
|
|
@ -2849,11 +2849,13 @@ void TopoRebuildHelperInfo::deserialize(const char *buffer) {
|
|||
size_t TopoRebuildHelperInfo::serialize(char ** buffer) const{
|
||||
size_t buffer_size = ::serialize(&dev_info, buffer);
|
||||
char * buffer_ = (char *)malloc(buffer_size + 1);
|
||||
|
||||
if (buffer_ == NULL) {
|
||||
LOG_ERR("%s: failed to allocate %zu bytes for device info serialization\n",
|
||||
__func__, buffer_size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
memcpy(buffer_, *buffer, buffer_size);
|
||||
memcpy(buffer_ + buffer_size, &is_forwarder, 1);
|
||||
free(*buffer);
|
||||
|
|
|
@ -3633,6 +3633,10 @@ void llama_profile_device(
|
|||
|
||||
dev_info->memory.total_physical = round(device_physical_memory(false) / (double)(1 << 30) * 100) / 100;
|
||||
dev_info->memory.available_physical = round(device_physical_memory(true) / (double)(1 << 30) * 100) / 100;
|
||||
|
||||
GGML_ASSERT(dev_info->memory.total_physical > 0, "Failed to parse total physical memory\n");
|
||||
GGML_ASSERT(dev_info->memory.available_physical > 0, "Failed to parse available physical memory\n");
|
||||
|
||||
dev_info->memory.used_can_swap = round(device_swappable_memory() / (double)(1 << 30) * 100) / 100;
|
||||
dev_info->memory.total_swap = round(device_swap_memory(false) / (double)(1 << 30) * 100) / 100;
|
||||
dev_info->memory.available_swap = round(device_swap_memory(true) / (double)(1 << 30) * 100) / 100;
|
||||
|
@ -20681,11 +20685,13 @@ int llama_rebuild_topo(llama_context * ctx,
|
|||
auto next_connect_rank = (my_rank + 1) % n_world;
|
||||
zmq::socket_t * socket_to_close = nullptr;
|
||||
bool is_not_exit = n_layer_window[my_rank] > 0 || topo_helper[my_rank].is_forwarder == 1;
|
||||
|
||||
if (is_not_exit) {
|
||||
// reconstruct socket to the next valid rank
|
||||
auto current_rank = my_rank;
|
||||
std::vector<uint32_t> nodes;
|
||||
auto next_rank_ = next_rank;
|
||||
|
||||
while (next_rank_ != my_rank) {
|
||||
nodes.push_back(next_rank_);
|
||||
if (n_layer_window[next_rank_] > 0) {
|
||||
|
@ -20694,6 +20700,7 @@ int llama_rebuild_topo(llama_context * ctx,
|
|||
next_rank_ = (next_rank_ + 1) % n_world;
|
||||
current_rank = (current_rank + 1) % n_world;
|
||||
}
|
||||
|
||||
if (next_rank_ == my_rank) {
|
||||
// only one node
|
||||
ctx->next_node_ip = "";
|
||||
|
@ -20779,9 +20786,11 @@ int llama_rebuild_topo(llama_context * ctx,
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < n_world; i++) {
|
||||
is_forwarder[i] = topo_helper[i].is_forwarder;
|
||||
}
|
||||
|
||||
ctx->cparams.node_type = *node_type;
|
||||
|
||||
if (socket_to_close != nullptr) {
|
||||
|
@ -20816,9 +20825,15 @@ int llama_recv_layer_setup(struct llama_context * ctx, uint32_t * n_layer_window
|
|||
uint32_t my_rank = ctx->cparams.rank;
|
||||
|
||||
std::vector<zmq::message_t> recv_msgs;
|
||||
while (true) {
|
||||
recv_msgs.clear();
|
||||
if (!zmq::recv_multipart(*ctx->recv_socket, std::back_inserter(recv_msgs))) {
|
||||
return -1;
|
||||
}
|
||||
if (!recv_msgs.empty() && recv_msgs[0].to_string() == "n_layer_window") {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
GGML_ASSERT(recv_msgs[0].to_string() == "n_layer_window");
|
||||
GGML_ASSERT(recv_msgs[1].size() == sizeof(uint32_t) * 32);
|
||||
|
|
Loading…
Add table
Reference in a new issue