mirror of
https://github.com/Lizonghang/prima.cpp.git
synced 2025-09-06 02:29:02 +00:00
fix some bugs
Signed-off-by: DeEMO <yzzxrx@gmail.com>
This commit is contained in:
parent
cc46aa9828
commit
4b36aef157
2 changed files with 20 additions and 14 deletions
|
@ -1547,7 +1547,7 @@ static bool tune_layer_allocation(
|
||||||
dev_infos_temp.clear();
|
dev_infos_temp.clear();
|
||||||
n_layer_windows_temp.clear();
|
n_layer_windows_temp.clear();
|
||||||
n_gpu_layers_temp.clear();
|
n_gpu_layers_temp.clear();
|
||||||
for(auto i=0; i<n_world; i++) {
|
for(uint32_t i=0; i<n_world; i++) {
|
||||||
if (n_layer_windows_[i] > 1 || i==0 ) {
|
if (n_layer_windows_[i] > 1 || i==0 ) {
|
||||||
dev_infos_temp.push_back(dev_infos_[i]);
|
dev_infos_temp.push_back(dev_infos_[i]);
|
||||||
n_layer_windows_temp.push_back(n_layer_windows_[i]);
|
n_layer_windows_temp.push_back(n_layer_windows_[i]);
|
||||||
|
@ -1561,7 +1561,7 @@ static bool tune_layer_allocation(
|
||||||
|
|
||||||
n_world = dev_infos_temp.size();
|
n_world = dev_infos_temp.size();
|
||||||
}
|
}
|
||||||
int i =0 , j =0;
|
uint32_t i =0 , j =0;
|
||||||
while(j < n_world) {
|
while(j < n_world) {
|
||||||
if(dev_infos[i].rank == dev_infos_temp[j].rank){
|
if(dev_infos[i].rank == dev_infos_temp[j].rank){
|
||||||
n_layer_window[i] = n_layer_windows_temp[j];
|
n_layer_window[i] = n_layer_windows_temp[j];
|
||||||
|
@ -1701,13 +1701,19 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
|
||||||
llama_recv_layer_setup(lctx, n_layer_window, n_gpu_layers);
|
llama_recv_layer_setup(lctx, n_layer_window, n_gpu_layers);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if(n_layer_window[my_rank]<=0){
|
||||||
|
LOG_INF("%s: info: rank %d has no layers to run, skipping\n", __func__, my_rank);
|
||||||
|
llama_free(lctx);
|
||||||
|
llama_free_model(model);
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
//update rank and n_world for consistency
|
//update rank and n_world for consistency
|
||||||
uint32_t update_rank = 0;
|
uint32_t update_rank = 0;
|
||||||
uint32_t update_n_world = 1;
|
uint32_t update_n_world = 1;
|
||||||
std::vector<uint32_t> n_layer_window_temp = {n_layer_window[0]};
|
std::vector<uint32_t> n_layer_window_temp = {n_layer_window[0]};
|
||||||
std::vector<uint32_t> n_gpu_layers_temp = {n_gpu_layers[0]};
|
std::vector<uint32_t> n_gpu_layers_temp = {n_gpu_layers[0]};
|
||||||
for(auto i=1; i<n_world; i++) {
|
for(uint32_t i=1; i<n_world; i++) {
|
||||||
if(n_layer_window[i] <= 0 ){
|
if(n_layer_window[i] <= 0 ){
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -1720,7 +1726,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
|
||||||
}
|
}
|
||||||
memset(n_layer_window, 0, n_world * sizeof(uint32_t));
|
memset(n_layer_window, 0, n_world * sizeof(uint32_t));
|
||||||
memset(n_gpu_layers, 0, n_world * sizeof(uint32_t));
|
memset(n_gpu_layers, 0, n_world * sizeof(uint32_t));
|
||||||
for (auto i=0; i<update_n_world; i++) {
|
for (uint32_t i=0; i<update_n_world; i++) {
|
||||||
n_layer_window[i] = n_layer_window_temp[i];
|
n_layer_window[i] = n_layer_window_temp[i];
|
||||||
n_gpu_layers[i] = n_gpu_layers_temp[i];
|
n_gpu_layers[i] = n_gpu_layers_temp[i];
|
||||||
}
|
}
|
||||||
|
|
|
@ -20263,7 +20263,7 @@ int llama_send_device_info(struct llama_context * ctx, struct device_info * dev_
|
||||||
}
|
}
|
||||||
|
|
||||||
LLAMA_API int llama_bcast_startup_args(llama_context *ctx, uint32_t rank, startup_args *args) {
|
LLAMA_API int llama_bcast_startup_args(llama_context *ctx, uint32_t rank, startup_args *args) {
|
||||||
int32_t n_world = ctx->cparams.n_world;
|
auto n_world = ctx->cparams.n_world;
|
||||||
if (n_world == 1) {
|
if (n_world == 1) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -20343,14 +20343,14 @@ LLAMA_API int llama_rebuild_topo(llama_context *ctx,
|
||||||
}
|
}
|
||||||
dev_info_ptr = new device_info[n_world];
|
dev_info_ptr = new device_info[n_world];
|
||||||
for (size_t i = 0; i < msgs.size(); i++) {
|
for (size_t i = 0; i < msgs.size(); i++) {
|
||||||
deserialize((const char *)msgs[i].data(), &dev_info_set[i]);
|
deserialize((const char *)msgs[i].data(), &dev_info_ptr[i]);
|
||||||
}
|
}
|
||||||
}else{
|
}else{
|
||||||
char * buffer = nullptr;
|
char * buffer = nullptr;
|
||||||
for(size_t i = 0; i < n_world; i++) {
|
for(size_t i = 0; i < n_world; i++) {
|
||||||
size_t buffer_size = serialize(&dev_info_set[i], &buffer);
|
size_t buffer_size = serialize(&dev_info_set[i], &buffer);
|
||||||
msgs.emplace_back(buffer, buffer_size);
|
msgs.emplace_back(buffer, buffer_size);
|
||||||
|
|
||||||
free(buffer);
|
free(buffer);
|
||||||
}
|
}
|
||||||
dev_info_ptr = dev_info_set;
|
dev_info_ptr = dev_info_set;
|
||||||
|
@ -20361,9 +20361,9 @@ LLAMA_API int llama_rebuild_topo(llama_context *ctx,
|
||||||
|
|
||||||
// notify next rank
|
// notify next rank
|
||||||
auto next_rank = (my_rank + 1) % n_world;
|
auto next_rank = (my_rank + 1) % n_world;
|
||||||
if(n_layer_window[next_rank] <= 0){
|
if(n_layer_window[next_rank] <= 0 && next_rank != 0){
|
||||||
try {
|
try {
|
||||||
ctx->send_socket->setsockopt(ZMQ_LINGER, 3500);
|
ctx->send_socket->set(zmq::sockopt::linger, 3500);
|
||||||
zmq::send_multipart(*ctx->send_socket, msgs);
|
zmq::send_multipart(*ctx->send_socket, msgs);
|
||||||
} catch (const zmq::error_t& e) {
|
} catch (const zmq::error_t& e) {
|
||||||
LLAMA_LOG_INFO("Failed to send data: %s\n", e.what());
|
LLAMA_LOG_INFO("Failed to send data: %s\n", e.what());
|
||||||
|
@ -20382,7 +20382,7 @@ LLAMA_API int llama_rebuild_topo(llama_context *ctx,
|
||||||
auto current_rank = my_rank;
|
auto current_rank = my_rank;
|
||||||
while(next_rank!=my_rank){
|
while(next_rank!=my_rank){
|
||||||
if(n_layer_window[next_rank] > 0){
|
if(n_layer_window[next_rank] > 0){
|
||||||
next_ip = dev_info_ptr[next_rank].next_ip;
|
next_ip = dev_info_ptr[current_rank].next_ip;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
next_rank = (next_rank + 1) % n_world;
|
next_rank = (next_rank + 1) % n_world;
|
||||||
|
@ -20402,6 +20402,9 @@ LLAMA_API int llama_rebuild_topo(llama_context *ctx,
|
||||||
}
|
}
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
}else{
|
||||||
|
// only one node
|
||||||
|
ctx->next_node_ip = "";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(!dev_info_set){
|
if(!dev_info_set){
|
||||||
|
@ -20409,10 +20412,7 @@ LLAMA_API int llama_rebuild_topo(llama_context *ctx,
|
||||||
}
|
}
|
||||||
socket_to_close->close();
|
socket_to_close->close();
|
||||||
delete socket_to_close;
|
delete socket_to_close;
|
||||||
if(n_layer_window[my_rank]<=0){
|
return 0;
|
||||||
exit(0);
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int llama_recv_layer_setup(struct llama_context * ctx, uint32_t * n_layer_window, uint32_t * n_gpu_layers) {
|
int llama_recv_layer_setup(struct llama_context * ctx, uint32_t * n_layer_window, uint32_t * n_gpu_layers) {
|
||||||
|
|
Loading…
Add table
Reference in a new issue