mirror of
https://github.com/Lizonghang/prima.cpp.git
synced 2025-09-06 07:09:06 +00:00
add args: data-port and signal-port
This commit is contained in:
parent
104e3b2356
commit
6ff38b2a0c
5 changed files with 26 additions and 2 deletions
|
@ -675,6 +675,20 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
|
||||||
params.rank = value;
|
params.rank = value;
|
||||||
}
|
}
|
||||||
).set_env("LLAMA_ARG_RANK"));
|
).set_env("LLAMA_ARG_RANK"));
|
||||||
|
add_opt(llama_arg(
|
||||||
|
{"--data-port"}, "N",
|
||||||
|
format("data port for distributed inference (default: %d)", params.data_port),
|
||||||
|
[](gpt_params & params, int value) {
|
||||||
|
params.data_port = value;
|
||||||
|
}
|
||||||
|
).set_env("LLAMA_ARG_DATA_PORT"));
|
||||||
|
add_opt(llama_arg(
|
||||||
|
{"--signal-port"}, "N",
|
||||||
|
format("signal port for distributed inference (default: %d)", params.signal_port),
|
||||||
|
[](gpt_params & params, int value) {
|
||||||
|
params.signal_port = value;
|
||||||
|
}
|
||||||
|
).set_env("LLAMA_ARG_SIGNAL_PORT"));
|
||||||
add_opt(llama_arg(
|
add_opt(llama_arg(
|
||||||
{"-lw", "--layer-window", "--n-layer-window"}, "N",
|
{"-lw", "--layer-window", "--n-layer-window"}, "N",
|
||||||
format("number of layers to process in each compute (e.g., 16,16)"),
|
format("number of layers to process in each compute (e.g., 16,16)"),
|
||||||
|
|
|
@ -2032,6 +2032,8 @@ struct llama_context_params llama_context_params_from_gpt_params(const gpt_param
|
||||||
}
|
}
|
||||||
cparams.master_ip = new char[params.master_ip.length() + 1];
|
cparams.master_ip = new char[params.master_ip.length() + 1];
|
||||||
std::strcpy(cparams.master_ip, params.master_ip.c_str());
|
std::strcpy(cparams.master_ip, params.master_ip.c_str());
|
||||||
|
cparams.data_port = params.data_port;
|
||||||
|
cparams.signal_port = params.signal_port;
|
||||||
|
|
||||||
if (cparams.next_node_ip != nullptr) {
|
if (cparams.next_node_ip != nullptr) {
|
||||||
delete[] cparams.next_node_ip;
|
delete[] cparams.next_node_ip;
|
||||||
|
|
|
@ -145,8 +145,10 @@ struct gpt_params {
|
||||||
int32_t n_world = 1; // number of devices to use
|
int32_t n_world = 1; // number of devices to use
|
||||||
int32_t rank = 0; // my rank for distributed inference
|
int32_t rank = 0; // my rank for distributed inference
|
||||||
uint32_t n_layer_window[32] = {0}; // layer window size on each node
|
uint32_t n_layer_window[32] = {0}; // layer window size on each node
|
||||||
std::string master_ip = "localhost"; // ip address of the master node
|
std::string master_ip = "127.0.0.1"; // ip address of the master node
|
||||||
std::string next_node_ip = "localhost"; // ip address of my next node
|
std::string next_node_ip = "127.0.0.1"; // ip address of my next node
|
||||||
|
uint32_t data_port = 9000; // data port for distributed inference
|
||||||
|
uint32_t signal_port = 10000; // signal port for distributed inference
|
||||||
bool prefetch = false; // prefetch layer weights
|
bool prefetch = false; // prefetch layer weights
|
||||||
bool keep_out_in_metal = true; // whether to keep output weights in metal memory, true by default
|
bool keep_out_in_metal = true; // whether to keep output weights in metal memory, true by default
|
||||||
bool force = false; // force to start prefetching after computation
|
bool force = false; // force to start prefetching after computation
|
||||||
|
|
|
@ -330,6 +330,8 @@ extern "C" {
|
||||||
bool keep_out_in_metal; // whether to keep output weights in metal memory
|
bool keep_out_in_metal; // whether to keep output weights in metal memory
|
||||||
char * master_ip; // ip address of the master node
|
char * master_ip; // ip address of the master node
|
||||||
char * next_node_ip; // ip address of the next node
|
char * next_node_ip; // ip address of the next node
|
||||||
|
uint32_t data_port; // data port for distributed inference
|
||||||
|
uint32_t signal_port; // signal port for distributed inference
|
||||||
uint32_t n_ctx; // text context, 0 = from model
|
uint32_t n_ctx; // text context, 0 = from model
|
||||||
uint32_t n_predict; // number of tokens to predict
|
uint32_t n_predict; // number of tokens to predict
|
||||||
uint32_t n_batch; // logical maximum batch size that can be submitted to llama_decode
|
uint32_t n_batch; // logical maximum batch size that can be submitted to llama_decode
|
||||||
|
|
|
@ -20266,6 +20266,8 @@ struct llama_context_params llama_context_default_params() {
|
||||||
/*.keep_out_in_metal =*/ true,
|
/*.keep_out_in_metal =*/ true,
|
||||||
/*.master_ip =*/ nullptr,
|
/*.master_ip =*/ nullptr,
|
||||||
/*.next_node_ip =*/ nullptr,
|
/*.next_node_ip =*/ nullptr,
|
||||||
|
/*.data_port =*/ 9000,
|
||||||
|
/*.signal_port =*/ 10000,
|
||||||
/*.n_ctx =*/ 512,
|
/*.n_ctx =*/ 512,
|
||||||
/*.n_predict =*/ 512,
|
/*.n_predict =*/ 512,
|
||||||
/*.n_batch =*/ 2048,
|
/*.n_batch =*/ 2048,
|
||||||
|
@ -20896,6 +20898,8 @@ struct llama_context * llama_new_context_with_model(
|
||||||
|
|
||||||
ctx->master_ip = params.master_ip;
|
ctx->master_ip = params.master_ip;
|
||||||
ctx->next_node_ip = params.next_node_ip;
|
ctx->next_node_ip = params.next_node_ip;
|
||||||
|
ctx->data_port = params.data_port;
|
||||||
|
ctx->signal_port = params.signal_port;
|
||||||
ctx->cparams.n_world = params.n_world;
|
ctx->cparams.n_world = params.n_world;
|
||||||
ctx->cparams.rank = params.rank;
|
ctx->cparams.rank = params.rank;
|
||||||
ctx->cparams.force = params.force;
|
ctx->cparams.force = params.force;
|
||||||
|
|
Loading…
Add table
Reference in a new issue