diff --git a/common/arg.cpp b/common/arg.cpp
index 47d3c5e6..e282c80d 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -675,6 +675,20 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
             params.rank = value;
         }
     ).set_env("LLAMA_ARG_RANK"));
+    add_opt(llama_arg(
+        {"--data-port"}, "N",
+        format("data port for distributed inference (default: %d)", params.data_port),
+        [](gpt_params & params, int value) {
+            params.data_port = value;
+        }
+    ).set_env("LLAMA_ARG_DATA_PORT"));
+    add_opt(llama_arg(
+        {"--signal-port"}, "N",
+        format("signal port for distributed inference (default: %d)", params.signal_port),
+        [](gpt_params & params, int value) {
+            params.signal_port = value;
+        }
+    ).set_env("LLAMA_ARG_SIGNAL_PORT"));
     add_opt(llama_arg(
         {"-lw", "--layer-window", "--n-layer-window"}, "N",
         format("number of layers to process in each compute (e.g., 16,16)"),
diff --git a/common/common.cpp b/common/common.cpp
index 374ae1f8..38828be3 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -2032,6 +2032,8 @@ struct llama_context_params llama_context_params_from_gpt_params(const gpt_param
     }
     cparams.master_ip         = new char[params.master_ip.length() + 1];
     std::strcpy(cparams.master_ip, params.master_ip.c_str());
+    cparams.data_port         = params.data_port;
+    cparams.signal_port       = params.signal_port;
 
     if (cparams.next_node_ip != nullptr) {
         delete[] cparams.next_node_ip;
diff --git a/common/common.h b/common/common.h
index 0a679213..c6ffe136 100644
--- a/common/common.h
+++ b/common/common.h
@@ -145,8 +145,10 @@ struct gpt_params {
     int32_t n_world               =     1; // number of devices to use
     int32_t rank                  =     0; // my rank for distributed inference
     uint32_t n_layer_window[32]   =   {0}; // layer window size on each node
-    std::string master_ip         = "localhost"; // ip address of the master node
-    std::string next_node_ip      = "localhost"; // ip address of my next node
+    std::string master_ip         = "127.0.0.1"; // ip address of the master node
+    std::string next_node_ip      = "127.0.0.1"; // ip address of my next node
+    uint32_t data_port            =  9000;  // data port for distributed inference
+    uint32_t signal_port          =  10000; // signal port for distributed inference
     bool    prefetch              = false; // prefetch layer weights
     bool    keep_out_in_metal     =  true; // whether to keep output weights in metal memory, true by default
     bool    force                 = false; // force to start prefetching after computation
diff --git a/include/llama.h b/include/llama.h
index 4d6dd80d..3c220562 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -330,6 +330,8 @@ extern "C" {
         bool        keep_out_in_metal; // whether to keep output weights in metal memory
         char *      master_ip;         // ip address of the master node
         char *      next_node_ip;      // ip address of the next node
+        uint32_t    data_port;         // data port for distributed inference
+        uint32_t    signal_port;       // signal port for distributed inference
         uint32_t    n_ctx;             // text context, 0 = from model
         uint32_t    n_predict;         // number of tokens to predict
         uint32_t    n_batch;           // logical maximum batch size that can be submitted to llama_decode
diff --git a/src/llama.cpp b/src/llama.cpp
index c6f6d3b5..8b5af567 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -20266,6 +20266,8 @@ struct llama_context_params llama_context_default_params() {
         /*.keep_out_in_metal           =*/ true,
         /*.master_ip                   =*/ nullptr,
         /*.next_node_ip                =*/ nullptr,
+        /*.data_port                   =*/ 9000,
+        /*.signal_port                 =*/ 10000,
         /*.n_ctx                       =*/ 512,
         /*.n_predict                   =*/ 512,
         /*.n_batch                     =*/ 2048,
@@ -20896,6 +20898,8 @@ struct llama_context * llama_new_context_with_model(
 
     ctx->master_ip       = params.master_ip;
     ctx->next_node_ip    = params.next_node_ip;
+    ctx->data_port       = params.data_port;
+    ctx->signal_port     = params.signal_port;
     ctx->cparams.n_world = params.n_world;
     ctx->cparams.rank    = params.rank;
     ctx->cparams.force   = params.force;