add arg --cuda-mem

2025-09-16 01:19:40 +00:00 · 2025-01-16 09:15:34 +04:00 · 2025-01-16 09:15:34 +04:00 · 46e99218b4
commit 46e99218b4
parent dab6b2e1c2
5 changed files with 19 additions and 3 deletions
--- a/common/common.h
+++ b/common/common.h
@ -148,6 +148,7 @@ struct gpt_params {
    std::string master_ip         = "localhost"; // ip address of the master node
    std::string next_node_ip      = "localhost"; // ip address of my next node
    bool    unload                = false; // unload layer weights after use or not
+    int32_t cuda_mem              = 999.0; // cuda memory to use, in GiB
    int32_t n_predict             =    -1; // new tokens to predict
    int32_t n_ctx                 =     0; // context size
    int32_t n_batch               =  2048; // logical batch size for prompt processing (must be >=32 to use BLAS)