add metal mem limit

2025-09-09 23:44:33 +00:00 · 2025-01-23 16:08:52 +04:00 · 2025-01-23 16:08:52 +04:00 · 78a544d716
commit 78a544d716
parent 33429ec4e1
5 changed files with 102 additions and 67 deletions
--- a/common/common.h
+++ b/common/common.h
@ -149,7 +149,7 @@ struct gpt_params {
    std::string next_node_ip      = "localhost"; // ip address of my next node
    bool    unload                = false; // unload layer weights after use or not
    bool    keep_inp_out_in_metal = false; // whether to keep input/output weight in metal, not by default
-    int32_t cuda_mem              = 999.0; // cuda memory to use, in GiB
+    int32_t gpu_mem               = 999.0; // gpu memory to use, in GiB
    int32_t n_predict             =    -1; // new tokens to predict
    int32_t n_ctx                 =     0; // context size
    int32_t n_batch               =  2048; // logical batch size for prompt processing (must be >=32 to use BLAS)