add option --keep-inp-out-in-metal

This commit is contained in:
Zonghang Li 2025-01-22 11:25:09 +04:00
parent facb4ea736
commit 33429ec4e1
2 changed files with 3 additions and 1 deletions

View file

@ -737,7 +737,7 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
params.cuda_mem = value; // in GiB params.cuda_mem = value; // in GiB
} }
).set_env("LLAMA_ARG_CUDA_MEM")); ).set_env("LLAMA_ARG_CUDA_MEM"));
// "--keep-inp-out-in-metal" is a temporary option to keep the input and output in metal #ifdef GGML_USE_METAL
add_opt(llama_arg( add_opt(llama_arg(
{"--keep-inp-out-in-metal"}, {"--keep-inp-out-in-metal"},
format("whether to keep input and output weight in metal (default: %s)", params.keep_inp_out_in_metal ? "true" : "false"), format("whether to keep input and output weight in metal (default: %s)", params.keep_inp_out_in_metal ? "true" : "false"),
@ -745,6 +745,7 @@ gpt_params_context gpt_params_parser_init(gpt_params & params, llama_example ex,
params.keep_inp_out_in_metal = true; params.keep_inp_out_in_metal = true;
} }
).set_env("LLAMA_ARG_KEEP_INP_OUT_IN_METAL")); ).set_env("LLAMA_ARG_KEEP_INP_OUT_IN_METAL"));
#endif
add_opt(llama_arg( add_opt(llama_arg(
{"-n", "--predict", "--n-predict"}, "N", {"-n", "--predict", "--n-predict"}, "N",
format("number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)", params.n_predict), format("number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)", params.n_predict),

View file

@ -19773,6 +19773,7 @@ struct llama_model_params llama_model_default_params() {
/*.use_mmap =*/ true, /*.use_mmap =*/ true,
/*.use_mlock =*/ false, /*.use_mlock =*/ false,
/*.check_tensors =*/ false, /*.check_tensors =*/ false,
/*.keep_inp_out_in_metal =*/ false,
}; };
#ifdef GGML_USE_METAL #ifdef GGML_USE_METAL