mirror of
https://github.com/Lizonghang/prima.cpp.git
synced 2025-09-12 09:49:40 +00:00
speedup: add arg --keep-out-in-cuda to run the output layer on CUDA
This commit is contained in:
parent
e8d3e5a631
commit
1ea2d61a97
6 changed files with 66 additions and 16 deletions
|
@ -325,6 +325,7 @@ extern "C" {
|
|||
bool use_mlock; // force system to keep model in RAM
|
||||
bool check_tensors; // validate model tensor data
|
||||
bool keep_out_in_metal; // whether to keep output weights in metal memory
|
||||
bool keep_out_in_cuda; // whether to run the output layer on CUDA
|
||||
};
|
||||
|
||||
// NOTE: changing the default values of parameters marked as [EXPERIMENTAL] may cause crashes or incorrect results in certain configurations
|
||||
|
@ -339,6 +340,7 @@ extern "C" {
|
|||
bool force; // force to start prefetching after computation
|
||||
float master_priority; // priority to assign workload to the master (set 1.01 to use master first, and 0.99 to offload to other devices)
|
||||
bool keep_out_in_metal; // whether to keep output weights in metal memory
|
||||
bool keep_out_in_cuda; // whether to run the output layer on CUDA
|
||||
char * master_ip; // ip address of the master node
|
||||
char * next_node_ip; // ip address of the next node
|
||||
uint32_t data_port; // data port for distributed inference
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue