mirror of
https://github.com/Lizonghang/prima.cpp.git
synced 2025-09-06 21:49:02 +00:00
fix: reset -ngl to 0 when GPU is not used and reformat code
This commit is contained in:
parent
b44187e3af
commit
2fbc0c8da3
4 changed files with 12 additions and 6 deletions
|
@ -1527,6 +1527,12 @@ static bool assign_layers_to_device(
|
||||||
//
|
//
|
||||||
|
|
||||||
struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
|
struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
|
||||||
|
|
||||||
|
#if !(defined(GGML_USE_METAL) || defined(GGML_USE_CUDA))
|
||||||
|
// reset n_gpu_layers to 0 if GPU is not used
|
||||||
|
params.n_gpu_layers = 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
llama_init_result iparams;
|
llama_init_result iparams;
|
||||||
auto mparams = llama_model_params_from_gpt_params(params);
|
auto mparams = llama_model_params_from_gpt_params(params);
|
||||||
|
|
||||||
|
@ -1582,6 +1588,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
|
||||||
|
|
||||||
if (n_world == 1) {
|
if (n_world == 1) {
|
||||||
uint32_t n_layers = llama_model_n_layers(model);
|
uint32_t n_layers = llama_model_n_layers(model);
|
||||||
|
// assign all layers to this device
|
||||||
params.n_layer_window[0] = n_layers;
|
params.n_layer_window[0] = n_layers;
|
||||||
cparams.n_layer_window[0] = n_layers;
|
cparams.n_layer_window[0] = n_layers;
|
||||||
mparams.n_layer_window[0] = n_layers;
|
mparams.n_layer_window[0] = n_layers;
|
||||||
|
@ -1594,7 +1601,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
|
||||||
|
|
||||||
// broadcast startup args
|
// broadcast startup args
|
||||||
struct startup_args args;
|
struct startup_args args;
|
||||||
if (my_rank==0){
|
if (my_rank == 0){
|
||||||
args.should_profile = auto_schedule;
|
args.should_profile = auto_schedule;
|
||||||
}
|
}
|
||||||
llama_bcast_startup_args(lctx, my_rank, &args);
|
llama_bcast_startup_args(lctx, my_rank, &args);
|
||||||
|
|
|
@ -350,7 +350,6 @@ float device_inp_embd_delay(struct llama_model * model, enum ggml_type src0t, in
|
||||||
return 0.0f;
|
return 0.0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t QK_K = 0;
|
|
||||||
switch (src0t) {
|
switch (src0t) {
|
||||||
case GGML_TYPE_F32: {
|
case GGML_TYPE_F32: {
|
||||||
matrix_B = malloc(embd_size * sizeof(float));
|
matrix_B = malloc(embd_size * sizeof(float));
|
||||||
|
|
|
@ -453,7 +453,7 @@ extern "C" {
|
||||||
LLAMA_API void llama_free_sockets (struct llama_context * ctx, char ** msg);
|
LLAMA_API void llama_free_sockets (struct llama_context * ctx, char ** msg);
|
||||||
LLAMA_API int llama_gather_device_info(struct llama_context * ctx, struct device_info * dev_info_set);
|
LLAMA_API int llama_gather_device_info(struct llama_context * ctx, struct device_info * dev_info_set);
|
||||||
LLAMA_API int llama_send_device_info (struct llama_context * ctx, struct device_info * dev_info);
|
LLAMA_API int llama_send_device_info (struct llama_context * ctx, struct device_info * dev_info);
|
||||||
LLAMA_API int llama_bcast_startup_args(struct llama_context * ctx, uint32_t rank, struct startup_args * args);
|
LLAMA_API int llama_bcast_startup_args(struct llama_context * ctx, uint32_t rank, struct startup_args * args);
|
||||||
LLAMA_API int llama_bcast_layer_setup (struct llama_context * ctx, uint32_t * n_layer_window, uint32_t * n_gpu_layers);
|
LLAMA_API int llama_bcast_layer_setup (struct llama_context * ctx, uint32_t * n_layer_window, uint32_t * n_gpu_layers);
|
||||||
LLAMA_API int llama_recv_layer_setup (struct llama_context * ctx, uint32_t * n_layer_window, uint32_t * n_gpu_layers);
|
LLAMA_API int llama_recv_layer_setup (struct llama_context * ctx, uint32_t * n_layer_window, uint32_t * n_gpu_layers);
|
||||||
|
|
||||||
|
|
|
@ -20262,13 +20262,13 @@ int llama_send_device_info(struct llama_context * ctx, struct device_info * dev_
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
LLAMA_API int llama_bcast_startup_args(llama_context *ctx, uint32_t rank, startup_args *args) {
|
int llama_bcast_startup_args(llama_context * ctx, uint32_t rank, startup_args * args) {
|
||||||
int32_t n_world = ctx->cparams.n_world;
|
int32_t n_world = ctx->cparams.n_world;
|
||||||
if (n_world == 1) {
|
if (n_world == 1) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
GGML_ASSERT(ctx != nullptr && ctx->send_socket != nullptr);
|
GGML_ASSERT(ctx != nullptr && ctx->send_socket != nullptr);
|
||||||
if (rank==0){
|
if (rank == 0){
|
||||||
// send
|
// send
|
||||||
try {
|
try {
|
||||||
std::vector<zmq::message_t> send_msgs;
|
std::vector<zmq::message_t> send_msgs;
|
||||||
|
@ -20289,7 +20289,7 @@ LLAMA_API int llama_bcast_startup_args(llama_context *ctx, uint32_t rank, startu
|
||||||
GGML_ASSERT(recv_msgs[1].size() == sizeof(bool));
|
GGML_ASSERT(recv_msgs[1].size() == sizeof(bool));
|
||||||
bool should_profile = *static_cast<bool*>(recv_msgs[1].data());
|
bool should_profile = *static_cast<bool*>(recv_msgs[1].data());
|
||||||
args->should_profile = should_profile;
|
args->should_profile = should_profile;
|
||||||
if (rank != n_world-1){
|
if ((int)rank != (int)n_world - 1){
|
||||||
// send
|
// send
|
||||||
try {
|
try {
|
||||||
zmq::send_multipart(*ctx->send_socket, recv_msgs);
|
zmq::send_multipart(*ctx->send_socket, recv_msgs);
|
||||||
|
|
Loading…
Add table
Reference in a new issue