fix: reset -ngl to 0 when GPU is not used and reformat code

2025-09-06 13:39:02 +00:00 · 2025-05-14 13:27:20 +04:00 · 2025-05-14 13:27:20 +04:00 · 2fbc0c8da3
commit 2fbc0c8da3
parent b44187e3af
4 changed files with 12 additions and 6 deletions
--- a/common/common.cpp
+++ b/common/common.cpp
@ -1527,6 +1527,12 @@ static bool assign_layers_to_device(
 //
 struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
 #if !(defined(GGML_USE_METAL) || defined(GGML_USE_CUDA))
    // reset n_gpu_layers to 0 if GPU is not used
    params.n_gpu_layers  = 0;
 #endif
    llama_init_result iparams;
    auto mparams = llama_model_params_from_gpt_params(params);
@ -1582,6 +1588,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
    if (n_world == 1) {
        uint32_t n_layers = llama_model_n_layers(model);
        // assign all layers to this device
        params.n_layer_window[0]  = n_layers;
        cparams.n_layer_window[0] = n_layers;
        mparams.n_layer_window[0] = n_layers;
--- a/common/profiler.cpp
+++ b/common/profiler.cpp
@ -350,7 +350,6 @@ float device_inp_embd_delay(struct llama_model * model, enum ggml_type src0t, in
        return 0.0f;
    }
    size_t QK_K = 0; 
    switch (src0t) {
        case GGML_TYPE_F32: {
            matrix_B = malloc(embd_size * sizeof(float));
--- a/src/llama.cpp
+++ b/src/llama.cpp
@ -20262,7 +20262,7 @@ int llama_send_device_info(struct llama_context * ctx, struct device_info * dev_
    return 0;
 }
-LLAMA_API int llama_bcast_startup_args(llama_context *ctx, uint32_t rank, startup_args *args) {
+int llama_bcast_startup_args(llama_context * ctx, uint32_t rank, startup_args * args) {
    int32_t n_world = ctx->cparams.n_world;
    if (n_world == 1) {
        return 0;
@ -20289,7 +20289,7 @@ LLAMA_API int llama_bcast_startup_args(llama_context *ctx, uint32_t rank, startu
        GGML_ASSERT(recv_msgs[1].size() == sizeof(bool));
        bool should_profile = *static_cast<bool*>(recv_msgs[1].data());
        args->should_profile = should_profile;
-        if (rank != n_world-1){
+        if ((int)rank != (int)n_world - 1){
            // send
            try {
                zmq::send_multipart(*ctx->send_socket, recv_msgs);