diff --git a/common/common.cpp b/common/common.cpp index 76b95d15..d60c8220 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1440,6 +1440,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) { params.n_gpu_layers = n_gpu_layers[my_rank]; cparams.n_gpu_layers = n_gpu_layers[my_rank]; mparams.n_gpu_layers = n_gpu_layers[my_rank]; + llama_model_set_n_gpu_layers(model, n_gpu_layers[my_rank]); #ifdef LLAMA_DEBUG device_print_props(dev_info_set, n_world, model, cparams); diff --git a/src/llama.cpp b/src/llama.cpp index 5b7fb7a0..3126651f 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -20705,6 +20705,14 @@ uint32_t llama_model_n_layers(const struct llama_model * model) { return model->hparams.n_layer; } +uint32_t llama_model_n_gpu_layers(const struct llama_model * model) { + return model->n_gpu_layers; +} + +void llama_model_set_n_gpu_layers(struct llama_model * model, uint32_t value) { + model->n_gpu_layers = value; +} + uint64_t llama_model_n_params(const struct llama_model * model) { uint64_t nparams = 0; for (const auto & it : model->tensors_by_name) {