mirror of
https://github.com/Lizonghang/prima.cpp.git
synced 2025-09-10 12:24:34 +00:00
add automatic layer window size assignment workflow
This commit is contained in:
parent
53cb3a6069
commit
2bd4d03aa8
6 changed files with 241 additions and 110 deletions
|
@ -28,6 +28,8 @@
|
|||
#include <vector>
|
||||
#include <thread>
|
||||
|
||||
#define DEFAULT_N_LAYER_WINDOW 4
|
||||
|
||||
#if defined(__APPLE__) && defined(__MACH__)
|
||||
#include <sys/types.h>
|
||||
#include <sys/sysctl.h>
|
||||
|
@ -362,6 +364,11 @@ bool parse_cpu_mask(const std::string & mask, bool (&boolmask)[GGML_MAX_N_THREAD
|
|||
return true;
|
||||
}
|
||||
|
||||
template <size_t N>
|
||||
void copy_n_layer_window(const uint32_t (&source)[N], uint32_t * destination) {
|
||||
std::copy(std::begin(source), std::end(source), destination);
|
||||
}
|
||||
|
||||
void gpt_init() {
|
||||
llama_log_set([](ggml_log_level level, const char * text, void * /*user_data*/) {
|
||||
if (LOG_DEFAULT_LLAMA <= gpt_log_verbosity_thold) {
|
||||
|
@ -819,6 +826,24 @@ std::string fs_get_cache_file(const std::string & filename) {
|
|||
//
|
||||
// Model utils
|
||||
//
|
||||
static void llama_assign_n_layer_window(
|
||||
uint32_t n_world,
|
||||
uint32_t my_rank,
|
||||
const device_info * dev_info_set,
|
||||
uint32_t * n_layer_window,
|
||||
struct llama_model * model) {
|
||||
GGML_ASSERT(dev_info_set != nullptr);
|
||||
GGML_ASSERT(n_layer_window != nullptr);
|
||||
|
||||
uint32_t n_layer = llama_model_n_layers(model);
|
||||
if (n_world == 1) {
|
||||
n_layer_window[0] = n_layer;
|
||||
return;
|
||||
}
|
||||
|
||||
std::fill_n(n_layer_window, n_world, DEFAULT_N_LAYER_WINDOW);
|
||||
}
|
||||
|
||||
struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
|
||||
llama_init_result iparams;
|
||||
auto mparams = llama_model_params_from_gpt_params(params);
|
||||
|
@ -838,6 +863,8 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
|
|||
return iparams;
|
||||
}
|
||||
|
||||
llama_model_loader * ml = llama_model_load(params.model.c_str(), model, &mparams);
|
||||
|
||||
if (params.reranking) {
|
||||
bool ok = true;
|
||||
|
||||
|
@ -871,21 +898,49 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
|
|||
struct llama_context_params cparams = llama_context_params_from_gpt_params(params);
|
||||
llama_context * lctx = llama_new_context_with_model(model, cparams);
|
||||
|
||||
uint32_t n_world = cparams.n_world;
|
||||
uint32_t my_rank = cparams.rank;
|
||||
|
||||
// initialize sockets
|
||||
llama_init_sockets(lctx, cparams.n_world, cparams.rank);
|
||||
llama_init_sockets(lctx, n_world, my_rank);
|
||||
|
||||
// sychronize device profile to the master node
|
||||
struct device_info * dev_info_set = nullptr;
|
||||
if (params.rank == 0) {
|
||||
dev_info_set = (struct device_info *)malloc(cparams.n_world * sizeof(struct device_info));
|
||||
if (my_rank == 0) {
|
||||
dev_info_set = (struct device_info *)malloc(n_world * sizeof(struct device_info));
|
||||
dev_info_set[0] = dev_info;
|
||||
llama_collect_device_info(dev_info_set, lctx);
|
||||
device_print_props(dev_info_set, cparams.n_world);
|
||||
llama_gather_device_info(lctx, dev_info_set);
|
||||
device_print_props(dev_info_set, n_world);
|
||||
} else {
|
||||
llama_send_device_info(&dev_info, lctx);
|
||||
llama_send_device_info(lctx, &dev_info);
|
||||
}
|
||||
|
||||
if (llama_context_setup_backend(lctx) == nullptr) {
|
||||
uint32_t n_layer_window[32] = {0};
|
||||
if (my_rank == 0) {
|
||||
if (n_world == 1 || params.n_layer_window[0] == 0) {
|
||||
llama_assign_n_layer_window(n_world, my_rank, dev_info_set, n_layer_window, model);
|
||||
} else {
|
||||
copy_n_layer_window(params.n_layer_window, n_layer_window);
|
||||
}
|
||||
|
||||
// synchronize the new n_layer_window to other nodes
|
||||
llama_broadcast_n_layer_window(lctx, n_layer_window);
|
||||
} else {
|
||||
llama_recv_n_layer_window(lctx, n_layer_window);
|
||||
}
|
||||
|
||||
// update n_layer_window
|
||||
copy_n_layer_window(n_layer_window, params.n_layer_window);
|
||||
copy_n_layer_window(n_layer_window, cparams.n_layer_window);
|
||||
copy_n_layer_window(n_layer_window, mparams.n_layer_window);
|
||||
copy_n_layer_window(n_layer_window, llama_context_n_layer_window(lctx));
|
||||
|
||||
if (!mparams.vocab_only && llm_load_tensors(ml, model, mparams) < 0) {
|
||||
LOG_ERR("%s: failed to load model '%s'\n", __func__, params.model.c_str());
|
||||
return iparams;
|
||||
}
|
||||
|
||||
if (llama_context_setup_backend(model, cparams, lctx) == nullptr) {
|
||||
LOG_ERR("%s: failed to setup context with model '%s'\n", __func__, params.model.c_str());
|
||||
llama_free_model(model);
|
||||
return iparams;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue