mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
llama : add llama_model_load_from_splits
(#11255)
* llama : add `llama_model_load_from_splits` * update
This commit is contained in:
parent
c67cc9837d
commit
681149ced2
5 changed files with 116 additions and 24 deletions
|
@ -31,7 +31,7 @@
|
|||
#endif
|
||||
|
||||
// Returns 0 on success, -1 on error, and -2 on cancellation via llama_progress_callback
|
||||
static int llama_model_load(const std::string & fname, llama_model & model, llama_model_params & params) {
|
||||
static int llama_model_load(const std::string & fname, std::vector<std::string> & splits, llama_model & model, llama_model_params & params) {
|
||||
// loading time will be recalculated after the first eval, so
|
||||
// we take page faults deferred by mmap() into consideration
|
||||
model.t_load_us = 0;
|
||||
|
@ -40,7 +40,7 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
|
|||
model.t_start_us = tm.t_start_us;
|
||||
|
||||
try {
|
||||
llama_model_loader ml(fname, params.use_mmap, params.check_tensors, params.kv_overrides);
|
||||
llama_model_loader ml(fname, splits, params.use_mmap, params.check_tensors, params.kv_overrides);
|
||||
|
||||
ml.print_info();
|
||||
|
||||
|
@ -9374,14 +9374,9 @@ int64_t llama_time_us(void) {
|
|||
return ggml_time_us();
|
||||
}
|
||||
|
||||
struct llama_model * llama_load_model_from_file(
|
||||
const char * path_model,
|
||||
struct llama_model_params params) {
|
||||
return llama_model_load_from_file(path_model, params);
|
||||
}
|
||||
|
||||
struct llama_model * llama_model_load_from_file(
|
||||
const char * path_model,
|
||||
static struct llama_model * llama_model_load_from_file_impl(
|
||||
const std::string & path_model,
|
||||
std::vector<std::string> & splits,
|
||||
struct llama_model_params params) {
|
||||
ggml_time_init();
|
||||
|
||||
|
@ -9485,7 +9480,7 @@ struct llama_model * llama_model_load_from_file(
|
|||
LLAMA_LOG_INFO("%s: using device %s (%s) - %zu MiB free\n", __func__, ggml_backend_dev_name(dev), ggml_backend_dev_description(dev), free/1024/1024);
|
||||
}
|
||||
|
||||
const int status = llama_model_load(path_model, *model, params);
|
||||
const int status = llama_model_load(path_model, splits, *model, params);
|
||||
GGML_ASSERT(status <= 0);
|
||||
if (status < 0) {
|
||||
if (status == -1) {
|
||||
|
@ -9501,6 +9496,35 @@ struct llama_model * llama_model_load_from_file(
|
|||
return model;
|
||||
}
|
||||
|
||||
// deprecated
|
||||
struct llama_model * llama_load_model_from_file(
|
||||
const char * path_model,
|
||||
struct llama_model_params params) {
|
||||
return llama_model_load_from_file(path_model, params);
|
||||
}
|
||||
|
||||
struct llama_model * llama_model_load_from_file(
|
||||
const char * path_model,
|
||||
struct llama_model_params params) {
|
||||
std::vector<std::string> splits = {};
|
||||
return llama_model_load_from_file_impl(path_model, splits, params);
|
||||
}
|
||||
|
||||
struct llama_model * llama_model_load_from_splits(
|
||||
const char ** paths,
|
||||
size_t n_paths,
|
||||
struct llama_model_params params) {
|
||||
std::vector<std::string> splits;
|
||||
if (n_paths == 0) {
|
||||
LLAMA_LOG_ERROR("%s: list of splits is empty\n", __func__);
|
||||
return nullptr;
|
||||
}
|
||||
for (size_t i = 0; i < n_paths; ++i) {
|
||||
splits.push_back(paths[i]);
|
||||
}
|
||||
return llama_model_load_from_file_impl(splits.front(), splits, params);
|
||||
}
|
||||
|
||||
struct llama_context * llama_init_from_model(
|
||||
struct llama_model * model,
|
||||
struct llama_context_params params) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue