mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-10 17:14:36 +00:00
Merge branch 'upstream' into concedo_experimental
# Conflicts: # .devops/llama-server.Dockerfile # README.md # flake.lock # ggml/src/ggml-vulkan.cpp # ggml/src/vulkan-shaders/concat.comp # ggml/src/vulkan-shaders/pad.comp # ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp # scripts/sync-ggml-am.sh # scripts/sync-ggml.last # src/llama.cpp # tests/test-backend-ops.cpp
This commit is contained in:
commit
e1f97f7fb5
55 changed files with 112612 additions and 111077 deletions
|
@ -2040,8 +2040,8 @@ std::string fs_get_cache_file(const std::string & filename) {
|
|||
//
|
||||
// Model utils
|
||||
//
|
||||
|
||||
std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_params(gpt_params & params) {
|
||||
struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
|
||||
llama_init_result iparams;
|
||||
auto mparams = llama_model_params_from_gpt_params(params);
|
||||
|
||||
llama_model * model = nullptr;
|
||||
|
@ -2056,7 +2056,7 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
|
|||
|
||||
if (model == NULL) {
|
||||
fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str());
|
||||
return std::make_tuple(nullptr, nullptr);
|
||||
return iparams;
|
||||
}
|
||||
|
||||
auto cparams = llama_context_params_from_gpt_params(params);
|
||||
|
@ -2065,7 +2065,7 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
|
|||
if (lctx == NULL) {
|
||||
fprintf(stderr, "%s: error: failed to create context with model '%s'\n", __func__, params.model.c_str());
|
||||
llama_free_model(model);
|
||||
return std::make_tuple(nullptr, nullptr);
|
||||
return iparams;
|
||||
}
|
||||
|
||||
if (!params.control_vectors.empty()) {
|
||||
|
@ -2076,7 +2076,7 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
|
|||
if (cvec.n_embd == -1) {
|
||||
llama_free(lctx);
|
||||
llama_free_model(model);
|
||||
return std::make_tuple(nullptr, nullptr);
|
||||
return iparams;
|
||||
}
|
||||
|
||||
int err = llama_control_vector_apply(lctx,
|
||||
|
@ -2088,7 +2088,7 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
|
|||
if (err) {
|
||||
llama_free(lctx);
|
||||
llama_free_model(model);
|
||||
return std::make_tuple(nullptr, nullptr);
|
||||
return iparams;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2100,7 +2100,7 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
|
|||
fprintf(stderr, "%s: error: failed to apply lora adapter\n", __func__);
|
||||
llama_free(lctx);
|
||||
llama_free_model(model);
|
||||
return std::make_tuple(nullptr, nullptr);
|
||||
return iparams;
|
||||
}
|
||||
llama_lora_adapter_set(lctx, adapter, lora_scale);
|
||||
}
|
||||
|
@ -2136,7 +2136,9 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
|
|||
llama_reset_timings(lctx);
|
||||
}
|
||||
|
||||
return std::make_tuple(model, lctx);
|
||||
iparams.model = model;
|
||||
iparams.context = lctx;
|
||||
return iparams;
|
||||
}
|
||||
|
||||
struct llama_model_params llama_model_params_from_gpt_params(const gpt_params & params) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue