mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-08 01:41:37 +00:00
Merge commit '12280ae905' into concedo_experimental
# Conflicts: # .github/workflows/build.yml # common/CMakeLists.txt # docs/docker.md # examples/model-conversion/scripts/causal/compare-logits.py # ggml/src/ggml-hexagon/htp/rope-ops.c # tests/test-backend-ops.cpp # tests/test-barrier.cpp # tools/server/CMakeLists.txt # tools/server/README.md
This commit is contained in:
commit
e88bf41fdc
49 changed files with 1380 additions and 583 deletions
|
|
@ -49,6 +49,7 @@
|
|||
#define LLAMA_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
|
||||
|
||||
using json = nlohmann::ordered_json;
|
||||
using namespace common_arg_utils;
|
||||
|
||||
static std::initializer_list<enum llama_example> mmproj_examples = {
|
||||
LLAMA_EXAMPLE_MTMD,
|
||||
|
|
@ -66,6 +67,15 @@ static std::string read_file(const std::string & fname) {
|
|||
return content;
|
||||
}
|
||||
|
||||
static const std::vector<common_arg> & get_common_arg_defs() {
|
||||
static const std::vector<common_arg> options = [] {
|
||||
common_params params;
|
||||
auto ctx = common_params_parser_init(params, LLAMA_EXAMPLE_SERVER, nullptr);
|
||||
return ctx.options;
|
||||
}();
|
||||
return options;
|
||||
}
|
||||
|
||||
common_arg & common_arg::set_examples(std::initializer_list<enum llama_example> examples) {
|
||||
this->examples = examples;
|
||||
return *this;
|
||||
|
|
@ -136,7 +146,7 @@ static std::vector<std::string> break_str_into_lines(std::string input, size_t m
|
|||
return result;
|
||||
}
|
||||
|
||||
std::string common_arg::to_string() {
|
||||
std::string common_arg::to_string() const {
|
||||
// params for printing to console
|
||||
const static int n_leading_spaces = 40;
|
||||
const static int n_char_per_line_help = 70; // TODO: detect this based on current console
|
||||
|
|
@ -649,6 +659,53 @@ static void add_rpc_devices(const std::string & servers) {
|
|||
}
|
||||
}
|
||||
|
||||
bool common_params_parse(int argc, char ** argv, llama_example ex, std::map<common_arg, std::string> & out_map) {
|
||||
common_params dummy_params;
|
||||
common_params_context ctx_arg = common_params_parser_init(dummy_params, ex, nullptr);
|
||||
|
||||
std::unordered_map<std::string, common_arg *> arg_to_options;
|
||||
for (auto & opt : ctx_arg.options) {
|
||||
for (const auto & arg : opt.args) {
|
||||
arg_to_options[arg] = &opt;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO @ngxson : find a way to deduplicate this code
|
||||
|
||||
// handle command line arguments
|
||||
auto check_arg = [&](int i) {
|
||||
if (i+1 >= argc) {
|
||||
throw std::invalid_argument("expected value for argument");
|
||||
}
|
||||
};
|
||||
|
||||
for (int i = 1; i < argc; i++) {
|
||||
const std::string arg_prefix = "--";
|
||||
|
||||
std::string arg = argv[i];
|
||||
if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) {
|
||||
std::replace(arg.begin(), arg.end(), '_', '-');
|
||||
}
|
||||
if (arg_to_options.find(arg) == arg_to_options.end()) {
|
||||
throw std::invalid_argument(string_format("error: invalid argument: %s", arg.c_str()));
|
||||
}
|
||||
auto opt = *arg_to_options[arg];
|
||||
std::string val;
|
||||
if (opt.value_hint != nullptr) {
|
||||
// arg with single value
|
||||
check_arg(i);
|
||||
val = argv[++i];
|
||||
}
|
||||
if (opt.value_hint_2 != nullptr) {
|
||||
// TODO: support arg with 2 values
|
||||
throw std::invalid_argument("error: argument with 2 values is not yet supported\n");
|
||||
}
|
||||
out_map[opt] = val;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool common_params_parse(int argc, char ** argv, common_params & params, llama_example ex, void(*print_usage)(int, char **)) {
|
||||
auto ctx_arg = common_params_parser_init(params, ex, print_usage);
|
||||
const common_params params_org = ctx_arg.params; // the example can modify the default params
|
||||
|
|
@ -694,25 +751,19 @@ static std::string list_builtin_chat_templates() {
|
|||
return msg.str();
|
||||
}
|
||||
|
||||
static bool is_truthy(const std::string & value) {
|
||||
bool common_arg_utils::is_truthy(const std::string & value) {
|
||||
return value == "on" || value == "enabled" || value == "1";
|
||||
}
|
||||
|
||||
static bool is_falsey(const std::string & value) {
|
||||
bool common_arg_utils::is_falsey(const std::string & value) {
|
||||
return value == "off" || value == "disabled" || value == "0";
|
||||
}
|
||||
|
||||
static bool is_autoy(const std::string & value) {
|
||||
bool common_arg_utils::is_autoy(const std::string & value) {
|
||||
return value == "auto" || value == "-1";
|
||||
}
|
||||
|
||||
common_params_context common_params_parser_init(common_params & params, llama_example ex, void(*print_usage)(int, char **)) {
|
||||
// default values specific to example
|
||||
// note: we place it here instead of inside server.cpp to allow llama-gen-docs to pick it up
|
||||
if (ex == LLAMA_EXAMPLE_SERVER) {
|
||||
params.use_jinja = true;
|
||||
}
|
||||
|
||||
params.use_color = tty_can_use_colors();
|
||||
|
||||
// load dynamic backends
|
||||
|
|
@ -1807,7 +1858,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_NO_CONT_BATCHING"));
|
||||
add_opt(common_arg(
|
||||
{"--mmproj"}, "FILE",
|
||||
{"-mm", "--mmproj"}, "FILE",
|
||||
"path to a multimodal projector file. see tools/mtmd/README.md\n"
|
||||
"note: if -hf is used, this argument can be omitted",
|
||||
[](common_params & params, const std::string & value) {
|
||||
|
|
@ -1815,7 +1866,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
}
|
||||
).set_examples(mmproj_examples).set_env("LLAMA_ARG_MMPROJ"));
|
||||
add_opt(common_arg(
|
||||
{"--mmproj-url"}, "URL",
|
||||
{"-mmu", "--mmproj-url"}, "URL",
|
||||
"URL to a multimodal projector file. see tools/mtmd/README.md",
|
||||
[](common_params & params, const std::string & value) {
|
||||
params.mmproj.url = value;
|
||||
|
|
@ -2545,6 +2596,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
params.models_dir = value;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODELS_DIR"));
|
||||
add_opt(common_arg(
|
||||
{"--models-preset"}, "PATH",
|
||||
"path to INI file containing model presets for the router server (default: disabled)",
|
||||
[](common_params & params, const std::string & value) {
|
||||
params.models_preset = value;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODELS_PRESET"));
|
||||
add_opt(common_arg(
|
||||
{"--models-max"}, "N",
|
||||
string_format("for router server, maximum number of models to load simultaneously (default: %d, 0 = unlimited)", params.models_max),
|
||||
|
|
@ -2561,14 +2619,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
|||
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_NO_MODELS_AUTOLOAD"));
|
||||
add_opt(common_arg(
|
||||
{"--jinja"},
|
||||
string_format("use jinja template for chat (default: %s)\n", params.use_jinja ? "enabled" : "disabled"),
|
||||
string_format("use jinja template for chat (default: %s)", params.use_jinja ? "enabled" : "disabled"),
|
||||
[](common_params & params) {
|
||||
params.use_jinja = true;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_COMPLETION, LLAMA_EXAMPLE_CLI, LLAMA_EXAMPLE_MTMD}).set_env("LLAMA_ARG_JINJA"));
|
||||
add_opt(common_arg(
|
||||
{"--no-jinja"},
|
||||
string_format("disable jinja template for chat (default: %s)\n", params.use_jinja ? "enabled" : "disabled"),
|
||||
string_format("disable jinja template for chat (default: %s)", params.use_jinja ? "disabled" : "enabled"),
|
||||
[](common_params & params) {
|
||||
params.use_jinja = false;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue