Merge commit '12280ae905' into concedo_experimental

# Conflicts:
#	.github/workflows/build.yml
#	common/CMakeLists.txt
#	docs/docker.md
#	examples/model-conversion/scripts/causal/compare-logits.py
#	ggml/src/ggml-hexagon/htp/rope-ops.c
#	tests/test-backend-ops.cpp
#	tests/test-barrier.cpp
#	tools/server/CMakeLists.txt
#	tools/server/README.md
This commit is contained in:
Concedo 2025-12-16 16:29:01 +08:00
commit e88bf41fdc
49 changed files with 1380 additions and 583 deletions

View file

@ -49,6 +49,7 @@
#define LLAMA_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083
using json = nlohmann::ordered_json;
using namespace common_arg_utils;
static std::initializer_list<enum llama_example> mmproj_examples = {
LLAMA_EXAMPLE_MTMD,
@ -66,6 +67,15 @@ static std::string read_file(const std::string & fname) {
return content;
}
static const std::vector<common_arg> & get_common_arg_defs() {
static const std::vector<common_arg> options = [] {
common_params params;
auto ctx = common_params_parser_init(params, LLAMA_EXAMPLE_SERVER, nullptr);
return ctx.options;
}();
return options;
}
common_arg & common_arg::set_examples(std::initializer_list<enum llama_example> examples) {
this->examples = examples;
return *this;
@ -136,7 +146,7 @@ static std::vector<std::string> break_str_into_lines(std::string input, size_t m
return result;
}
std::string common_arg::to_string() {
std::string common_arg::to_string() const {
// params for printing to console
const static int n_leading_spaces = 40;
const static int n_char_per_line_help = 70; // TODO: detect this based on current console
@ -649,6 +659,53 @@ static void add_rpc_devices(const std::string & servers) {
}
}
bool common_params_parse(int argc, char ** argv, llama_example ex, std::map<common_arg, std::string> & out_map) {
common_params dummy_params;
common_params_context ctx_arg = common_params_parser_init(dummy_params, ex, nullptr);
std::unordered_map<std::string, common_arg *> arg_to_options;
for (auto & opt : ctx_arg.options) {
for (const auto & arg : opt.args) {
arg_to_options[arg] = &opt;
}
}
// TODO @ngxson : find a way to deduplicate this code
// handle command line arguments
auto check_arg = [&](int i) {
if (i+1 >= argc) {
throw std::invalid_argument("expected value for argument");
}
};
for (int i = 1; i < argc; i++) {
const std::string arg_prefix = "--";
std::string arg = argv[i];
if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) {
std::replace(arg.begin(), arg.end(), '_', '-');
}
if (arg_to_options.find(arg) == arg_to_options.end()) {
throw std::invalid_argument(string_format("error: invalid argument: %s", arg.c_str()));
}
auto opt = *arg_to_options[arg];
std::string val;
if (opt.value_hint != nullptr) {
// arg with single value
check_arg(i);
val = argv[++i];
}
if (opt.value_hint_2 != nullptr) {
// TODO: support arg with 2 values
throw std::invalid_argument("error: argument with 2 values is not yet supported\n");
}
out_map[opt] = val;
}
return true;
}
bool common_params_parse(int argc, char ** argv, common_params & params, llama_example ex, void(*print_usage)(int, char **)) {
auto ctx_arg = common_params_parser_init(params, ex, print_usage);
const common_params params_org = ctx_arg.params; // the example can modify the default params
@ -694,25 +751,19 @@ static std::string list_builtin_chat_templates() {
return msg.str();
}
static bool is_truthy(const std::string & value) {
bool common_arg_utils::is_truthy(const std::string & value) {
return value == "on" || value == "enabled" || value == "1";
}
static bool is_falsey(const std::string & value) {
bool common_arg_utils::is_falsey(const std::string & value) {
return value == "off" || value == "disabled" || value == "0";
}
static bool is_autoy(const std::string & value) {
bool common_arg_utils::is_autoy(const std::string & value) {
return value == "auto" || value == "-1";
}
common_params_context common_params_parser_init(common_params & params, llama_example ex, void(*print_usage)(int, char **)) {
// default values specific to example
// note: we place it here instead of inside server.cpp to allow llama-gen-docs to pick it up
if (ex == LLAMA_EXAMPLE_SERVER) {
params.use_jinja = true;
}
params.use_color = tty_can_use_colors();
// load dynamic backends
@ -1807,7 +1858,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
}
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_NO_CONT_BATCHING"));
add_opt(common_arg(
{"--mmproj"}, "FILE",
{"-mm", "--mmproj"}, "FILE",
"path to a multimodal projector file. see tools/mtmd/README.md\n"
"note: if -hf is used, this argument can be omitted",
[](common_params & params, const std::string & value) {
@ -1815,7 +1866,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
}
).set_examples(mmproj_examples).set_env("LLAMA_ARG_MMPROJ"));
add_opt(common_arg(
{"--mmproj-url"}, "URL",
{"-mmu", "--mmproj-url"}, "URL",
"URL to a multimodal projector file. see tools/mtmd/README.md",
[](common_params & params, const std::string & value) {
params.mmproj.url = value;
@ -2545,6 +2596,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
params.models_dir = value;
}
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODELS_DIR"));
add_opt(common_arg(
{"--models-preset"}, "PATH",
"path to INI file containing model presets for the router server (default: disabled)",
[](common_params & params, const std::string & value) {
params.models_preset = value;
}
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODELS_PRESET"));
add_opt(common_arg(
{"--models-max"}, "N",
string_format("for router server, maximum number of models to load simultaneously (default: %d, 0 = unlimited)", params.models_max),
@ -2561,14 +2619,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_NO_MODELS_AUTOLOAD"));
add_opt(common_arg(
{"--jinja"},
string_format("use jinja template for chat (default: %s)\n", params.use_jinja ? "enabled" : "disabled"),
string_format("use jinja template for chat (default: %s)", params.use_jinja ? "enabled" : "disabled"),
[](common_params & params) {
params.use_jinja = true;
}
).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_COMPLETION, LLAMA_EXAMPLE_CLI, LLAMA_EXAMPLE_MTMD}).set_env("LLAMA_ARG_JINJA"));
add_opt(common_arg(
{"--no-jinja"},
string_format("disable jinja template for chat (default: %s)\n", params.use_jinja ? "enabled" : "disabled"),
string_format("disable jinja template for chat (default: %s)", params.use_jinja ? "disabled" : "enabled"),
[](common_params & params) {
params.use_jinja = false;
}