koboldcpp/app/llama.cpp
Adrien Gallouët 98e480a32e
app : move licences to llama-app (#23824)
Signed-off-by: Adrien Gallouët <angt@huggingface.co>
2026-05-29 07:46:11 +02:00

113 lines
3.7 KiB
C++

#include "build-info.h"
#include <cstdio>
#include <cstdlib>
#include <string>
#include <vector>
// embedded data generated by cmake
extern const char * LICENSES[];
// visible
int llama_server(int argc, char ** argv);
int llama_cli(int argc, char ** argv);
// hidden
int llama_completion(int argc, char ** argv);
int llama_bench(int argc, char ** argv);
int llama_batched_bench(int argc, char ** argv);
int llama_fit_params(int argc, char ** argv);
int llama_quantize(int argc, char ** argv);
int llama_perplexity(int argc, char ** argv);
static const char * progname;
static int help(int argc, char ** argv);
static int version(int argc, char ** argv);
static int licenses(int argc, char ** argv);
struct command {
const char * name;
const char * desc;
std::vector<std::string> aliases;
bool hidden;
int (*func)(int, char **);
};
static const command cmds[] = {
{"serve", "HTTP API server", {"server"}, false, llama_server },
{"cli", "Command-line interactive interface", {"client"}, false, llama_cli },
{"completion", "Text completion", {"complete"}, true, llama_completion },
{"bench", "Benchmark prompt processing and text generation", {}, true, llama_bench },
{"batched-bench", "Benchmark batched decoding performance", {}, true, llama_batched_bench},
{"fit-params", "Compute parameters to fit a model in device memory", {}, true, llama_fit_params },
{"quantize", "Quantize a model", {}, true, llama_quantize },
{"perplexity", "Compute model perplexity and KL divergence", {}, true, llama_perplexity },
{"version", "Show version", {}, false, version },
{"licenses", "Show third-party licenses", {"credits"}, false, licenses },
{"help", "Show available commands", {}, false, help },
};
static int version(int argc, char ** argv) {
printf("%s\n", llama_build_info());
return 0;
}
static int licenses(int argc, char ** argv) {
for (int i = 0; LICENSES[i]; ++i) {
printf("%s\n", LICENSES[i]);
}
return 0;
}
static int help(int argc, char ** argv) {
const bool show_all = argc >= 2 && std::string(argv[1]) == "all";
printf("Usage: %s <command> [options]\n\nAvailable commands:\n", progname);
for (const auto & cmd : cmds) {
if (show_all || !cmd.hidden) {
printf(" %-15s %s\n", cmd.name, cmd.desc);
}
}
printf("\n");
if (!show_all) {
printf("Run '%s help all' to show additional commands.\n", progname);
}
printf("Run '%s <command> --help' for command-specific usage.\n", progname);
return 0;
}
static bool matches(const std::string & arg, const command & cmd) {
if (arg == cmd.name) {
return true;
}
for (const auto & alias : cmd.aliases) {
if (arg == alias) {
return true;
}
}
return false;
}
int main(int argc, char ** argv) {
progname = argv[0];
const std::string arg = argc >= 2 ? argv[1] : "help";
for (const auto & cmd : cmds) {
if (matches(arg, cmd)) {
// keep cmd.name so the router's child processes re-invoke correctly
#ifdef _WIN32
_putenv_s("LLAMA_APP_CMD", cmd.name);
#else
setenv("LLAMA_APP_CMD", cmd.name, 1);
#endif
return cmd.func(argc - 1, argv + 1);
}
}
fprintf(stderr, "error: unknown command '%s'\n", arg.c_str());
return 1;
}