mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 01:24:36 +00:00
server: Add ability to mount server at prefix (#14544)
* Add server_prefix * Correct server path env * Rename cli flag to --api-prefix * Change all to api_prefix
This commit is contained in:
parent
8f22dc0a53
commit
17a1f0d2d4
3 changed files with 39 additions and 31 deletions
|
@ -2734,6 +2734,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
||||||
params.public_path = value;
|
params.public_path = value;
|
||||||
}
|
}
|
||||||
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_STATIC_PATH"));
|
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_STATIC_PATH"));
|
||||||
|
add_opt(common_arg(
|
||||||
|
{"--api-prefix"}, "PREFIX",
|
||||||
|
string_format("prefix path the server serves from, without the trailing slash (default: %s)", params.api_prefix.c_str()),
|
||||||
|
[](common_params & params, const std::string & value) {
|
||||||
|
params.api_prefix = value;
|
||||||
|
}
|
||||||
|
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_API_PREFIX"));
|
||||||
add_opt(common_arg(
|
add_opt(common_arg(
|
||||||
{"--no-webui"},
|
{"--no-webui"},
|
||||||
string_format("Disable the Web UI (default: %s)", params.webui ? "enabled" : "disabled"),
|
string_format("Disable the Web UI (default: %s)", params.webui ? "enabled" : "disabled"),
|
||||||
|
|
|
@ -370,6 +370,7 @@ struct common_params {
|
||||||
|
|
||||||
std::string hostname = "127.0.0.1";
|
std::string hostname = "127.0.0.1";
|
||||||
std::string public_path = ""; // NOLINT
|
std::string public_path = ""; // NOLINT
|
||||||
|
std::string api_prefix = ""; // NOLINT
|
||||||
std::string chat_template = ""; // NOLINT
|
std::string chat_template = ""; // NOLINT
|
||||||
bool use_jinja = false; // NOLINT
|
bool use_jinja = false; // NOLINT
|
||||||
bool enable_chat_template = true;
|
bool enable_chat_template = true;
|
||||||
|
|
|
@ -4806,14 +4806,14 @@ int main(int argc, char ** argv) {
|
||||||
// register static assets routes
|
// register static assets routes
|
||||||
if (!params.public_path.empty()) {
|
if (!params.public_path.empty()) {
|
||||||
// Set the base directory for serving static files
|
// Set the base directory for serving static files
|
||||||
bool is_found = svr->set_mount_point("/", params.public_path);
|
bool is_found = svr->set_mount_point(params.api_prefix + "/", params.public_path);
|
||||||
if (!is_found) {
|
if (!is_found) {
|
||||||
LOG_ERR("%s: static assets path not found: %s\n", __func__, params.public_path.c_str());
|
LOG_ERR("%s: static assets path not found: %s\n", __func__, params.public_path.c_str());
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// using embedded static index.html
|
// using embedded static index.html
|
||||||
svr->Get("/", [](const httplib::Request & req, httplib::Response & res) {
|
svr->Get(params.api_prefix + "/", [](const httplib::Request & req, httplib::Response & res) {
|
||||||
if (req.get_header_value("Accept-Encoding").find("gzip") == std::string::npos) {
|
if (req.get_header_value("Accept-Encoding").find("gzip") == std::string::npos) {
|
||||||
res.set_content("Error: gzip is not supported by this browser", "text/plain");
|
res.set_content("Error: gzip is not supported by this browser", "text/plain");
|
||||||
} else {
|
} else {
|
||||||
|
@ -4829,37 +4829,37 @@ int main(int argc, char ** argv) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// register API routes
|
// register API routes
|
||||||
svr->Get ("/health", handle_health); // public endpoint (no API key check)
|
svr->Get (params.api_prefix + "/health", handle_health); // public endpoint (no API key check)
|
||||||
svr->Get ("/metrics", handle_metrics);
|
svr->Get (params.api_prefix + "/metrics", handle_metrics);
|
||||||
svr->Get ("/props", handle_props);
|
svr->Get (params.api_prefix + "/props", handle_props);
|
||||||
svr->Post("/props", handle_props_change);
|
svr->Post(params.api_prefix + "/props", handle_props_change);
|
||||||
svr->Post("/api/show", handle_api_show);
|
svr->Post(params.api_prefix + "/api/show", handle_api_show);
|
||||||
svr->Get ("/models", handle_models); // public endpoint (no API key check)
|
svr->Get (params.api_prefix + "/models", handle_models); // public endpoint (no API key check)
|
||||||
svr->Get ("/v1/models", handle_models); // public endpoint (no API key check)
|
svr->Get (params.api_prefix + "/v1/models", handle_models); // public endpoint (no API key check)
|
||||||
svr->Get ("/api/tags", handle_models); // ollama specific endpoint. public endpoint (no API key check)
|
svr->Get (params.api_prefix + "/api/tags", handle_models); // ollama specific endpoint. public endpoint (no API key check)
|
||||||
svr->Post("/completion", handle_completions); // legacy
|
svr->Post(params.api_prefix + "/completion", handle_completions); // legacy
|
||||||
svr->Post("/completions", handle_completions);
|
svr->Post(params.api_prefix + "/completions", handle_completions);
|
||||||
svr->Post("/v1/completions", handle_completions_oai);
|
svr->Post(params.api_prefix + "/v1/completions", handle_completions_oai);
|
||||||
svr->Post("/chat/completions", handle_chat_completions);
|
svr->Post(params.api_prefix + "/chat/completions", handle_chat_completions);
|
||||||
svr->Post("/v1/chat/completions", handle_chat_completions);
|
svr->Post(params.api_prefix + "/v1/chat/completions", handle_chat_completions);
|
||||||
svr->Post("/api/chat", handle_chat_completions); // ollama specific endpoint
|
svr->Post(params.api_prefix + "/api/chat", handle_chat_completions); // ollama specific endpoint
|
||||||
svr->Post("/infill", handle_infill);
|
svr->Post(params.api_prefix + "/infill", handle_infill);
|
||||||
svr->Post("/embedding", handle_embeddings); // legacy
|
svr->Post(params.api_prefix + "/embedding", handle_embeddings); // legacy
|
||||||
svr->Post("/embeddings", handle_embeddings);
|
svr->Post(params.api_prefix + "/embeddings", handle_embeddings);
|
||||||
svr->Post("/v1/embeddings", handle_embeddings_oai);
|
svr->Post(params.api_prefix + "/v1/embeddings", handle_embeddings_oai);
|
||||||
svr->Post("/rerank", handle_rerank);
|
svr->Post(params.api_prefix + "/rerank", handle_rerank);
|
||||||
svr->Post("/reranking", handle_rerank);
|
svr->Post(params.api_prefix + "/reranking", handle_rerank);
|
||||||
svr->Post("/v1/rerank", handle_rerank);
|
svr->Post(params.api_prefix + "/v1/rerank", handle_rerank);
|
||||||
svr->Post("/v1/reranking", handle_rerank);
|
svr->Post(params.api_prefix + "/v1/reranking", handle_rerank);
|
||||||
svr->Post("/tokenize", handle_tokenize);
|
svr->Post(params.api_prefix + "/tokenize", handle_tokenize);
|
||||||
svr->Post("/detokenize", handle_detokenize);
|
svr->Post(params.api_prefix + "/detokenize", handle_detokenize);
|
||||||
svr->Post("/apply-template", handle_apply_template);
|
svr->Post(params.api_prefix + "/apply-template", handle_apply_template);
|
||||||
// LoRA adapters hotswap
|
// LoRA adapters hotswap
|
||||||
svr->Get ("/lora-adapters", handle_lora_adapters_list);
|
svr->Get (params.api_prefix + "/lora-adapters", handle_lora_adapters_list);
|
||||||
svr->Post("/lora-adapters", handle_lora_adapters_apply);
|
svr->Post(params.api_prefix + "/lora-adapters", handle_lora_adapters_apply);
|
||||||
// Save & load slots
|
// Save & load slots
|
||||||
svr->Get ("/slots", handle_slots);
|
svr->Get (params.api_prefix + "/slots", handle_slots);
|
||||||
svr->Post("/slots/:id_slot", handle_slots_action);
|
svr->Post(params.api_prefix + "/slots/:id_slot", handle_slots_action);
|
||||||
|
|
||||||
//
|
//
|
||||||
// Start the server
|
// Start the server
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue