mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2026-05-05 23:41:45 +00:00
server: support OAI /v1/audio/transcriptions API (#21863)
* server: support OAI /v1/audio/transcriptions API * address autoreview comments * correct default response_format value
This commit is contained in:
parent
e21cdc11a0
commit
e489a5ca0e
9 changed files with 194 additions and 38 deletions
|
|
@ -1433,6 +1433,60 @@ json convert_responses_to_chatcmpl(const json & response_body) {
|
||||||
return chatcmpl_body;
|
return chatcmpl_body;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
json convert_transcriptions_to_chatcmpl(
|
||||||
|
const json & inp_body,
|
||||||
|
const std::map<std::string, raw_buffer> & in_files,
|
||||||
|
std::vector<raw_buffer> & out_files) {
|
||||||
|
// TODO @ngxson : this function may need to be improved in the future
|
||||||
|
// handle input files
|
||||||
|
out_files.clear();
|
||||||
|
auto it = in_files.find("file");
|
||||||
|
if (it != in_files.end()) {
|
||||||
|
out_files.push_back(it->second);
|
||||||
|
} else {
|
||||||
|
throw std::invalid_argument("No input file found for transcription");
|
||||||
|
}
|
||||||
|
|
||||||
|
// handle input data
|
||||||
|
std::string prompt = json_value(inp_body, "prompt", std::string());
|
||||||
|
std::string language = json_value(inp_body, "language", std::string());
|
||||||
|
std::string response_format = json_value(inp_body, "response_format", std::string("json"));
|
||||||
|
if (response_format != "json") {
|
||||||
|
throw std::invalid_argument("Only 'json' response_format is supported for transcription");
|
||||||
|
}
|
||||||
|
if (prompt.empty()) {
|
||||||
|
prompt = "Transcribe audio to text";
|
||||||
|
}
|
||||||
|
if (!language.empty()) {
|
||||||
|
prompt += string_format(" (language: %s)", language.c_str());
|
||||||
|
}
|
||||||
|
prompt += mtmd_default_marker();
|
||||||
|
|
||||||
|
json chatcmpl_body = inp_body; // copy all fields
|
||||||
|
chatcmpl_body["messages"] = json::array({
|
||||||
|
{
|
||||||
|
{"role", "user"},
|
||||||
|
{"content", prompt},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
// because input from form-data, everything is string, we need to correct the types here
|
||||||
|
std::string stream = json_value(inp_body, "stream", std::string("false"));
|
||||||
|
chatcmpl_body["stream"] = stream == "true";
|
||||||
|
|
||||||
|
if (inp_body.contains("max_tokens")) {
|
||||||
|
std::string inp = inp_body["max_tokens"].get<std::string>();
|
||||||
|
chatcmpl_body["max_tokens"] = std::stoul(inp);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (inp_body.contains("temperature")) {
|
||||||
|
std::string inp = inp_body["temperature"].get<std::string>();
|
||||||
|
chatcmpl_body["temperature"] = std::stof(inp);
|
||||||
|
}
|
||||||
|
|
||||||
|
return chatcmpl_body;
|
||||||
|
}
|
||||||
|
|
||||||
json convert_anthropic_to_oai(const json & body) {
|
json convert_anthropic_to_oai(const json & body) {
|
||||||
json oai_body;
|
json oai_body;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -305,6 +305,12 @@ json oaicompat_chat_params_parse(
|
||||||
// convert OpenAI Responses API format to OpenAI Chat Completions API format
|
// convert OpenAI Responses API format to OpenAI Chat Completions API format
|
||||||
json convert_responses_to_chatcmpl(const json & body);
|
json convert_responses_to_chatcmpl(const json & body);
|
||||||
|
|
||||||
|
// convert OpenAI transcriptions API format to OpenAI Chat Completions API format
|
||||||
|
json convert_transcriptions_to_chatcmpl(
|
||||||
|
const json & body,
|
||||||
|
const std::map<std::string, raw_buffer> & in_files,
|
||||||
|
std::vector<raw_buffer> & out_files);
|
||||||
|
|
||||||
// convert Anthropic Messages API format to OpenAI Chat Completions API format
|
// convert Anthropic Messages API format to OpenAI Chat Completions API format
|
||||||
json convert_anthropic_to_oai(const json & body);
|
json convert_anthropic_to_oai(const json & body);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -3732,6 +3732,33 @@ void server_routes::init_routes() {
|
||||||
TASK_RESPONSE_TYPE_OAI_RESP);
|
TASK_RESPONSE_TYPE_OAI_RESP);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
this->post_transcriptions_oai = [this](const server_http_req & req) {
|
||||||
|
auto res = create_response();
|
||||||
|
|
||||||
|
if (!meta->has_mtmd || !meta->chat_params.allow_audio) {
|
||||||
|
res->error(format_error_response("The current model does not support audio input.", ERROR_TYPE_NOT_SUPPORTED));
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<raw_buffer> files;
|
||||||
|
json body = convert_transcriptions_to_chatcmpl(
|
||||||
|
json::parse(req.body),
|
||||||
|
req.files,
|
||||||
|
files);
|
||||||
|
SRV_DBG("%s\n", "Request converted: OpenAI Transcriptions -> OpenAI Chat Completions");
|
||||||
|
SRV_DBG("converted request: %s\n", body.dump().c_str());
|
||||||
|
json body_parsed = oaicompat_chat_params_parse(
|
||||||
|
body,
|
||||||
|
meta->chat_params,
|
||||||
|
files);
|
||||||
|
return handle_completions_impl(
|
||||||
|
req,
|
||||||
|
SERVER_TASK_TYPE_COMPLETION,
|
||||||
|
body_parsed,
|
||||||
|
files,
|
||||||
|
TASK_RESPONSE_TYPE_OAI_ASR);
|
||||||
|
};
|
||||||
|
|
||||||
this->post_anthropic_messages = [this](const server_http_req & req) {
|
this->post_anthropic_messages = [this](const server_http_req & req) {
|
||||||
auto res = create_response();
|
auto res = create_response();
|
||||||
std::vector<raw_buffer> files;
|
std::vector<raw_buffer> files;
|
||||||
|
|
|
||||||
|
|
@ -111,6 +111,7 @@ struct server_routes {
|
||||||
server_http_context::handler_t post_completions_oai;
|
server_http_context::handler_t post_completions_oai;
|
||||||
server_http_context::handler_t post_chat_completions;
|
server_http_context::handler_t post_chat_completions;
|
||||||
server_http_context::handler_t post_responses_oai;
|
server_http_context::handler_t post_responses_oai;
|
||||||
|
server_http_context::handler_t post_transcriptions_oai;
|
||||||
server_http_context::handler_t post_anthropic_messages;
|
server_http_context::handler_t post_anthropic_messages;
|
||||||
server_http_context::handler_t post_anthropic_count_tokens;
|
server_http_context::handler_t post_anthropic_count_tokens;
|
||||||
server_http_context::handler_t post_apply_template;
|
server_http_context::handler_t post_apply_template;
|
||||||
|
|
|
||||||
|
|
@ -428,6 +428,7 @@ void server_http_context::get(const std::string & path, const server_http_contex
|
||||||
req.path,
|
req.path,
|
||||||
build_query_string(req),
|
build_query_string(req),
|
||||||
req.body,
|
req.body,
|
||||||
|
{},
|
||||||
req.is_connection_closed
|
req.is_connection_closed
|
||||||
});
|
});
|
||||||
server_http_res_ptr response = handler(*request);
|
server_http_res_ptr response = handler(*request);
|
||||||
|
|
@ -437,12 +438,39 @@ void server_http_context::get(const std::string & path, const server_http_contex
|
||||||
|
|
||||||
void server_http_context::post(const std::string & path, const server_http_context::handler_t & handler) const {
|
void server_http_context::post(const std::string & path, const server_http_context::handler_t & handler) const {
|
||||||
pimpl->srv->Post(path_prefix + path, [handler](const httplib::Request & req, httplib::Response & res) {
|
pimpl->srv->Post(path_prefix + path, [handler](const httplib::Request & req, httplib::Response & res) {
|
||||||
|
std::string body = req.body;
|
||||||
|
std::map<std::string, raw_buffer> files;
|
||||||
|
|
||||||
|
if (req.is_multipart_form_data()) {
|
||||||
|
// translate text fields to a JSON object and use it as the body
|
||||||
|
json form_json = json::object();
|
||||||
|
for (const auto & [key, field] : req.form.fields) {
|
||||||
|
if (form_json.contains(key)) {
|
||||||
|
// if the key already exists, convert it to an array
|
||||||
|
if (!form_json[key].is_array()) {
|
||||||
|
json existing_value = form_json[key];
|
||||||
|
form_json[key] = json::array({existing_value});
|
||||||
|
}
|
||||||
|
form_json[key].push_back(field.content);
|
||||||
|
} else {
|
||||||
|
form_json[key] = field.content;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
body = form_json.dump();
|
||||||
|
|
||||||
|
// populate files from multipart form
|
||||||
|
for (const auto & [key, file] : req.form.files) {
|
||||||
|
files[key] = raw_buffer(file.content.begin(), file.content.end());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
server_http_req_ptr request = std::make_unique<server_http_req>(server_http_req{
|
server_http_req_ptr request = std::make_unique<server_http_req>(server_http_req{
|
||||||
get_params(req),
|
get_params(req),
|
||||||
get_headers(req),
|
get_headers(req),
|
||||||
req.path,
|
req.path,
|
||||||
build_query_string(req),
|
build_query_string(req),
|
||||||
req.body,
|
body,
|
||||||
|
std::move(files),
|
||||||
req.is_connection_closed
|
req.is_connection_closed
|
||||||
});
|
});
|
||||||
server_http_res_ptr response = handler(*request);
|
server_http_res_ptr response = handler(*request);
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,8 @@
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
|
#include <vector>
|
||||||
|
#include <cstdint>
|
||||||
|
|
||||||
struct common_params;
|
struct common_params;
|
||||||
|
|
||||||
|
|
@ -32,6 +34,7 @@ struct server_http_res {
|
||||||
// unique pointer, used by set_chunked_content_provider
|
// unique pointer, used by set_chunked_content_provider
|
||||||
// httplib requires the stream provider to be stored in heap
|
// httplib requires the stream provider to be stored in heap
|
||||||
using server_http_res_ptr = std::unique_ptr<server_http_res>;
|
using server_http_res_ptr = std::unique_ptr<server_http_res>;
|
||||||
|
using raw_buffer = std::vector<uint8_t>;
|
||||||
|
|
||||||
struct server_http_req {
|
struct server_http_req {
|
||||||
std::map<std::string, std::string> params; // path_params + query_params
|
std::map<std::string, std::string> params; // path_params + query_params
|
||||||
|
|
@ -39,6 +42,7 @@ struct server_http_req {
|
||||||
std::string path;
|
std::string path;
|
||||||
std::string query_string; // query parameters string (e.g. "action=save")
|
std::string query_string; // query parameters string (e.g. "action=save")
|
||||||
std::string body;
|
std::string body;
|
||||||
|
std::map<std::string, raw_buffer> files; // used for file uploads (form data)
|
||||||
const std::function<bool()> & should_stop;
|
const std::function<bool()> & should_stop;
|
||||||
|
|
||||||
std::string get_param(const std::string & key, const std::string & def = "") const {
|
std::string get_param(const std::string & key, const std::string & def = "") const {
|
||||||
|
|
|
||||||
|
|
@ -725,6 +725,8 @@ json server_task_result_cmpl_final::to_json() {
|
||||||
return stream ? to_json_oaicompat_chat_stream() : to_json_oaicompat_chat();
|
return stream ? to_json_oaicompat_chat_stream() : to_json_oaicompat_chat();
|
||||||
case TASK_RESPONSE_TYPE_OAI_RESP:
|
case TASK_RESPONSE_TYPE_OAI_RESP:
|
||||||
return stream ? to_json_oaicompat_resp_stream() : to_json_oaicompat_resp();
|
return stream ? to_json_oaicompat_resp_stream() : to_json_oaicompat_resp();
|
||||||
|
case TASK_RESPONSE_TYPE_OAI_ASR:
|
||||||
|
return to_json_oaicompat_asr();
|
||||||
case TASK_RESPONSE_TYPE_ANTHROPIC:
|
case TASK_RESPONSE_TYPE_ANTHROPIC:
|
||||||
return stream ? to_json_anthropic_stream() : to_json_anthropic();
|
return stream ? to_json_anthropic_stream() : to_json_anthropic();
|
||||||
default:
|
default:
|
||||||
|
|
@ -1102,6 +1104,21 @@ json server_task_result_cmpl_final::to_json_oaicompat_resp_stream() {
|
||||||
return server_sent_events;
|
return server_sent_events;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
json server_task_result_cmpl_final::to_json_oaicompat_asr() {
|
||||||
|
json event = json {
|
||||||
|
{"type", "transcript.text.done"},
|
||||||
|
{"text", content},
|
||||||
|
{"usage", json {
|
||||||
|
{"type", "tokens"},
|
||||||
|
{"input_tokens", n_prompt_tokens},
|
||||||
|
{"output_tokens", n_decoded},
|
||||||
|
{"total_tokens", n_decoded + n_prompt_tokens},
|
||||||
|
{"input_tokens_details", json { {"cached_tokens", n_prompt_tokens_cache} }},
|
||||||
|
}},
|
||||||
|
};
|
||||||
|
return event;
|
||||||
|
}
|
||||||
|
|
||||||
json server_task_result_cmpl_final::to_json_anthropic() {
|
json server_task_result_cmpl_final::to_json_anthropic() {
|
||||||
std::string stop_reason = "max_tokens";
|
std::string stop_reason = "max_tokens";
|
||||||
if (stop == STOP_TYPE_WORD || stop == STOP_TYPE_EOS) {
|
if (stop == STOP_TYPE_WORD || stop == STOP_TYPE_EOS) {
|
||||||
|
|
@ -1400,6 +1417,8 @@ json server_task_result_cmpl_partial::to_json() {
|
||||||
return to_json_oaicompat_chat();
|
return to_json_oaicompat_chat();
|
||||||
case TASK_RESPONSE_TYPE_OAI_RESP:
|
case TASK_RESPONSE_TYPE_OAI_RESP:
|
||||||
return to_json_oaicompat_resp();
|
return to_json_oaicompat_resp();
|
||||||
|
case TASK_RESPONSE_TYPE_OAI_ASR:
|
||||||
|
return to_json_oaicompat_asr();
|
||||||
case TASK_RESPONSE_TYPE_ANTHROPIC:
|
case TASK_RESPONSE_TYPE_ANTHROPIC:
|
||||||
return to_json_anthropic();
|
return to_json_anthropic();
|
||||||
default:
|
default:
|
||||||
|
|
@ -1650,6 +1669,14 @@ json server_task_result_cmpl_partial::to_json_oaicompat_resp() {
|
||||||
return events;
|
return events;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
json server_task_result_cmpl_partial::to_json_oaicompat_asr() {
|
||||||
|
json event = json {
|
||||||
|
{"type", "transcript.text.delta"},
|
||||||
|
{"delta", content},
|
||||||
|
};
|
||||||
|
return event;
|
||||||
|
}
|
||||||
|
|
||||||
json server_task_result_cmpl_partial::to_json_anthropic() {
|
json server_task_result_cmpl_partial::to_json_anthropic() {
|
||||||
json events = json::array();
|
json events = json::array();
|
||||||
bool first = (n_decoded == 1);
|
bool first = (n_decoded == 1);
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,7 @@ enum task_response_type {
|
||||||
TASK_RESPONSE_TYPE_OAI_CHAT,
|
TASK_RESPONSE_TYPE_OAI_CHAT,
|
||||||
TASK_RESPONSE_TYPE_OAI_CMPL,
|
TASK_RESPONSE_TYPE_OAI_CMPL,
|
||||||
TASK_RESPONSE_TYPE_OAI_RESP,
|
TASK_RESPONSE_TYPE_OAI_RESP,
|
||||||
|
TASK_RESPONSE_TYPE_OAI_ASR, // transcriptions API
|
||||||
TASK_RESPONSE_TYPE_OAI_EMBD,
|
TASK_RESPONSE_TYPE_OAI_EMBD,
|
||||||
TASK_RESPONSE_TYPE_ANTHROPIC,
|
TASK_RESPONSE_TYPE_ANTHROPIC,
|
||||||
};
|
};
|
||||||
|
|
@ -401,6 +402,8 @@ struct server_task_result_cmpl_final : server_task_result {
|
||||||
|
|
||||||
json to_json_oaicompat_resp_stream();
|
json to_json_oaicompat_resp_stream();
|
||||||
|
|
||||||
|
json to_json_oaicompat_asr();
|
||||||
|
|
||||||
json to_json_anthropic();
|
json to_json_anthropic();
|
||||||
|
|
||||||
json to_json_anthropic_stream();
|
json to_json_anthropic_stream();
|
||||||
|
|
@ -457,6 +460,8 @@ struct server_task_result_cmpl_partial : server_task_result {
|
||||||
|
|
||||||
json to_json_oaicompat_resp();
|
json to_json_oaicompat_resp();
|
||||||
|
|
||||||
|
json to_json_oaicompat_asr();
|
||||||
|
|
||||||
json to_json_anthropic();
|
json to_json_anthropic();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -145,6 +145,7 @@ int main(int argc, char ** argv) {
|
||||||
routes.post_completions_oai = models_routes->proxy_post;
|
routes.post_completions_oai = models_routes->proxy_post;
|
||||||
routes.post_chat_completions = models_routes->proxy_post;
|
routes.post_chat_completions = models_routes->proxy_post;
|
||||||
routes.post_responses_oai = models_routes->proxy_post;
|
routes.post_responses_oai = models_routes->proxy_post;
|
||||||
|
routes.post_transcriptions_oai = models_routes->proxy_post;
|
||||||
routes.post_anthropic_messages = models_routes->proxy_post;
|
routes.post_anthropic_messages = models_routes->proxy_post;
|
||||||
routes.post_anthropic_count_tokens = models_routes->proxy_post;
|
routes.post_anthropic_count_tokens = models_routes->proxy_post;
|
||||||
routes.post_infill = models_routes->proxy_post;
|
routes.post_infill = models_routes->proxy_post;
|
||||||
|
|
@ -160,48 +161,51 @@ int main(int argc, char ** argv) {
|
||||||
routes.post_slots = models_routes->proxy_post;
|
routes.post_slots = models_routes->proxy_post;
|
||||||
|
|
||||||
// custom routes for router
|
// custom routes for router
|
||||||
routes.get_props = models_routes->get_router_props;
|
routes.get_props = models_routes->get_router_props;
|
||||||
routes.get_models = models_routes->get_router_models;
|
routes.get_models = models_routes->get_router_models;
|
||||||
ctx_http.post("/models/load", ex_wrapper(models_routes->post_router_models_load));
|
|
||||||
ctx_http.post("/models/unload", ex_wrapper(models_routes->post_router_models_unload));
|
ctx_http.post("/models/load", ex_wrapper(models_routes->post_router_models_load));
|
||||||
|
ctx_http.post("/models/unload", ex_wrapper(models_routes->post_router_models_unload));
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx_http.get ("/health", ex_wrapper(routes.get_health)); // public endpoint (no API key check)
|
ctx_http.get ("/health", ex_wrapper(routes.get_health)); // public endpoint (no API key check)
|
||||||
ctx_http.get ("/v1/health", ex_wrapper(routes.get_health)); // public endpoint (no API key check)
|
ctx_http.get ("/v1/health", ex_wrapper(routes.get_health)); // public endpoint (no API key check)
|
||||||
ctx_http.get ("/metrics", ex_wrapper(routes.get_metrics));
|
ctx_http.get ("/metrics", ex_wrapper(routes.get_metrics));
|
||||||
ctx_http.get ("/props", ex_wrapper(routes.get_props));
|
ctx_http.get ("/props", ex_wrapper(routes.get_props));
|
||||||
ctx_http.post("/props", ex_wrapper(routes.post_props));
|
ctx_http.post("/props", ex_wrapper(routes.post_props));
|
||||||
ctx_http.post("/api/show", ex_wrapper(routes.get_api_show));
|
ctx_http.post("/api/show", ex_wrapper(routes.get_api_show));
|
||||||
ctx_http.get ("/models", ex_wrapper(routes.get_models)); // public endpoint (no API key check)
|
ctx_http.get ("/models", ex_wrapper(routes.get_models)); // public endpoint (no API key check)
|
||||||
ctx_http.get ("/v1/models", ex_wrapper(routes.get_models)); // public endpoint (no API key check)
|
ctx_http.get ("/v1/models", ex_wrapper(routes.get_models)); // public endpoint (no API key check)
|
||||||
ctx_http.get ("/api/tags", ex_wrapper(routes.get_models)); // ollama specific endpoint. public endpoint (no API key check)
|
ctx_http.get ("/api/tags", ex_wrapper(routes.get_models)); // ollama specific endpoint. public endpoint (no API key check)
|
||||||
ctx_http.post("/completion", ex_wrapper(routes.post_completions)); // legacy
|
ctx_http.post("/completion", ex_wrapper(routes.post_completions)); // legacy
|
||||||
ctx_http.post("/completions", ex_wrapper(routes.post_completions));
|
ctx_http.post("/completions", ex_wrapper(routes.post_completions));
|
||||||
ctx_http.post("/v1/completions", ex_wrapper(routes.post_completions_oai));
|
ctx_http.post("/v1/completions", ex_wrapper(routes.post_completions_oai));
|
||||||
ctx_http.post("/chat/completions", ex_wrapper(routes.post_chat_completions));
|
ctx_http.post("/chat/completions", ex_wrapper(routes.post_chat_completions));
|
||||||
ctx_http.post("/v1/chat/completions", ex_wrapper(routes.post_chat_completions));
|
ctx_http.post("/v1/chat/completions", ex_wrapper(routes.post_chat_completions));
|
||||||
ctx_http.post("/api/chat", ex_wrapper(routes.post_chat_completions)); // ollama specific endpoint
|
ctx_http.post("/api/chat", ex_wrapper(routes.post_chat_completions)); // ollama specific endpoint
|
||||||
ctx_http.post("/v1/responses", ex_wrapper(routes.post_responses_oai));
|
ctx_http.post("/v1/responses", ex_wrapper(routes.post_responses_oai));
|
||||||
ctx_http.post("/responses", ex_wrapper(routes.post_responses_oai));
|
ctx_http.post("/responses", ex_wrapper(routes.post_responses_oai));
|
||||||
ctx_http.post("/v1/messages", ex_wrapper(routes.post_anthropic_messages)); // anthropic messages API
|
ctx_http.post("/v1/audio/transcriptions", ex_wrapper(routes.post_transcriptions_oai));
|
||||||
|
ctx_http.post("/audio/transcriptions", ex_wrapper(routes.post_transcriptions_oai));
|
||||||
|
ctx_http.post("/v1/messages", ex_wrapper(routes.post_anthropic_messages)); // anthropic messages API
|
||||||
ctx_http.post("/v1/messages/count_tokens", ex_wrapper(routes.post_anthropic_count_tokens)); // anthropic token counting
|
ctx_http.post("/v1/messages/count_tokens", ex_wrapper(routes.post_anthropic_count_tokens)); // anthropic token counting
|
||||||
ctx_http.post("/infill", ex_wrapper(routes.post_infill));
|
ctx_http.post("/infill", ex_wrapper(routes.post_infill));
|
||||||
ctx_http.post("/embedding", ex_wrapper(routes.post_embeddings)); // legacy
|
ctx_http.post("/embedding", ex_wrapper(routes.post_embeddings)); // legacy
|
||||||
ctx_http.post("/embeddings", ex_wrapper(routes.post_embeddings));
|
ctx_http.post("/embeddings", ex_wrapper(routes.post_embeddings));
|
||||||
ctx_http.post("/v1/embeddings", ex_wrapper(routes.post_embeddings_oai));
|
ctx_http.post("/v1/embeddings", ex_wrapper(routes.post_embeddings_oai));
|
||||||
ctx_http.post("/rerank", ex_wrapper(routes.post_rerank));
|
ctx_http.post("/rerank", ex_wrapper(routes.post_rerank));
|
||||||
ctx_http.post("/reranking", ex_wrapper(routes.post_rerank));
|
ctx_http.post("/reranking", ex_wrapper(routes.post_rerank));
|
||||||
ctx_http.post("/v1/rerank", ex_wrapper(routes.post_rerank));
|
ctx_http.post("/v1/rerank", ex_wrapper(routes.post_rerank));
|
||||||
ctx_http.post("/v1/reranking", ex_wrapper(routes.post_rerank));
|
ctx_http.post("/v1/reranking", ex_wrapper(routes.post_rerank));
|
||||||
ctx_http.post("/tokenize", ex_wrapper(routes.post_tokenize));
|
ctx_http.post("/tokenize", ex_wrapper(routes.post_tokenize));
|
||||||
ctx_http.post("/detokenize", ex_wrapper(routes.post_detokenize));
|
ctx_http.post("/detokenize", ex_wrapper(routes.post_detokenize));
|
||||||
ctx_http.post("/apply-template", ex_wrapper(routes.post_apply_template));
|
ctx_http.post("/apply-template", ex_wrapper(routes.post_apply_template));
|
||||||
// LoRA adapters hotswap
|
// LoRA adapters hotswap
|
||||||
ctx_http.get ("/lora-adapters", ex_wrapper(routes.get_lora_adapters));
|
ctx_http.get ("/lora-adapters", ex_wrapper(routes.get_lora_adapters));
|
||||||
ctx_http.post("/lora-adapters", ex_wrapper(routes.post_lora_adapters));
|
ctx_http.post("/lora-adapters", ex_wrapper(routes.post_lora_adapters));
|
||||||
// Save & load slots
|
// Save & load slots
|
||||||
ctx_http.get ("/slots", ex_wrapper(routes.get_slots));
|
ctx_http.get ("/slots", ex_wrapper(routes.get_slots));
|
||||||
ctx_http.post("/slots/:id_slot", ex_wrapper(routes.post_slots));
|
ctx_http.post("/slots/:id_slot", ex_wrapper(routes.post_slots));
|
||||||
// CORS proxy (EXPERIMENTAL, only used by the Web UI for MCP)
|
// CORS proxy (EXPERIMENTAL, only used by the Web UI for MCP)
|
||||||
if (params.webui_mcp_proxy) {
|
if (params.webui_mcp_proxy) {
|
||||||
SRV_WRN("%s", "-----------------\n");
|
SRV_WRN("%s", "-----------------\n");
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue