server : allow specifying reasoning_format in HTTP request (#15238)

2025-09-10 17:14:36 +00:00 · 2025-08-11 14:48:41 +02:00 · 2025-08-11 14:48:41 +02:00 · 53d0a12658
commit 53d0a12658
parent 27093afe78
7 changed files with 28 additions and 7 deletions
--- a/common/arg.cpp
+++ b/common/arg.cpp
@ -2949,11 +2949,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
        "- deepseek: puts thoughts in `message.reasoning_content` (except in streaming mode, which behaves as `none`)\n"
        "(default: auto)",
        [](common_params & params, const std::string & value) {
-            /**/ if (value == "deepseek") { params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; }
+            params.reasoning_format = common_reasoning_format_from_name(value);
            else if (value == "deepseek-legacy") { params.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY; }
            else if (value == "none") {     params.reasoning_format = COMMON_REASONING_FORMAT_NONE; }
            else if (value == "auto") {     params.reasoning_format = COMMON_REASONING_FORMAT_AUTO; }
            else { throw std::invalid_argument("invalid value"); }
        }
    ).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN}).set_env("LLAMA_ARG_THINK"));
    add_opt(common_arg(
--- a/common/chat.cpp
+++ b/common/chat.cpp
@ -625,6 +625,19 @@ const char * common_reasoning_format_name(common_reasoning_format format) {
    }
 }
 common_reasoning_format common_reasoning_format_from_name(const std::string & format) {
    if (format == "none") {
        return COMMON_REASONING_FORMAT_NONE;
    } else if (format == "auto") {
        return COMMON_REASONING_FORMAT_AUTO;
    } else if (format == "deepseek") {
        return COMMON_REASONING_FORMAT_DEEPSEEK;
    } else if (format == "deepseek-legacy") {
        return COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY;
    }
    throw std::runtime_error("Unknown reasoning format: " + format);
 }
 static std::string wrap_code_as_arguments(common_chat_msg_parser & builder, const std::string & code) {
    std::string arguments;
    if (builder.is_partial()) {
--- a/common/chat.h
+++ b/common/chat.h
@ -191,6 +191,7 @@ std::string common_chat_format_example(
 const char*               common_chat_format_name(common_chat_format format);
 const char*               common_reasoning_format_name(common_reasoning_format format);
 common_reasoning_format   common_reasoning_format_from_name(const std::string & format);
 common_chat_msg           common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax);
 common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice);
--- a/tools/server/README.md
+++ b/tools/server/README.md
@ -1132,6 +1132,12 @@ The `response_format` parameter supports both plain JSON output (e.g. `{"type":
 `chat_template_kwargs`: Allows sending additional parameters to the json templating system. For example: `{"enable_thinking": false}`
 `reasoning_format`: The reasoning format to be parsed. If set to `none`, it will output the raw generated text.
 `thinking_forced_open`: Force a reasoning model to always output the reasoning. Only works on certain models.
 `parse_tool_calls`: Whether to parse the generated tool call.
 *Examples:*
 You can use either Python `openai` library with appropriate checkpoints:
--- a/tools/server/public/index.html.gz
+++ b/tools/server/public/index.html.gz
--- a/tools/server/server.cpp
+++ b/tools/server/server.cpp
@ -383,8 +383,12 @@ struct server_task {
            } else {
                params.oaicompat_chat_syntax.format = defaults.oaicompat_chat_syntax.format;
            }
-            params.oaicompat_chat_syntax.reasoning_format = params_base.reasoning_format;
+            common_reasoning_format reasoning_format = params_base.reasoning_format;
-            params.oaicompat_chat_syntax.reasoning_in_content = params.stream && (params_base.reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY);
+            if (data.contains("reasoning_format")) {
                reasoning_format = common_reasoning_format_from_name(data.at("reasoning_format").get<std::string>());
            }
            params.oaicompat_chat_syntax.reasoning_format = reasoning_format;
            params.oaicompat_chat_syntax.reasoning_in_content = params.stream && (reasoning_format == COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY);
            params.oaicompat_chat_syntax.thinking_forced_open = json_value(data, "thinking_forced_open", false);
            params.oaicompat_chat_syntax.parse_tool_calls = json_value(data, "parse_tool_calls", false);
        }
--- a/tools/server/webui/src/utils/app.context.tsx
+++ b/tools/server/webui/src/utils/app.context.tsx
@ -209,6 +209,7 @@ export const AppContextProvider = ({
        messages,
        stream: true,
        cache_prompt: true,
        reasoning_format: 'none',
        samplers: config.samplers,
        temperature: config.temperature,
        dynatemp_range: config.dynatemp_range,