Merge commit '2f966b8ed8' into concedo_experimental

# Conflicts:
#	.github/workflows/release.yml
#	docs/docker.md
#	ggml/src/CMakeLists.txt
#	ggml/src/ggml-cpu/CMakeLists.txt
#	tests/test-backend-ops.cpp
#	tests/test-thread-safety.cpp
#	tools/batched-bench/batched-bench.cpp
#	tools/mtmd/clip.cpp
This commit is contained in:
LostRuins Concedo 2025-11-08 10:34:17 +08:00
commit fdcb281a3a
31 changed files with 655 additions and 165 deletions

View file

@ -313,7 +313,6 @@ json common_chat_msgs_to_json_oaicompat(const std::vector<common_chat_msg> & msg
}
if (!msg.reasoning_content.empty()) {
jmsg["reasoning_content"] = msg.reasoning_content;
jmsg["thinking"] = msg.reasoning_content; // gpt-oss
}
if (!msg.tool_name.empty()) {
jmsg["name"] = msg.tool_name;
@ -1810,7 +1809,23 @@ static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) {
static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template & tmpl, const struct templates_params & inputs) {
common_chat_params data;
auto prompt = apply(tmpl, inputs);
// Copy reasoning to the "thinking" field as expected by the gpt-oss template
auto adjusted_messages = json::array();
for (const auto & msg : inputs.messages) {
auto has_reasoning_content = msg.contains("reasoning_content") && msg.at("reasoning_content").is_string();
auto has_tool_calls = msg.contains("tool_calls") && msg.at("tool_calls").is_array();
if (has_reasoning_content && has_tool_calls) {
auto adjusted_message = msg;
adjusted_message["thinking"] = msg.at("reasoning_content");
adjusted_messages.push_back(adjusted_message);
} else {
adjusted_messages.push_back(msg);
}
}
auto prompt = apply(tmpl, inputs, /* messages_override= */ adjusted_messages);
// Check if we need to replace the return token with end token during
// inference and without generation prompt. For more details see: