Merge commit '190c4838bd' into concedo_experimental

# Conflicts:
#	.github/workflows/build.yml
#	.github/workflows/release.yml
#	.gitignore
#	CMakeLists.txt
#	CODEOWNERS
#	common/CMakeLists.txt
#	ggml/CMakeLists.txt
#	ggml/src/ggml-webgpu/CMakeLists.txt
#	ggml/src/ggml-webgpu/ggml-webgpu.cpp
#	tests/.gitignore
#	tests/CMakeLists.txt
#	tests/test-backend-ops.cpp
This commit is contained in:
Concedo 2025-12-04 16:50:15 +08:00
commit addf1d8785
23 changed files with 2817 additions and 45 deletions

View file

@ -85,29 +85,36 @@ json common_chat_msg::to_json_oaicompat() const
return message;
}
std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const common_chat_msg & previous_msg, const common_chat_msg & new_msg) {
std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const common_chat_msg & msg_prv, const common_chat_msg & msg_new) {
std::vector<common_chat_msg_diff> diffs;
if (previous_msg.reasoning_content != new_msg.reasoning_content) {
auto & diff = diffs.emplace_back();
diff.reasoning_content_delta = string_diff(previous_msg.reasoning_content, new_msg.reasoning_content);
}
if (previous_msg.content != new_msg.content) {
auto & diff = diffs.emplace_back();
diff.content_delta = string_diff(previous_msg.content, new_msg.content);
if (msg_new.tool_calls.size() > msg_prv.tool_calls.size()) {
diffs.reserve(msg_new.tool_calls.size() - msg_prv.tool_calls.size() + 3);
} else {
diffs.reserve(3);
}
if (new_msg.tool_calls.size() < previous_msg.tool_calls.size()) {
// TODO: these can become expensive for long messages - how to optimize?
if (msg_prv.reasoning_content != msg_new.reasoning_content) {
auto & diff = diffs.emplace_back();
diff.reasoning_content_delta = string_diff(msg_prv.reasoning_content, msg_new.reasoning_content);
}
if (msg_prv.content != msg_new.content) {
auto & diff = diffs.emplace_back();
diff.content_delta = string_diff(msg_prv.content, msg_new.content);
}
if (msg_new.tool_calls.size() < msg_prv.tool_calls.size()) {
throw std::runtime_error("Invalid diff: now finding less tool calls!");
}
if (!previous_msg.tool_calls.empty()) {
auto idx = previous_msg.tool_calls.size() - 1;
const auto & pref = previous_msg.tool_calls[idx];
const auto & newf = new_msg.tool_calls[idx];
if (!msg_prv.tool_calls.empty()) {
const auto idx = msg_prv.tool_calls.size() - 1;
const auto & pref = msg_prv.tool_calls[idx];
const auto & newf = msg_new.tool_calls[idx];
if (pref.name != newf.name) {
throw std::runtime_error("Invalid diff: tool call mismatch!");
}
auto args_diff = string_diff(pref.arguments, newf.arguments);
const auto args_diff = string_diff(pref.arguments, newf.arguments);
if (!args_diff.empty() || pref.id != newf.id) {
auto & diff = diffs.emplace_back();
diff.tool_call_index = idx;
@ -118,11 +125,12 @@ std::vector<common_chat_msg_diff> common_chat_msg_diff::compute_diffs(const comm
diff.tool_call_delta.arguments = args_diff;
}
}
for (size_t idx = previous_msg.tool_calls.size(); idx < new_msg.tool_calls.size(); ++idx) {
for (size_t idx = msg_prv.tool_calls.size(); idx < msg_new.tool_calls.size(); ++idx) {
auto & diff = diffs.emplace_back();
diff.tool_call_index = idx;
diff.tool_call_delta = new_msg.tool_calls[idx];
diff.tool_call_delta = msg_new.tool_calls[idx];
}
return diffs;
}
@ -434,6 +442,9 @@ template <> json common_chat_msg_diff_to_json_oaicompat(const common_chat_msg_di
}
#include "chat-parser.cpp"
#include "common/unicode.cpp"
#include "peg-parser.cpp"
#include "chat-peg-parser.cpp"
bool common_chat_verify_template(const std::string & tmpl, bool use_jinja) {
if (use_jinja) {
@ -651,6 +662,9 @@ const char * common_chat_format_name(common_chat_format format) {
case COMMON_CHAT_FORMAT_QWEN3_CODER_XML: return "Qwen3 Coder";
case COMMON_CHAT_FORMAT_APRIEL_1_5: return "Apriel 1.5";
case COMMON_CHAT_FORMAT_XIAOMI_MIMO: return "Xiaomi MiMo";
case COMMON_CHAT_FORMAT_PEG_SIMPLE: return "peg-simple";
case COMMON_CHAT_FORMAT_PEG_NATIVE: return "peg-native";
case COMMON_CHAT_FORMAT_PEG_CONSTRUCTED: return "peg-constructed";
default:
throw std::runtime_error("Unknown chat format");
}