common : delegate assistant continuation to underlying template handlers (#23089)

* common : delegate assistant continuation to template handler

* server : implement echo parameter to exclude assistant prefill in the response

* server : fix tests for prefill

* server : use existing llama template

* cont : clean up
This commit is contained in:
Aldehir Rojas 2026-05-17 07:36:05 -04:00 committed by GitHub
parent a6d6183dbc
commit 39cf5d6191
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 1112 additions and 191 deletions

View file

@ -144,6 +144,17 @@ json task_params::to_json(bool only_metrics) const {
//
// task_result_state
//
task_result_state::task_result_state(const common_chat_parser_params & chat_parser_params)
: chat_parser_params(chat_parser_params)
, oai_resp_id("resp_" + random_string())
, oai_resp_reasoning_id("rs_" + random_string())
, oai_resp_message_id("msg_" + random_string()) {
if (!chat_parser_params.echo) {
// initialize chat_msg to avoid emitting a delta containing the assistant prefill
chat_msg = common_chat_parse("", true, chat_parser_params);
}
}
common_chat_msg task_result_state::update_chat_msg(
const std::string & text_added,
bool is_partial,
@ -421,6 +432,7 @@ task_params server_task::params_from_json_cmpl(
if (data.contains("chat_parser")) {
params.chat_parser_params.parser.load(data.at("chat_parser").get<std::string>());
}
params.chat_parser_params.echo = json_value(data, "echo", false);
}
{