server: fix checkpoints creation (#22929)

* common : add common_chat_split_by_role

* cont : fix spans to reach end of message

* server: fix checkpoints creation

- extract message_spans from chat templates
- find the prompt token position before the latest user message
- split prompt batching at that position
- create a context checkpoint before the latest user input
- avoid periodic mid-prompt checkpoints when that position is known
- handle multimodal prompts when mapping text/template positions to server prompt tokens
- add --checkpoint-min-step to control minimum spacing between checkpoints

* cont : clean-up

* Support autoparser detection for message barriers

* server: fix message span delimiter and update docs

---------

Co-authored-by: Alde Rojas <hello@alde.dev>
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Co-authored-by: Piotr Wilkin <piotr.wilkin@syndatis.com>
This commit is contained in:
jacekpoplawski 2026-05-25 07:56:18 +02:00 committed by GitHub
parent 6d57c26ef8
commit e2ef8fe42c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 586 additions and 37 deletions

View file

@ -445,6 +445,27 @@ std::string string_strip(const std::string & str) {
return str.substr(start, end - start);
}
std::string string_lcs(std::string_view a, std::string_view b) {
if (a.empty() || b.empty()) return {};
std::vector<std::vector<size_t>> dp(a.size() + 1, std::vector<size_t>(b.size() + 1, 0));
size_t best_len = 0;
size_t best_end_a = 0;
for (size_t i = 1; i <= a.size(); ++i) {
for (size_t j = 1; j <= b.size(); ++j) {
if (a[i - 1] == b[j - 1]) {
dp[i][j] = dp[i - 1][j - 1] + 1;
if (dp[i][j] > best_len) {
best_len = dp[i][j];
best_end_a = i;
}
}
}
}
return std::string(a.substr(best_end_a - best_len, best_len));
}
std::string string_get_sortable_timestamp() {
using clock = std::chrono::system_clock;