server: fix OpenAI API compatibility for usage statistics in chat streams (#15444)

This commit is contained in:
teo 2025-08-21 07:10:08 +09:00 committed by GitHub
parent 13aeb7aef2
commit 1bc664a26a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 105 additions and 82 deletions

View file

@ -911,6 +911,17 @@ struct server_task_result_cmpl_final : server_task_result {
{"model", oaicompat_model},
{"system_fingerprint", build_info},
{"object", "chat.completion.chunk"},
});
// OpenAI API spec for chat.completion.chunks specifies an empty `choices` array for the last chunk when including usage
// https://platform.openai.com/docs/api-reference/chat_streaming/streaming#chat_streaming/streaming-choices
deltas.push_back({
{"choices", json::array()},
{"created", t},
{"id", oaicompat_cmpl_id},
{"model", oaicompat_model},
{"system_fingerprint", build_info},
{"object", "chat.completion.chunk"},
{"usage", json {
{"completion_tokens", n_decoded},
{"prompt_tokens", n_prompt_tokens},