server: fix OpenAI API compatibility for usage statistics in chat streams (#15444)

This commit is contained in:
teo 2025-08-21 07:10:08 +09:00 committed by GitHub
parent 13aeb7aef2
commit 1bc664a26a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 105 additions and 82 deletions

View file

@ -318,46 +318,53 @@ class ServerProcess:
arguments_parts = 0
for chunk in self.make_stream_request(method, path, data, headers):
assert len(chunk['choices']) == 1, f'Expected 1 choice, got {len(chunk["choices"])}'
choice = chunk['choices'][0]
if choice['delta'].get('content') is not None:
assert len(choice['delta']['content']) > 0, f'Expected non empty content delta!'
content.append(choice['delta']['content'])
content_parts += 1
if choice['delta'].get('reasoning_content') is not None:
assert len(choice['delta']['reasoning_content']) > 0, f'Expected non empty reasoning_content delta!'
reasoning_content.append(choice['delta']['reasoning_content'])
reasoning_content_parts += 1
if choice['delta'].get('finish_reason') is not None:
finish_reason = choice['delta']['finish_reason']
for tc in choice['delta'].get('tool_calls', []):
if 'function' not in tc:
raise ValueError(f"Expected function type, got {tc['type']}")
if tc['index'] >= len(tool_calls):
assert 'id' in tc
assert tc.get('type') == 'function'
assert 'function' in tc and 'name' in tc['function'] and len(tc['function']['name']) > 0, \
f"Expected function call with name, got {tc.get('function')}"
tool_calls.append(dict(
id="",
type="function",
function=dict(
name="",
arguments="",
)
))
tool_call = tool_calls[tc['index']]
if tc.get('id') is not None:
tool_call['id'] = tc['id']
fct = tc['function']
assert 'id' not in fct, f"Function call should not have id: {fct}"
if fct.get('name') is not None:
tool_call['function']['name'] = tool_call['function'].get('name', '') + fct['name']
if fct.get('arguments') is not None:
tool_call['function']['arguments'] += fct['arguments']
arguments_parts += 1
tool_call_parts += 1
if chunk['choices']:
assert len(chunk['choices']) == 1, f'Expected 1 choice, got {len(chunk["choices"])}'
choice = chunk['choices'][0]
if choice['delta'].get('content') is not None:
assert len(choice['delta']['content']) > 0, f'Expected non empty content delta!'
content.append(choice['delta']['content'])
content_parts += 1
if choice['delta'].get('reasoning_content') is not None:
assert len(choice['delta']['reasoning_content']) > 0, f'Expected non empty reasoning_content delta!'
reasoning_content.append(choice['delta']['reasoning_content'])
reasoning_content_parts += 1
if choice['delta'].get('finish_reason') is not None:
finish_reason = choice['delta']['finish_reason']
for tc in choice['delta'].get('tool_calls', []):
if 'function' not in tc:
raise ValueError(f"Expected function type, got {tc['type']}")
if tc['index'] >= len(tool_calls):
assert 'id' in tc
assert tc.get('type') == 'function'
assert 'function' in tc and 'name' in tc['function'] and len(tc['function']['name']) > 0, \
f"Expected function call with name, got {tc.get('function')}"
tool_calls.append(dict(
id="",
type="function",
function=dict(
name="",
arguments="",
)
))
tool_call = tool_calls[tc['index']]
if tc.get('id') is not None:
tool_call['id'] = tc['id']
fct = tc['function']
assert 'id' not in fct, f"Function call should not have id: {fct}"
if fct.get('name') is not None:
tool_call['function']['name'] = tool_call['function'].get('name', '') + fct['name']
if fct.get('arguments') is not None:
tool_call['function']['arguments'] += fct['arguments']
arguments_parts += 1
tool_call_parts += 1
else:
# When `include_usage` is True (the default), we expect the last chunk of the stream
# immediately preceding the `data: [DONE]` message to contain a `choices` field with an empty array
# and a `usage` field containing the usage statistics (n.b., llama-server also returns `timings` in
# the last chunk)
assert 'usage' in chunk, f"Expected finish_reason in chunk: {chunk}"
assert 'timings' in chunk, f"Expected finish_reason in chunk: {chunk}"
print(f'Streamed response had {content_parts} content parts, {reasoning_content_parts} reasoning_content parts, {tool_call_parts} tool call parts incl. {arguments_parts} arguments parts')
result = dict(
choices=[