mirror of
https://github.com/LostRuins/koboldcpp.git
synced 2025-09-11 09:34:37 +00:00
server: fix OpenAI API compatibility for usage statistics in chat streams (#15444)
This commit is contained in:
parent
13aeb7aef2
commit
1bc664a26a
3 changed files with 105 additions and 82 deletions
|
@ -318,46 +318,53 @@ class ServerProcess:
|
|||
arguments_parts = 0
|
||||
|
||||
for chunk in self.make_stream_request(method, path, data, headers):
|
||||
assert len(chunk['choices']) == 1, f'Expected 1 choice, got {len(chunk["choices"])}'
|
||||
choice = chunk['choices'][0]
|
||||
if choice['delta'].get('content') is not None:
|
||||
assert len(choice['delta']['content']) > 0, f'Expected non empty content delta!'
|
||||
content.append(choice['delta']['content'])
|
||||
content_parts += 1
|
||||
if choice['delta'].get('reasoning_content') is not None:
|
||||
assert len(choice['delta']['reasoning_content']) > 0, f'Expected non empty reasoning_content delta!'
|
||||
reasoning_content.append(choice['delta']['reasoning_content'])
|
||||
reasoning_content_parts += 1
|
||||
if choice['delta'].get('finish_reason') is not None:
|
||||
finish_reason = choice['delta']['finish_reason']
|
||||
for tc in choice['delta'].get('tool_calls', []):
|
||||
if 'function' not in tc:
|
||||
raise ValueError(f"Expected function type, got {tc['type']}")
|
||||
if tc['index'] >= len(tool_calls):
|
||||
assert 'id' in tc
|
||||
assert tc.get('type') == 'function'
|
||||
assert 'function' in tc and 'name' in tc['function'] and len(tc['function']['name']) > 0, \
|
||||
f"Expected function call with name, got {tc.get('function')}"
|
||||
tool_calls.append(dict(
|
||||
id="",
|
||||
type="function",
|
||||
function=dict(
|
||||
name="",
|
||||
arguments="",
|
||||
)
|
||||
))
|
||||
tool_call = tool_calls[tc['index']]
|
||||
if tc.get('id') is not None:
|
||||
tool_call['id'] = tc['id']
|
||||
fct = tc['function']
|
||||
assert 'id' not in fct, f"Function call should not have id: {fct}"
|
||||
if fct.get('name') is not None:
|
||||
tool_call['function']['name'] = tool_call['function'].get('name', '') + fct['name']
|
||||
if fct.get('arguments') is not None:
|
||||
tool_call['function']['arguments'] += fct['arguments']
|
||||
arguments_parts += 1
|
||||
tool_call_parts += 1
|
||||
|
||||
if chunk['choices']:
|
||||
assert len(chunk['choices']) == 1, f'Expected 1 choice, got {len(chunk["choices"])}'
|
||||
choice = chunk['choices'][0]
|
||||
if choice['delta'].get('content') is not None:
|
||||
assert len(choice['delta']['content']) > 0, f'Expected non empty content delta!'
|
||||
content.append(choice['delta']['content'])
|
||||
content_parts += 1
|
||||
if choice['delta'].get('reasoning_content') is not None:
|
||||
assert len(choice['delta']['reasoning_content']) > 0, f'Expected non empty reasoning_content delta!'
|
||||
reasoning_content.append(choice['delta']['reasoning_content'])
|
||||
reasoning_content_parts += 1
|
||||
if choice['delta'].get('finish_reason') is not None:
|
||||
finish_reason = choice['delta']['finish_reason']
|
||||
for tc in choice['delta'].get('tool_calls', []):
|
||||
if 'function' not in tc:
|
||||
raise ValueError(f"Expected function type, got {tc['type']}")
|
||||
if tc['index'] >= len(tool_calls):
|
||||
assert 'id' in tc
|
||||
assert tc.get('type') == 'function'
|
||||
assert 'function' in tc and 'name' in tc['function'] and len(tc['function']['name']) > 0, \
|
||||
f"Expected function call with name, got {tc.get('function')}"
|
||||
tool_calls.append(dict(
|
||||
id="",
|
||||
type="function",
|
||||
function=dict(
|
||||
name="",
|
||||
arguments="",
|
||||
)
|
||||
))
|
||||
tool_call = tool_calls[tc['index']]
|
||||
if tc.get('id') is not None:
|
||||
tool_call['id'] = tc['id']
|
||||
fct = tc['function']
|
||||
assert 'id' not in fct, f"Function call should not have id: {fct}"
|
||||
if fct.get('name') is not None:
|
||||
tool_call['function']['name'] = tool_call['function'].get('name', '') + fct['name']
|
||||
if fct.get('arguments') is not None:
|
||||
tool_call['function']['arguments'] += fct['arguments']
|
||||
arguments_parts += 1
|
||||
tool_call_parts += 1
|
||||
else:
|
||||
# When `include_usage` is True (the default), we expect the last chunk of the stream
|
||||
# immediately preceding the `data: [DONE]` message to contain a `choices` field with an empty array
|
||||
# and a `usage` field containing the usage statistics (n.b., llama-server also returns `timings` in
|
||||
# the last chunk)
|
||||
assert 'usage' in chunk, f"Expected finish_reason in chunk: {chunk}"
|
||||
assert 'timings' in chunk, f"Expected finish_reason in chunk: {chunk}"
|
||||
print(f'Streamed response had {content_parts} content parts, {reasoning_content_parts} reasoning_content parts, {tool_call_parts} tool call parts incl. {arguments_parts} arguments parts')
|
||||
result = dict(
|
||||
choices=[
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue