mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-07 13:09:50 +00:00
Update completions.py
This commit is contained in:
parent
52fa671c10
commit
84164f584c
1 changed files with 12 additions and 4 deletions
|
@ -85,7 +85,11 @@ async def generate(request: Request, input: OllamaGenerateCompletionRequest):
|
||||||
return check_link_response(request, inner())
|
return check_link_response(request, inner())
|
||||||
else:
|
else:
|
||||||
complete_response = ""
|
complete_response = ""
|
||||||
async for token in interface.inference(input.prompt, id):
|
async for res in interface.inference(input.prompt, id):
|
||||||
|
if isinstance(res, RawUsage):
|
||||||
|
raw_usage = res
|
||||||
|
else:
|
||||||
|
token, finish_reason = res
|
||||||
complete_response += token
|
complete_response += token
|
||||||
response = OllamaGenerationResponse(
|
response = OllamaGenerationResponse(
|
||||||
model=config.model_name,
|
model=config.model_name,
|
||||||
|
@ -187,7 +191,11 @@ async def chat(request: Request, input: OllamaChatCompletionRequest):
|
||||||
complete_response = ""
|
complete_response = ""
|
||||||
eval_count = 0
|
eval_count = 0
|
||||||
|
|
||||||
async for token in interface.inference(prompt, id):
|
async for res in interface.inference(prompt, id):
|
||||||
|
if isinstance(res, RawUsage):
|
||||||
|
raw_usage = res
|
||||||
|
else:
|
||||||
|
token, finish_reason = res
|
||||||
complete_response += token
|
complete_response += token
|
||||||
eval_count += 1
|
eval_count += 1
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue