mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-06 20:49:55 +00:00
Update completions.py
This commit is contained in:
parent
52fa671c10
commit
84164f584c
1 changed files with 12 additions and 4 deletions
|
@ -85,8 +85,12 @@ async def generate(request: Request, input: OllamaGenerateCompletionRequest):
|
|||
return check_link_response(request, inner())
|
||||
else:
|
||||
complete_response = ""
|
||||
async for token in interface.inference(input.prompt, id):
|
||||
complete_response += token
|
||||
async for res in interface.inference(input.prompt, id):
|
||||
if isinstance(res, RawUsage):
|
||||
raw_usage = res
|
||||
else:
|
||||
token, finish_reason = res
|
||||
complete_response += token
|
||||
response = OllamaGenerationResponse(
|
||||
model=config.model_name,
|
||||
created_at=str(datetime.now()),
|
||||
|
@ -187,8 +191,12 @@ async def chat(request: Request, input: OllamaChatCompletionRequest):
|
|||
complete_response = ""
|
||||
eval_count = 0
|
||||
|
||||
async for token in interface.inference(prompt, id):
|
||||
complete_response += token
|
||||
async for res in interface.inference(prompt, id):
|
||||
if isinstance(res, RawUsage):
|
||||
raw_usage = res
|
||||
else:
|
||||
token, finish_reason = res
|
||||
complete_response += token
|
||||
eval_count += 1
|
||||
|
||||
end_time = time()
|
||||
|
|
Loading…
Add table
Reference in a new issue