mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-10 06:14:58 +00:00
Revert repetition_penalty as it is not in API spec
This commit is contained in:
parent
05ad288453
commit
76487c4dcb
5 changed files with 12 additions and 14 deletions
|
@ -28,13 +28,13 @@ async def chat_completion(request:Request,create:ChatCompletionCreate):
|
|||
if create.stream:
|
||||
async def inner():
|
||||
chunk = ChatCompletionChunk(id=id,object='chat.completion.chunk',created=int(time()))
|
||||
async for token in interface.inference(input_message,id,create.temperature,create.top_p,create.repetition_penalty):
|
||||
async for token in interface.inference(input_message,id,create.temperature,create.top_p):
|
||||
chunk.set_token(token)
|
||||
yield chunk
|
||||
return chat_stream_response(request,inner())
|
||||
else:
|
||||
comp = ChatCompletionObject(id=id,object='chat.completion',created=int(time()))
|
||||
comp.usage = Usage(completion_tokens=1, prompt_tokens=1, total_tokens=2)
|
||||
async for token in interface.inference(input_message,id,create.temperature,create.top_p,create.repetition_penalty):
|
||||
async for token in interface.inference(input_message,id,create.temperature,create.top_p):
|
||||
comp.append_token(token)
|
||||
return comp
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue