Revert repetition_penalty as it is not in API spec

This commit is contained in:
lazymio 2025-02-24 21:30:03 +08:00
parent 05ad288453
commit 76487c4dcb
No known key found for this signature in database
GPG key ID: DFF27E34A47CB873
5 changed files with 12 additions and 14 deletions

View file

@ -28,13 +28,13 @@ async def chat_completion(request:Request,create:ChatCompletionCreate):
if create.stream:
async def inner():
chunk = ChatCompletionChunk(id=id,object='chat.completion.chunk',created=int(time()))
async for token in interface.inference(input_message,id,create.temperature,create.top_p,create.repetition_penalty):
async for token in interface.inference(input_message,id,create.temperature,create.top_p):
chunk.set_token(token)
yield chunk
return chat_stream_response(request,inner())
else:
comp = ChatCompletionObject(id=id,object='chat.completion',created=int(time()))
comp.usage = Usage(completion_tokens=1, prompt_tokens=1, total_tokens=2)
async for token in interface.inference(input_message,id,create.temperature,create.top_p,create.repetition_penalty):
async for token in interface.inference(input_message,id,create.temperature,create.top_p):
comp.append_token(token)
return comp

View file

@ -20,7 +20,7 @@ async def create_completion(request:Request,create:CompletionCreate):
if create.stream:
async def inner():
async for token in interface.inference(create.prompt,id,create.temperature,create.top_p,create.repetition_penalty):
async for token in interface.inference(create.prompt,id,create.temperature,create.top_p):
d = {'choices':[{'delta':{'content':token}}]}
yield f"data:{json.dumps(d)}\n\n"
d = {'choices':[{'delta':{'content':''},'finish_reason':''}]}
@ -28,6 +28,6 @@ async def create_completion(request:Request,create:CompletionCreate):
return stream_response(request,inner())
else:
comp = CompletionObject(id=id,object='text_completion',created=int(time()))
async for token in interface.inference(create.prompt,id,create.temperature,create.top_p,create.repetition_penalty):
async for token in interface.inference(create.prompt,id,create.temperature,create.top_p):
comp.append_token(token)
return comp