mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2025-09-06 04:30:03 +00:00
Added simple /models endpoint to work with frontends that don't allow bypass check like Openweb-ui
This commit is contained in:
parent
de7e892f72
commit
b1bff2a405
1 changed files with 11 additions and 3 deletions
|
@ -10,8 +10,16 @@ from ktransformers.server.backend.base import BackendInterfaceBase
|
||||||
|
|
||||||
router = APIRouter()
|
router = APIRouter()
|
||||||
|
|
||||||
|
models = [
|
||||||
|
{"id": "0", "name": "ktranformers-model"},
|
||||||
|
]
|
||||||
|
|
||||||
@router.post('/chat/completions',tags=['openai'])
|
@router.get('/models', tags=['openai'])
|
||||||
|
async def list_models():
|
||||||
|
return models
|
||||||
|
|
||||||
|
|
||||||
|
@router.post('/chat/completions', tags=['openai'])
|
||||||
async def chat_completion(request:Request,create:ChatCompletionCreate):
|
async def chat_completion(request:Request,create:ChatCompletionCreate):
|
||||||
id = str(uuid4())
|
id = str(uuid4())
|
||||||
|
|
||||||
|
@ -23,12 +31,12 @@ async def chat_completion(request:Request,create:ChatCompletionCreate):
|
||||||
if create.stream:
|
if create.stream:
|
||||||
async def inner():
|
async def inner():
|
||||||
chunk = ChatCompletionChunk(id=id,object='chat.completion.chunk',created=int(time()))
|
chunk = ChatCompletionChunk(id=id,object='chat.completion.chunk',created=int(time()))
|
||||||
async for token in interface.inference(input_message,id):
|
async for token in interface.inference(input_message,id):
|
||||||
chunk.set_token(token)
|
chunk.set_token(token)
|
||||||
yield chunk
|
yield chunk
|
||||||
return chat_stream_response(request,inner())
|
return chat_stream_response(request,inner())
|
||||||
else:
|
else:
|
||||||
comp = ChatCompletionObject(id=id,object='chat.completion.chunk',created=int(time()))
|
comp = ChatCompletionObject(id=id,object='chat.completion.chunk',created=int(time()))
|
||||||
async for token in interface.inference(input_message,id):
|
async for token in interface.inference(input_message,id):
|
||||||
comp.append_token(token)
|
comp.append_token(token)
|
||||||
return comp
|
return comp
|
||||||
|
|
Loading…
Add table
Reference in a new issue