From b1bff2a4054a223e7631f6fbfc2279e91fc96c53 Mon Sep 17 00:00:00 2001 From: RodriMora Date: Fri, 7 Feb 2025 10:30:39 +0100 Subject: [PATCH] Added simple /models endpoint to work with frontends that don't allow bypass check like Openweb-ui --- ktransformers/server/api/openai/endpoints/chat.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/ktransformers/server/api/openai/endpoints/chat.py b/ktransformers/server/api/openai/endpoints/chat.py index 8fb7de9..4da3bc9 100644 --- a/ktransformers/server/api/openai/endpoints/chat.py +++ b/ktransformers/server/api/openai/endpoints/chat.py @@ -10,8 +10,16 @@ from ktransformers.server.backend.base import BackendInterfaceBase router = APIRouter() +models = [ + {"id": "0", "name": "ktranformers-model"}, +] -@router.post('/chat/completions',tags=['openai']) +@router.get('/models', tags=['openai']) +async def list_models(): + return models + + +@router.post('/chat/completions', tags=['openai']) async def chat_completion(request:Request,create:ChatCompletionCreate): id = str(uuid4()) @@ -23,12 +31,12 @@ async def chat_completion(request:Request,create:ChatCompletionCreate): if create.stream: async def inner(): chunk = ChatCompletionChunk(id=id,object='chat.completion.chunk',created=int(time())) - async for token in interface.inference(input_message,id): + async for token in interface.inference(input_message,id): chunk.set_token(token) yield chunk return chat_stream_response(request,inner()) else: comp = ChatCompletionObject(id=id,object='chat.completion.chunk',created=int(time())) - async for token in interface.inference(input_message,id): + async for token in interface.inference(input_message,id): comp.append_token(token) return comp