rudimentary responses api, not usable yet

2026-04-28 03:30:20 +00:00 · 2026-03-27 23:38:08 +08:00 · 2026-03-27 23:38:08 +08:00 · 326542f480
commit 326542f480
parent 81cebb6179
2 changed files with 43 additions and 2 deletions
--- a/embd_res/kcpp_docs.embd
+++ b/embd_res/kcpp_docs.embd
@ -2798,6 +2798,28 @@
                      "responses": {"default": {"description": ""}}
                   }
                },
+                "/v1/responses": {
+                   "post": {
+                      "summary": "Emulates OpenAI Responses API. Very experimental, not recommended (use chat completions instead). Please refer to OpenAI documentation",
+                      "description": "Emulates OpenAI Responses API.\n\n This is an OpenAI compatibility endpoint.\n\n Please refer to OpenAI documentation at [https://developers.openai.com/api/reference/resources/responses](https://developers.openai.com/api/reference/resources/responses). All KoboldCpp samplers are supported, please refer to /api/v1/generate for more details.",
+                      "requestBody": {
+                         "content": {
+                            "application/json": {
+                               "example": {"model":"kcpp","input": "Tell me a joke about Kobolds."},
+                               "schema": {
+                                  "properties": {},
+                                  "type": "object"
+                               }
+                            }
+                         },
+                         "required": true
+                      },
+                      "tags": [
+                         "v1"
+                      ],
+                      "responses": {"default": {"description": ""}}
+                   }
+                },
                "/v1/models": {
                   "get": {
                      "summary": "List and describe the various models available in the API. Please refer to OpenAI documentation",
--- a/koboldcpp.py
+++ b/koboldcpp.py
@ -3636,6 +3636,20 @@ ws ::= | " " | "\n" [ \t]{0,20}
        genparams["ollamasysprompt"] = ollamasysprompt
        genparams["ollamabodyprompt"] = ollamabodyprompt
        genparams["prompt"] = ollamasysprompt + ollamabodyprompt
+    elif api_format==8: # OpenAI Responses API, oai-responses
+        raw_input = genparams.get('input', '')
+        raw_instructions = genparams.get('instructions', '')
+        if isinstance(raw_input, str):
+            genparams['messages'] = [{"role": "user", "content": raw_input}]
+        elif isinstance(raw_input, list):
+            genparams['messages'] = raw_input
+        else:
+            genparams['messages'] = []
+        if raw_instructions and isinstance(raw_instructions, str) and raw_instructions!="":
+            genparams['messages'].insert(0, {"role": "system", "content": raw_instructions})
+        genparams['stream'] = False
+        transform_genparams(genparams, 4, use_jinja) # Delegate to the chat-completions transform by re-running as format 4
+        return genparams

    #final transformations (universal template replace)
    replace_instruct_placeholders = genparams.get('replace_instruct_placeholders', True)
@ -3788,7 +3802,7 @@ class KcppProxyHandler(http.server.BaseHTTPRequestHandler):
        is_chat_completions_path = (self.path.endswith('/v1/chat/completions') or self.path=='/chat/completions')

        #any requests to the following endpoints is capable of waking the server
-        wake_requests = ["/api/extra/generate/stream","/api/extra/tokencount","/api/v1/generate","/sdapi/v1/interrogate","/v1/completions","/v1/chat/completions","/api/extra/transcribe","/v1/audio/transcriptions","/api/extra/tts","/v1/audio/speech","/api/extra/embeddings","/v1/embeddings","/api/extra/music/prepare","/api/extra/music/generate","/sdapi/v1/txt2img","/sdapi/v1/img2img","/sdapi/v1/upscale"]
+        wake_requests = ["/api/extra/generate/stream","/api/extra/tokencount","/api/v1/generate","/sdapi/v1/interrogate","/v1/completions","/v1/chat/completions","/v1/responses","/api/extra/transcribe","/v1/audio/transcriptions","/api/extra/tts","/v1/audio/speech","/api/extra/embeddings","/v1/embeddings","/api/extra/music/prepare","/api/extra/music/generate","/sdapi/v1/txt2img","/sdapi/v1/img2img","/sdapi/v1/upscale"]
        is_wake_request = self.path in wake_requests

        if is_post and (is_completions_path or is_chat_completions_path or is_wake_request):
@ -4096,6 +4110,9 @@ class KcppServerRequestHandler(http.server.SimpleHTTPRequestHandler):
            res = {"model": friendlymodelname,"created_at": str(datetime.now(timezone.utc).isoformat()),"response":recvtxt,"done": True,"done_reason":currfinishreason,"context": tokarr,"total_duration": 1,"load_duration": 1,"prompt_eval_count": prompttokens,"prompt_eval_duration": 1,"eval_count": comptokens,"eval_duration": 1}
        elif api_format == 7:
            res = {"model": friendlymodelname,"created_at": str(datetime.now(timezone.utc).isoformat()),"message":{"role":"assistant","content":recvtxt},"done": True,"done_reason":currfinishreason,"total_duration": 1,"load_duration": 1,"prompt_eval_count": prompttokens,"prompt_eval_duration": 1,"eval_count": comptokens,"eval_duration": 1}
+        elif api_format == 8:
+            resp_id = f"resp-A{genparams.get('oai_uniqueid', 1)}"
+            res = {"id": resp_id, "object": "response", "created_at": int(time.time()), "model": friendlymodelname, "output": [ { "type": "message", "role": "assistant", "content": [ {"type": "output_text", "text": recvtxt} ] } ], "usage": { "input_tokens": prompttokens, "output_tokens": comptokens, "total_tokens": prompttokens + comptokens } }
        else: #kcpp format
            res = {"results": [{"text": recvtxt, "tool_calls": tool_calls, "finish_reason": currfinishreason, "logprobs":logprobsdict, "prompt_tokens": prompttokens, "completion_tokens": comptokens}]}

@ -5325,7 +5342,7 @@ Change Mode<br>
        # handle endpoints that require mutex locking and handle actual gens
        try:
            sse_stream_flag = False
-            api_format = 0 #1=basic,2=kai,3=oai,4=oai-chat,5=interrogate,6=ollama,7=ollamachat
+            api_format = 0 #1=basic,2=kai,3=oai,4=oai-chat,5=interrogate,6=ollama,7=ollamachat,8=oai-responses
            is_imggen = False
            is_comfyui_imggen = False
            is_oai_imggen = False
@ -5418,6 +5435,8 @@ Change Mode<br>
                api_format = 6
            elif self.path.endswith('/api/chat'): #ollama
                api_format = 7
+            elif self.path.endswith('/v1/responses') or self.path=='/responses': #oai-responses
+                api_format = 8
            elif self.path.endswith('/sdapi/v1/extra-single-image') or self.path.endswith('/sdapi/v1/upscale'):
                is_img_upscale = True
            elif self.path=="/prompt" or self.path=="/images/generations" or self.path.endswith('/v1/images/generations') or self.path.endswith('/sdapi/v1/txt2img') or self.path.endswith('/sdapi/v1/img2img'):