From b7f8d0fe2b0b97d2fe54286c7ef42120ac802f08 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Mon, 14 Jul 2025 22:17:18 +0800 Subject: [PATCH] handle inconsistent final message content being sent with finish_reason --- koboldcpp.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/koboldcpp.py b/koboldcpp.py index 3f2c612b1..acae9fb99 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -2830,6 +2830,18 @@ class KcppServerRequestHandler(http.server.SimpleHTTPRequestHandler): tokenStr = tokenStr[:sindex] if tokenStr!="" or streamDone: + need_split_final_msg = True if (currfinishreason is not None and streamDone and tokenStr!="") else False + if need_split_final_msg: #we need to send one message without the finish reason, then send a finish reason with no msg to follow standards + if api_format == 4: # if oai chat, set format to expected openai streaming response + event_str = json.dumps({"id":"koboldcpp","object":"chat.completion.chunk","created":int(time.time()),"model":friendlymodelname,"choices":[{"index":0,"finish_reason":None,"delta":{'role':'assistant','content':tokenStr}}]}) + await self.send_oai_sse_event(event_str) + elif api_format == 3: # non chat completions + event_str = json.dumps({"id":"koboldcpp","object":"text_completion","created":int(time.time()),"model":friendlymodelname,"choices":[{"index":0,"finish_reason":None,"text":tokenStr}]}) + await self.send_oai_sse_event(event_str) + else: + event_str = json.dumps({"token": tokenStr, "finish_reason":None}) + await self.send_kai_sse_event(event_str) + tokenStr = "" # now the final finish reason can be sent alone if api_format == 4: # if oai chat, set format to expected openai streaming response event_str = json.dumps({"id":"koboldcpp","object":"chat.completion.chunk","created":int(time.time()),"model":friendlymodelname,"choices":[{"index":0,"finish_reason":currfinishreason,"delta":{'role':'assistant','content':tokenStr}}]}) await self.send_oai_sse_event(event_str)