From d3d50a7b3c67340c8d5171e34349299ff4cbfcd2 Mon Sep 17 00:00:00 2001 From: Concedo <39025047+LostRuins@users.noreply.github.com> Date: Sat, 4 Apr 2026 23:03:33 +0800 Subject: [PATCH] fixed reasoning content response in fakestreaming tools --- koboldcpp.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/koboldcpp.py b/koboldcpp.py index fbb8e991a..7d5eceba4 100755 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -6076,6 +6076,26 @@ Change Mode
# Send content if present if content_text: + reasoning_txt = "" + thinkstrips = [""] + thinksplitters = [""] + for tsp in thinksplitters: + if tsp in content_text: + parts = content_text.split(tsp, 1) + reasoning_txt = parts[0] + content_text = parts[1] + for ts in thinkstrips: + reasoning_txt = reasoning_txt.replace(ts, "") + if reasoning_txt: + chunk_content = json.dumps({ + "id": "koboldcpp", + "object": "chat.completion.chunk", + "created": int(time.time()), + "model": modelNameToReturn, + "choices": [{"index": 0, "finish_reason": None, "delta": {"reasoning_content": reasoning_txt}}] + }) + self.wfile.write(f"data: {chunk_content}\n\n".encode()) + self.wfile.flush() chunk_content = json.dumps({ "id": "koboldcpp", "object": "chat.completion.chunk",