fix(ai): preserve requiresThinkingAsText replay semantics closes #3387

2026-04-28 06:19:43 +00:00 · 2026-04-20 19:44:38 +02:00 · 2026-04-20 19:44:38 +02:00 · 1d488626d9
commit 1d488626d9
parent b73212616d
3 changed files with 277 additions and 33 deletions
--- a/packages/ai/CHANGELOG.md
+++ b/packages/ai/CHANGELOG.md
@ -4,6 +4,7 @@

 ### Fixed

+- Fixed `openai-completions` `compat.requiresThinkingAsText` assistant replay to preserve text-part serialization and avoid same-model crashes when prior assistant messages contain both thinking and text ([#3387](https://github.com/badlogic/pi-mono/issues/3387))
 - Fixed non-vision model requests to replace user and tool-result image blocks with explicit text placeholders instead of silently dropping them during provider payload conversion ([#3429](https://github.com/badlogic/pi-mono/issues/3429))
 - Fixed OpenRouter Meta tests by switching `meta-llama/llama-4-maverick` to `meta-llama/llama-4-scout` to avoid type-check failures from model-catalog drift.
 - Fixed direct OpenAI Chat Completions requests to map `sessionId` and `cacheRetention` to OpenAI prompt caching fields, sending `prompt_cache_key` when caching is enabled and `prompt_cache_retention: "24h"` for direct `api.openai.com` requests with long retention ([#3426](https://github.com/badlogic/pi-mono/issues/3426))
--- a/packages/ai/src/providers/openai-completions.ts
+++ b/packages/ai/src/providers/openai-completions.ts
@ -16,6 +16,7 @@ import type {
 	AssistantMessage,
 	CacheRetention,
 	Context,
+	ImageContent,
 	Message,
 	Model,
 	OpenAICompletionsCompat,
@ -56,6 +57,22 @@ function hasToolHistory(messages: Message[]): boolean {
 	return false;
 }

+function isTextContentBlock(block: { type: string }): block is TextContent {
+	return block.type === "text";
+}
+
+function isThinkingContentBlock(block: { type: string }): block is ThinkingContent {
+	return block.type === "thinking";
+}
+
+function isToolCallBlock(block: { type: string }): block is ToolCall {
+	return block.type === "toolCall";
+}
+
+function isImageContentBlock(block: { type: string }): block is ImageContent {
+	return block.type === "image";
+}
+
 export interface OpenAICompletionsOptions extends StreamOptions {
 	toolChoice?: "auto" | "none" | "required" | { type: "function"; function: { name: string } };
 	reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh";
@ -716,42 +733,54 @@ export function convertMessages(
 				content: compat.requiresAssistantAfterToolResult ? "" : null,
 			};

-			const textBlocks = msg.content.filter((b) => b.type === "text") as TextContent[];
-			// Filter out empty text blocks to avoid API validation errors
-			const nonEmptyTextBlocks = textBlocks.filter((b) => b.text && b.text.trim().length > 0);
-			if (nonEmptyTextBlocks.length > 0) {
+			const assistantTextParts = msg.content
+				.filter(isTextContentBlock)
+				.filter((block) => block.text.trim().length > 0)
+				.map(
+					(block) =>
+						({
+							type: "text",
+							text: sanitizeSurrogates(block.text),
+						}) satisfies ChatCompletionContentPartText,
+				);
+			const assistantText = assistantTextParts.map((part) => part.text).join("");
+
+			const nonEmptyThinkingBlocks = msg.content
+				.filter(isThinkingContentBlock)
+				.filter((block) => block.thinking.trim().length > 0);
+			if (nonEmptyThinkingBlocks.length > 0) {
+				if (compat.requiresThinkingAsText) {
+					// Convert thinking blocks to plain text (no tags to avoid model mimicking them)
+					const thinkingText = nonEmptyThinkingBlocks
+						.map((block) => sanitizeSurrogates(block.thinking))
+						.join("\n\n");
+					assistantMsg.content = [{ type: "text", text: thinkingText }, ...assistantTextParts];
+				} else {
+					// Always send assistant content as a plain string (OpenAI Chat Completions
+					// API standard format). Sending as an array of {type:"text", text:"..."}
+					// objects is non-standard and causes some models (e.g. DeepSeek V3.2 via
+					// NVIDIA NIM) to mirror the content-block structure literally in their
+					// output, producing recursive nesting like [{'type':'text','text':'[{...}]'}].
+					if (assistantText.length > 0) {
+						assistantMsg.content = assistantText;
+					}
+
+					// Use the signature from the first thinking block if available (for llama.cpp server + gpt-oss)
+					const signature = nonEmptyThinkingBlocks[0].thinkingSignature;
+					if (signature && signature.length > 0) {
+						(assistantMsg as any)[signature] = nonEmptyThinkingBlocks.map((block) => block.thinking).join("\n");
+					}
+				}
+			} else if (assistantText.length > 0) {
 				// Always send assistant content as a plain string (OpenAI Chat Completions
 				// API standard format). Sending as an array of {type:"text", text:"..."}
 				// objects is non-standard and causes some models (e.g. DeepSeek V3.2 via
 				// NVIDIA NIM) to mirror the content-block structure literally in their
 				// output, producing recursive nesting like [{'type':'text','text':'[{...}]'}].
-				assistantMsg.content = nonEmptyTextBlocks.map((b) => sanitizeSurrogates(b.text)).join("");
+				assistantMsg.content = assistantText;
 			}

-			// Handle thinking blocks
-			const thinkingBlocks = msg.content.filter((b) => b.type === "thinking") as ThinkingContent[];
-			// Filter out empty thinking blocks to avoid API validation errors
-			const nonEmptyThinkingBlocks = thinkingBlocks.filter((b) => b.thinking && b.thinking.trim().length > 0);
-			if (nonEmptyThinkingBlocks.length > 0) {
-				if (compat.requiresThinkingAsText) {
-					// Convert thinking blocks to plain text (no tags to avoid model mimicking them)
-					const thinkingText = nonEmptyThinkingBlocks.map((b) => b.thinking).join("\n\n");
-					const textContent = assistantMsg.content as Array<{ type: "text"; text: string }> | null;
-					if (textContent) {
-						textContent.unshift({ type: "text", text: thinkingText });
-					} else {
-						assistantMsg.content = [{ type: "text", text: thinkingText }];
-					}
-				} else {
-					// Use the signature from the first thinking block if available (for llama.cpp server + gpt-oss)
-					const signature = nonEmptyThinkingBlocks[0].thinkingSignature;
-					if (signature && signature.length > 0) {
-						(assistantMsg as any)[signature] = nonEmptyThinkingBlocks.map((b) => b.thinking).join("\n");
-					}
-				}
-			}
-
-			const toolCalls = msg.content.filter((b) => b.type === "toolCall") as ToolCall[];
+			const toolCalls = msg.content.filter(isToolCallBlock);
 			if (toolCalls.length > 0) {
 				assistantMsg.tool_calls = toolCalls.map((tc) => ({
 					id: tc.id,
@ -797,8 +826,8 @@ export function convertMessages(

 				// Extract text and image content
 				const textResult = toolMsg.content
-					.filter((c) => c.type === "text")
-					.map((c) => (c as any).text)
+					.filter(isTextContentBlock)
+					.map((block) => block.text)
 					.join("\n");
 				const hasImages = toolMsg.content.some((c) => c.type === "image");

@ -817,11 +846,11 @@ export function convertMessages(

 				if (hasImages && model.input.includes("image")) {
 					for (const block of toolMsg.content) {
-						if (block.type === "image") {
+						if (isImageContentBlock(block)) {
 							imageBlocks.push({
 								type: "image_url",
 								image_url: {
-									url: `data:${(block as any).mimeType};base64,${(block as any).data}`,
+									url: `data:${block.mimeType};base64,${block.data}`,
 								},
 							});
 						}
--- a/packages/ai/test/openai-completions-thinking-as-text.test.ts
+++ b/packages/ai/test/openai-completions-thinking-as-text.test.ts
@ -0,0 +1,214 @@
+import { once } from "node:events";
+import http from "node:http";
+import type { AddressInfo } from "node:net";
+import { afterEach, describe, expect, it } from "vitest";
+import { convertMessages, streamOpenAICompletions } from "../src/providers/openai-completions.js";
+import type {
+	AssistantMessage,
+	AssistantMessageEvent,
+	Context,
+	Model,
+	OpenAICompletionsCompat,
+	Usage,
+} from "../src/types.js";
+
+const emptyUsage: Usage = {
+	input: 0,
+	output: 0,
+	cacheRead: 0,
+	cacheWrite: 0,
+	totalTokens: 0,
+	cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
+};
+
+const compat = {
+	supportsStore: true,
+	supportsDeveloperRole: true,
+	supportsReasoningEffort: true,
+	reasoningEffortMap: {},
+	supportsUsageInStreaming: true,
+	maxTokensField: "max_completion_tokens",
+	requiresToolResultName: false,
+	requiresAssistantAfterToolResult: false,
+	requiresThinkingAsText: true,
+	thinkingFormat: "openai",
+	openRouterRouting: {},
+	vercelGatewayRouting: {},
+	zaiToolStream: false,
+	supportsStrictMode: true,
+	cacheControlFormat: undefined,
+	sendSessionAffinityHeaders: false,
+} satisfies Required<Omit<OpenAICompletionsCompat, "cacheControlFormat">> & {
+	cacheControlFormat?: OpenAICompletionsCompat["cacheControlFormat"];
+};
+
+function buildModel(baseUrl = "http://127.0.0.1:1"): Model<"openai-completions"> {
+	return {
+		id: "repro-model",
+		name: "Repro Model",
+		api: "openai-completions",
+		provider: "repro-provider",
+		baseUrl,
+		reasoning: true,
+		input: ["text"],
+		cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+		contextWindow: 128000,
+		maxTokens: 4096,
+		compat,
+	};
+}
+
+function buildAssistant(content: AssistantMessage["content"]): AssistantMessage {
+	return {
+		role: "assistant",
+		content,
+		api: "openai-completions",
+		provider: "repro-provider",
+		model: "repro-model",
+		usage: emptyUsage,
+		stopReason: "stop",
+		timestamp: 2,
+	};
+}
+
+function buildContext(assistant: AssistantMessage): Context {
+	return {
+		messages: [
+			{ role: "user", content: "hello", timestamp: 1 },
+			assistant,
+			{ role: "user", content: "continue", timestamp: 3 },
+		],
+	};
+}
+
+async function collectEvents(stream: AsyncIterable<AssistantMessageEvent>): Promise<AssistantMessageEvent[]> {
+	const events: AssistantMessageEvent[] = [];
+	for await (const event of stream) {
+		events.push(event);
+	}
+	return events;
+}
+
+interface ChatCompletionsRequestBody {
+	model: string;
+	messages: Array<{ role: string; content?: unknown }>;
+	stream: boolean;
+	stream_options?: { include_usage?: boolean };
+}
+
+describe("openai-completions thinking-as-text replay", () => {
+	afterEach(() => {
+		delete process.env.OPENAI_API_KEY;
+	});
+
+	it("serializes same-model thinking-plus-text replay as assistant text parts", () => {
+		const messages = convertMessages(
+			buildModel(),
+			buildContext(
+				buildAssistant([
+					{ type: "thinking", thinking: "internal reasoning" },
+					{ type: "text", text: "visible answer" },
+				]),
+			),
+			compat,
+		);
+
+		expect(messages[1]).toEqual({
+			role: "assistant",
+			content: [
+				{ type: "text", text: "internal reasoning" },
+				{ type: "text", text: "visible answer" },
+			],
+		});
+	});
+
+	it("serializes same-model thinking-only replay as assistant text parts", () => {
+		const messages = convertMessages(
+			buildModel(),
+			buildContext(buildAssistant([{ type: "thinking", thinking: "internal reasoning" }])),
+			compat,
+		);
+
+		expect(messages[1]).toEqual({
+			role: "assistant",
+			content: [{ type: "text", text: "internal reasoning" }],
+		});
+	});
+
+	it("reaches the endpoint when replay contains both thinking and text", async () => {
+		const requestBodies: ChatCompletionsRequestBody[] = [];
+		const server = http.createServer(async (req, res) => {
+			if (req.method !== "POST" || req.url !== "/chat/completions") {
+				res.writeHead(404).end();
+				return;
+			}
+
+			let body = "";
+			for await (const chunk of req) {
+				body += chunk.toString();
+			}
+			requestBodies.push(JSON.parse(body) as ChatCompletionsRequestBody);
+
+			res.writeHead(200, {
+				"content-type": "text/event-stream",
+				"cache-control": "no-cache",
+				connection: "keep-alive",
+			});
+			res.write(
+				`data: ${JSON.stringify({
+					id: "chatcmpl-repro",
+					object: "chat.completion.chunk",
+					created: 0,
+					model: "repro-model",
+					choices: [{ index: 0, delta: { role: "assistant", content: "ok" }, finish_reason: null }],
+				})}\n\n`,
+			);
+			res.write(
+				`data: ${JSON.stringify({
+					id: "chatcmpl-repro",
+					object: "chat.completion.chunk",
+					created: 0,
+					model: "repro-model",
+					choices: [{ index: 0, delta: {}, finish_reason: "stop" }],
+					usage: { prompt_tokens: 1, completion_tokens: 1 },
+				})}\n\n`,
+			);
+			res.write("data: [DONE]\n\n");
+			res.end();
+		});
+
+		server.listen(0, "127.0.0.1");
+		await once(server, "listening");
+
+		try {
+			const { port } = server.address() as AddressInfo;
+			const events = await collectEvents(
+				streamOpenAICompletions(
+					buildModel(`http://127.0.0.1:${port}`),
+					buildContext(
+						buildAssistant([
+							{ type: "thinking", thinking: "internal reasoning" },
+							{ type: "text", text: "visible answer" },
+						]),
+					),
+					{ apiKey: "test-key" },
+				),
+			);
+
+			expect(requestBodies).toHaveLength(1);
+			expect(requestBodies[0]?.messages[1]).toEqual({
+				role: "assistant",
+				content: [
+					{ type: "text", text: "internal reasoning" },
+					{ type: "text", text: "visible answer" },
+				],
+			});
+
+			const terminalEvent = events.at(-1);
+			expect(terminalEvent?.type).toBe("done");
+		} finally {
+			server.close();
+			await once(server, "close");
+		}
+	});
+});