fix(ai): preserve requiresThinkingAsText replay semantics closes #3387
Some checks are pending
CI / build-check-test (push) Waiting to run

This commit is contained in:
Mario Zechner 2026-04-20 19:44:38 +02:00
parent b73212616d
commit 1d488626d9
3 changed files with 277 additions and 33 deletions

View file

@ -4,6 +4,7 @@
### Fixed
- Fixed `openai-completions` `compat.requiresThinkingAsText` assistant replay to preserve text-part serialization and avoid same-model crashes when prior assistant messages contain both thinking and text ([#3387](https://github.com/badlogic/pi-mono/issues/3387))
- Fixed non-vision model requests to replace user and tool-result image blocks with explicit text placeholders instead of silently dropping them during provider payload conversion ([#3429](https://github.com/badlogic/pi-mono/issues/3429))
- Fixed OpenRouter Meta tests by switching `meta-llama/llama-4-maverick` to `meta-llama/llama-4-scout` to avoid type-check failures from model-catalog drift.
- Fixed direct OpenAI Chat Completions requests to map `sessionId` and `cacheRetention` to OpenAI prompt caching fields, sending `prompt_cache_key` when caching is enabled and `prompt_cache_retention: "24h"` for direct `api.openai.com` requests with long retention ([#3426](https://github.com/badlogic/pi-mono/issues/3426))

View file

@ -16,6 +16,7 @@ import type {
AssistantMessage,
CacheRetention,
Context,
ImageContent,
Message,
Model,
OpenAICompletionsCompat,
@ -56,6 +57,22 @@ function hasToolHistory(messages: Message[]): boolean {
return false;
}
function isTextContentBlock(block: { type: string }): block is TextContent {
return block.type === "text";
}
function isThinkingContentBlock(block: { type: string }): block is ThinkingContent {
return block.type === "thinking";
}
function isToolCallBlock(block: { type: string }): block is ToolCall {
return block.type === "toolCall";
}
function isImageContentBlock(block: { type: string }): block is ImageContent {
return block.type === "image";
}
export interface OpenAICompletionsOptions extends StreamOptions {
toolChoice?: "auto" | "none" | "required" | { type: "function"; function: { name: string } };
reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh";
@ -716,42 +733,54 @@ export function convertMessages(
content: compat.requiresAssistantAfterToolResult ? "" : null,
};
const textBlocks = msg.content.filter((b) => b.type === "text") as TextContent[];
// Filter out empty text blocks to avoid API validation errors
const nonEmptyTextBlocks = textBlocks.filter((b) => b.text && b.text.trim().length > 0);
if (nonEmptyTextBlocks.length > 0) {
const assistantTextParts = msg.content
.filter(isTextContentBlock)
.filter((block) => block.text.trim().length > 0)
.map(
(block) =>
({
type: "text",
text: sanitizeSurrogates(block.text),
}) satisfies ChatCompletionContentPartText,
);
const assistantText = assistantTextParts.map((part) => part.text).join("");
const nonEmptyThinkingBlocks = msg.content
.filter(isThinkingContentBlock)
.filter((block) => block.thinking.trim().length > 0);
if (nonEmptyThinkingBlocks.length > 0) {
if (compat.requiresThinkingAsText) {
// Convert thinking blocks to plain text (no tags to avoid model mimicking them)
const thinkingText = nonEmptyThinkingBlocks
.map((block) => sanitizeSurrogates(block.thinking))
.join("\n\n");
assistantMsg.content = [{ type: "text", text: thinkingText }, ...assistantTextParts];
} else {
// Always send assistant content as a plain string (OpenAI Chat Completions
// API standard format). Sending as an array of {type:"text", text:"..."}
// objects is non-standard and causes some models (e.g. DeepSeek V3.2 via
// NVIDIA NIM) to mirror the content-block structure literally in their
// output, producing recursive nesting like [{'type':'text','text':'[{...}]'}].
if (assistantText.length > 0) {
assistantMsg.content = assistantText;
}
// Use the signature from the first thinking block if available (for llama.cpp server + gpt-oss)
const signature = nonEmptyThinkingBlocks[0].thinkingSignature;
if (signature && signature.length > 0) {
(assistantMsg as any)[signature] = nonEmptyThinkingBlocks.map((block) => block.thinking).join("\n");
}
}
} else if (assistantText.length > 0) {
// Always send assistant content as a plain string (OpenAI Chat Completions
// API standard format). Sending as an array of {type:"text", text:"..."}
// objects is non-standard and causes some models (e.g. DeepSeek V3.2 via
// NVIDIA NIM) to mirror the content-block structure literally in their
// output, producing recursive nesting like [{'type':'text','text':'[{...}]'}].
assistantMsg.content = nonEmptyTextBlocks.map((b) => sanitizeSurrogates(b.text)).join("");
assistantMsg.content = assistantText;
}
// Handle thinking blocks
const thinkingBlocks = msg.content.filter((b) => b.type === "thinking") as ThinkingContent[];
// Filter out empty thinking blocks to avoid API validation errors
const nonEmptyThinkingBlocks = thinkingBlocks.filter((b) => b.thinking && b.thinking.trim().length > 0);
if (nonEmptyThinkingBlocks.length > 0) {
if (compat.requiresThinkingAsText) {
// Convert thinking blocks to plain text (no tags to avoid model mimicking them)
const thinkingText = nonEmptyThinkingBlocks.map((b) => b.thinking).join("\n\n");
const textContent = assistantMsg.content as Array<{ type: "text"; text: string }> | null;
if (textContent) {
textContent.unshift({ type: "text", text: thinkingText });
} else {
assistantMsg.content = [{ type: "text", text: thinkingText }];
}
} else {
// Use the signature from the first thinking block if available (for llama.cpp server + gpt-oss)
const signature = nonEmptyThinkingBlocks[0].thinkingSignature;
if (signature && signature.length > 0) {
(assistantMsg as any)[signature] = nonEmptyThinkingBlocks.map((b) => b.thinking).join("\n");
}
}
}
const toolCalls = msg.content.filter((b) => b.type === "toolCall") as ToolCall[];
const toolCalls = msg.content.filter(isToolCallBlock);
if (toolCalls.length > 0) {
assistantMsg.tool_calls = toolCalls.map((tc) => ({
id: tc.id,
@ -797,8 +826,8 @@ export function convertMessages(
// Extract text and image content
const textResult = toolMsg.content
.filter((c) => c.type === "text")
.map((c) => (c as any).text)
.filter(isTextContentBlock)
.map((block) => block.text)
.join("\n");
const hasImages = toolMsg.content.some((c) => c.type === "image");
@ -817,11 +846,11 @@ export function convertMessages(
if (hasImages && model.input.includes("image")) {
for (const block of toolMsg.content) {
if (block.type === "image") {
if (isImageContentBlock(block)) {
imageBlocks.push({
type: "image_url",
image_url: {
url: `data:${(block as any).mimeType};base64,${(block as any).data}`,
url: `data:${block.mimeType};base64,${block.data}`,
},
});
}

View file

@ -0,0 +1,214 @@
import { once } from "node:events";
import http from "node:http";
import type { AddressInfo } from "node:net";
import { afterEach, describe, expect, it } from "vitest";
import { convertMessages, streamOpenAICompletions } from "../src/providers/openai-completions.js";
import type {
AssistantMessage,
AssistantMessageEvent,
Context,
Model,
OpenAICompletionsCompat,
Usage,
} from "../src/types.js";
const emptyUsage: Usage = {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 0,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
};
const compat = {
supportsStore: true,
supportsDeveloperRole: true,
supportsReasoningEffort: true,
reasoningEffortMap: {},
supportsUsageInStreaming: true,
maxTokensField: "max_completion_tokens",
requiresToolResultName: false,
requiresAssistantAfterToolResult: false,
requiresThinkingAsText: true,
thinkingFormat: "openai",
openRouterRouting: {},
vercelGatewayRouting: {},
zaiToolStream: false,
supportsStrictMode: true,
cacheControlFormat: undefined,
sendSessionAffinityHeaders: false,
} satisfies Required<Omit<OpenAICompletionsCompat, "cacheControlFormat">> & {
cacheControlFormat?: OpenAICompletionsCompat["cacheControlFormat"];
};
function buildModel(baseUrl = "http://127.0.0.1:1"): Model<"openai-completions"> {
return {
id: "repro-model",
name: "Repro Model",
api: "openai-completions",
provider: "repro-provider",
baseUrl,
reasoning: true,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 128000,
maxTokens: 4096,
compat,
};
}
function buildAssistant(content: AssistantMessage["content"]): AssistantMessage {
return {
role: "assistant",
content,
api: "openai-completions",
provider: "repro-provider",
model: "repro-model",
usage: emptyUsage,
stopReason: "stop",
timestamp: 2,
};
}
function buildContext(assistant: AssistantMessage): Context {
return {
messages: [
{ role: "user", content: "hello", timestamp: 1 },
assistant,
{ role: "user", content: "continue", timestamp: 3 },
],
};
}
async function collectEvents(stream: AsyncIterable<AssistantMessageEvent>): Promise<AssistantMessageEvent[]> {
const events: AssistantMessageEvent[] = [];
for await (const event of stream) {
events.push(event);
}
return events;
}
interface ChatCompletionsRequestBody {
model: string;
messages: Array<{ role: string; content?: unknown }>;
stream: boolean;
stream_options?: { include_usage?: boolean };
}
describe("openai-completions thinking-as-text replay", () => {
afterEach(() => {
delete process.env.OPENAI_API_KEY;
});
it("serializes same-model thinking-plus-text replay as assistant text parts", () => {
const messages = convertMessages(
buildModel(),
buildContext(
buildAssistant([
{ type: "thinking", thinking: "internal reasoning" },
{ type: "text", text: "visible answer" },
]),
),
compat,
);
expect(messages[1]).toEqual({
role: "assistant",
content: [
{ type: "text", text: "internal reasoning" },
{ type: "text", text: "visible answer" },
],
});
});
it("serializes same-model thinking-only replay as assistant text parts", () => {
const messages = convertMessages(
buildModel(),
buildContext(buildAssistant([{ type: "thinking", thinking: "internal reasoning" }])),
compat,
);
expect(messages[1]).toEqual({
role: "assistant",
content: [{ type: "text", text: "internal reasoning" }],
});
});
it("reaches the endpoint when replay contains both thinking and text", async () => {
const requestBodies: ChatCompletionsRequestBody[] = [];
const server = http.createServer(async (req, res) => {
if (req.method !== "POST" || req.url !== "/chat/completions") {
res.writeHead(404).end();
return;
}
let body = "";
for await (const chunk of req) {
body += chunk.toString();
}
requestBodies.push(JSON.parse(body) as ChatCompletionsRequestBody);
res.writeHead(200, {
"content-type": "text/event-stream",
"cache-control": "no-cache",
connection: "keep-alive",
});
res.write(
`data: ${JSON.stringify({
id: "chatcmpl-repro",
object: "chat.completion.chunk",
created: 0,
model: "repro-model",
choices: [{ index: 0, delta: { role: "assistant", content: "ok" }, finish_reason: null }],
})}\n\n`,
);
res.write(
`data: ${JSON.stringify({
id: "chatcmpl-repro",
object: "chat.completion.chunk",
created: 0,
model: "repro-model",
choices: [{ index: 0, delta: {}, finish_reason: "stop" }],
usage: { prompt_tokens: 1, completion_tokens: 1 },
})}\n\n`,
);
res.write("data: [DONE]\n\n");
res.end();
});
server.listen(0, "127.0.0.1");
await once(server, "listening");
try {
const { port } = server.address() as AddressInfo;
const events = await collectEvents(
streamOpenAICompletions(
buildModel(`http://127.0.0.1:${port}`),
buildContext(
buildAssistant([
{ type: "thinking", thinking: "internal reasoning" },
{ type: "text", text: "visible answer" },
]),
),
{ apiKey: "test-key" },
),
);
expect(requestBodies).toHaveLength(1);
expect(requestBodies[0]?.messages[1]).toEqual({
role: "assistant",
content: [
{ type: "text", text: "internal reasoning" },
{ type: "text", text: "visible answer" },
],
});
const terminalEvent = events.at(-1);
expect(terminalEvent?.type).toBe("done");
} finally {
server.close();
await once(server, "close");
}
});
});