mirror of
https://github.com/badlogic/pi-mono.git
synced 2026-04-28 06:19:43 +00:00
fix(ai): preserve requiresThinkingAsText replay semantics closes #3387
Some checks are pending
CI / build-check-test (push) Waiting to run
Some checks are pending
CI / build-check-test (push) Waiting to run
This commit is contained in:
parent
b73212616d
commit
1d488626d9
3 changed files with 277 additions and 33 deletions
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
### Fixed
|
||||
|
||||
- Fixed `openai-completions` `compat.requiresThinkingAsText` assistant replay to preserve text-part serialization and avoid same-model crashes when prior assistant messages contain both thinking and text ([#3387](https://github.com/badlogic/pi-mono/issues/3387))
|
||||
- Fixed non-vision model requests to replace user and tool-result image blocks with explicit text placeholders instead of silently dropping them during provider payload conversion ([#3429](https://github.com/badlogic/pi-mono/issues/3429))
|
||||
- Fixed OpenRouter Meta tests by switching `meta-llama/llama-4-maverick` to `meta-llama/llama-4-scout` to avoid type-check failures from model-catalog drift.
|
||||
- Fixed direct OpenAI Chat Completions requests to map `sessionId` and `cacheRetention` to OpenAI prompt caching fields, sending `prompt_cache_key` when caching is enabled and `prompt_cache_retention: "24h"` for direct `api.openai.com` requests with long retention ([#3426](https://github.com/badlogic/pi-mono/issues/3426))
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ import type {
|
|||
AssistantMessage,
|
||||
CacheRetention,
|
||||
Context,
|
||||
ImageContent,
|
||||
Message,
|
||||
Model,
|
||||
OpenAICompletionsCompat,
|
||||
|
|
@ -56,6 +57,22 @@ function hasToolHistory(messages: Message[]): boolean {
|
|||
return false;
|
||||
}
|
||||
|
||||
function isTextContentBlock(block: { type: string }): block is TextContent {
|
||||
return block.type === "text";
|
||||
}
|
||||
|
||||
function isThinkingContentBlock(block: { type: string }): block is ThinkingContent {
|
||||
return block.type === "thinking";
|
||||
}
|
||||
|
||||
function isToolCallBlock(block: { type: string }): block is ToolCall {
|
||||
return block.type === "toolCall";
|
||||
}
|
||||
|
||||
function isImageContentBlock(block: { type: string }): block is ImageContent {
|
||||
return block.type === "image";
|
||||
}
|
||||
|
||||
export interface OpenAICompletionsOptions extends StreamOptions {
|
||||
toolChoice?: "auto" | "none" | "required" | { type: "function"; function: { name: string } };
|
||||
reasoningEffort?: "minimal" | "low" | "medium" | "high" | "xhigh";
|
||||
|
|
@ -716,42 +733,54 @@ export function convertMessages(
|
|||
content: compat.requiresAssistantAfterToolResult ? "" : null,
|
||||
};
|
||||
|
||||
const textBlocks = msg.content.filter((b) => b.type === "text") as TextContent[];
|
||||
// Filter out empty text blocks to avoid API validation errors
|
||||
const nonEmptyTextBlocks = textBlocks.filter((b) => b.text && b.text.trim().length > 0);
|
||||
if (nonEmptyTextBlocks.length > 0) {
|
||||
const assistantTextParts = msg.content
|
||||
.filter(isTextContentBlock)
|
||||
.filter((block) => block.text.trim().length > 0)
|
||||
.map(
|
||||
(block) =>
|
||||
({
|
||||
type: "text",
|
||||
text: sanitizeSurrogates(block.text),
|
||||
}) satisfies ChatCompletionContentPartText,
|
||||
);
|
||||
const assistantText = assistantTextParts.map((part) => part.text).join("");
|
||||
|
||||
const nonEmptyThinkingBlocks = msg.content
|
||||
.filter(isThinkingContentBlock)
|
||||
.filter((block) => block.thinking.trim().length > 0);
|
||||
if (nonEmptyThinkingBlocks.length > 0) {
|
||||
if (compat.requiresThinkingAsText) {
|
||||
// Convert thinking blocks to plain text (no tags to avoid model mimicking them)
|
||||
const thinkingText = nonEmptyThinkingBlocks
|
||||
.map((block) => sanitizeSurrogates(block.thinking))
|
||||
.join("\n\n");
|
||||
assistantMsg.content = [{ type: "text", text: thinkingText }, ...assistantTextParts];
|
||||
} else {
|
||||
// Always send assistant content as a plain string (OpenAI Chat Completions
|
||||
// API standard format). Sending as an array of {type:"text", text:"..."}
|
||||
// objects is non-standard and causes some models (e.g. DeepSeek V3.2 via
|
||||
// NVIDIA NIM) to mirror the content-block structure literally in their
|
||||
// output, producing recursive nesting like [{'type':'text','text':'[{...}]'}].
|
||||
if (assistantText.length > 0) {
|
||||
assistantMsg.content = assistantText;
|
||||
}
|
||||
|
||||
// Use the signature from the first thinking block if available (for llama.cpp server + gpt-oss)
|
||||
const signature = nonEmptyThinkingBlocks[0].thinkingSignature;
|
||||
if (signature && signature.length > 0) {
|
||||
(assistantMsg as any)[signature] = nonEmptyThinkingBlocks.map((block) => block.thinking).join("\n");
|
||||
}
|
||||
}
|
||||
} else if (assistantText.length > 0) {
|
||||
// Always send assistant content as a plain string (OpenAI Chat Completions
|
||||
// API standard format). Sending as an array of {type:"text", text:"..."}
|
||||
// objects is non-standard and causes some models (e.g. DeepSeek V3.2 via
|
||||
// NVIDIA NIM) to mirror the content-block structure literally in their
|
||||
// output, producing recursive nesting like [{'type':'text','text':'[{...}]'}].
|
||||
assistantMsg.content = nonEmptyTextBlocks.map((b) => sanitizeSurrogates(b.text)).join("");
|
||||
assistantMsg.content = assistantText;
|
||||
}
|
||||
|
||||
// Handle thinking blocks
|
||||
const thinkingBlocks = msg.content.filter((b) => b.type === "thinking") as ThinkingContent[];
|
||||
// Filter out empty thinking blocks to avoid API validation errors
|
||||
const nonEmptyThinkingBlocks = thinkingBlocks.filter((b) => b.thinking && b.thinking.trim().length > 0);
|
||||
if (nonEmptyThinkingBlocks.length > 0) {
|
||||
if (compat.requiresThinkingAsText) {
|
||||
// Convert thinking blocks to plain text (no tags to avoid model mimicking them)
|
||||
const thinkingText = nonEmptyThinkingBlocks.map((b) => b.thinking).join("\n\n");
|
||||
const textContent = assistantMsg.content as Array<{ type: "text"; text: string }> | null;
|
||||
if (textContent) {
|
||||
textContent.unshift({ type: "text", text: thinkingText });
|
||||
} else {
|
||||
assistantMsg.content = [{ type: "text", text: thinkingText }];
|
||||
}
|
||||
} else {
|
||||
// Use the signature from the first thinking block if available (for llama.cpp server + gpt-oss)
|
||||
const signature = nonEmptyThinkingBlocks[0].thinkingSignature;
|
||||
if (signature && signature.length > 0) {
|
||||
(assistantMsg as any)[signature] = nonEmptyThinkingBlocks.map((b) => b.thinking).join("\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const toolCalls = msg.content.filter((b) => b.type === "toolCall") as ToolCall[];
|
||||
const toolCalls = msg.content.filter(isToolCallBlock);
|
||||
if (toolCalls.length > 0) {
|
||||
assistantMsg.tool_calls = toolCalls.map((tc) => ({
|
||||
id: tc.id,
|
||||
|
|
@ -797,8 +826,8 @@ export function convertMessages(
|
|||
|
||||
// Extract text and image content
|
||||
const textResult = toolMsg.content
|
||||
.filter((c) => c.type === "text")
|
||||
.map((c) => (c as any).text)
|
||||
.filter(isTextContentBlock)
|
||||
.map((block) => block.text)
|
||||
.join("\n");
|
||||
const hasImages = toolMsg.content.some((c) => c.type === "image");
|
||||
|
||||
|
|
@ -817,11 +846,11 @@ export function convertMessages(
|
|||
|
||||
if (hasImages && model.input.includes("image")) {
|
||||
for (const block of toolMsg.content) {
|
||||
if (block.type === "image") {
|
||||
if (isImageContentBlock(block)) {
|
||||
imageBlocks.push({
|
||||
type: "image_url",
|
||||
image_url: {
|
||||
url: `data:${(block as any).mimeType};base64,${(block as any).data}`,
|
||||
url: `data:${block.mimeType};base64,${block.data}`,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
|
|
|||
214
packages/ai/test/openai-completions-thinking-as-text.test.ts
Normal file
214
packages/ai/test/openai-completions-thinking-as-text.test.ts
Normal file
|
|
@ -0,0 +1,214 @@
|
|||
import { once } from "node:events";
|
||||
import http from "node:http";
|
||||
import type { AddressInfo } from "node:net";
|
||||
import { afterEach, describe, expect, it } from "vitest";
|
||||
import { convertMessages, streamOpenAICompletions } from "../src/providers/openai-completions.js";
|
||||
import type {
|
||||
AssistantMessage,
|
||||
AssistantMessageEvent,
|
||||
Context,
|
||||
Model,
|
||||
OpenAICompletionsCompat,
|
||||
Usage,
|
||||
} from "../src/types.js";
|
||||
|
||||
const emptyUsage: Usage = {
|
||||
input: 0,
|
||||
output: 0,
|
||||
cacheRead: 0,
|
||||
cacheWrite: 0,
|
||||
totalTokens: 0,
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
|
||||
};
|
||||
|
||||
const compat = {
|
||||
supportsStore: true,
|
||||
supportsDeveloperRole: true,
|
||||
supportsReasoningEffort: true,
|
||||
reasoningEffortMap: {},
|
||||
supportsUsageInStreaming: true,
|
||||
maxTokensField: "max_completion_tokens",
|
||||
requiresToolResultName: false,
|
||||
requiresAssistantAfterToolResult: false,
|
||||
requiresThinkingAsText: true,
|
||||
thinkingFormat: "openai",
|
||||
openRouterRouting: {},
|
||||
vercelGatewayRouting: {},
|
||||
zaiToolStream: false,
|
||||
supportsStrictMode: true,
|
||||
cacheControlFormat: undefined,
|
||||
sendSessionAffinityHeaders: false,
|
||||
} satisfies Required<Omit<OpenAICompletionsCompat, "cacheControlFormat">> & {
|
||||
cacheControlFormat?: OpenAICompletionsCompat["cacheControlFormat"];
|
||||
};
|
||||
|
||||
function buildModel(baseUrl = "http://127.0.0.1:1"): Model<"openai-completions"> {
|
||||
return {
|
||||
id: "repro-model",
|
||||
name: "Repro Model",
|
||||
api: "openai-completions",
|
||||
provider: "repro-provider",
|
||||
baseUrl,
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: 128000,
|
||||
maxTokens: 4096,
|
||||
compat,
|
||||
};
|
||||
}
|
||||
|
||||
function buildAssistant(content: AssistantMessage["content"]): AssistantMessage {
|
||||
return {
|
||||
role: "assistant",
|
||||
content,
|
||||
api: "openai-completions",
|
||||
provider: "repro-provider",
|
||||
model: "repro-model",
|
||||
usage: emptyUsage,
|
||||
stopReason: "stop",
|
||||
timestamp: 2,
|
||||
};
|
||||
}
|
||||
|
||||
function buildContext(assistant: AssistantMessage): Context {
|
||||
return {
|
||||
messages: [
|
||||
{ role: "user", content: "hello", timestamp: 1 },
|
||||
assistant,
|
||||
{ role: "user", content: "continue", timestamp: 3 },
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
async function collectEvents(stream: AsyncIterable<AssistantMessageEvent>): Promise<AssistantMessageEvent[]> {
|
||||
const events: AssistantMessageEvent[] = [];
|
||||
for await (const event of stream) {
|
||||
events.push(event);
|
||||
}
|
||||
return events;
|
||||
}
|
||||
|
||||
interface ChatCompletionsRequestBody {
|
||||
model: string;
|
||||
messages: Array<{ role: string; content?: unknown }>;
|
||||
stream: boolean;
|
||||
stream_options?: { include_usage?: boolean };
|
||||
}
|
||||
|
||||
describe("openai-completions thinking-as-text replay", () => {
|
||||
afterEach(() => {
|
||||
delete process.env.OPENAI_API_KEY;
|
||||
});
|
||||
|
||||
it("serializes same-model thinking-plus-text replay as assistant text parts", () => {
|
||||
const messages = convertMessages(
|
||||
buildModel(),
|
||||
buildContext(
|
||||
buildAssistant([
|
||||
{ type: "thinking", thinking: "internal reasoning" },
|
||||
{ type: "text", text: "visible answer" },
|
||||
]),
|
||||
),
|
||||
compat,
|
||||
);
|
||||
|
||||
expect(messages[1]).toEqual({
|
||||
role: "assistant",
|
||||
content: [
|
||||
{ type: "text", text: "internal reasoning" },
|
||||
{ type: "text", text: "visible answer" },
|
||||
],
|
||||
});
|
||||
});
|
||||
|
||||
it("serializes same-model thinking-only replay as assistant text parts", () => {
|
||||
const messages = convertMessages(
|
||||
buildModel(),
|
||||
buildContext(buildAssistant([{ type: "thinking", thinking: "internal reasoning" }])),
|
||||
compat,
|
||||
);
|
||||
|
||||
expect(messages[1]).toEqual({
|
||||
role: "assistant",
|
||||
content: [{ type: "text", text: "internal reasoning" }],
|
||||
});
|
||||
});
|
||||
|
||||
it("reaches the endpoint when replay contains both thinking and text", async () => {
|
||||
const requestBodies: ChatCompletionsRequestBody[] = [];
|
||||
const server = http.createServer(async (req, res) => {
|
||||
if (req.method !== "POST" || req.url !== "/chat/completions") {
|
||||
res.writeHead(404).end();
|
||||
return;
|
||||
}
|
||||
|
||||
let body = "";
|
||||
for await (const chunk of req) {
|
||||
body += chunk.toString();
|
||||
}
|
||||
requestBodies.push(JSON.parse(body) as ChatCompletionsRequestBody);
|
||||
|
||||
res.writeHead(200, {
|
||||
"content-type": "text/event-stream",
|
||||
"cache-control": "no-cache",
|
||||
connection: "keep-alive",
|
||||
});
|
||||
res.write(
|
||||
`data: ${JSON.stringify({
|
||||
id: "chatcmpl-repro",
|
||||
object: "chat.completion.chunk",
|
||||
created: 0,
|
||||
model: "repro-model",
|
||||
choices: [{ index: 0, delta: { role: "assistant", content: "ok" }, finish_reason: null }],
|
||||
})}\n\n`,
|
||||
);
|
||||
res.write(
|
||||
`data: ${JSON.stringify({
|
||||
id: "chatcmpl-repro",
|
||||
object: "chat.completion.chunk",
|
||||
created: 0,
|
||||
model: "repro-model",
|
||||
choices: [{ index: 0, delta: {}, finish_reason: "stop" }],
|
||||
usage: { prompt_tokens: 1, completion_tokens: 1 },
|
||||
})}\n\n`,
|
||||
);
|
||||
res.write("data: [DONE]\n\n");
|
||||
res.end();
|
||||
});
|
||||
|
||||
server.listen(0, "127.0.0.1");
|
||||
await once(server, "listening");
|
||||
|
||||
try {
|
||||
const { port } = server.address() as AddressInfo;
|
||||
const events = await collectEvents(
|
||||
streamOpenAICompletions(
|
||||
buildModel(`http://127.0.0.1:${port}`),
|
||||
buildContext(
|
||||
buildAssistant([
|
||||
{ type: "thinking", thinking: "internal reasoning" },
|
||||
{ type: "text", text: "visible answer" },
|
||||
]),
|
||||
),
|
||||
{ apiKey: "test-key" },
|
||||
),
|
||||
);
|
||||
|
||||
expect(requestBodies).toHaveLength(1);
|
||||
expect(requestBodies[0]?.messages[1]).toEqual({
|
||||
role: "assistant",
|
||||
content: [
|
||||
{ type: "text", text: "internal reasoning" },
|
||||
{ type: "text", text: "visible answer" },
|
||||
],
|
||||
});
|
||||
|
||||
const terminalEvent = events.at(-1);
|
||||
expect(terminalEvent?.type).toBe("done");
|
||||
} finally {
|
||||
server.close();
|
||||
await once(server, "close");
|
||||
}
|
||||
});
|
||||
});
|
||||
Loading…
Add table
Add a link
Reference in a new issue