fix(ai): support xhigh for Codex GPT-5.5

2026-04-28 06:19:43 +00:00 · 2026-04-23 22:49:09 +02:00 · 2026-04-23 22:49:09 +02:00 · bf4aa3a601
commit bf4aa3a601
parent 8700ac1f0e
7 changed files with 80 additions and 12 deletions
--- a/packages/agent/src/types.ts
+++ b/packages/agent/src/types.ts
@ -225,7 +225,8 @@ export interface AgentLoopConfig extends SimpleStreamOptions {

 /**
 * Thinking/reasoning level for models that support it.
- * Note: "xhigh" is only supported by OpenAI gpt-5.1-codex-max, gpt-5.2, gpt-5.2-codex, gpt-5.3, and gpt-5.3-codex models.
+ * Note: "xhigh" is only supported by selected model families. Use supportsXhigh() from @mariozechner/pi-ai
+ * to detect support for a concrete model.
 */
 export type ThinkingLevel = "off" | "minimal" | "low" | "medium" | "high" | "xhigh";

--- a/packages/ai/CHANGELOG.md
+++ b/packages/ai/CHANGELOG.md
@ -8,6 +8,8 @@

 ### Fixed

+- Fixed `anthropic-messages` tool streaming compatibility by adding `compat.supportsEagerToolInputStreaming`, allowing Anthropic-compatible providers to omit per-tool `eager_input_streaming` and use the legacy fine-grained tool streaming beta header instead ([#3575](https://github.com/badlogic/pi-mono/issues/3575))
+- Fixed `supportsXhigh()` to recognize `openai-codex` `gpt-5.5`, preserving `xhigh` reasoning requests instead of clamping them to `high`.
 - Fixed `openai-completions` streamed tool-call assembly to coalesce deltas by stable tool index when OpenAI-compatible gateways mutate tool call IDs mid-stream, preventing malformed Kimi K2.6/OpenCode tool streams from splitting one call into multiple bogus tool calls ([#3576](https://github.com/badlogic/pi-mono/issues/3576))
 - Fixed `packages/ai` E2E coverage to use currently supported OpenAI Responses and OpenAI Codex models, and updated the Bedrock adaptive-thinking payload expectation to match the current `display: "summarized"` shape.
 - Fixed built-in `kimi-coding` model generation to attach `User-Agent: KimiCLI/1.5` to all generated Kimi models, overriding the Anthropic SDK default UA so direct Kimi Coding requests use the provider's expected client identity ([#3586](https://github.com/badlogic/pi-mono/issues/3586))
--- a/packages/ai/src/models.ts
+++ b/packages/ai/src/models.ts
@ -49,11 +49,16 @@ export function calculateCost<TApi extends Api>(model: Model<TApi>, usage: Usage
 * Check if a model supports xhigh thinking level.
 *
 * Supported today:
- * - GPT-5.2 / GPT-5.3 / GPT-5.4 model families
+ * - GPT-5.2 / GPT-5.3 / GPT-5.4 / GPT-5.5 model families
 * - Opus 4.6+ models (xhigh maps to adaptive effort "max" on Anthropic-compatible providers)
 */
 export function supportsXhigh<TApi extends Api>(model: Model<TApi>): boolean {
-	if (model.id.includes("gpt-5.2") || model.id.includes("gpt-5.3") || model.id.includes("gpt-5.4")) {
+	if (
+		model.id.includes("gpt-5.2") ||
+		model.id.includes("gpt-5.3") ||
+		model.id.includes("gpt-5.4") ||
+		model.id.includes("gpt-5.5")
+	) {
 		return true;
 	}

--- a/packages/ai/test/openai-codex-stream.test.ts
+++ b/packages/ai/test/openai-codex-stream.test.ts
@ -2,7 +2,10 @@ import { mkdtempSync } from "node:fs";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
 import { afterEach, describe, expect, it, vi } from "vitest";
-import { streamOpenAICodexResponses } from "../src/providers/openai-codex-responses.js";
+import {
+	streamOpenAICodexResponses,
+	streamSimpleOpenAICodexResponses,
+} from "../src/providers/openai-codex-responses.js";
 import type { Context, Model } from "../src/types.js";

 const originalFetch = global.fetch;
@ -403,6 +406,61 @@ describe("openai-codex streaming", () => {
 		await streamResult.result();
 	});

+	it("preserves gpt-5.5 xhigh reasoning effort from simple options", async () => {
+		const tempDir = mkdtempSync(join(tmpdir(), "pi-codex-stream-"));
+		process.env.PI_CODING_AGENT_DIR = tempDir;
+		const token = mockToken();
+		const sse = buildSSEPayload({ status: "completed" });
+		const encoder = new TextEncoder();
+		const stream = new ReadableStream<Uint8Array>({
+			start(controller) {
+				controller.enqueue(encoder.encode(sse));
+				controller.close();
+			},
+		});
+		let requestedReasoning: unknown;
+
+		global.fetch = vi.fn(async (input: string | URL, init?: RequestInit) => {
+			const url = typeof input === "string" ? input : input.toString();
+			if (url === "https://api.github.com/repos/openai/codex/releases/latest") {
+				return new Response(JSON.stringify({ tag_name: "rust-v0.0.0" }), { status: 200 });
+			}
+			if (url.startsWith("https://raw.githubusercontent.com/openai/codex/")) {
+				return new Response("PROMPT", { status: 200, headers: { etag: '"etag"' } });
+			}
+			if (url === "https://chatgpt.com/backend-api/codex/responses") {
+				const body = typeof init?.body === "string" ? (JSON.parse(init.body) as Record<string, unknown>) : null;
+				requestedReasoning = body?.reasoning;
+				return new Response(stream, {
+					status: 200,
+					headers: { "content-type": "text/event-stream" },
+				});
+			}
+			return new Response("not found", { status: 404 });
+		}) as typeof fetch;
+
+		const model: Model<"openai-codex-responses"> = {
+			id: "gpt-5.5",
+			name: "GPT-5.5",
+			api: "openai-codex-responses",
+			provider: "openai-codex",
+			baseUrl: "https://chatgpt.com/backend-api",
+			reasoning: true,
+			input: ["text"],
+			cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
+			contextWindow: 400000,
+			maxTokens: 128000,
+		};
+		const context: Context = {
+			systemPrompt: "You are a helpful assistant.",
+			messages: [{ role: "user", content: "Say hello", timestamp: Date.now() }],
+		};
+
+		await streamSimpleOpenAICodexResponses(model, context, { apiKey: token, reasoning: "xhigh" }).result();
+
+		expect(requestedReasoning).toEqual({ effort: "xhigh", summary: "auto" });
+	});
+
 	it.each(["gpt-5.3-codex", "gpt-5.4"])("clamps %s minimal reasoning effort to low", async (modelId) => {
 		const tempDir = mkdtempSync(join(tmpdir(), "pi-codex-stream-"));
 		process.env.PI_CODING_AGENT_DIR = tempDir;
--- a/packages/ai/test/stream.test.ts
+++ b/packages/ai/test/stream.test.ts
@ -1135,12 +1135,12 @@ describe("Generate E2E Tests", () => {
 			await handleStreaming(llm, { apiKey: openaiCodexToken });
 		});

-		it.skipIf(!openaiCodexToken)("should handle thinking with reasoningEffort high", { retry: 3 }, async () => {
-			await handleThinking(llm, { apiKey: openaiCodexToken, reasoningEffort: "high" });
+		it.skipIf(!openaiCodexToken)("should handle thinking with reasoningEffort xhigh", { retry: 3 }, async () => {
+			await handleThinking(llm, { apiKey: openaiCodexToken, reasoningEffort: "xhigh" });
 		});

 		it.skipIf(!openaiCodexToken)("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
-			await multiTurn(llm, { apiKey: openaiCodexToken, reasoningEffort: "high" });
+			await multiTurn(llm, { apiKey: openaiCodexToken, reasoningEffort: "xhigh" });
 		});

 		it.skipIf(!openaiCodexToken)("should handle image input", { retry: 3 }, async () => {
@ -1164,12 +1164,12 @@ describe("Generate E2E Tests", () => {
 			await handleStreaming(llm, wsOptions);
 		});

-		it.skipIf(!openaiCodexToken)("should handle thinking with reasoningEffort high", { retry: 3 }, async () => {
-			await handleThinking(llm, { ...wsOptions, reasoningEffort: "high" });
+		it.skipIf(!openaiCodexToken)("should handle thinking with reasoningEffort xhigh", { retry: 3 }, async () => {
+			await handleThinking(llm, { ...wsOptions, reasoningEffort: "xhigh" });
 		});

 		it.skipIf(!openaiCodexToken)("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
-			await multiTurn(llm, { ...wsOptions, reasoningEffort: "high" });
+			await multiTurn(llm, { ...wsOptions, reasoningEffort: "xhigh" });
 		});

 		it.skipIf(!openaiCodexToken)("should handle image input", { retry: 3 }, async () => {
--- a/packages/ai/test/supports-xhigh.test.ts
+++ b/packages/ai/test/supports-xhigh.test.ts
@ -20,8 +20,8 @@ describe("supportsXhigh", () => {
 		expect(supportsXhigh(model!)).toBe(false);
 	});

-	it("returns true for GPT-5.4 models", () => {
-		const model = getModel("openai-codex", "gpt-5.4");
+	it.each(["gpt-5.4", "gpt-5.5"] as const)("returns true for %s models", (modelId) => {
+		const model = getModel("openai-codex", modelId);
 		expect(model).toBeDefined();
 		expect(supportsXhigh(model!)).toBe(true);
 	});
--- a/packages/coding-agent/CHANGELOG.md
+++ b/packages/coding-agent/CHANGELOG.md
@ -24,6 +24,7 @@

 ### Fixed

+- Fixed `--thinking xhigh` for `openai-codex` `gpt-5.5` so it is no longer downgraded to `high`.
 - Fixed git package installs with custom `npmCommand` values such as `pnpm` by avoiding npm-specific production flags in that compatibility path ([#3604](https://github.com/badlogic/pi-mono/issues/3604))
 - Fixed first user messages rendering without spacing after existing notices such as compaction summaries or status messages ([#3613](https://github.com/badlogic/pi-mono/issues/3613))
 - Fixed the handoff extension example to use the replacement-session context after creating a new session, avoiding stale `ctx` errors when it installs the generated prompt ([#3606](https://github.com/badlogic/pi-mono/issues/3606))
@ -37,6 +38,7 @@
 - Fixed `ctx.ui.setWorkingMessage()` to persist across loader recreation, matching the behavior of `ctx.ui.setWorkingIndicator()` ([#3566](https://github.com/badlogic/pi-mono/issues/3566))
 - Fixed coding-agent `fs.watch` error handling for theme and git-footer watchers to retry after transient watcher failures such as `EMFILE`, avoiding startup crashes in large repos ([#3564](https://github.com/badlogic/pi-mono/issues/3564))
 - Fixed built-in `kimi-coding` model generation to attach the expected `User-Agent` header so direct Kimi Coding requests use the provider's expected client identity ([#3586](https://github.com/badlogic/pi-mono/issues/3586))
+- Fixed `models.json` Anthropic-compatible provider configuration to accept `compat.supportsEagerToolInputStreaming`, allowing proxies that reject per-tool `eager_input_streaming` to use the legacy fine-grained tool streaming beta header instead ([#3575](https://github.com/badlogic/pi-mono/issues/3575))

 ## [0.69.0] - 2026-04-22