fix(ai): support xhigh for Codex GPT-5.5

This commit is contained in:
Mario Zechner 2026-04-23 22:49:09 +02:00
parent 8700ac1f0e
commit bf4aa3a601
7 changed files with 80 additions and 12 deletions

View file

@ -225,7 +225,8 @@ export interface AgentLoopConfig extends SimpleStreamOptions {
/**
* Thinking/reasoning level for models that support it.
* Note: "xhigh" is only supported by OpenAI gpt-5.1-codex-max, gpt-5.2, gpt-5.2-codex, gpt-5.3, and gpt-5.3-codex models.
* Note: "xhigh" is only supported by selected model families. Use supportsXhigh() from @mariozechner/pi-ai
* to detect support for a concrete model.
*/
export type ThinkingLevel = "off" | "minimal" | "low" | "medium" | "high" | "xhigh";

View file

@ -8,6 +8,8 @@
### Fixed
- Fixed `anthropic-messages` tool streaming compatibility by adding `compat.supportsEagerToolInputStreaming`, allowing Anthropic-compatible providers to omit per-tool `eager_input_streaming` and use the legacy fine-grained tool streaming beta header instead ([#3575](https://github.com/badlogic/pi-mono/issues/3575))
- Fixed `supportsXhigh()` to recognize `openai-codex` `gpt-5.5`, preserving `xhigh` reasoning requests instead of clamping them to `high`.
- Fixed `openai-completions` streamed tool-call assembly to coalesce deltas by stable tool index when OpenAI-compatible gateways mutate tool call IDs mid-stream, preventing malformed Kimi K2.6/OpenCode tool streams from splitting one call into multiple bogus tool calls ([#3576](https://github.com/badlogic/pi-mono/issues/3576))
- Fixed `packages/ai` E2E coverage to use currently supported OpenAI Responses and OpenAI Codex models, and updated the Bedrock adaptive-thinking payload expectation to match the current `display: "summarized"` shape.
- Fixed built-in `kimi-coding` model generation to attach `User-Agent: KimiCLI/1.5` to all generated Kimi models, overriding the Anthropic SDK default UA so direct Kimi Coding requests use the provider's expected client identity ([#3586](https://github.com/badlogic/pi-mono/issues/3586))

View file

@ -49,11 +49,16 @@ export function calculateCost<TApi extends Api>(model: Model<TApi>, usage: Usage
* Check if a model supports xhigh thinking level.
*
* Supported today:
* - GPT-5.2 / GPT-5.3 / GPT-5.4 model families
* - GPT-5.2 / GPT-5.3 / GPT-5.4 / GPT-5.5 model families
* - Opus 4.6+ models (xhigh maps to adaptive effort "max" on Anthropic-compatible providers)
*/
export function supportsXhigh<TApi extends Api>(model: Model<TApi>): boolean {
if (model.id.includes("gpt-5.2") || model.id.includes("gpt-5.3") || model.id.includes("gpt-5.4")) {
if (
model.id.includes("gpt-5.2") ||
model.id.includes("gpt-5.3") ||
model.id.includes("gpt-5.4") ||
model.id.includes("gpt-5.5")
) {
return true;
}

View file

@ -2,7 +2,10 @@ import { mkdtempSync } from "node:fs";
import { tmpdir } from "node:os";
import { join } from "node:path";
import { afterEach, describe, expect, it, vi } from "vitest";
import { streamOpenAICodexResponses } from "../src/providers/openai-codex-responses.js";
import {
streamOpenAICodexResponses,
streamSimpleOpenAICodexResponses,
} from "../src/providers/openai-codex-responses.js";
import type { Context, Model } from "../src/types.js";
const originalFetch = global.fetch;
@ -403,6 +406,61 @@ describe("openai-codex streaming", () => {
await streamResult.result();
});
it("preserves gpt-5.5 xhigh reasoning effort from simple options", async () => {
const tempDir = mkdtempSync(join(tmpdir(), "pi-codex-stream-"));
process.env.PI_CODING_AGENT_DIR = tempDir;
const token = mockToken();
const sse = buildSSEPayload({ status: "completed" });
const encoder = new TextEncoder();
const stream = new ReadableStream<Uint8Array>({
start(controller) {
controller.enqueue(encoder.encode(sse));
controller.close();
},
});
let requestedReasoning: unknown;
global.fetch = vi.fn(async (input: string | URL, init?: RequestInit) => {
const url = typeof input === "string" ? input : input.toString();
if (url === "https://api.github.com/repos/openai/codex/releases/latest") {
return new Response(JSON.stringify({ tag_name: "rust-v0.0.0" }), { status: 200 });
}
if (url.startsWith("https://raw.githubusercontent.com/openai/codex/")) {
return new Response("PROMPT", { status: 200, headers: { etag: '"etag"' } });
}
if (url === "https://chatgpt.com/backend-api/codex/responses") {
const body = typeof init?.body === "string" ? (JSON.parse(init.body) as Record<string, unknown>) : null;
requestedReasoning = body?.reasoning;
return new Response(stream, {
status: 200,
headers: { "content-type": "text/event-stream" },
});
}
return new Response("not found", { status: 404 });
}) as typeof fetch;
const model: Model<"openai-codex-responses"> = {
id: "gpt-5.5",
name: "GPT-5.5",
api: "openai-codex-responses",
provider: "openai-codex",
baseUrl: "https://chatgpt.com/backend-api",
reasoning: true,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 400000,
maxTokens: 128000,
};
const context: Context = {
systemPrompt: "You are a helpful assistant.",
messages: [{ role: "user", content: "Say hello", timestamp: Date.now() }],
};
await streamSimpleOpenAICodexResponses(model, context, { apiKey: token, reasoning: "xhigh" }).result();
expect(requestedReasoning).toEqual({ effort: "xhigh", summary: "auto" });
});
it.each(["gpt-5.3-codex", "gpt-5.4"])("clamps %s minimal reasoning effort to low", async (modelId) => {
const tempDir = mkdtempSync(join(tmpdir(), "pi-codex-stream-"));
process.env.PI_CODING_AGENT_DIR = tempDir;

View file

@ -1135,12 +1135,12 @@ describe("Generate E2E Tests", () => {
await handleStreaming(llm, { apiKey: openaiCodexToken });
});
it.skipIf(!openaiCodexToken)("should handle thinking with reasoningEffort high", { retry: 3 }, async () => {
await handleThinking(llm, { apiKey: openaiCodexToken, reasoningEffort: "high" });
it.skipIf(!openaiCodexToken)("should handle thinking with reasoningEffort xhigh", { retry: 3 }, async () => {
await handleThinking(llm, { apiKey: openaiCodexToken, reasoningEffort: "xhigh" });
});
it.skipIf(!openaiCodexToken)("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
await multiTurn(llm, { apiKey: openaiCodexToken, reasoningEffort: "high" });
await multiTurn(llm, { apiKey: openaiCodexToken, reasoningEffort: "xhigh" });
});
it.skipIf(!openaiCodexToken)("should handle image input", { retry: 3 }, async () => {
@ -1164,12 +1164,12 @@ describe("Generate E2E Tests", () => {
await handleStreaming(llm, wsOptions);
});
it.skipIf(!openaiCodexToken)("should handle thinking with reasoningEffort high", { retry: 3 }, async () => {
await handleThinking(llm, { ...wsOptions, reasoningEffort: "high" });
it.skipIf(!openaiCodexToken)("should handle thinking with reasoningEffort xhigh", { retry: 3 }, async () => {
await handleThinking(llm, { ...wsOptions, reasoningEffort: "xhigh" });
});
it.skipIf(!openaiCodexToken)("should handle multi-turn with thinking and tools", { retry: 3 }, async () => {
await multiTurn(llm, { ...wsOptions, reasoningEffort: "high" });
await multiTurn(llm, { ...wsOptions, reasoningEffort: "xhigh" });
});
it.skipIf(!openaiCodexToken)("should handle image input", { retry: 3 }, async () => {

View file

@ -20,8 +20,8 @@ describe("supportsXhigh", () => {
expect(supportsXhigh(model!)).toBe(false);
});
it("returns true for GPT-5.4 models", () => {
const model = getModel("openai-codex", "gpt-5.4");
it.each(["gpt-5.4", "gpt-5.5"] as const)("returns true for %s models", (modelId) => {
const model = getModel("openai-codex", modelId);
expect(model).toBeDefined();
expect(supportsXhigh(model!)).toBe(true);
});

View file

@ -24,6 +24,7 @@
### Fixed
- Fixed `--thinking xhigh` for `openai-codex` `gpt-5.5` so it is no longer downgraded to `high`.
- Fixed git package installs with custom `npmCommand` values such as `pnpm` by avoiding npm-specific production flags in that compatibility path ([#3604](https://github.com/badlogic/pi-mono/issues/3604))
- Fixed first user messages rendering without spacing after existing notices such as compaction summaries or status messages ([#3613](https://github.com/badlogic/pi-mono/issues/3613))
- Fixed the handoff extension example to use the replacement-session context after creating a new session, avoiding stale `ctx` errors when it installs the generated prompt ([#3606](https://github.com/badlogic/pi-mono/issues/3606))
@ -37,6 +38,7 @@
- Fixed `ctx.ui.setWorkingMessage()` to persist across loader recreation, matching the behavior of `ctx.ui.setWorkingIndicator()` ([#3566](https://github.com/badlogic/pi-mono/issues/3566))
- Fixed coding-agent `fs.watch` error handling for theme and git-footer watchers to retry after transient watcher failures such as `EMFILE`, avoiding startup crashes in large repos ([#3564](https://github.com/badlogic/pi-mono/issues/3564))
- Fixed built-in `kimi-coding` model generation to attach the expected `User-Agent` header so direct Kimi Coding requests use the provider's expected client identity ([#3586](https://github.com/badlogic/pi-mono/issues/3586))
- Fixed `models.json` Anthropic-compatible provider configuration to accept `compat.supportsEagerToolInputStreaming`, allowing proxies that reject per-tool `eager_input_streaming` to use the legacy fine-grained tool streaming beta header instead ([#3575](https://github.com/badlogic/pi-mono/issues/3575))
## [0.69.0] - 2026-04-22