From 6c5fa1b51f424cd786e48196fffdff240bdde9c5 Mon Sep 17 00:00:00 2001 From: warkcod Date: Wed, 20 May 2026 16:52:35 +0800 Subject: [PATCH] Preserve Responses streaming usage for compaction Claude Code relies on assistant usage metadata as a token-count anchor when deciding whether to auto-compact resumed sessions. The Responses streaming transformer dropped usage from response.completed, leaving transcript input/output usage at zero and allowing long sessions to overrun provider context windows before auto-compact could trigger. Constraint: OpenAI Responses emits final usage on response.completed rather than ordinary text/tool deltas Rejected: Lower the local auto-compact window only | masks the bad usage metadata and still misleads session accounting Confidence: high Scope-risk: narrow Directive: Keep streaming usage mapped when changing Responses SSE conversion; Claude Code context management depends on it Tested: pnpm --filter @musistudio/llms tsx --test scripts/openaiResponsesTransformer.test.ts scripts/providerError.test.ts Tested: pnpm build --- .../openaiResponsesTransformer.test.ts | 67 +++++++++++++++++++ .../openai.responses.transformer.ts | 15 +++++ 2 files changed, 82 insertions(+) create mode 100644 packages/core/scripts/openaiResponsesTransformer.test.ts diff --git a/packages/core/scripts/openaiResponsesTransformer.test.ts b/packages/core/scripts/openaiResponsesTransformer.test.ts new file mode 100644 index 0000000..6e12cb4 --- /dev/null +++ b/packages/core/scripts/openaiResponsesTransformer.test.ts @@ -0,0 +1,67 @@ +import assert from "node:assert/strict"; +import { describe, it } from "node:test"; +import { OpenAIResponsesTransformer } from "../src/transformer/openai.responses.transformer"; + +const readStream = async (stream: ReadableStream) => { + const reader = stream.getReader(); + const decoder = new TextDecoder(); + let output = ""; + + while (true) { + const { done, value } = await reader.read(); + if (done) break; + output += decoder.decode(value, { stream: true }); + } + + output += decoder.decode(); + return output; +}; + +const parseSseData = (output: string) => + output + .split(/\r?\n/) + .filter((line) => line.startsWith("data: ")) + .map((line) => line.slice("data: ".length).trim()) + .filter((line) => line && line !== "[DONE]") + .map((line) => JSON.parse(line)); + +describe("OpenAIResponsesTransformer", () => { + it("preserves streaming usage from response.completed", async () => { + const upstream = [ + { + type: "response.completed", + response: { + id: "resp_123", + model: "gpt-5.5", + output: [{ type: "message" }], + usage: { + input_tokens: 12345, + output_tokens: 67, + total_tokens: 12412, + }, + }, + }, + ] + .map((event) => `data: ${JSON.stringify(event)}\n\n`) + .join(""); + + const response = new Response(upstream, { + headers: { "Content-Type": "text/event-stream" }, + }); + + const transformed = + await new OpenAIResponsesTransformer().transformResponseOut(response); + assert.ok(transformed.body); + + const chunks = parseSseData(await readStream(transformed.body)); + const doneChunk = chunks.find( + (chunk) => chunk.choices?.[0]?.finish_reason === "stop" + ); + + assert.deepEqual(doneChunk?.usage, { + prompt_tokens: 12345, + completion_tokens: 67, + total_tokens: 12412, + }); + }); +}); diff --git a/packages/core/src/transformer/openai.responses.transformer.ts b/packages/core/src/transformer/openai.responses.transformer.ts index a0a3857..6be9cd0 100644 --- a/packages/core/src/transformer/openai.responses.transformer.ts +++ b/packages/core/src/transformer/openai.responses.transformer.ts @@ -61,6 +61,11 @@ interface ResponsesStreamEvent { output?: Array<{ type: string; }>; + usage?: { + input_tokens?: number; + output_tokens?: number; + total_tokens?: number; + }; }; arguments?: string; reasoning_summary?: string; // 添加推理摘要支持 @@ -550,6 +555,16 @@ export class OpenAIResponsesTransformer implements Transformer { finish_reason: finishReason, }, ], + usage: data.response?.usage + ? { + prompt_tokens: + data.response.usage.input_tokens || 0, + completion_tokens: + data.response.usage.output_tokens || 0, + total_tokens: + data.response.usage.total_tokens || 0, + } + : undefined, }; controller.enqueue(