Preserve Responses streaming usage for compaction

Claude Code relies on assistant usage metadata as a token-count anchor when deciding whether to auto-compact resumed sessions. The Responses streaming transformer dropped usage from response.completed, leaving transcript input/output usage at zero and allowing long sessions to overrun provider context windows before auto-compact could trigger.

Constraint: OpenAI Responses emits final usage on response.completed rather than ordinary text/tool deltas

Rejected: Lower the local auto-compact window only | masks the bad usage metadata and still misleads session accounting

Confidence: high

Scope-risk: narrow

Directive: Keep streaming usage mapped when changing Responses SSE conversion; Claude Code context management depends on it

Tested: pnpm --filter @musistudio/llms tsx --test scripts/openaiResponsesTransformer.test.ts scripts/providerError.test.ts

Tested: pnpm build
This commit is contained in:
warkcod 2026-05-20 16:52:35 +08:00
parent 8b7eba7440
commit 6c5fa1b51f
2 changed files with 82 additions and 0 deletions

View file

@ -0,0 +1,67 @@
import assert from "node:assert/strict";
import { describe, it } from "node:test";
import { OpenAIResponsesTransformer } from "../src/transformer/openai.responses.transformer";
const readStream = async (stream: ReadableStream<Uint8Array>) => {
const reader = stream.getReader();
const decoder = new TextDecoder();
let output = "";
while (true) {
const { done, value } = await reader.read();
if (done) break;
output += decoder.decode(value, { stream: true });
}
output += decoder.decode();
return output;
};
const parseSseData = (output: string) =>
output
.split(/\r?\n/)
.filter((line) => line.startsWith("data: "))
.map((line) => line.slice("data: ".length).trim())
.filter((line) => line && line !== "[DONE]")
.map((line) => JSON.parse(line));
describe("OpenAIResponsesTransformer", () => {
it("preserves streaming usage from response.completed", async () => {
const upstream = [
{
type: "response.completed",
response: {
id: "resp_123",
model: "gpt-5.5",
output: [{ type: "message" }],
usage: {
input_tokens: 12345,
output_tokens: 67,
total_tokens: 12412,
},
},
},
]
.map((event) => `data: ${JSON.stringify(event)}\n\n`)
.join("");
const response = new Response(upstream, {
headers: { "Content-Type": "text/event-stream" },
});
const transformed =
await new OpenAIResponsesTransformer().transformResponseOut(response);
assert.ok(transformed.body);
const chunks = parseSseData(await readStream(transformed.body));
const doneChunk = chunks.find(
(chunk) => chunk.choices?.[0]?.finish_reason === "stop"
);
assert.deepEqual(doneChunk?.usage, {
prompt_tokens: 12345,
completion_tokens: 67,
total_tokens: 12412,
});
});
});

View file

@ -61,6 +61,11 @@ interface ResponsesStreamEvent {
output?: Array<{
type: string;
}>;
usage?: {
input_tokens?: number;
output_tokens?: number;
total_tokens?: number;
};
};
arguments?: string;
reasoning_summary?: string; // 添加推理摘要支持
@ -550,6 +555,16 @@ export class OpenAIResponsesTransformer implements Transformer {
finish_reason: finishReason,
},
],
usage: data.response?.usage
? {
prompt_tokens:
data.response.usage.input_tokens || 0,
completion_tokens:
data.response.usage.output_tokens || 0,
total_tokens:
data.response.usage.total_tokens || 0,
}
: undefined,
};
controller.enqueue(