mirror of
https://github.com/musistudio/claude-code-router.git
synced 2026-05-22 19:33:52 +00:00
Preserve Responses streaming usage for compaction
Claude Code relies on assistant usage metadata as a token-count anchor when deciding whether to auto-compact resumed sessions. The Responses streaming transformer dropped usage from response.completed, leaving transcript input/output usage at zero and allowing long sessions to overrun provider context windows before auto-compact could trigger. Constraint: OpenAI Responses emits final usage on response.completed rather than ordinary text/tool deltas Rejected: Lower the local auto-compact window only | masks the bad usage metadata and still misleads session accounting Confidence: high Scope-risk: narrow Directive: Keep streaming usage mapped when changing Responses SSE conversion; Claude Code context management depends on it Tested: pnpm --filter @musistudio/llms tsx --test scripts/openaiResponsesTransformer.test.ts scripts/providerError.test.ts Tested: pnpm build
This commit is contained in:
parent
8b7eba7440
commit
6c5fa1b51f
2 changed files with 82 additions and 0 deletions
67
packages/core/scripts/openaiResponsesTransformer.test.ts
Normal file
67
packages/core/scripts/openaiResponsesTransformer.test.ts
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
import assert from "node:assert/strict";
|
||||
import { describe, it } from "node:test";
|
||||
import { OpenAIResponsesTransformer } from "../src/transformer/openai.responses.transformer";
|
||||
|
||||
const readStream = async (stream: ReadableStream<Uint8Array>) => {
|
||||
const reader = stream.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
let output = "";
|
||||
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) break;
|
||||
output += decoder.decode(value, { stream: true });
|
||||
}
|
||||
|
||||
output += decoder.decode();
|
||||
return output;
|
||||
};
|
||||
|
||||
const parseSseData = (output: string) =>
|
||||
output
|
||||
.split(/\r?\n/)
|
||||
.filter((line) => line.startsWith("data: "))
|
||||
.map((line) => line.slice("data: ".length).trim())
|
||||
.filter((line) => line && line !== "[DONE]")
|
||||
.map((line) => JSON.parse(line));
|
||||
|
||||
describe("OpenAIResponsesTransformer", () => {
|
||||
it("preserves streaming usage from response.completed", async () => {
|
||||
const upstream = [
|
||||
{
|
||||
type: "response.completed",
|
||||
response: {
|
||||
id: "resp_123",
|
||||
model: "gpt-5.5",
|
||||
output: [{ type: "message" }],
|
||||
usage: {
|
||||
input_tokens: 12345,
|
||||
output_tokens: 67,
|
||||
total_tokens: 12412,
|
||||
},
|
||||
},
|
||||
},
|
||||
]
|
||||
.map((event) => `data: ${JSON.stringify(event)}\n\n`)
|
||||
.join("");
|
||||
|
||||
const response = new Response(upstream, {
|
||||
headers: { "Content-Type": "text/event-stream" },
|
||||
});
|
||||
|
||||
const transformed =
|
||||
await new OpenAIResponsesTransformer().transformResponseOut(response);
|
||||
assert.ok(transformed.body);
|
||||
|
||||
const chunks = parseSseData(await readStream(transformed.body));
|
||||
const doneChunk = chunks.find(
|
||||
(chunk) => chunk.choices?.[0]?.finish_reason === "stop"
|
||||
);
|
||||
|
||||
assert.deepEqual(doneChunk?.usage, {
|
||||
prompt_tokens: 12345,
|
||||
completion_tokens: 67,
|
||||
total_tokens: 12412,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -61,6 +61,11 @@ interface ResponsesStreamEvent {
|
|||
output?: Array<{
|
||||
type: string;
|
||||
}>;
|
||||
usage?: {
|
||||
input_tokens?: number;
|
||||
output_tokens?: number;
|
||||
total_tokens?: number;
|
||||
};
|
||||
};
|
||||
arguments?: string;
|
||||
reasoning_summary?: string; // 添加推理摘要支持
|
||||
|
|
@ -550,6 +555,16 @@ export class OpenAIResponsesTransformer implements Transformer {
|
|||
finish_reason: finishReason,
|
||||
},
|
||||
],
|
||||
usage: data.response?.usage
|
||||
? {
|
||||
prompt_tokens:
|
||||
data.response.usage.input_tokens || 0,
|
||||
completion_tokens:
|
||||
data.response.usage.output_tokens || 0,
|
||||
total_tokens:
|
||||
data.response.usage.total_tokens || 0,
|
||||
}
|
||||
: undefined,
|
||||
};
|
||||
|
||||
controller.enqueue(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue