pi-mono/packages/coding-agent/test/compaction-summary-reasoning.test.ts
Armin Ronacher 3d9e14d748
Some checks are pending
CI / build-check-test (push) Waiting to run
fix(compaction): clamp summary output tokens
Fixes #4390.
2026-05-11 16:36:27 +02:00

134 lines
3.6 KiB
TypeScript

import type { AgentMessage } from "@earendil-works/pi-agent-core";
import type { AssistantMessage, Model } from "@earendil-works/pi-ai";
import { beforeEach, describe, expect, it, vi } from "vitest";
import { type CompactionPreparation, compact, generateSummary } from "../src/core/compaction/index.js";
const { completeSimpleMock } = vi.hoisted(() => ({
completeSimpleMock: vi.fn(),
}));
vi.mock("@earendil-works/pi-ai", async (importOriginal) => {
const actual = await importOriginal<typeof import("@earendil-works/pi-ai")>();
return {
...actual,
completeSimple: completeSimpleMock,
};
});
function createModel(reasoning: boolean, maxTokens = 8192): Model<"anthropic-messages"> {
return {
id: reasoning ? "reasoning-model" : "non-reasoning-model",
name: reasoning ? "Reasoning Model" : "Non-reasoning Model",
api: "anthropic-messages",
provider: "anthropic",
baseUrl: "https://api.anthropic.com",
reasoning,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 200000,
maxTokens,
};
}
const mockSummaryResponse: AssistantMessage = {
role: "assistant",
content: [{ type: "text", text: "## Goal\nTest summary" }],
api: "anthropic-messages",
provider: "anthropic",
model: "claude-sonnet-4-5",
usage: {
input: 10,
output: 10,
cacheRead: 0,
cacheWrite: 0,
totalTokens: 20,
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 },
},
stopReason: "stop",
timestamp: Date.now(),
};
const messages: AgentMessage[] = [{ role: "user", content: "Summarize this.", timestamp: Date.now() }];
describe("generateSummary reasoning options", () => {
beforeEach(() => {
completeSimpleMock.mockReset();
completeSimpleMock.mockResolvedValue(mockSummaryResponse);
});
it("uses the provided thinking level for reasoning-capable models", async () => {
await generateSummary(
messages,
createModel(true),
2000,
"test-key",
undefined,
undefined,
undefined,
undefined,
"medium",
);
expect(completeSimpleMock).toHaveBeenCalledTimes(1);
expect(completeSimpleMock.mock.calls[0][2]).toMatchObject({
reasoning: "medium",
apiKey: "test-key",
});
});
it("does not set reasoning when thinking is off", async () => {
await generateSummary(
messages,
createModel(true),
2000,
"test-key",
undefined,
undefined,
undefined,
undefined,
"off",
);
expect(completeSimpleMock).toHaveBeenCalledTimes(1);
expect(completeSimpleMock.mock.calls[0][2]).toMatchObject({
apiKey: "test-key",
});
expect(completeSimpleMock.mock.calls[0][2]).not.toHaveProperty("reasoning");
});
it("does not set reasoning for non-reasoning models", async () => {
await generateSummary(
messages,
createModel(false),
2000,
"test-key",
undefined,
undefined,
undefined,
undefined,
"medium",
);
expect(completeSimpleMock).toHaveBeenCalledTimes(1);
expect(completeSimpleMock.mock.calls[0][2]).toMatchObject({
apiKey: "test-key",
});
expect(completeSimpleMock.mock.calls[0][2]).not.toHaveProperty("reasoning");
});
it("clamps compaction summary maxTokens to the model output cap", async () => {
const preparation: CompactionPreparation = {
firstKeptEntryId: "entry-keep",
messagesToSummarize: messages,
turnPrefixMessages: messages,
isSplitTurn: true,
tokensBefore: 600000,
fileOps: { read: new Set(), written: new Set(), edited: new Set() },
settings: { enabled: true, reserveTokens: 500000, keepRecentTokens: 20000 },
};
await compact(preparation, createModel(false, 128000), "test-key");
expect(completeSimpleMock.mock.calls.map((call) => call[2]?.maxTokens)).toEqual([128000, 128000]);
});
});