From 42771c1db377d190b670ec623a951e2ad7d51c3d Mon Sep 17 00:00:00 2001 From: Shoubhit Dash Date: Thu, 16 Apr 2026 17:30:29 +0530 Subject: [PATCH] fix(compaction): budget retained tail with media --- packages/opencode/src/config/config.ts | 6 +- packages/opencode/src/session/compaction.ts | 22 ++----- packages/opencode/src/session/overflow.ts | 24 ++++--- .../opencode/test/session/compaction.test.ts | 62 +++++++++++++++++++ 4 files changed, 84 insertions(+), 30 deletions(-) diff --git a/packages/opencode/src/config/config.ts b/packages/opencode/src/config/config.ts index b4e6268b14..97e96ccbf5 100644 --- a/packages/opencode/src/config/config.ts +++ b/packages/opencode/src/config/config.ts @@ -1009,13 +1009,15 @@ export const Info = z .int() .min(0) .optional() - .describe("Number of recent real user turns to keep verbatim during compaction (default: 2)"), + .describe( + "Number of recent user turns, including their following assistant/tool responses, to keep verbatim during compaction (default: 2)", + ), tail_tokens: z .number() .int() .min(0) .optional() - .describe("Token budget for retained recent turns during compaction"), + .describe("Token budget for retained recent turn spans during compaction"), reserved: z .number() .int() diff --git a/packages/opencode/src/session/compaction.ts b/packages/opencode/src/session/compaction.ts index 325eb5a3df..ff2b316c48 100644 --- a/packages/opencode/src/session/compaction.ts +++ b/packages/opencode/src/session/compaction.ts @@ -2,7 +2,7 @@ import { BusEvent } from "@/bus/bus-event" import { Bus } from "@/bus" import * as Session from "./session" import { SessionID, MessageID, PartID } from "./schema" -import { Provider, ProviderTransform } from "../provider" +import { Provider } from "../provider" import { MessageV2 } from "./message-v2" import z from "zod" import { Token } from "../util" @@ -17,7 +17,7 @@ import { Effect, Layer, Context } from "effect" import { InstanceState } from "@/effect" import { makeRuntime } from "@/effect/run-service" import { fn } from "@/util/fn" -import { isOverflow as overflow } from "./overflow" +import { isOverflow as overflow, usable } from "./overflow" export namespace SessionCompaction { const log = Log.create({ service: "session.compaction" }) @@ -43,13 +43,6 @@ export namespace SessionCompaction { id: MessageID } - function usable(input: { cfg: Config.Info; model: Provider.Model }) { - const reserved = input.cfg.compaction?.reserved ?? Math.min(20_000, ProviderTransform.maxOutputTokens(input.model)) - return input.model.limit.input - ? Math.max(0, input.model.limit.input - reserved) - : Math.max(0, input.model.limit.context - ProviderTransform.maxOutputTokens(input.model)) - } - function tailBudget(input: { cfg: Config.Info; model: Provider.Model }) { return ( input.cfg.compaction?.tail_tokens ?? @@ -131,7 +124,7 @@ export namespace SessionCompaction { messages: MessageV2.WithParts[] model: Provider.Model }) { - const msgs = yield* MessageV2.toModelMessagesEffect(input.messages, input.model, { stripMedia: true }) + const msgs = yield* MessageV2.toModelMessagesEffect(input.messages, input.model) return Token.estimate(JSON.stringify(msgs)) }) @@ -282,14 +275,7 @@ export namespace SessionCompaction { { sessionID: input.sessionID }, { context: [], prompt: undefined }, ) - const defaultPrompt = `Summarize the older conversation history so another agent can continue the work with the retained recent turns. -The most recent conversation turns will remain verbatim outside this summary, so focus on older context that is still needed to understand and continue the work. -Include what we did, what we're doing, which files we're working on, and what we're going to do next. -The summary that you construct will be used so that another agent can read it and continue the work. -Do not call any tools. Respond only with the summary text. -Respond in the same language as the user's messages in the conversation. - -When constructing the summary, try to stick to this template: + const defaultPrompt = `When constructing the summary, try to stick to this template: --- ## Goal diff --git a/packages/opencode/src/session/overflow.ts b/packages/opencode/src/session/overflow.ts index 6f48a760df..477b5815b2 100644 --- a/packages/opencode/src/session/overflow.ts +++ b/packages/opencode/src/session/overflow.ts @@ -5,18 +5,22 @@ import type { MessageV2 } from "./message-v2" const COMPACTION_BUFFER = 20_000 -export function isOverflow(input: { cfg: Config.Info; tokens: MessageV2.Assistant["tokens"]; model: Provider.Model }) { - if (input.cfg.compaction?.auto === false) return false +export function usable(input: { cfg: Config.Info; model: Provider.Model }) { const context = input.model.limit.context - if (context === 0) return false - - const count = - input.tokens.total || input.tokens.input + input.tokens.output + input.tokens.cache.read + input.tokens.cache.write + if (context === 0) return 0 const reserved = input.cfg.compaction?.reserved ?? Math.min(COMPACTION_BUFFER, ProviderTransform.maxOutputTokens(input.model)) - const usable = input.model.limit.input - ? input.model.limit.input - reserved - : context - ProviderTransform.maxOutputTokens(input.model) - return count >= usable + return input.model.limit.input + ? Math.max(0, input.model.limit.input - reserved) + : Math.max(0, context - ProviderTransform.maxOutputTokens(input.model)) +} + +export function isOverflow(input: { cfg: Config.Info; tokens: MessageV2.Assistant["tokens"]; model: Provider.Model }) { + if (input.cfg.compaction?.auto === false) return false + if (input.model.limit.context === 0) return false + + const count = + input.tokens.total || input.tokens.input + input.tokens.output + input.tokens.cache.read + input.tokens.cache.write + return count >= usable(input) } diff --git a/packages/opencode/test/session/compaction.test.ts b/packages/opencode/test/session/compaction.test.ts index 85bc6e54ad..015a1653a3 100644 --- a/packages/opencode/test/session/compaction.test.ts +++ b/packages/opencode/test/session/compaction.test.ts @@ -1044,6 +1044,68 @@ describe("session.compaction.process", () => { }) }) + test("falls back to full summary when retained tail media exceeds tail budget", async () => { + await using tmp = await tmpdir({ git: true }) + const stub = llm() + let captured = "" + stub.push( + reply("summary", (input) => { + captured = JSON.stringify(input.messages) + }), + ) + await Instance.provide({ + directory: tmp.path, + fn: async () => { + const session = await svc.create({}) + await user(session.id, "older") + const recent = await user(session.id, "recent image turn") + await svc.updatePart({ + id: PartID.ascending(), + messageID: recent.id, + sessionID: session.id, + type: "file", + mime: "image/png", + filename: "big.png", + url: `data:image/png;base64,${"a".repeat(4_000)}`, + }) + await SessionCompaction.create({ + sessionID: session.id, + agent: "build", + model: ref, + auto: false, + }) + + const rt = liveRuntime(stub.layer, wide(), cfg({ tail_turns: 1, tail_tokens: 100 })) + try { + const msgs = await svc.messages({ sessionID: session.id }) + const parent = msgs.at(-1)?.info.id + expect(parent).toBeTruthy() + await rt.runPromise( + SessionCompaction.Service.use((svc) => + svc.process({ + parentID: parent!, + messages: msgs, + sessionID: session.id, + auto: false, + }), + ), + ) + + const part = (await svc.messages({ sessionID: session.id })) + .at(-2) + ?.parts.find((item) => item.type === "compaction") + + expect(part?.type).toBe("compaction") + if (part?.type === "compaction") expect(part.tail_start_id).toBeUndefined() + expect(captured).toContain("recent image turn") + expect(captured).toContain("Attached image/png: big.png") + } finally { + await rt.dispose() + } + }, + }) + }) + test("allows plugins to disable synthetic continue prompt", async () => { await using tmp = await tmpdir() await Instance.provide({