fix(compaction): budget retained tail with media

This commit is contained in:
Shoubhit Dash 2026-04-16 17:30:29 +05:30
parent 2e18a603f0
commit 42771c1db3
4 changed files with 84 additions and 30 deletions

View file

@ -1009,13 +1009,15 @@ export const Info = z
.int()
.min(0)
.optional()
.describe("Number of recent real user turns to keep verbatim during compaction (default: 2)"),
.describe(
"Number of recent user turns, including their following assistant/tool responses, to keep verbatim during compaction (default: 2)",
),
tail_tokens: z
.number()
.int()
.min(0)
.optional()
.describe("Token budget for retained recent turns during compaction"),
.describe("Token budget for retained recent turn spans during compaction"),
reserved: z
.number()
.int()

View file

@ -2,7 +2,7 @@ import { BusEvent } from "@/bus/bus-event"
import { Bus } from "@/bus"
import * as Session from "./session"
import { SessionID, MessageID, PartID } from "./schema"
import { Provider, ProviderTransform } from "../provider"
import { Provider } from "../provider"
import { MessageV2 } from "./message-v2"
import z from "zod"
import { Token } from "../util"
@ -17,7 +17,7 @@ import { Effect, Layer, Context } from "effect"
import { InstanceState } from "@/effect"
import { makeRuntime } from "@/effect/run-service"
import { fn } from "@/util/fn"
import { isOverflow as overflow } from "./overflow"
import { isOverflow as overflow, usable } from "./overflow"
export namespace SessionCompaction {
const log = Log.create({ service: "session.compaction" })
@ -43,13 +43,6 @@ export namespace SessionCompaction {
id: MessageID
}
function usable(input: { cfg: Config.Info; model: Provider.Model }) {
const reserved = input.cfg.compaction?.reserved ?? Math.min(20_000, ProviderTransform.maxOutputTokens(input.model))
return input.model.limit.input
? Math.max(0, input.model.limit.input - reserved)
: Math.max(0, input.model.limit.context - ProviderTransform.maxOutputTokens(input.model))
}
function tailBudget(input: { cfg: Config.Info; model: Provider.Model }) {
return (
input.cfg.compaction?.tail_tokens ??
@ -131,7 +124,7 @@ export namespace SessionCompaction {
messages: MessageV2.WithParts[]
model: Provider.Model
}) {
const msgs = yield* MessageV2.toModelMessagesEffect(input.messages, input.model, { stripMedia: true })
const msgs = yield* MessageV2.toModelMessagesEffect(input.messages, input.model)
return Token.estimate(JSON.stringify(msgs))
})
@ -282,14 +275,7 @@ export namespace SessionCompaction {
{ sessionID: input.sessionID },
{ context: [], prompt: undefined },
)
const defaultPrompt = `Summarize the older conversation history so another agent can continue the work with the retained recent turns.
The most recent conversation turns will remain verbatim outside this summary, so focus on older context that is still needed to understand and continue the work.
Include what we did, what we're doing, which files we're working on, and what we're going to do next.
The summary that you construct will be used so that another agent can read it and continue the work.
Do not call any tools. Respond only with the summary text.
Respond in the same language as the user's messages in the conversation.
When constructing the summary, try to stick to this template:
const defaultPrompt = `When constructing the summary, try to stick to this template:
---
## Goal

View file

@ -5,18 +5,22 @@ import type { MessageV2 } from "./message-v2"
const COMPACTION_BUFFER = 20_000
export function isOverflow(input: { cfg: Config.Info; tokens: MessageV2.Assistant["tokens"]; model: Provider.Model }) {
if (input.cfg.compaction?.auto === false) return false
export function usable(input: { cfg: Config.Info; model: Provider.Model }) {
const context = input.model.limit.context
if (context === 0) return false
const count =
input.tokens.total || input.tokens.input + input.tokens.output + input.tokens.cache.read + input.tokens.cache.write
if (context === 0) return 0
const reserved =
input.cfg.compaction?.reserved ?? Math.min(COMPACTION_BUFFER, ProviderTransform.maxOutputTokens(input.model))
const usable = input.model.limit.input
? input.model.limit.input - reserved
: context - ProviderTransform.maxOutputTokens(input.model)
return count >= usable
return input.model.limit.input
? Math.max(0, input.model.limit.input - reserved)
: Math.max(0, context - ProviderTransform.maxOutputTokens(input.model))
}
export function isOverflow(input: { cfg: Config.Info; tokens: MessageV2.Assistant["tokens"]; model: Provider.Model }) {
if (input.cfg.compaction?.auto === false) return false
if (input.model.limit.context === 0) return false
const count =
input.tokens.total || input.tokens.input + input.tokens.output + input.tokens.cache.read + input.tokens.cache.write
return count >= usable(input)
}

View file

@ -1044,6 +1044,68 @@ describe("session.compaction.process", () => {
})
})
test("falls back to full summary when retained tail media exceeds tail budget", async () => {
await using tmp = await tmpdir({ git: true })
const stub = llm()
let captured = ""
stub.push(
reply("summary", (input) => {
captured = JSON.stringify(input.messages)
}),
)
await Instance.provide({
directory: tmp.path,
fn: async () => {
const session = await svc.create({})
await user(session.id, "older")
const recent = await user(session.id, "recent image turn")
await svc.updatePart({
id: PartID.ascending(),
messageID: recent.id,
sessionID: session.id,
type: "file",
mime: "image/png",
filename: "big.png",
url: `data:image/png;base64,${"a".repeat(4_000)}`,
})
await SessionCompaction.create({
sessionID: session.id,
agent: "build",
model: ref,
auto: false,
})
const rt = liveRuntime(stub.layer, wide(), cfg({ tail_turns: 1, tail_tokens: 100 }))
try {
const msgs = await svc.messages({ sessionID: session.id })
const parent = msgs.at(-1)?.info.id
expect(parent).toBeTruthy()
await rt.runPromise(
SessionCompaction.Service.use((svc) =>
svc.process({
parentID: parent!,
messages: msgs,
sessionID: session.id,
auto: false,
}),
),
)
const part = (await svc.messages({ sessionID: session.id }))
.at(-2)
?.parts.find((item) => item.type === "compaction")
expect(part?.type).toBe("compaction")
if (part?.type === "compaction") expect(part.tail_start_id).toBeUndefined()
expect(captured).toContain("recent image turn")
expect(captured).toContain("Attached image/png: big.png")
} finally {
await rt.dispose()
}
},
})
})
test("allows plugins to disable synthetic continue prompt", async () => {
await using tmp = await tmpdir()
await Instance.provide({