fix(vcs): avoid unbounded diff memory usage (#25581)

This commit is contained in:
Shoubhit Dash 2026-05-03 17:49:46 +05:30 committed by GitHub
parent 8299fb3e2b
commit d1f597b5b5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 338 additions and 71 deletions

View file

@ -24,6 +24,7 @@ const fail = (err: unknown) =>
text: () => "",
stdout: Buffer.alloc(0),
stderr: Buffer.from(err instanceof Error ? err.message : String(err)),
truncated: false,
}) satisfies Result
export type Kind = "added" | "deleted" | "modified"
@ -45,16 +46,28 @@ export type Stat = {
readonly deletions: number
}
export type Patch = {
readonly text: string
readonly truncated: boolean
}
export interface PatchOptions {
readonly context?: number
readonly maxOutputBytes?: number
}
export interface Result {
readonly exitCode: number
readonly text: () => string
readonly stdout: Buffer
readonly stderr: Buffer
readonly truncated: boolean
}
export interface Options {
readonly cwd: string
readonly env?: Record<string, string>
readonly maxOutputBytes?: number
}
export interface Interface {
@ -68,6 +81,10 @@ export interface Interface {
readonly status: (cwd: string) => Effect.Effect<Item[]>
readonly diff: (cwd: string, ref: string) => Effect.Effect<Item[]>
readonly stats: (cwd: string, ref: string) => Effect.Effect<Stat[]>
readonly patch: (cwd: string, ref: string, file: string, options?: PatchOptions) => Effect.Effect<Patch>
readonly patchAll: (cwd: string, ref: string, options?: PatchOptions) => Effect.Effect<Patch>
readonly patchUntracked: (cwd: string, file: string, options?: PatchOptions) => Effect.Effect<Patch>
readonly statUntracked: (cwd: string, file: string) => Effect.Effect<Stat | undefined>
}
const kind = (code: string): Kind => {
@ -96,15 +113,31 @@ export const layer = Layer.effect(
stderr: "pipe",
})
const handle = yield* spawner.spawn(proc)
const [stdout, stderr] = yield* Effect.all(
[Stream.mkString(Stream.decodeText(handle.stdout)), Stream.mkString(Stream.decodeText(handle.stderr))],
{ concurrency: 2 },
)
const collect = (stream: typeof handle.stdout) =>
Stream.runFold(
stream,
() => ({ chunks: [] as Uint8Array[], bytes: 0, truncated: false }),
(acc, chunk) => {
if (opts.maxOutputBytes === undefined) {
acc.chunks.push(chunk)
acc.bytes += chunk.length
return acc
}
const remaining = opts.maxOutputBytes - acc.bytes
if (remaining > 0) acc.chunks.push(remaining >= chunk.length ? chunk : chunk.slice(0, remaining))
acc.bytes += chunk.length
acc.truncated = acc.truncated || acc.bytes > opts.maxOutputBytes
return acc
},
).pipe(Effect.map((x) => ({ buffer: Buffer.concat(x.chunks), truncated: x.truncated })))
const [stdout, stderr] = yield* Effect.all([collect(handle.stdout), collect(handle.stderr)], { concurrency: 2 })
return {
exitCode: yield* handle.exitCode,
text: () => stdout,
stdout: Buffer.from(stdout),
stderr: Buffer.from(stderr),
text: () => stdout.buffer.toString("utf8"),
stdout: stdout.buffer,
stderr: stderr.buffer,
truncated: stdout.truncated || stderr.truncated,
} satisfies Result
},
Effect.scoped,
@ -240,6 +273,61 @@ export const layer = Layer.effect(
})
})
const patch = Effect.fn("Git.patch")(function* (cwd: string, ref: string, file: string, options?: PatchOptions) {
const result = yield* run(
["diff", "--patch", "--no-ext-diff", "--no-renames", `--unified=${options?.context ?? 3}`, ref, "--", file],
{ cwd, maxOutputBytes: options?.maxOutputBytes },
)
return { text: result.truncated ? "" : result.text(), truncated: result.truncated } satisfies Patch
})
const patchAll = Effect.fn("Git.patchAll")(function* (cwd: string, ref: string, options?: PatchOptions) {
const result = yield* run(
["diff", "--patch", "--no-ext-diff", "--no-renames", `--unified=${options?.context ?? 3}`, ref, "--", "."],
{ cwd, maxOutputBytes: options?.maxOutputBytes },
)
return { text: result.text(), truncated: result.truncated } satisfies Patch
})
const patchUntracked = Effect.fn("Git.patchUntracked")(function* (
cwd: string,
file: string,
options?: PatchOptions,
) {
const result = yield* run(
[
"diff",
"--no-index",
"--patch",
"--no-ext-diff",
"--no-renames",
`--unified=${options?.context ?? 3}`,
"--",
"/dev/null",
file,
],
{ cwd, maxOutputBytes: options?.maxOutputBytes },
)
return { text: result.truncated ? "" : result.text(), truncated: result.truncated } satisfies Patch
})
const statUntracked = Effect.fn("Git.statUntracked")(function* (cwd: string, file: string) {
const result = yield* run(["diff", "--no-index", "--numstat", "--", "/dev/null", file], {
cwd,
maxOutputBytes: 4096,
})
if (result.truncated) return
const parts = result.text().split("\t")
if (parts.length < 2) return
const additions = parts[0] === "-" ? 0 : Number.parseInt(parts[0] || "0", 10)
const deletions = parts[1] === "-" ? 0 : Number.parseInt(parts[1] || "0", 10)
return {
file,
additions: Number.isFinite(additions) ? additions : 0,
deletions: Number.isFinite(deletions) ? deletions : 0,
} satisfies Stat
})
return Service.of({
run,
branch,
@ -251,6 +339,10 @@ export const layer = Layer.effect(
status,
diff,
stats,
patch,
patchAll,
patchUntracked,
statUntracked,
})
}),
)

View file

@ -1,10 +1,8 @@
import { Effect, Layer, Context, Schema, Stream, Scope } from "effect"
import { formatPatch, structuredPatch } from "diff"
import path from "path"
import { Bus } from "@/bus"
import { BusEvent } from "@/bus/bus-event"
import { InstanceState } from "@/effect/instance-state"
import { AppFileSystem } from "@opencode-ai/core/filesystem"
import { FileWatcher } from "@/file/watcher"
import { Git } from "@/git"
import * as Log from "@opencode-ai/core/util/log"
@ -12,20 +10,11 @@ import { zod } from "@/util/effect-zod"
import { NonNegativeInt, withStatics } from "@/util/schema"
const log = Log.create({ service: "vcs" })
const PATCH_CONTEXT_LINES = 2_147_483_647
const MAX_PATCH_BYTES = 10_000_000
const MAX_TOTAL_PATCH_BYTES = 10_000_000
const count = (text: string) => {
if (!text) return 0
if (!text.endsWith("\n")) return text.split("\n").length
return text.slice(0, -1).split("\n").length
}
const work = Effect.fnUntraced(function* (fs: AppFileSystem.Interface, cwd: string, file: string) {
const full = path.join(cwd, file)
if (!(yield* fs.exists(full).pipe(Effect.orDie))) return ""
const buf = yield* fs.readFile(full).pipe(Effect.catch(() => Effect.succeed(new Uint8Array())))
if (Buffer.from(buf).includes(0)) return ""
return Buffer.from(buf).toString("utf8")
})
const emptyPatch = (file: string) => formatPatch(structuredPatch(file, file, "", "", "", "", { context: 0 }))
const nums = (list: Git.Stat[]) =>
new Map(list.map((item) => [item.file, { additions: item.additions, deletions: item.deletions }] as const))
@ -38,59 +27,168 @@ const merge = (...lists: Git.Item[][]) => {
return [...out.values()]
}
const emptyBatch = () => ({ patches: new Map<string, string>(), capped: false })
const parseQuotedPath = (value: string) => {
let out = ""
for (let idx = 1; idx < value.length; idx++) {
const char = value[idx]
if (char === '"') return { value: out, end: idx + 1 }
if (char !== "\\") {
out += char
continue
}
const next = value[++idx]
if (next === "t") out += "\t"
else if (next === "n") out += "\n"
else if (next === "r") out += "\r"
else if (next === '"' || next === "\\") out += next
else out += next ?? ""
}
}
const parsePathToken = (value: string) => {
if (!value.startsWith('"')) return value.split("\t")[0]
return parseQuotedPath(value)?.value ?? value
}
const fileFromDiffPath = (value: string | undefined) => {
if (!value || value === "/dev/null") return
const file = parsePathToken(value)
if (file.startsWith("a/") || file.startsWith("b/")) return file.slice(2)
return file
}
const fileFromGitHeader = (header: string) => {
if (header.startsWith('"')) {
const first = parseQuotedPath(header)
const second = first ? header.slice(first.end).trimStart() : undefined
if (!second) return
if (!second.startsWith('"')) return fileFromDiffPath(second)
return fileFromDiffPath(parseQuotedPath(second)?.value)
}
const separator = header.indexOf(" b/")
if (separator === -1) return
return fileFromDiffPath(header.slice(separator + 1))
}
const fileFromPatchChunk = (chunk: string) => {
const next = /^\+\+\+ (.+)$/m.exec(chunk)?.[1]
const before = /^--- (.+)$/m.exec(chunk)?.[1]
const file = fileFromDiffPath(next) ?? fileFromDiffPath(before)
if (file) return file
const header = /^diff --git (.+)$/m.exec(chunk)?.[1]
return fileFromGitHeader(header ?? "")
}
const splitGitPatch = (patch: Git.Patch) => {
const starts = [...patch.text.matchAll(/^diff --git /gm)].map((match) => match.index)
const chunks = starts.map((start, index) => patch.text.slice(start, starts[index + 1] ?? patch.text.length))
if (!patch.truncated) return chunks
return chunks.slice(0, -1)
}
const batchPatches = Effect.fnUntraced(function* (git: Git.Interface, cwd: string, ref: string, list: Git.Item[]) {
if (list.length === 0) return { patches: new Map<string, string>(), capped: false }
const result = yield* git.patchAll(cwd, ref, {
context: PATCH_CONTEXT_LINES,
maxOutputBytes: MAX_TOTAL_PATCH_BYTES,
})
if (result.truncated) log.warn("batched patch exceeded byte limit", { max: MAX_TOTAL_PATCH_BYTES })
return {
patches: splitGitPatch(result).reduce((acc, patch, index) => {
const file = fileFromPatchChunk(patch) ?? list[index]?.file
if (!file) return acc
acc.set(file, (acc.get(file) ?? "") + patch)
return acc
}, new Map<string, string>()),
capped: result.truncated,
}
})
const nativePatch = Effect.fnUntraced(function* (
git: Git.Interface,
cwd: string,
ref: string | undefined,
item: Git.Item,
) {
const result =
item.code === "??" || !ref
? yield* git.patchUntracked(cwd, item.file, { context: PATCH_CONTEXT_LINES, maxOutputBytes: MAX_PATCH_BYTES })
: yield* git.patch(cwd, ref, item.file, { context: PATCH_CONTEXT_LINES, maxOutputBytes: MAX_PATCH_BYTES })
if (!result.truncated && result.text) return result.text
if (result.truncated) log.warn("patch exceeded byte limit", { file: item.file, max: MAX_PATCH_BYTES })
return emptyPatch(item.file)
})
const totalPatch = (file: string, patch: string, total: number) => {
if (total + Buffer.byteLength(patch) <= MAX_TOTAL_PATCH_BYTES) return { patch, capped: false }
log.warn("total patch budget exceeded", { file, max: MAX_TOTAL_PATCH_BYTES })
return { patch: emptyPatch(file), capped: true }
}
const patchForItem = Effect.fnUntraced(function* (
git: Git.Interface,
cwd: string,
ref: string | undefined,
item: Git.Item,
batch: { patches: Map<string, string>; capped: boolean },
capped: boolean,
) {
if (capped) return emptyPatch(item.file)
const batched = batch.patches.get(item.file)
if (batched !== undefined) return batched
if (item.code !== "??" && batch.capped) return emptyPatch(item.file)
return yield* nativePatch(git, cwd, ref, item)
})
const files = Effect.fnUntraced(function* (
fs: AppFileSystem.Interface,
git: Git.Interface,
cwd: string,
ref: string | undefined,
list: Git.Item[],
map: Map<string, { additions: number; deletions: number }>,
batch: { patches: Map<string, string>; capped: boolean },
) {
const base = ref ? yield* git.prefix(cwd) : ""
const patch = (file: string, before: string, after: string) =>
formatPatch(structuredPatch(file, file, before, after, "", "", { context: Number.MAX_SAFE_INTEGER }))
const next = yield* Effect.forEach(
list,
(item) =>
Effect.gen(function* () {
const before = item.status === "added" || !ref ? "" : yield* git.show(cwd, ref, item.file, base)
const after = item.status === "deleted" ? "" : yield* work(fs, cwd, item.file)
const stat = map.get(item.file)
return {
file: item.file,
patch: patch(item.file, before, after),
additions: stat?.additions ?? (item.status === "added" ? count(after) : 0),
deletions: stat?.deletions ?? (item.status === "deleted" ? count(before) : 0),
status: item.status,
} satisfies FileDiff
}),
{ concurrency: 8 },
)
return next.toSorted((a, b) => a.file.localeCompare(b.file))
const next: FileDiff[] = []
let total = 0
let capped = false
for (const item of list.toSorted((a, b) => a.file.localeCompare(b.file))) {
const stat = map.get(item.file) ?? (item.status === "added" ? yield* git.statUntracked(cwd, item.file) : undefined)
const patch = yield* patchForItem(git, cwd, ref, item, batch, capped)
const result: { patch: string; capped: boolean } = capped
? { patch, capped: true }
: totalPatch(item.file, patch, total)
capped = capped || result.capped
if (!capped) {
total += Buffer.byteLength(result.patch)
capped = total >= MAX_TOTAL_PATCH_BYTES
}
next.push({
file: item.file,
patch: result.patch,
additions: stat?.additions ?? 0,
deletions: stat?.deletions ?? 0,
status: item.status,
})
}
return next
})
const track = Effect.fnUntraced(function* (
fs: AppFileSystem.Interface,
git: Git.Interface,
cwd: string,
ref: string | undefined,
) {
if (!ref) return yield* files(fs, git, cwd, ref, yield* git.status(cwd), new Map())
const [list, stats] = yield* Effect.all([git.status(cwd), git.stats(cwd, ref)], { concurrency: 2 })
return yield* files(fs, git, cwd, ref, list, nums(stats))
})
const compare = Effect.fnUntraced(function* (
fs: AppFileSystem.Interface,
git: Git.Interface,
cwd: string,
ref: string,
) {
const diffAgainstRef = Effect.fnUntraced(function* (git: Git.Interface, cwd: string, ref: string) {
const [list, stats, extra] = yield* Effect.all([git.diff(cwd, ref), git.stats(cwd, ref), git.status(cwd)], {
concurrency: 3,
})
return yield* files(
fs,
git,
cwd,
ref,
@ -99,9 +197,15 @@ const compare = Effect.fnUntraced(function* (
extra.filter((item) => item.code === "??"),
),
nums(stats),
yield* batchPatches(git, cwd, ref, list),
)
})
const track = Effect.fnUntraced(function* (git: Git.Interface, cwd: string, ref: string | undefined) {
if (!ref) return yield* files(git, cwd, ref, yield* git.status(cwd), new Map(), emptyBatch())
return yield* diffAgainstRef(git, cwd, ref)
})
export const Mode = Schema.Literals(["git", "branch"]).pipe(withStatics((s) => ({ zod: zod(s) })))
export type Mode = Schema.Schema.Type<typeof Mode>
@ -147,10 +251,9 @@ interface State {
export class Service extends Context.Service<Service, Interface>()("@opencode/Vcs") {}
export const layer: Layer.Layer<Service, never, AppFileSystem.Service | Git.Service | Bus.Service> = Layer.effect(
export const layer: Layer.Layer<Service, never, Git.Service | Bus.Service> = Layer.effect(
Service,
Effect.gen(function* () {
const fs = yield* AppFileSystem.Service
const git = yield* Git.Service
const bus = yield* Bus.Service
const scope = yield* Scope.Scope
@ -204,23 +307,19 @@ export const layer: Layer.Layer<Service, never, AppFileSystem.Service | Git.Serv
const ctx = yield* InstanceState.context
if (ctx.project.vcs !== "git") return []
if (mode === "git") {
return yield* track(fs, git, ctx.directory, (yield* git.hasHead(ctx.directory)) ? "HEAD" : undefined)
return yield* track(git, ctx.directory, (yield* git.hasHead(ctx.directory)) ? "HEAD" : undefined)
}
if (!value.root) return []
if (value.current && value.current === value.root.name) return []
const ref = yield* git.mergeBase(ctx.directory, value.root.ref)
if (!ref) return []
return yield* compare(fs, git, ctx.directory, ref)
return yield* diffAgainstRef(git, ctx.directory, ref)
}),
})
}),
)
export const defaultLayer = layer.pipe(
Layer.provide(Git.defaultLayer),
Layer.provide(AppFileSystem.defaultLayer),
Layer.provide(Bus.layer),
)
export const defaultLayer = layer.pipe(Layer.provide(Git.defaultLayer), Layer.provide(Bus.layer))
export * as Vcs from "./vcs"

View file

@ -114,6 +114,53 @@ describe("Git", () => {
})
})
test("patch() returns capped native patch output", async () => {
await using tmp = await tmpdir({ git: true })
await fs.writeFile(path.join(tmp.path, weird), "before\n", "utf-8")
await fs.writeFile(path.join(tmp.path, "other.txt"), "old\n", "utf-8")
await $`git add .`.cwd(tmp.path).quiet()
await $`git commit --no-gpg-sign -m "add file"`.cwd(tmp.path).quiet()
await fs.writeFile(path.join(tmp.path, weird), "after\n", "utf-8")
await fs.writeFile(path.join(tmp.path, "other.txt"), "new\n", "utf-8")
await withGit(async (rt) => {
const [patch, all, capped] = await Promise.all([
rt.runPromise(Git.Service.use((git) => git.patch(tmp.path, "HEAD", weird, { context: 2_147_483_647 }))),
rt.runPromise(Git.Service.use((git) => git.patchAll(tmp.path, "HEAD", { context: 2_147_483_647 }))),
rt.runPromise(Git.Service.use((git) => git.patch(tmp.path, "HEAD", weird, { maxOutputBytes: 1 }))),
])
expect(patch.truncated).toBe(false)
expect(patch.text).toContain("diff --git")
expect(patch.text).toContain("-before")
expect(patch.text).toContain("+after")
expect(all.truncated).toBe(false)
expect(all.text).toContain("diff --git")
expect(all.text).toContain("other.txt")
expect(all.text).toContain("+new")
expect(capped.truncated).toBe(true)
expect(capped.text).toBe("")
})
})
test("patchUntracked() and statUntracked() handle added files", async () => {
await using tmp = await tmpdir({ git: true })
await fs.writeFile(path.join(tmp.path, weird), "one\ntwo\n", "utf-8")
await withGit(async (rt) => {
const [patch, stat] = await Promise.all([
rt.runPromise(Git.Service.use((git) => git.patchUntracked(tmp.path, weird, { context: 2_147_483_647 }))),
rt.runPromise(Git.Service.use((git) => git.statUntracked(tmp.path, weird))),
])
expect(patch.truncated).toBe(false)
expect(patch.text).toContain("diff --git")
expect(patch.text).toContain("+one")
expect(patch.text).toContain("+two")
expect(stat).toEqual(expect.objectContaining({ file: weird, additions: 2, deletions: 0 }))
})
})
test("show() returns empty text for binary blobs", async () => {
await using tmp = await tmpdir({ git: true })
await fs.writeFile(path.join(tmp.path, "bin.dat"), new Uint8Array([0, 1, 2, 3]))

View file

@ -234,6 +234,7 @@ describe("Vcs diff", () => {
}),
]),
)
expect(diff.find((item) => item.file === "file.txt")?.patch).toContain("diff --git")
})
})
@ -259,6 +260,34 @@ describe("Vcs diff", () => {
})
})
test("diff('git') keeps batched patches aligned for type changes", async () => {
if (process.platform === "win32") return
await using tmp = await tmpdir({ git: true })
await fs.writeFile(path.join(tmp.path, "a.txt"), "old\n", "utf-8")
await fs.writeFile(path.join(tmp.path, "b.txt"), "old\n", "utf-8")
await $`git add .`.cwd(tmp.path).quiet()
await $`git commit --no-gpg-sign -m "add files"`.cwd(tmp.path).quiet()
await fs.unlink(path.join(tmp.path, "a.txt"))
await fs.symlink("target", path.join(tmp.path, "a.txt"))
await fs.writeFile(path.join(tmp.path, "b.txt"), "new\n", "utf-8")
await withVcsOnly(tmp.path, async () => {
const diff = await AppRuntime.runPromise(
Effect.gen(function* () {
const vcs = yield* Vcs.Service
return yield* vcs.diff("git")
}),
)
const a = diff.find((item) => item.file === "a.txt")
const b = diff.find((item) => item.file === "b.txt")
expect(a?.patch).toContain("deleted file mode")
expect(a?.patch).toContain("new file mode")
expect(b?.patch).toContain("+new")
})
})
test("diff('branch') returns changes against default branch", async () => {
await using tmp = await tmpdir({ git: true })
await $`git branch -M main`.cwd(tmp.path).quiet()