From d1f597b5b5abfe330aa30ca3c33ca043bf9b9a83 Mon Sep 17 00:00:00 2001 From: Shoubhit Dash Date: Sun, 3 May 2026 17:49:46 +0530 Subject: [PATCH] fix(vcs): avoid unbounded diff memory usage (#25581) --- packages/opencode/src/git/index.ts | 106 +++++++++- packages/opencode/src/project/vcs.ts | 227 +++++++++++++++------ packages/opencode/test/git/git.test.ts | 47 +++++ packages/opencode/test/project/vcs.test.ts | 29 +++ 4 files changed, 338 insertions(+), 71 deletions(-) diff --git a/packages/opencode/src/git/index.ts b/packages/opencode/src/git/index.ts index 16a8624474..fff1d70b2a 100644 --- a/packages/opencode/src/git/index.ts +++ b/packages/opencode/src/git/index.ts @@ -24,6 +24,7 @@ const fail = (err: unknown) => text: () => "", stdout: Buffer.alloc(0), stderr: Buffer.from(err instanceof Error ? err.message : String(err)), + truncated: false, }) satisfies Result export type Kind = "added" | "deleted" | "modified" @@ -45,16 +46,28 @@ export type Stat = { readonly deletions: number } +export type Patch = { + readonly text: string + readonly truncated: boolean +} + +export interface PatchOptions { + readonly context?: number + readonly maxOutputBytes?: number +} + export interface Result { readonly exitCode: number readonly text: () => string readonly stdout: Buffer readonly stderr: Buffer + readonly truncated: boolean } export interface Options { readonly cwd: string readonly env?: Record + readonly maxOutputBytes?: number } export interface Interface { @@ -68,6 +81,10 @@ export interface Interface { readonly status: (cwd: string) => Effect.Effect readonly diff: (cwd: string, ref: string) => Effect.Effect readonly stats: (cwd: string, ref: string) => Effect.Effect + readonly patch: (cwd: string, ref: string, file: string, options?: PatchOptions) => Effect.Effect + readonly patchAll: (cwd: string, ref: string, options?: PatchOptions) => Effect.Effect + readonly patchUntracked: (cwd: string, file: string, options?: PatchOptions) => Effect.Effect + readonly statUntracked: (cwd: string, file: string) => Effect.Effect } const kind = (code: string): Kind => { @@ -96,15 +113,31 @@ export const layer = Layer.effect( stderr: "pipe", }) const handle = yield* spawner.spawn(proc) - const [stdout, stderr] = yield* Effect.all( - [Stream.mkString(Stream.decodeText(handle.stdout)), Stream.mkString(Stream.decodeText(handle.stderr))], - { concurrency: 2 }, - ) + const collect = (stream: typeof handle.stdout) => + Stream.runFold( + stream, + () => ({ chunks: [] as Uint8Array[], bytes: 0, truncated: false }), + (acc, chunk) => { + if (opts.maxOutputBytes === undefined) { + acc.chunks.push(chunk) + acc.bytes += chunk.length + return acc + } + + const remaining = opts.maxOutputBytes - acc.bytes + if (remaining > 0) acc.chunks.push(remaining >= chunk.length ? chunk : chunk.slice(0, remaining)) + acc.bytes += chunk.length + acc.truncated = acc.truncated || acc.bytes > opts.maxOutputBytes + return acc + }, + ).pipe(Effect.map((x) => ({ buffer: Buffer.concat(x.chunks), truncated: x.truncated }))) + const [stdout, stderr] = yield* Effect.all([collect(handle.stdout), collect(handle.stderr)], { concurrency: 2 }) return { exitCode: yield* handle.exitCode, - text: () => stdout, - stdout: Buffer.from(stdout), - stderr: Buffer.from(stderr), + text: () => stdout.buffer.toString("utf8"), + stdout: stdout.buffer, + stderr: stderr.buffer, + truncated: stdout.truncated || stderr.truncated, } satisfies Result }, Effect.scoped, @@ -240,6 +273,61 @@ export const layer = Layer.effect( }) }) + const patch = Effect.fn("Git.patch")(function* (cwd: string, ref: string, file: string, options?: PatchOptions) { + const result = yield* run( + ["diff", "--patch", "--no-ext-diff", "--no-renames", `--unified=${options?.context ?? 3}`, ref, "--", file], + { cwd, maxOutputBytes: options?.maxOutputBytes }, + ) + return { text: result.truncated ? "" : result.text(), truncated: result.truncated } satisfies Patch + }) + + const patchAll = Effect.fn("Git.patchAll")(function* (cwd: string, ref: string, options?: PatchOptions) { + const result = yield* run( + ["diff", "--patch", "--no-ext-diff", "--no-renames", `--unified=${options?.context ?? 3}`, ref, "--", "."], + { cwd, maxOutputBytes: options?.maxOutputBytes }, + ) + return { text: result.text(), truncated: result.truncated } satisfies Patch + }) + + const patchUntracked = Effect.fn("Git.patchUntracked")(function* ( + cwd: string, + file: string, + options?: PatchOptions, + ) { + const result = yield* run( + [ + "diff", + "--no-index", + "--patch", + "--no-ext-diff", + "--no-renames", + `--unified=${options?.context ?? 3}`, + "--", + "/dev/null", + file, + ], + { cwd, maxOutputBytes: options?.maxOutputBytes }, + ) + return { text: result.truncated ? "" : result.text(), truncated: result.truncated } satisfies Patch + }) + + const statUntracked = Effect.fn("Git.statUntracked")(function* (cwd: string, file: string) { + const result = yield* run(["diff", "--no-index", "--numstat", "--", "/dev/null", file], { + cwd, + maxOutputBytes: 4096, + }) + if (result.truncated) return + const parts = result.text().split("\t") + if (parts.length < 2) return + const additions = parts[0] === "-" ? 0 : Number.parseInt(parts[0] || "0", 10) + const deletions = parts[1] === "-" ? 0 : Number.parseInt(parts[1] || "0", 10) + return { + file, + additions: Number.isFinite(additions) ? additions : 0, + deletions: Number.isFinite(deletions) ? deletions : 0, + } satisfies Stat + }) + return Service.of({ run, branch, @@ -251,6 +339,10 @@ export const layer = Layer.effect( status, diff, stats, + patch, + patchAll, + patchUntracked, + statUntracked, }) }), ) diff --git a/packages/opencode/src/project/vcs.ts b/packages/opencode/src/project/vcs.ts index 24112cf442..28ac143eec 100644 --- a/packages/opencode/src/project/vcs.ts +++ b/packages/opencode/src/project/vcs.ts @@ -1,10 +1,8 @@ import { Effect, Layer, Context, Schema, Stream, Scope } from "effect" import { formatPatch, structuredPatch } from "diff" -import path from "path" import { Bus } from "@/bus" import { BusEvent } from "@/bus/bus-event" import { InstanceState } from "@/effect/instance-state" -import { AppFileSystem } from "@opencode-ai/core/filesystem" import { FileWatcher } from "@/file/watcher" import { Git } from "@/git" import * as Log from "@opencode-ai/core/util/log" @@ -12,20 +10,11 @@ import { zod } from "@/util/effect-zod" import { NonNegativeInt, withStatics } from "@/util/schema" const log = Log.create({ service: "vcs" }) +const PATCH_CONTEXT_LINES = 2_147_483_647 +const MAX_PATCH_BYTES = 10_000_000 +const MAX_TOTAL_PATCH_BYTES = 10_000_000 -const count = (text: string) => { - if (!text) return 0 - if (!text.endsWith("\n")) return text.split("\n").length - return text.slice(0, -1).split("\n").length -} - -const work = Effect.fnUntraced(function* (fs: AppFileSystem.Interface, cwd: string, file: string) { - const full = path.join(cwd, file) - if (!(yield* fs.exists(full).pipe(Effect.orDie))) return "" - const buf = yield* fs.readFile(full).pipe(Effect.catch(() => Effect.succeed(new Uint8Array()))) - if (Buffer.from(buf).includes(0)) return "" - return Buffer.from(buf).toString("utf8") -}) +const emptyPatch = (file: string) => formatPatch(structuredPatch(file, file, "", "", "", "", { context: 0 })) const nums = (list: Git.Stat[]) => new Map(list.map((item) => [item.file, { additions: item.additions, deletions: item.deletions }] as const)) @@ -38,59 +27,168 @@ const merge = (...lists: Git.Item[][]) => { return [...out.values()] } +const emptyBatch = () => ({ patches: new Map(), capped: false }) + +const parseQuotedPath = (value: string) => { + let out = "" + for (let idx = 1; idx < value.length; idx++) { + const char = value[idx] + if (char === '"') return { value: out, end: idx + 1 } + if (char !== "\\") { + out += char + continue + } + + const next = value[++idx] + if (next === "t") out += "\t" + else if (next === "n") out += "\n" + else if (next === "r") out += "\r" + else if (next === '"' || next === "\\") out += next + else out += next ?? "" + } +} + +const parsePathToken = (value: string) => { + if (!value.startsWith('"')) return value.split("\t")[0] + return parseQuotedPath(value)?.value ?? value +} + +const fileFromDiffPath = (value: string | undefined) => { + if (!value || value === "/dev/null") return + const file = parsePathToken(value) + if (file.startsWith("a/") || file.startsWith("b/")) return file.slice(2) + return file +} + +const fileFromGitHeader = (header: string) => { + if (header.startsWith('"')) { + const first = parseQuotedPath(header) + const second = first ? header.slice(first.end).trimStart() : undefined + if (!second) return + if (!second.startsWith('"')) return fileFromDiffPath(second) + return fileFromDiffPath(parseQuotedPath(second)?.value) + } + + const separator = header.indexOf(" b/") + if (separator === -1) return + return fileFromDiffPath(header.slice(separator + 1)) +} + +const fileFromPatchChunk = (chunk: string) => { + const next = /^\+\+\+ (.+)$/m.exec(chunk)?.[1] + const before = /^--- (.+)$/m.exec(chunk)?.[1] + const file = fileFromDiffPath(next) ?? fileFromDiffPath(before) + if (file) return file + + const header = /^diff --git (.+)$/m.exec(chunk)?.[1] + return fileFromGitHeader(header ?? "") +} + +const splitGitPatch = (patch: Git.Patch) => { + const starts = [...patch.text.matchAll(/^diff --git /gm)].map((match) => match.index) + const chunks = starts.map((start, index) => patch.text.slice(start, starts[index + 1] ?? patch.text.length)) + if (!patch.truncated) return chunks + return chunks.slice(0, -1) +} + +const batchPatches = Effect.fnUntraced(function* (git: Git.Interface, cwd: string, ref: string, list: Git.Item[]) { + if (list.length === 0) return { patches: new Map(), capped: false } + + const result = yield* git.patchAll(cwd, ref, { + context: PATCH_CONTEXT_LINES, + maxOutputBytes: MAX_TOTAL_PATCH_BYTES, + }) + if (result.truncated) log.warn("batched patch exceeded byte limit", { max: MAX_TOTAL_PATCH_BYTES }) + + return { + patches: splitGitPatch(result).reduce((acc, patch, index) => { + const file = fileFromPatchChunk(patch) ?? list[index]?.file + if (!file) return acc + acc.set(file, (acc.get(file) ?? "") + patch) + return acc + }, new Map()), + capped: result.truncated, + } +}) + +const nativePatch = Effect.fnUntraced(function* ( + git: Git.Interface, + cwd: string, + ref: string | undefined, + item: Git.Item, +) { + const result = + item.code === "??" || !ref + ? yield* git.patchUntracked(cwd, item.file, { context: PATCH_CONTEXT_LINES, maxOutputBytes: MAX_PATCH_BYTES }) + : yield* git.patch(cwd, ref, item.file, { context: PATCH_CONTEXT_LINES, maxOutputBytes: MAX_PATCH_BYTES }) + if (!result.truncated && result.text) return result.text + + if (result.truncated) log.warn("patch exceeded byte limit", { file: item.file, max: MAX_PATCH_BYTES }) + return emptyPatch(item.file) +}) + +const totalPatch = (file: string, patch: string, total: number) => { + if (total + Buffer.byteLength(patch) <= MAX_TOTAL_PATCH_BYTES) return { patch, capped: false } + log.warn("total patch budget exceeded", { file, max: MAX_TOTAL_PATCH_BYTES }) + return { patch: emptyPatch(file), capped: true } +} + +const patchForItem = Effect.fnUntraced(function* ( + git: Git.Interface, + cwd: string, + ref: string | undefined, + item: Git.Item, + batch: { patches: Map; capped: boolean }, + capped: boolean, +) { + if (capped) return emptyPatch(item.file) + + const batched = batch.patches.get(item.file) + if (batched !== undefined) return batched + if (item.code !== "??" && batch.capped) return emptyPatch(item.file) + return yield* nativePatch(git, cwd, ref, item) +}) + const files = Effect.fnUntraced(function* ( - fs: AppFileSystem.Interface, git: Git.Interface, cwd: string, ref: string | undefined, list: Git.Item[], map: Map, + batch: { patches: Map; capped: boolean }, ) { - const base = ref ? yield* git.prefix(cwd) : "" - const patch = (file: string, before: string, after: string) => - formatPatch(structuredPatch(file, file, before, after, "", "", { context: Number.MAX_SAFE_INTEGER })) - const next = yield* Effect.forEach( - list, - (item) => - Effect.gen(function* () { - const before = item.status === "added" || !ref ? "" : yield* git.show(cwd, ref, item.file, base) - const after = item.status === "deleted" ? "" : yield* work(fs, cwd, item.file) - const stat = map.get(item.file) - return { - file: item.file, - patch: patch(item.file, before, after), - additions: stat?.additions ?? (item.status === "added" ? count(after) : 0), - deletions: stat?.deletions ?? (item.status === "deleted" ? count(before) : 0), - status: item.status, - } satisfies FileDiff - }), - { concurrency: 8 }, - ) - return next.toSorted((a, b) => a.file.localeCompare(b.file)) + const next: FileDiff[] = [] + let total = 0 + let capped = false + + for (const item of list.toSorted((a, b) => a.file.localeCompare(b.file))) { + const stat = map.get(item.file) ?? (item.status === "added" ? yield* git.statUntracked(cwd, item.file) : undefined) + const patch = yield* patchForItem(git, cwd, ref, item, batch, capped) + const result: { patch: string; capped: boolean } = capped + ? { patch, capped: true } + : totalPatch(item.file, patch, total) + capped = capped || result.capped + if (!capped) { + total += Buffer.byteLength(result.patch) + capped = total >= MAX_TOTAL_PATCH_BYTES + } + next.push({ + file: item.file, + patch: result.patch, + additions: stat?.additions ?? 0, + deletions: stat?.deletions ?? 0, + status: item.status, + }) + } + + return next }) -const track = Effect.fnUntraced(function* ( - fs: AppFileSystem.Interface, - git: Git.Interface, - cwd: string, - ref: string | undefined, -) { - if (!ref) return yield* files(fs, git, cwd, ref, yield* git.status(cwd), new Map()) - const [list, stats] = yield* Effect.all([git.status(cwd), git.stats(cwd, ref)], { concurrency: 2 }) - return yield* files(fs, git, cwd, ref, list, nums(stats)) -}) - -const compare = Effect.fnUntraced(function* ( - fs: AppFileSystem.Interface, - git: Git.Interface, - cwd: string, - ref: string, -) { +const diffAgainstRef = Effect.fnUntraced(function* (git: Git.Interface, cwd: string, ref: string) { const [list, stats, extra] = yield* Effect.all([git.diff(cwd, ref), git.stats(cwd, ref), git.status(cwd)], { concurrency: 3, }) return yield* files( - fs, git, cwd, ref, @@ -99,9 +197,15 @@ const compare = Effect.fnUntraced(function* ( extra.filter((item) => item.code === "??"), ), nums(stats), + yield* batchPatches(git, cwd, ref, list), ) }) +const track = Effect.fnUntraced(function* (git: Git.Interface, cwd: string, ref: string | undefined) { + if (!ref) return yield* files(git, cwd, ref, yield* git.status(cwd), new Map(), emptyBatch()) + return yield* diffAgainstRef(git, cwd, ref) +}) + export const Mode = Schema.Literals(["git", "branch"]).pipe(withStatics((s) => ({ zod: zod(s) }))) export type Mode = Schema.Schema.Type @@ -147,10 +251,9 @@ interface State { export class Service extends Context.Service()("@opencode/Vcs") {} -export const layer: Layer.Layer = Layer.effect( +export const layer: Layer.Layer = Layer.effect( Service, Effect.gen(function* () { - const fs = yield* AppFileSystem.Service const git = yield* Git.Service const bus = yield* Bus.Service const scope = yield* Scope.Scope @@ -204,23 +307,19 @@ export const layer: Layer.Layer { }) }) + test("patch() returns capped native patch output", async () => { + await using tmp = await tmpdir({ git: true }) + await fs.writeFile(path.join(tmp.path, weird), "before\n", "utf-8") + await fs.writeFile(path.join(tmp.path, "other.txt"), "old\n", "utf-8") + await $`git add .`.cwd(tmp.path).quiet() + await $`git commit --no-gpg-sign -m "add file"`.cwd(tmp.path).quiet() + await fs.writeFile(path.join(tmp.path, weird), "after\n", "utf-8") + await fs.writeFile(path.join(tmp.path, "other.txt"), "new\n", "utf-8") + + await withGit(async (rt) => { + const [patch, all, capped] = await Promise.all([ + rt.runPromise(Git.Service.use((git) => git.patch(tmp.path, "HEAD", weird, { context: 2_147_483_647 }))), + rt.runPromise(Git.Service.use((git) => git.patchAll(tmp.path, "HEAD", { context: 2_147_483_647 }))), + rt.runPromise(Git.Service.use((git) => git.patch(tmp.path, "HEAD", weird, { maxOutputBytes: 1 }))), + ]) + + expect(patch.truncated).toBe(false) + expect(patch.text).toContain("diff --git") + expect(patch.text).toContain("-before") + expect(patch.text).toContain("+after") + expect(all.truncated).toBe(false) + expect(all.text).toContain("diff --git") + expect(all.text).toContain("other.txt") + expect(all.text).toContain("+new") + expect(capped.truncated).toBe(true) + expect(capped.text).toBe("") + }) + }) + + test("patchUntracked() and statUntracked() handle added files", async () => { + await using tmp = await tmpdir({ git: true }) + await fs.writeFile(path.join(tmp.path, weird), "one\ntwo\n", "utf-8") + + await withGit(async (rt) => { + const [patch, stat] = await Promise.all([ + rt.runPromise(Git.Service.use((git) => git.patchUntracked(tmp.path, weird, { context: 2_147_483_647 }))), + rt.runPromise(Git.Service.use((git) => git.statUntracked(tmp.path, weird))), + ]) + + expect(patch.truncated).toBe(false) + expect(patch.text).toContain("diff --git") + expect(patch.text).toContain("+one") + expect(patch.text).toContain("+two") + expect(stat).toEqual(expect.objectContaining({ file: weird, additions: 2, deletions: 0 })) + }) + }) + test("show() returns empty text for binary blobs", async () => { await using tmp = await tmpdir({ git: true }) await fs.writeFile(path.join(tmp.path, "bin.dat"), new Uint8Array([0, 1, 2, 3])) diff --git a/packages/opencode/test/project/vcs.test.ts b/packages/opencode/test/project/vcs.test.ts index 6fb0e251d3..53ff547ac1 100644 --- a/packages/opencode/test/project/vcs.test.ts +++ b/packages/opencode/test/project/vcs.test.ts @@ -234,6 +234,7 @@ describe("Vcs diff", () => { }), ]), ) + expect(diff.find((item) => item.file === "file.txt")?.patch).toContain("diff --git") }) }) @@ -259,6 +260,34 @@ describe("Vcs diff", () => { }) }) + test("diff('git') keeps batched patches aligned for type changes", async () => { + if (process.platform === "win32") return + + await using tmp = await tmpdir({ git: true }) + await fs.writeFile(path.join(tmp.path, "a.txt"), "old\n", "utf-8") + await fs.writeFile(path.join(tmp.path, "b.txt"), "old\n", "utf-8") + await $`git add .`.cwd(tmp.path).quiet() + await $`git commit --no-gpg-sign -m "add files"`.cwd(tmp.path).quiet() + await fs.unlink(path.join(tmp.path, "a.txt")) + await fs.symlink("target", path.join(tmp.path, "a.txt")) + await fs.writeFile(path.join(tmp.path, "b.txt"), "new\n", "utf-8") + + await withVcsOnly(tmp.path, async () => { + const diff = await AppRuntime.runPromise( + Effect.gen(function* () { + const vcs = yield* Vcs.Service + return yield* vcs.diff("git") + }), + ) + const a = diff.find((item) => item.file === "a.txt") + const b = diff.find((item) => item.file === "b.txt") + + expect(a?.patch).toContain("deleted file mode") + expect(a?.patch).toContain("new file mode") + expect(b?.patch).toContain("+new") + }) + }) + test("diff('branch') returns changes against default branch", async () => { await using tmp = await tmpdir({ git: true }) await $`git branch -M main`.cwd(tmp.path).quiet()