diff --git a/packages/cli/src/serve/fs/audit.ts b/packages/cli/src/serve/fs/audit.ts index 3e7f48bb3..5a20a9cf5 100644 --- a/packages/cli/src/serve/fs/audit.ts +++ b/packages/cli/src/serve/fs/audit.ts @@ -214,7 +214,18 @@ export function createAuditPublisher( errorKind: record.errorKind, }; if (record.hint) payload.hint = record.hint; - if (record.message) payload.message = record.message; + // `message` carries the underlying `FsError.message`, which + // many throw-sites embed `${p}` (absolute workspace path) or + // user-supplied `oldText` snippets into. Privacy-mode + // deployments that intentionally disabled raw-path logging + // would otherwise see those paths leak through the message. + // Gate the field on `includeRawPaths` so privacy mode means + // privacy mode for ALL path-bearing audit content (relPath + // AND message). Operators who want full forensic context + // opt in via `QWEN_AUDIT_RAW_PATHS=1`. + if (record.message && includeRawPaths) { + payload.message = record.message; + } if (includeRawPaths) { payload.relPath = relForAudit(record.input, boundWorkspace); } diff --git a/packages/cli/src/serve/fs/paths.test.ts b/packages/cli/src/serve/fs/paths.test.ts index bdb60d244..f4b4cd5ae 100644 --- a/packages/cli/src/serve/fs/paths.test.ts +++ b/packages/cli/src/serve/fs/paths.test.ts @@ -226,6 +226,22 @@ describe('resolveWithinWorkspace', () => { ); }); + it('tolerates ENOENT for stat intent and resolves via existing ancestor', async () => { + // Pinned per the docstring on `Intent` / `ENOENT_TOLERATING_INTENTS`: + // `'stat'` joins `'write'` in the tolerant set so a route asking + // "does this path exist?" gets back a synthetic canonical the + // caller can pass straight to `fsp.lstat`. The natural ENOENT + // surfaces from the lstat itself rather than from the resolver. + const out = await resolveWithinWorkspace( + 'newdir/leaf.txt', + workspace, + 'stat', + ); + expect(out).toBe( + path.join(realpathSync.native(workspace), 'newdir', 'leaf.txt'), + ); + }); + it('rejects ENOENT under read intent with path_not_found', async () => { const err = await resolveWithinWorkspace( 'does-not-exist', diff --git a/packages/cli/src/serve/fs/paths.ts b/packages/cli/src/serve/fs/paths.ts index 8c9c718f3..cdb63b0a0 100644 --- a/packages/cli/src/serve/fs/paths.ts +++ b/packages/cli/src/serve/fs/paths.ts @@ -97,19 +97,35 @@ export type ResolvedPath = string & { readonly __brand: 'ResolvedPath' }; /** * Intent declared at boundary entry. Used by callers (and the upcoming - * `policy.ts` module) to decide ignore/trust handling. `resolveWithinWorkspace` - * itself uses the intent only to differentiate ENOENT semantics: write - * intents tolerate a non-existent leaf (the file is about to be created), - * read intents do not. + * `policy.ts` module) to decide ignore/trust handling. + * `resolveWithinWorkspace` itself uses the intent to differentiate + * ENOENT semantics: `'write'` and `'stat'` tolerate a non-existent + * leaf (the file is about to be created, or the caller is asking + * "does this exist?"), other intents do not. * - * `'edit'` is a distinct intent from `'write'` so the trust gate, audit - * payload, and exhaustiveness checks can reason about partial-replace - * semantics separately from full-overwrite. Both gate identically in - * `assertTrustedForIntent`; the split exists to keep audit events - * faithful to the operation actually performed. + * `'edit'` is a distinct intent from `'write'` so the trust gate, + * audit payload, and exhaustiveness checks can reason about + * partial-replace semantics separately from full-overwrite. Both + * gate identically in `assertTrustedForIntent`; the split exists + * to keep audit events faithful to the operation actually + * performed. + * + * Why `'stat'` tolerates ENOENT: stat-ing a path that doesn't + * exist is a legitimate existence check (a route handler asking + * "should I 404?" before letting a downstream call fail with a + * cryptic message). `WorkspaceFileSystem.stat()` re-`lstat`s the + * resolved path itself and surfaces the natural ENOENT to the + * caller, so the resolver doesn't need to pre-emptively reject. */ export type Intent = 'read' | 'write' | 'edit' | 'list' | 'glob' | 'stat'; +/** + * Intents that tolerate a non-existent leaf — see `Intent`'s + * docstring for why each is in the set. Adding a new intent here + * is a deliberate decision: the resolver's ancestor walk returns + * a synthetic canonical path that the caller MUST be prepared to + * see succeed for nonexistent leaves. + */ const ENOENT_TOLERATING_INTENTS: ReadonlySet = new Set([ 'write', 'stat', diff --git a/packages/cli/src/serve/fs/policy.ts b/packages/cli/src/serve/fs/policy.ts index 447640e38..bfbad4ab8 100644 --- a/packages/cli/src/serve/fs/policy.ts +++ b/packages/cli/src/serve/fs/policy.ts @@ -208,15 +208,23 @@ export function enforceWriteSize( * Throw `file_too_large` if a read would exceed the cap and the * caller cannot accept truncation (used by `readBytes`, where * partial content is unsafe to return). + * + * Critical: `MAX_READ_BYTES` is the **hard** ceiling — caller- + * supplied `maxBytes` clamps it DOWN, never up. Without this + * `Math.min` clamp, a future PR 19/20 route that blindly forwards + * `req.query.maxBytes` would let a request widen past the daemon's + * 256 KiB safety cap and trigger the OOM scenario the cap exists + * to prevent. */ export function enforceReadBytesSize( fileBytes: number, maxBytes: number = MAX_READ_BYTES, ): void { - if (fileBytes > maxBytes) { + const effectiveMax = Math.min(maxBytes, MAX_READ_BYTES); + if (fileBytes > effectiveMax) { throw new FsError( 'file_too_large', - `file of ${fileBytes} bytes exceeds read limit of ${maxBytes} bytes`, + `file of ${fileBytes} bytes exceeds read limit of ${effectiveMax} bytes`, { hint: 'use readText for capped truncation, or raise the daemon limit', }, diff --git a/packages/cli/src/serve/fs/workspaceFileSystem.test.ts b/packages/cli/src/serve/fs/workspaceFileSystem.test.ts index ac1c3b7b6..1fe38c473 100644 --- a/packages/cli/src/serve/fs/workspaceFileSystem.test.ts +++ b/packages/cli/src/serve/fs/workspaceFileSystem.test.ts @@ -518,6 +518,77 @@ describe('WorkspaceFileSystem - TOCTOU + UTF-8 + cwd hardening', () => { expect(isFsError(err)).toBe(true); expect((err as { kind: string }).kind).toBe('path_outside_workspace'); }); + + it('glob rejects opts.cwd that is a symlink resolving outside the workspace', async () => { + // The textual `path.resolve` + `isWithinRoot` check admits + // `/link` even when `/link → /scratch` is a symlink + // to outside. `realpath` on cwd follows the chain so the + // containment check sees the actual walk root and rejects. + const link = path.join(h.workspace, 'link-to-scratch'); + await fsp.symlink(h.scratch, link, 'dir'); + const err = await h.fs + .glob('**/*', { cwd: link as unknown as ResolvedPath }) + .catch((e: unknown) => e); + expect(isFsError(err)).toBe(true); + expect((err as { kind: string }).kind).toBe('path_outside_workspace'); + }); + + it('writeText rejects when path was swapped to a symlink between resolve and write', async () => { + // resolve a path → swap target with a symlink to outside → + // writeText should reject with `symlink_escape` rather than + // letting `atomicWriteFile`'s symlink-following code write + // outside the workspace. + const target = path.join(h.workspace, 'about-to-write.txt'); + const r = await h.fs.resolve('about-to-write.txt', 'write'); + const outside = path.join(h.scratch, 'outside-target.txt'); + await fsp.writeFile(outside, ''); // pre-create so symlink isn't dangling + await fsp.symlink(outside, target, 'file'); + const err = await h.fs.writeText(r, 'hello').catch((e: unknown) => e); + expect(isFsError(err)).toBe(true); + expect((err as { kind: string }).kind).toBe('symlink_escape'); + // The outside file MUST remain empty (no escape). + const outsideContent = await fsp.readFile(outside, 'utf-8'); + expect(outsideContent).toBe(''); + }); + + it('edit rejects when path was swapped to a symlink between read and write', async () => { + // edit() reads the file, then writes back. After the post-read + // inode check, an attacker could in theory swap to a symlink + // before the writeTextFile call. The pre-write guard catches + // that. We approximate: write a file, resolve, edit-pattern + // setup, then mid-operation we can't easily inject — instead + // we test the boundary directly by setting up a symlink that + // matches a pre-existing inode but points outside, similar + // shape to the writeText test above. + const target = path.join(h.workspace, 'edit-target.txt'); + await fsp.writeFile(target, 'foo=1\n'); + const r = await h.fs.resolve('edit-target.txt', 'edit'); + // Replace with symlink-to-outside AFTER resolve. + const outside = path.join(h.scratch, 'edit-outside.txt'); + await fsp.writeFile(outside, 'foo=1\n'); + await fsp.unlink(target); + await fsp.symlink(outside, target, 'file'); + const err = await h.fs.edit(r, 'foo=1', 'foo=2').catch((e: unknown) => e); + expect(isFsError(err)).toBe(true); + // post-read inode check fires first since inode changed; either + // catch is acceptable as a security signal. + expect(['symlink_escape']).toContain((err as { kind: string }).kind); + const outsideContent = await fsp.readFile(outside, 'utf-8'); + expect(outsideContent).toBe('foo=1\n'); + }); + + it('readBytes opts.maxBytes is clamped to MAX_READ_BYTES (cannot widen past hard cap)', async () => { + const policy = await import('./policy.js'); + const big = path.join(h.workspace, 'overrun.bin'); + await fsp.writeFile(big, Buffer.alloc(policy.MAX_READ_BYTES + 1)); + const r = await h.fs.resolve('overrun.bin', 'read'); + // Caller tries to widen past the hard cap. + const err = await h.fs + .readBytes(r, { maxBytes: policy.MAX_READ_BYTES * 10 }) + .catch((e: unknown) => e); + expect(isFsError(err)).toBe(true); + expect((err as { kind: string }).kind).toBe('file_too_large'); + }); }); describe('WorkspaceFileSystem - audit always emits on body errors', () => { @@ -554,18 +625,41 @@ describe('WorkspaceFileSystem - audit always emits on body errors', () => { expect(denied).toBeDefined(); }); - it('fs.denied audit payload carries the FsError message for forensic context', async () => { - // The earlier audit only recorded `errorKind` + `hint`; the - // underlying OS / `FsError` message (path, errno detail, byte - // count) was lost from the audit trail. `recordAndWrap` now - // forwards the message so audit consumers debugging a - // production incident can see the actual cause. + it('fs.denied audit message field is gated behind QWEN_AUDIT_RAW_PATHS (privacy default omits)', async () => { + // Default (privacy) mode: `message` MUST be absent because the + // underlying `FsError.message` embeds `${p}` absolute paths + // that would otherwise leak workspace structure to audit + // consumers — even when operators explicitly disabled + // raw-path logging via not-setting `QWEN_AUDIT_RAW_PATHS`. + // See `audit.ts:recordDenied` — message gates on + // `includeRawPaths`. const err = (await h.fs .resolve('../escape', 'read') .catch((e: unknown) => e)) as { message: string }; expect(err.message).toMatch(/escapes workspace/); const denied = h.events.find((e) => e.type === FS_DENIED_EVENT_TYPE); expect(denied).toBeDefined(); + // Privacy-default mode: message is OMITTED. + expect((denied!.data as { message?: string }).message).toBeUndefined(); + }); + + it('fs.denied carries message when includeRawPaths is enabled (forensic mode)', async () => { + // Build a fresh harness with includeRawPaths: true to verify + // the audit message round-trip works in raw-path mode. + const events: BridgeEvent[] = []; + const factory = createWorkspaceFileSystemFactory({ + boundWorkspace: h.workspace, + trusted: true, + emit: (e) => events.push(e), + includeRawPaths: true, + }); + const fs = factory.forRequest({ route: 'TEST /op' }); + const err = (await fs + .resolve('../escape', 'read') + .catch((e: unknown) => e)) as { message: string }; + expect(err.message).toMatch(/escapes workspace/); + const denied = events.find((e) => e.type === FS_DENIED_EVENT_TYPE); + expect(denied).toBeDefined(); expect((denied!.data as { message?: string }).message).toBe(err.message); }); }); diff --git a/packages/cli/src/serve/fs/workspaceFileSystem.ts b/packages/cli/src/serve/fs/workspaceFileSystem.ts index 0a039f946..c5ba45c2c 100644 --- a/packages/cli/src/serve/fs/workspaceFileSystem.ts +++ b/packages/cli/src/serve/fs/workspaceFileSystem.ts @@ -314,6 +314,27 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem { limit: opts.limit ?? Number.POSITIVE_INFINITY, line: startLineIndex, }); + // Post-read size sanity check. The pre-stat `MAX_READ_BYTES` + // gate above sees the file's size at stat time; a concurrent + // writer can grow the file from sub-cap to multi-GB between + // `fsp.stat` and `lowFs.readTextFile`'s underlying + // `fs.promises.readFile`. `readFileWithLineAndLimit` slurps + // the whole file into memory before slicing, so the stat + // gate alone is bypassable. Reject post-read if the + // returned content exceeds the cap. The proper fix is + // fd-based reading (open + stat + read tied to the same fd) + // — tracked as a hardening follow-up; this byte-length + // check is the defense-in-depth layer. + const decodedBytes = Buffer.byteLength(result.content, 'utf-8'); + if (decodedBytes > MAX_READ_BYTES) { + throw new FsError( + 'file_too_large', + `file grew during read to ${decodedBytes} bytes (cap ${MAX_READ_BYTES})`, + { + hint: 'concurrent writer detected via post-read size; retry or readBytes with explicit window', + }, + ); + } const ignoreVerdict = shouldIgnore( p, this.deps.boundWorkspace, @@ -474,12 +495,32 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem { // against `boundWorkspace` (or was verified at a stale // moment). Re-validate at the entry point so a glob with // `cwd: '/etc'` cannot enumerate files outside the workspace - // even when the *pattern* is harmlessly relative. The - // pattern-side checks above only constrain the pattern - // shape; without this check `cwd` is the actual hazard. + // even when the *pattern* is harmlessly relative. + // + // **Important**: use `realpath` rather than `path.resolve` — + // a textual containment check on `path.resolve(cwd)` admits + // `/link` even when `/link → /etc` is a symlink to + // outside the workspace; `globAsync` would then walk + // `/etc` before the per-hit filter drops the results. + // `realpath` follows the chain (or throws ENOENT for missing + // ancestors), so the containment check sees the actual + // walk root. const cwd = (opts.cwd as string | undefined) ?? this.deps.boundWorkspace; - const cwdAbs = path.resolve(cwd); - if (!isWithinRoot(cwdAbs, this.deps.boundWorkspace)) { + let cwdReal: string; + try { + cwdReal = await fsp.realpath(path.resolve(cwd)); + } catch (err) { + const code = (err as NodeJS.ErrnoException)?.code; + if (code === 'ENOENT') { + throw new FsError( + 'path_not_found', + `glob cwd does not exist: ${cwd}`, + { cause: err }, + ); + } + throw err; + } + if (!isWithinRoot(cwdReal, this.deps.boundWorkspace)) { throw new FsError( 'path_outside_workspace', `glob cwd is outside workspace: ${cwd}`, @@ -487,7 +528,7 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem { ); } const matches = await globAsync(pattern, { - cwd: cwdAbs, + cwd: cwdReal, nodir: false, absolute: true, dot: true, @@ -623,6 +664,13 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem { // wasting heap on every write. const sizeBytes = Buffer.byteLength(content, 'utf-8'); enforceWriteSize(sizeBytes); + // Pre-write TOCTOU guard — `atomicWriteFile`'s + // `resolveSymlinkChain` follows symlinks at write time, so + // a swap between the boundary's `resolve()` and this call + // would land the write outside the workspace. ENOENT is + // fine (ahead-of-create flow); an actual symlink is + // rejected. + await assertNotSymlinkBeforeWrite(p as string); await this.deps.lowFs.writeTextFile({ path: p as string, content, @@ -723,6 +771,12 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem { current.slice(0, idx) + newText + current.slice(idx + oldText.length); const writtenBytes = Buffer.byteLength(next, 'utf-8'); enforceWriteSize(writtenBytes); + // Pre-write TOCTOU guard — same shape as writeText. The + // read-modify-write race window between the post-read + // inode check above and this call is the deferred PR 20 + // atomic-via-temp follow-up; this guard is the + // defense-in-depth layer. + await assertNotSymlinkBeforeWrite(p as string); await this.deps.lowFs.writeTextFile({ path: p as string, content: next, @@ -869,6 +923,54 @@ async function assertInodeStableAfterRead( * `kindFromStats` + `kindFromDirent` pair as a maintenance hazard * (drift risk if the union grows). */ +/** + * Pre-write TOCTOU guard. Mirrors the post-read inode check but + * runs BEFORE the actual write. The earlier `resolve()` → + * `writeTextFile()` window let an attacker swap `p` with a + * symlink to outside the workspace; `atomicWriteFile`'s + * underlying `resolveSymlinkChain` follows the symlink and the + * write lands outside. + * + * Catches: + * - the path is now a symlink (`isSymbolicLink()`) — reject + * with `symlink_escape` regardless of where it points; PR 19/20 + * should re-`resolve` after a swap rather than blindly writing + * through the rename. + * + * Does NOT catch: + * - swap-back AFTER this guard but BEFORE `lowFs.writeTextFile` + * completes — the residual race window. The proper fix is + * fd-based atomic write (`fsp.open(O_NOFOLLOW)` + temp + rename + * tied to the parent dir), which is the deferred PR 20 + * atomic-via-temp follow-up. This guard is the defense-in-depth + * layer that closes the wide window. + * + * Used by `writeText` and `edit()` immediately before + * `lowFs.writeTextFile`. ENOENT (the file doesn't exist yet) is + * fine — that's the legitimate ahead-of-mkdir flow already + * sanctioned by `resolveWithinWorkspace`'s ENOENT-tolerant + * branch for write intents; only an actual symlink is rejected. + */ +async function assertNotSymlinkBeforeWrite(p: string): Promise { + let pre: Awaited>; + try { + pre = await fsp.lstat(p); + } catch (err) { + const code = (err as NodeJS.ErrnoException)?.code; + if (code === 'ENOENT') return; // ahead-of-create flow + throw err; + } + if (pre.isSymbolicLink()) { + throw new FsError( + 'symlink_escape', + `path was replaced with a symlink before write: ${p}`, + { + hint: 'TOCTOU swap detected via pre-write lstat — re-resolve before retrying', + }, + ); + } +} + function kindFromStatLike(s: { isFile(): boolean; isDirectory(): boolean;