diff --git a/packages/cli/src/serve/fs/errors.test.ts b/packages/cli/src/serve/fs/errors.test.ts index 44909901c..482769656 100644 --- a/packages/cli/src/serve/fs/errors.test.ts +++ b/packages/cli/src/serve/fs/errors.test.ts @@ -115,12 +115,26 @@ describe('wrapAsFsError', () => { expect(wrapAsFsError(errno('EISDIR')).kind).toBe('parse_error'); }); - it('maps EMFILE / ENFILE to permission_denied with a hint', () => { + it('maps EMFILE / ENFILE to io_error with a hint', () => { const out = wrapAsFsError(errno('EMFILE')); - expect(out.kind).toBe('permission_denied'); + expect(out.kind).toBe('io_error'); expect(out.hint).toMatch(/file-descriptor/); }); + it('maps ENOSPC / EIO / EBUSY / ETXTBSY / ENAMETOOLONG to io_error', () => { + const enospc = wrapAsFsError(errno('ENOSPC')); + expect(enospc.kind).toBe('io_error'); + expect(enospc.hint).toMatch(/full/); + expect(wrapAsFsError(errno('EIO')).kind).toBe('io_error'); + expect(wrapAsFsError(errno('EBUSY')).kind).toBe('io_error'); + expect(wrapAsFsError(errno('ETXTBSY')).kind).toBe('io_error'); + expect(wrapAsFsError(errno('ENAMETOOLONG')).kind).toBe('io_error'); + }); + + it('io_error has HTTP status 503', () => { + expect(new FsError('io_error', 'disk full').status).toBe(503); + }); + it('falls back to the configured default kind for unknown errnos', () => { expect(wrapAsFsError(errno('EWHATEVER')).kind).toBe('permission_denied'); expect(wrapAsFsError(errno('EWHATEVER'), 'parse_error').kind).toBe( diff --git a/packages/cli/src/serve/fs/errors.ts b/packages/cli/src/serve/fs/errors.ts index b6236b16d..df88dd2ac 100644 --- a/packages/cli/src/serve/fs/errors.ts +++ b/packages/cli/src/serve/fs/errors.ts @@ -22,6 +22,19 @@ export type FsErrorKind = | 'file_too_large' | 'untrusted_workspace' | 'permission_denied' + /** + * Environmental I/O failure that is *not* a permission decision — + * disk full (`ENOSPC`), generic I/O error (`EIO`), filesystem busy + * (`EBUSY`/`ETXTBSY`), path-too-long (`ENAMETOOLONG`), or + * file-descriptor exhaustion (`EMFILE`/`ENFILE`). + * + * Separated from `permission_denied` because monitoring pipelines + * key on `errorKind` for alerting — conflating "ACL denied" with + * "disk full" pages the security oncall when the real action is + * `df -h`. The 503 status communicates "service-level transient + * failure" to PR 19/20 route handlers and SDK consumers. + */ + | 'io_error' | 'parse_error'; /** @@ -32,7 +45,7 @@ export type FsErrorKind = * boundary is being asked to model a transport-level concern that * doesn't belong here (5xx, 401/403 from auth, etc.). */ -export type FsErrorStatus = 400 | 403 | 404 | 413 | 422; +export type FsErrorStatus = 400 | 403 | 404 | 413 | 422 | 503; /** * Default HTTP status mapping. Centralized here so callers can throw @@ -50,6 +63,7 @@ const DEFAULT_STATUS_BY_KIND: Record = { file_too_large: 413, untrusted_workspace: 403, permission_denied: 403, + io_error: 503, parse_error: 400, }; @@ -129,9 +143,30 @@ export function wrapAsFsError( cause: err, hint: 'a path component is not a directory where one was expected', }); + case 'ENOSPC': + return new FsError('io_error', message, { + cause: err, + hint: 'filesystem is full (df -h reporting 100%)', + }); + case 'EIO': + return new FsError('io_error', message, { + cause: err, + hint: 'underlying I/O error (failing disk or kernel-level fault)', + }); + case 'EBUSY': + case 'ETXTBSY': + return new FsError('io_error', message, { + cause: err, + hint: 'file is busy; another process holds an exclusive handle', + }); + case 'ENAMETOOLONG': + return new FsError('io_error', message, { + cause: err, + hint: 'path exceeds the OS PATH_MAX', + }); case 'EMFILE': case 'ENFILE': - return new FsError('permission_denied', message, { + return new FsError('io_error', message, { cause: err, hint: 'too many open files; daemon is at file-descriptor limit', }); diff --git a/packages/cli/src/serve/fs/paths.test.ts b/packages/cli/src/serve/fs/paths.test.ts index 7e5bf9f92..dc5ad6d3b 100644 --- a/packages/cli/src/serve/fs/paths.test.ts +++ b/packages/cli/src/serve/fs/paths.test.ts @@ -266,6 +266,43 @@ describe('resolveWithinWorkspace', () => { expect(typeof out).toBe('string'); }); + it('rejects a multi-hop dangling symlink chain that escapes the workspace', async () => { + // The exploit class: `/leak -> /middle -> /scratch/evil` + // where every link is a symlink and the final target doesn't + // exist. A single-hop guard (read T19's earlier fix) only + // checks the first readlink target — `/middle` — sees it's + // inside the workspace, and lets the chain through. The OS + // write at `/leak` then follows BOTH hops and creates + // `/scratch/evil`. The multi-hop loop fix dereferences every + // link before the containment check. + const evil = path.join(scratch, 'multi-hop-target.txt'); + const middle = path.join(workspace, 'middle'); + const leak = path.join(workspace, 'leak'); + await fsp.symlink(evil, middle, 'file'); + await fsp.symlink(middle, leak, 'file'); + const err = await resolveWithinWorkspace('leak', workspace, 'write').catch( + (e: unknown) => e, + ); + expect(isFsError(err)).toBe(true); + expect((err as { kind: string }).kind).toBe('symlink_escape'); + }); + + it('detects a symlink cycle and rejects with symlink_escape', async () => { + // a -> b -> a — symmetric self-referential pair. realpath + // returns ELOOP on most platforms; the multi-hop loop's inode + // tracking catches this even on filesystems that don't + // surface ELOOP. + const a = path.join(workspace, 'a'); + const b = path.join(workspace, 'b'); + await fsp.symlink(b, a, 'file'); + await fsp.symlink(a, b, 'file'); + const err = await resolveWithinWorkspace('a', workspace, 'write').catch( + (e: unknown) => e, + ); + expect(isFsError(err)).toBe(true); + expect((err as { kind: string }).kind).toBe('symlink_escape'); + }); + it('returns a value usable as ResolvedPath brand', async () => { const target = path.join(workspace, 'b.txt'); await fsp.writeFile(target, 'b'); diff --git a/packages/cli/src/serve/fs/paths.ts b/packages/cli/src/serve/fs/paths.ts index 19e966bd7..4b2599a04 100644 --- a/packages/cli/src/serve/fs/paths.ts +++ b/packages/cli/src/serve/fs/paths.ts @@ -335,19 +335,81 @@ export async function resolveWithinWorkspace( // detects the symlink without following it; `readlink` + // resolved-target containment closes the loop. try { - const linkStat = await fsp.lstat(absolute); - if (linkStat.isSymbolicLink()) { - const target = await fsp.readlink(absolute); + // Walk the FULL symlink chain via `lstat` + `readlink`, + // not just one hop. The earlier single-readlink fix was + // bypassed by `/leak -> /middle -> /etc/passwd` + // (where `middle` is itself a symlink): `findExistingAncestor` + // uses `fsp.stat` which follows the rest of the chain, + // landed on the target's parent, and reported the + // intermediate hop as canonical — letting the OS write + // follow the full chain to the escape target. The loop + // below dereferences every layer up to a max-depth bound + // and tracks visited inodes to surface cycles as + // `symlink_escape`. Once we reach a non-symlink leaf, we + // canonicalize that leaf via the deepest-existing-ancestor + // path (so macOS `/var` vs `/private/var` and Win32 case + // collapse consistently with `boundCanonical`). + let cursor: string = absolute; + const visitedInodes = new Set(); + let firstHopTarget: string | null = null; + let resolvedFully = false; + for (let hop = 0; hop < MAX_ANCESTOR_HOPS; hop++) { + let linkStat: Awaited>; + try { + linkStat = await fsp.lstat(cursor); + } catch (lstatErr) { + const lcode = (lstatErr as NodeJS.ErrnoException)?.code; + if (lcode === 'ENOENT' || lcode === 'ENOTDIR') { + // Reached a non-existent leaf — no symlink to chase + // here. Run the deepest-existing-ancestor check on + // `cursor` so the eventual write target is bounded. + resolvedFully = true; + break; + } + throw lstatErr; + } + if (!linkStat.isSymbolicLink()) { + // Reached a real file/dir — chain terminates here. + resolvedFully = true; + break; + } + // Detect cycles via inode identity (ino is a bigint on + // recent Node versions). Some filesystems return 0 for + // ino (e.g. virtual mounts); the depth bound covers + // those cases as a fallback. + const ino = + typeof linkStat.ino === 'bigint' + ? linkStat.ino + : BigInt(linkStat.ino as unknown as number); + if (ino !== 0n && visitedInodes.has(ino)) { + throw new FsError( + 'symlink_escape', + `symlink cycle detected at ${cursor}`, + { hint: 'symlink loops back onto a previously visited inode' }, + ); + } + if (ino !== 0n) visitedInodes.add(ino); + const target = await fsp.readlink(cursor); const absTarget = path.isAbsolute(target) ? target - : path.resolve(path.dirname(absolute), target); - // The symlink target itself doesn't exist (that's why - // we're in the ENOENT branch), so resolve via the - // deepest existing ancestor so case-insensitive - // filesystems and macOS `/var` vs `/private/var` - // canonicalize consistently with `boundCanonical`. + : path.resolve(path.dirname(cursor), target); + if (firstHopTarget === null) firstHopTarget = target; + cursor = absTarget; + } + if (!resolvedFully) { + throw new FsError( + 'symlink_escape', + `symlink chain exceeded ${MAX_ANCESTOR_HOPS} hops for ${input}`, + { hint: 'symlink chain is too long or contains a cycle' }, + ); + } + // Only run the containment check when we actually traversed + // at least one symlink — `firstHopTarget !== null` means the + // input was a symlink (vs a path through a non-existent + // ancestor that wasn't itself a symlink). + if (firstHopTarget !== null) { const { ancestor: targetAncestor, tail: targetTail } = - await findExistingAncestor(absTarget); + await findExistingAncestor(cursor); const targetAncestorReal = await fsp.realpath(targetAncestor); const canonicalTarget = targetTail ? path.join(targetAncestorReal, targetTail) @@ -356,16 +418,16 @@ export async function resolveWithinWorkspace( throw new FsError( 'symlink_escape', `dangling symlink target escapes workspace: ${input}`, - { hint: `symlink points to ${target}` }, + { hint: `symlink points to ${firstHopTarget}` }, ); } } } catch (err2) { if (err2 instanceof FsError) throw err2; - // `lstat` ENOENT means the input path itself doesn't - // exist (input is a path through a non-existent - // ancestor) — no symlink to worry about; fall through - // to the ancestor walk. + // `lstat` ENOENT on the very first hop means the input + // path itself doesn't exist (input is a path through a + // non-existent ancestor) — no symlink to worry about; + // fall through to the ancestor walk. const code2 = (err2 as NodeJS.ErrnoException)?.code; if (code2 !== 'ENOENT') throw err2; } diff --git a/packages/cli/src/serve/fs/workspaceFileSystem.test.ts b/packages/cli/src/serve/fs/workspaceFileSystem.test.ts index eaec8f5ad..5e8fdc56c 100644 --- a/packages/cli/src/serve/fs/workspaceFileSystem.test.ts +++ b/packages/cli/src/serve/fs/workspaceFileSystem.test.ts @@ -374,6 +374,37 @@ describe('WorkspaceFileSystem - write/edit', () => { const out = await h.fs.readText(r, { line: 2, limit: 1 }); expect(out.content.split('\n')[0]).toBe('two'); }); + + it('rejects non-positive-integer opts.line with parse_error', async () => { + const target = path.join(h.workspace, 'v.txt'); + await fsp.writeFile(target, 'a\nb\nc\n'); + const r = await h.fs.resolve('v.txt', 'read'); + for (const bad of [Infinity, -Infinity, 0, -1, 1.5, NaN]) { + const err = await h.fs + .readText(r, { line: bad }) + .catch((e: unknown) => e); + expect(isFsError(err)).toBe(true); + expect((err as { kind: string }).kind).toBe('parse_error'); + } + }); + + it('records matchedIgnore on edit() audit (parity with readText/writeText)', async () => { + const ignore = new Ignore().add(['*.log']); + h = await makeHarness({ ignore }); + const target = path.join(h.workspace, 'app.log'); + await fsp.writeFile(target, 'foo=1\nbar=2\n'); + const r = await h.fs.resolve('app.log', 'edit'); + await h.fs.edit(r, 'foo=1', 'foo=2'); + const access = h.events.find( + (e) => + e.type === FS_ACCESS_EVENT_TYPE && + (e.data as { intent: string }).intent === 'edit', + ); + expect(access).toBeDefined(); + expect((access!.data as { matchedIgnore?: string }).matchedIgnore).toBe( + 'file', + ); + }); }); describe('WorkspaceFileSystem - trust gate', () => { diff --git a/packages/cli/src/serve/fs/workspaceFileSystem.ts b/packages/cli/src/serve/fs/workspaceFileSystem.ts index ee849e475..202d423ea 100644 --- a/packages/cli/src/serve/fs/workspaceFileSystem.ts +++ b/packages/cli/src/serve/fs/workspaceFileSystem.ts @@ -282,6 +282,23 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem { }); } const sizeOutcome = enforceReadSize(st.size, opts.maxBytes); + // Reject `opts.line` values that the docstring forbids + // (positive integer required). Without this guard `Infinity` + // (`Infinity > 1` is true; `Infinity - 1` is still + // `Infinity`) and floats (`2.5 - 1 = 1.5`) flow through to + // `readFileWithLineAndLimit` and degrade silently to weird + // truncation behavior. `NaN` and `0` happen to work via the + // falsy fallback but that's accidental — prefer an explicit + // error. + if ( + opts.line !== undefined && + (!Number.isSafeInteger(opts.line) || opts.line < 1) + ) { + throw new FsError( + 'parse_error', + `line must be a positive integer, got ${opts.line}`, + ); + } // Delegate encoding-aware read to the existing core service so // BOM, CRLF, and iconv-supported codepages remain consistent // with what the tools layer already does. The core service's @@ -290,7 +307,7 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem { // convention SDK consumers expect from line-numbered errors, // editor jump-to-line, etc.). Convert here so the public // contract isn't tied to the internal helper's indexing. - const startLineIndex = opts.line && opts.line > 1 ? opts.line - 1 : 0; + const startLineIndex = opts.line !== undefined ? opts.line - 1 : 0; const result = await this.deps.lowFs.readTextFile({ path: p as string, limit: opts.limit ?? Number.POSITIVE_INFINITY, @@ -447,6 +464,7 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem { const out: ResolvedPath[] = []; const max = opts.maxResults ?? Number.POSITIVE_INFINITY; let escapedCount = 0; + let permissionErrorCount = 0; let transientErrorCount = 0; for (const hit of matches) { if (out.length >= max) break; @@ -457,29 +475,28 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem { // sits there), but the realpath isn't — so we resolve // each hit's symlink chain and compare the canonical to // the canonical workspace root. Filtered hits are counted - // and reported via a single aggregated `fs.denied` after + // and reported via aggregated `fs.denied` events after // the loop so per-hit emit doesn't flood the bus when a // misconfigured tree contains many escape symlinks. let canonical: string; try { canonical = await fsp.realpath(absolute); } catch (err) { - // Distinguish between the security-relevant escape kinds - // and transient I/O errors. `ENOENT` (dangling symlink - // or unlinked-mid-glob hit) and `ELOOP` (symlink cycle) - // are the cases that legitimately signal "this hit - // resolved outside of what the boundary trusts" — they - // count as `symlink_escape`. Other errnos (`EIO`, - // `EACCES`, `ENAMETOOLONG`, `EBUSY`) are environmental - // failures; treating them as escapes mislabels the audit - // and gives operators false security signal. Drop those - // hits silently from the result set but track them - // separately for the audit emission below so a - // monitoring pipeline can distinguish "filtered for - // escape" from "filtered for transient FS error". + // Three-way classification so monitoring pipelines can + // tell escapes from access denials from transient I/O: + // - `ENOENT` / `ELOOP` → real `symlink_escape` + // (dangling symlink, symlink cycle) + // - `EACCES` / `EPERM` → `permission_denied` + // (the literal access-denied case the kind names) + // - everything else → `io_error` (EIO, EBUSY, + // ENAMETOOLONG, EMFILE, …) — environmental, NOT a + // security signal. Conflating these poisons audit: + // a failing disk would page security oncall. const code = (err as NodeJS.ErrnoException)?.code; if (code === 'ENOENT' || code === 'ELOOP') { escapedCount += 1; + } else if (code === 'EACCES' || code === 'EPERM') { + permissionErrorCount += 1; } else { transientErrorCount += 1; } @@ -525,12 +542,27 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem { hint: `glob filtered ${escapedCount} hit(s) that resolved outside workspace`, }); } - if (transientErrorCount > 0) { + if (permissionErrorCount > 0) { this.deps.audit.recordDenied(this.deps.ctx, { intent: 'glob', input: pattern, errorKind: 'permission_denied', - hint: `glob skipped ${transientErrorCount} hit(s) due to transient I/O errors (EIO/EACCES/ENAMETOOLONG/EBUSY)`, + hint: `glob skipped ${permissionErrorCount} hit(s) due to EACCES/EPERM`, + }); + } + if (transientErrorCount > 0) { + this.deps.audit.recordDenied(this.deps.ctx, { + intent: 'glob', + input: pattern, + // `io_error` (not `permission_denied`) so monitoring + // pipelines that page security oncall on + // `permission_denied` aren't woken up by a failing disk + // or busy file. The kind was added to `FsErrorKind` for + // exactly this case (and for `wrapAsFsError`'s ENOSPC / + // EIO / EBUSY / ETXTBSY / ENAMETOOLONG / EMFILE / ENFILE + // mappings). + errorKind: 'io_error', + hint: `glob skipped ${transientErrorCount} hit(s) due to transient I/O errors (EIO/EBUSY/ENAMETOOLONG/EMFILE)`, }); } this.deps.audit.recordAccess(this.deps.ctx, { @@ -628,11 +660,23 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem { path: p as string, content: next, }); + // Symmetric with `readText` / `writeText` — operators + // monitoring `fs.access` need to see when an edit landed on + // a `.gitignore`d / `.qwenignore`d file (build artifacts, + // logs, etc.) rather than only learning about + // matchedIgnore for reads and writes. + const editVerdict = shouldIgnore( + p, + this.deps.boundWorkspace, + this.deps.ignore, + 'file', + ); this.deps.audit.recordAccess(this.deps.ctx, { intent: 'edit', absolute: p, durationMs: performance.now() - start, sizeBytes: buf.length, + matchedIgnore: editVerdict.ignored ? editVerdict.category : undefined, }); return { writtenBytes: buf.length }; } catch (err) {