fix(serve/fs): close 3 review-round-2 findings (#4250)

The wenshao + DeepSeek reviewer pass on a81ada43f surfaced 3 more issues; this commit fixes them. 1. Dangling-symlink write escape (paths.ts) — Critical security bug. A request like `write /ws/escape` where `<ws>/escape` is a symlink whose target doesn't exist YET would pass the ENOENT-tolerant ancestor walk: realpath fails ENOENT, the walk-up returns `<ws>` as ancestor, the canonical becomes `<ws>/escape`, containment passes — but the eventual write follows the symlink and creates the file at the symlink target outside the workspace. lstat-then-readlink before the ancestor walk catches this; the symlink target is itself resolved via the deepest existing ancestor so macOS /private/var canonicalization stays consistent with boundCanonical (an absolute target inside the workspace tmpdir would otherwise have been false-flagged on macOS). 2. glob realpath catch over-reported symlink_escape (workspaceFileSystem.ts) — every realpath failure inside the per-hit boundary loop was counted as `symlink_escape`. EIO, EACCES, ENAMETOOLONG, EBUSY are environmental failures, not security events; mislabeling them poisoned the audit signal for operators trying to investigate genuine escape attempts. Now distinguished: ENOENT/ELOOP count as escapes; other errnos count as transient errors and emit a separate aggregated `fs.denied` with errorKind: 'permission_denied'. 3. policy.ts:enforceReadSize JSDoc said the boundary "intentionally does NOT throw" — stale after a81ada43f's hard-cap refactor. Rewrote to clarify the helper is the soft truncation gate that only fires under the hard cap; readText itself enforces the hard cap with file_too_large via its pre-stat check. The readme/contract is now consistent with workspaceFileSystem.ts. 2 new tests: - dangling symlink targeting outside-workspace path → symlink_escape - dangling symlink targeting future-inside-workspace path → succeeds (ahead-of-mkdir flow for atomic-write-via-rename) 420/420 serve tests pass; typecheck clean. Remaining tracked follow-ups (per PR review reply): - list/glob brand cast (P2 deferred per PR description) - glob audit pathHash hashes pattern not paths - edit() TOCTOU read-modify-write race (atomic-via-temp + rename) - wrapAsFsError ENOSPC/EIO mapping to a distinct kind - runQwenServe → fsFactory injection integration test - glob maxResults streaming (glob.iterate) 🤖 Generated with [Qwen Code](https://github.com/QwenLM/qwen-code)
2026-05-25 06:26:53 +00:00 · 2026-05-18 00:02:50 +08:00 · 2026-05-18 00:02:50 +08:00 · efd7a46115
commit efd7a46115
parent a81ada43f5
4 changed files with 124 additions and 11 deletions
--- a/packages/cli/src/serve/fs/paths.test.ts
+++ b/packages/cli/src/serve/fs/paths.test.ts
@ -230,6 +230,42 @@ describe('resolveWithinWorkspace', () => {
    ).rejects.toMatchObject({ kind: 'parse_error' });
  });

+  it('rejects a dangling symlink whose target escapes the workspace (write intent)', async () => {
+    // Reproduces the exploit class flagged at PR #4250: an attacker
+    // creates `<ws>/escape -> /etc/cron.d/evil` BEFORE the target
+    // exists, then issues a write request. Without the lstat-then-
+    // readlink check the ENOENT-tolerant ancestor walk would happily
+    // return `<ws>/escape` as the canonical write target and the
+    // OS-level write would create the file at the symlink target
+    // outside the workspace.
+    const outsideTarget = path.join(scratch, 'outside-not-yet-existing.txt');
+    const danglingLink = path.join(workspace, 'escape');
+    await fsp.symlink(outsideTarget, danglingLink, 'file');
+    const err = await resolveWithinWorkspace(
+      'escape',
+      workspace,
+      'write',
+    ).catch((e: unknown) => e);
+    expect(isFsError(err)).toBe(true);
+    expect((err as { kind: string }).kind).toBe('symlink_escape');
+  });
+
+  it('allows a dangling symlink whose (not-yet-existing) target stays inside workspace', async () => {
+    // Symmetric to the escape case: a dangling symlink pointing at
+    // a future file INSIDE the workspace is a normal ahead-of-mkdir
+    // flow (test fixtures, atomic-write-via-rename). Resolve must
+    // succeed for write intent.
+    const insideTarget = path.join(workspace, 'will-create.txt');
+    const link = path.join(workspace, 'pending-link');
+    await fsp.symlink(insideTarget, link, 'file');
+    const out = await resolveWithinWorkspace(
+      'pending-link',
+      workspace,
+      'write',
+    );
+    expect(typeof out).toBe('string');
+  });
+
  it('returns a value usable as ResolvedPath brand', async () => {
    const target = path.join(workspace, 'b.txt');
    await fsp.writeFile(target, 'b');
--- a/packages/cli/src/serve/fs/paths.ts
+++ b/packages/cli/src/serve/fs/paths.ts
@ -311,6 +311,51 @@ export async function resolveWithinWorkspace(
  } catch (err) {
    const code = (err as NodeJS.ErrnoException)?.code;
    if (code === 'ENOENT' && ENOENT_TOLERATING_INTENTS.has(intent)) {
+      // Dangling-symlink write-escape guard. `realpath` follows
+      // symlinks, so a path like `<ws>/leak -> /etc/cron.d/evil`
+      // (where the target doesn't exist YET) throws ENOENT here.
+      // Without this branch the ENOENT-tolerant ancestor walk
+      // below would happily walk up to the workspace root and
+      // return `<ws>/leak` as the canonical write target — but
+      // the OS-level write would follow the symlink to
+      // `/etc/cron.d/evil` and create the file there. `lstat`
+      // detects the symlink without following it; `readlink` +
+      // resolved-target containment closes the loop.
+      try {
+        const linkStat = await fsp.lstat(absolute);
+        if (linkStat.isSymbolicLink()) {
+          const target = await fsp.readlink(absolute);
+          const absTarget = path.isAbsolute(target)
+            ? target
+            : path.resolve(path.dirname(absolute), target);
+          // The symlink target itself doesn't exist (that's why
+          // we're in the ENOENT branch), so resolve via the
+          // deepest existing ancestor so case-insensitive
+          // filesystems and macOS `/var` vs `/private/var`
+          // canonicalize consistently with `boundCanonical`.
+          const { ancestor: targetAncestor, tail: targetTail } =
+            await findExistingAncestor(absTarget);
+          const targetAncestorReal = await fsp.realpath(targetAncestor);
+          const canonicalTarget = targetTail
+            ? path.join(targetAncestorReal, targetTail)
+            : targetAncestorReal;
+          if (!isWithinRoot(canonicalTarget, boundCanonical)) {
+            throw new FsError(
+              'symlink_escape',
+              `dangling symlink target escapes workspace: ${input}`,
+              { hint: `symlink points to ${target}` },
+            );
+          }
+        }
+      } catch (err2) {
+        if (err2 instanceof FsError) throw err2;
+        // `lstat` ENOENT means the input path itself doesn't
+        // exist (input is a path through a non-existent
+        // ancestor) — no symlink to worry about; fall through
+        // to the ancestor walk.
+        const code2 = (err2 as NodeJS.ErrnoException)?.code;
+        if (code2 !== 'ENOENT') throw err2;
+      }
      const { ancestor, tail } = await findExistingAncestor(absolute);
      const ancestorReal = await fsp.realpath(ancestor);
      canonical = tail ? path.join(ancestorReal, tail) : ancestorReal;
--- a/packages/cli/src/serve/fs/policy.ts
+++ b/packages/cli/src/serve/fs/policy.ts
@ -160,11 +160,19 @@ export interface ReadSizeOutcome {
 }

 /**
- * Decide how many bytes a `readText` call should pull off disk
- * given the file's actual size. Reads above the cap surface
- * `truncated: true`; the boundary intentionally does NOT throw,
- * matching claude-code's behavior so a large config file still
- * returns useful content rather than an opaque error.
+ * Decide how many bytes a `readText` call should return given the
+ * file's actual size and the caller's optional tighter cap.
+ *
+ * **Note**: this helper is the *soft* truncation gate that fires
+ * when `opts.maxBytes < fileSize <= MAX_READ_BYTES`. It runs only
+ * AFTER `readText`'s pre-stat hard-cap check has rejected files
+ * above `MAX_READ_BYTES` with `file_too_large` (see
+ * `workspaceFileSystem.ts:readText`). Within the hard cap the
+ * caller can opt into a tighter byte ceiling via `opts.maxBytes`,
+ * and we surface that truncation via `truncated: true` rather
+ * than throwing — operators want to see a partial config file
+ * rather than an opaque error when they explicitly opted in to a
+ * smaller window.
 */
 export function enforceReadSize(
  fileBytes: number,
--- a/packages/cli/src/serve/fs/workspaceFileSystem.ts
+++ b/packages/cli/src/serve/fs/workspaceFileSystem.ts
@ -15,7 +15,7 @@ import { glob as globAsync } from 'glob';
 // + 31 tests post-commit. The inline `type` modifiers below tell the
 // `consistent-type-imports` rule per-symbol intent so future autofixes
 // don't repeat the regression.
- 
+
 import {
  StandardFileSystemService,
  loadIgnoreRules,
@ -447,6 +447,7 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem {
      const out: ResolvedPath[] = [];
      const max = opts.maxResults ?? Number.POSITIVE_INFINITY;
      let escapedCount = 0;
+      let transientErrorCount = 0;
      for (const hit of matches) {
        if (out.length >= max) break;
        const absolute = path.resolve(hit);
@ -462,11 +463,26 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem {
        let canonical: string;
        try {
          canonical = await fsp.realpath(absolute);
-        } catch {
-          // Dangling symlink or transient error: treat as escape
-          // so the hit isn't returned to the caller and the audit
-          // surfaces the count.
-          escapedCount += 1;
+        } catch (err) {
+          // Distinguish between the security-relevant escape kinds
+          // and transient I/O errors. `ENOENT` (dangling symlink
+          // or unlinked-mid-glob hit) and `ELOOP` (symlink cycle)
+          // are the cases that legitimately signal "this hit
+          // resolved outside of what the boundary trusts" — they
+          // count as `symlink_escape`. Other errnos (`EIO`,
+          // `EACCES`, `ENAMETOOLONG`, `EBUSY`) are environmental
+          // failures; treating them as escapes mislabels the audit
+          // and gives operators false security signal. Drop those
+          // hits silently from the result set but track them
+          // separately for the audit emission below so a
+          // monitoring pipeline can distinguish "filtered for
+          // escape" from "filtered for transient FS error".
+          const code = (err as NodeJS.ErrnoException)?.code;
+          if (code === 'ENOENT' || code === 'ELOOP') {
+            escapedCount += 1;
+          } else {
+            transientErrorCount += 1;
+          }
          continue;
        }
        const rel = path.relative(this.deps.boundWorkspace, canonical);
@ -509,6 +525,14 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem {
          hint: `glob filtered ${escapedCount} hit(s) that resolved outside workspace`,
        });
      }
+      if (transientErrorCount > 0) {
+        this.deps.audit.recordDenied(this.deps.ctx, {
+          intent: 'glob',
+          input: pattern,
+          errorKind: 'permission_denied',
+          hint: `glob skipped ${transientErrorCount} hit(s) due to transient I/O errors (EIO/EACCES/ENAMETOOLONG/EBUSY)`,
+        });
+      }
      this.deps.audit.recordAccess(this.deps.ctx, {
        intent: 'glob',
        absolute: cwd as ResolvedPath,