fix(serve/fs): close 7 review-round-5 findings (#4250)

DeepSeek round-5 surfaced 9 comments; 7 are real findings (the other 2 — UTF-8 truncation + glob opts.cwd — already fixed in 546834267 from round-4 and replied-to inline). Stack on round-4. 1. edit() empty-oldText silent-prepend (workspaceFileSystem.ts) — Critical silent data corruption. JS `''.indexOf('')` returns 0, so without an empty-string guard `current.slice(0,0) + newText + current.slice(0)` = `newText + current` — silently prepends `newText` to the whole file with a success audit event. PR 20 routes that thread user-supplied `oldText` verbatim must not be able to trigger this. Now throws `parse_error` BEFORE the read with a hint explaining why empty matches are rejected. 2. DOS device name regex misses bare names + first-extension forms (paths.ts) — Windows attack surface. The earlier /\.(CON|PRN|AUX|NUL|COM[1-9]|LPT[1-9])$/i only caught the last-extension form (file.CON). NTFS reserves these names regardless of extension: CON, NUL, CON.txt, NUL.dat, CON.foo.bar are ALL reserved device handles. New regex /(^|\.)(CON|...|LPT[1-9])(\.|$)/i covers all four positions (bare / first-ext / last-ext / middle-ext) while still admitting legitimate substrings (BACON, concat.txt, precon.go). 3. Buffer.byteLength replaces Buffer.from for size-only checks (workspaceFileSystem.ts) — 5 MB heap allocation per write eliminated. `writeText` and `edit()` previously materialized the entire UTF-8 payload (up to MAX_WRITE_BYTES) just to read `.length`; `Buffer.byteLength(content, 'utf-8')` returns the count without allocating. 4. edit() error message includes oldText snippet (workspaceFileSystem.ts) — Production debuggability. The earlier hint was just "edit() expects oldText to appear verbatim in the file" — at 3 AM an operator can't tell whether the mismatch is whitespace, a stale file, or a wrong target. Now includes a JSON-quoted truncated snippet (max 80 chars + ellipsis) of the searched-for text. 5. recordAndWrap forwards FsError message into fs.denied audit (workspaceFileSystem.ts + audit.ts) — Audit observability gap. Audit consumers debugging an incident saw `errorKind` + `hint` but lost the underlying OS error detail (path, errno text, byte count). FsDeniedAuditPayload now carries an optional `message` field; recordAndWrap forwards `fs.message` automatically. 6. EISDIR / ENOTDIR distinct hints (errors.ts) — UX. Both shared the same hint "a path component is not a directory where one was expected" — for EISDIR (path IS a directory but a file was expected) the wording was reversed. Now distinct hints with the errno name explicitly cited. 7. kindFromStats / kindFromDirent merged into kindFromStatLike (workspaceFileSystem.ts) — duplicate function bodies removed. Both fs.Stats and fs.Dirent expose the same isFile / isDirectory / isSymbolicLink interface, and both targets (FsStat['kind'], FsEntry['kind']) are the same 4-value union. Single helper avoids drift if the union grows. 4 new tests: - bare/multi-ext DOS device names (CON, NUL, CON.txt, CON.foo.bar) + legitimate substrings (BACON, concat, precon, contemplating) - edit() empty oldText -> parse_error + file unchanged - edit() not-found error includes searched snippet in hint - fs.denied audit payload carries FsError message Plus the 2 already-fixed items (UTF-8 boundary, glob opts.cwd) have new test coverage from round-4. 433/433 serve tests pass; typecheck + eslint clean. Stack: 7b0db4c3a → a81ada43f → efd7a4611 → b38e82157 → 911cb8e5d → 546834267 → THIS 🤖 Generated with [Qwen Code](https://github.com/QwenLM/qwen-code)
2026-05-24 22:03:46 +00:00 · 2026-05-18 01:48:45 +08:00 · 2026-05-18 01:48:45 +08:00 · ebd9e78a17
commit ebd9e78a17
parent 546834267f
6 changed files with 151 additions and 27 deletions
--- a/packages/cli/src/serve/fs/audit.ts
+++ b/packages/cli/src/serve/fs/audit.ts
@ -77,6 +77,16 @@ export interface FsDeniedAuditPayload {
  relPath?: string;
  errorKind: FsErrorKind;
  hint?: string;
+  /**
+   * Human-readable error message from the underlying `FsError`.
+   * Audit consumers debugging a production incident need to see
+   * the actual OS error (e.g. errno detail, byte counts) rather
+   * than only `errorKind` + `hint`. Optional so privacy-sensitive
+   * deployments can suppress it; populated by default by
+   * `recordDenied` since the orchestrator already wraps every body
+   * error into an `FsError` whose message we can quote.
+   */
+  message?: string;
 }

 /**
@ -204,6 +214,7 @@ export function createAuditPublisher(
        errorKind: record.errorKind,
      };
      if (record.hint) payload.hint = record.hint;
+      if (record.message) payload.message = record.message;
      if (includeRawPaths) {
        payload.relPath = relForAudit(record.input, boundWorkspace);
      }
--- a/packages/cli/src/serve/fs/errors.ts
+++ b/packages/cli/src/serve/fs/errors.ts
@ -138,10 +138,14 @@ export function wrapAsFsError(
        hint: 'symlink chain forms a cycle or exceeds SYMLOOP_MAX',
      });
    case 'EISDIR':
+      return new FsError('parse_error', message, {
+        cause: err,
+        hint: 'EISDIR — path is a directory but a regular file was expected',
+      });
    case 'ENOTDIR':
      return new FsError('parse_error', message, {
        cause: err,
-        hint: 'a path component is not a directory where one was expected',
+        hint: 'ENOTDIR — a path component is a regular file but a directory was expected',
      });
    case 'ENOSPC':
      return new FsError('io_error', message, {
--- a/packages/cli/src/serve/fs/paths.test.ts
+++ b/packages/cli/src/serve/fs/paths.test.ts
@ -125,6 +125,27 @@ describe('hasSuspiciousPathPattern', () => {
    expect(hasSuspiciousPathPattern('bar.LPT1')).toBe(true);
  });

+  it('rejects bare and multi-extension DOS device names (NTFS reserves regardless of extension)', () => {
+    // Bare reserved names
+    expect(hasSuspiciousPathPattern('CON')).toBe(true);
+    expect(hasSuspiciousPathPattern('NUL')).toBe(true);
+    expect(hasSuspiciousPathPattern('PRN')).toBe(true);
+    expect(hasSuspiciousPathPattern('AUX')).toBe(true);
+    expect(hasSuspiciousPathPattern('COM1')).toBe(true);
+    expect(hasSuspiciousPathPattern('LPT9')).toBe(true);
+    // First-extension forms
+    expect(hasSuspiciousPathPattern('CON.txt')).toBe(true);
+    expect(hasSuspiciousPathPattern('NUL.dat')).toBe(true);
+    expect(hasSuspiciousPathPattern('LPT1.log')).toBe(true);
+    // Middle-extension form
+    expect(hasSuspiciousPathPattern('CON.foo.bar')).toBe(true);
+    // Substring of longer name must NOT match (BACON, concat, lprint…)
+    expect(hasSuspiciousPathPattern('BACON')).toBe(false);
+    expect(hasSuspiciousPathPattern('concat.txt')).toBe(false);
+    expect(hasSuspiciousPathPattern('precon.go')).toBe(false);
+    expect(hasSuspiciousPathPattern('contemplating.md')).toBe(false);
+  });
+
  it('rejects three-or-more-dot path components', () => {
    expect(hasSuspiciousPathPattern('foo/.../bar')).toBe(true);
    expect(hasSuspiciousPathPattern('.../leaf')).toBe(true);
--- a/packages/cli/src/serve/fs/paths.ts
+++ b/packages/cli/src/serve/fs/paths.ts
@ -180,7 +180,22 @@ export function hasSuspiciousPathPattern(p: string): boolean {
  for (const seg of p.split(/[\\/]/)) {
    if (seg === '' || seg === '.' || seg === '..') continue;
    if (/[.\s]+$/.test(seg)) return true;
-    if (/\.(CON|PRN|AUX|NUL|COM[1-9]|LPT[1-9])$/i.test(seg)) return true;
+    // DOS / NTFS reserved device names. The Windows kernel treats
+    // these as device handles regardless of extension, so all four
+    // forms are reserved:
+    //   - bare:        `CON`, `NUL`, `LPT1`
+    //   - first-ext:   `CON.txt`, `NUL.dat`, `COM1.json`
+    //   - last-ext:    `file.CON`, `audit.PRN`, `bar.LPT9`
+    //   - any-ext:     `CON.foo.bar`
+    // The earlier regex anchored to `$` only caught the last-ext
+    // form. Anchor to either start (bare or first-ext) or `.`
+    // (last-ext or middle-ext) to cover the full set. Names
+    // containing the reserved word as a substring of a longer
+    // segment (e.g. `BACON`, `concat.txt`) are NOT reserved — the
+    // boundary anchor `(^|\.)` keeps those legitimate.
+    if (/(^|\.)(CON|PRN|AUX|NUL|COM[1-9]|LPT[1-9])(\.|$)/i.test(seg)) {
+      return true;
+    }
  }
  return false;
 }
--- a/packages/cli/src/serve/fs/workspaceFileSystem.test.ts
+++ b/packages/cli/src/serve/fs/workspaceFileSystem.test.ts
@ -348,6 +348,32 @@ describe('WorkspaceFileSystem - write/edit', () => {
    expect((err as { kind: string }).kind).toBe('file_too_large');
  });

+  it('rejects edit() with empty oldText (would silently prepend newText otherwise)', async () => {
+    // JS `''.indexOf('')` returns 0, so without the empty-check
+    // `current.slice(0, 0) + newText + current.slice(0)` would
+    // silently prepend `newText` to the entire file with a success
+    // audit event — textbook silent data corruption. Reject up-front.
+    const target = path.join(h.workspace, 'silent.txt');
+    await fsp.writeFile(target, 'original\n');
+    const r = await h.fs.resolve('silent.txt', 'edit');
+    const err = await h.fs.edit(r, '', 'INJECTED').catch((e: unknown) => e);
+    expect(isFsError(err)).toBe(true);
+    expect((err as { kind: string }).kind).toBe('parse_error');
+    // File must be unchanged.
+    const after = await fsp.readFile(target, 'utf-8');
+    expect(after).toBe('original\n');
+  });
+
+  it('edit() error includes oldText snippet in the hint', async () => {
+    const target = path.join(h.workspace, 'snippet.txt');
+    await fsp.writeFile(target, 'foo=1\nbar=2\n');
+    const r = await h.fs.resolve('snippet.txt', 'edit');
+    const err = (await h.fs
+      .edit(r, 'this string is not present', 'X')
+      .catch((e: unknown) => e)) as { hint?: string };
+    expect(err.hint).toMatch(/this string is not present/);
+  });
+
  it('rejects edit on binary file', async () => {
    const bin = path.join(h.workspace, 'bin.dat');
    const buf = Buffer.alloc(64);
@ -527,6 +553,21 @@ describe('WorkspaceFileSystem - audit always emits on body errors', () => {
    const denied = h.events.find((e) => e.type === FS_DENIED_EVENT_TYPE);
    expect(denied).toBeDefined();
  });
+
+  it('fs.denied audit payload carries the FsError message for forensic context', async () => {
+    // The earlier audit only recorded `errorKind` + `hint`; the
+    // underlying OS / `FsError` message (path, errno detail, byte
+    // count) was lost from the audit trail. `recordAndWrap` now
+    // forwards the message so audit consumers debugging a
+    // production incident can see the actual cause.
+    const err = (await h.fs
+      .resolve('../escape', 'read')
+      .catch((e: unknown) => e)) as { message: string };
+    expect(err.message).toMatch(/escapes workspace/);
+    const denied = h.events.find((e) => e.type === FS_DENIED_EVENT_TYPE);
+    expect(denied).toBeDefined();
+    expect((denied!.data as { message?: string }).message).toBe(err.message);
+  });
 });

 describe('WorkspaceFileSystem - glob escape audit', () => {
--- a/packages/cli/src/serve/fs/workspaceFileSystem.ts
+++ b/packages/cli/src/serve/fs/workspaceFileSystem.ts
@ -231,7 +231,7 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem {
      assertTrustedForIntent(this.deps.trusted, 'stat');
      const st = await fsp.lstat(p as string);
      const out: FsStat = {
-        kind: kindFromStats(st),
+        kind: kindFromStatLike(st),
        sizeBytes: st.size,
        modifiedMs: st.mtimeMs,
      };
@ -413,7 +413,7 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem {
        // call `resolve()` separately. Treating each child as
        // implicitly-resolved here would be a brand-cast bypass.
        const childAbs = path.join(p as string, d.name);
-        const kind = kindFromDirent(d);
+        const kind = kindFromStatLike(d);
        const verdict = shouldIgnore(
          childAbs as ResolvedPath,
          this.deps.boundWorkspace,
@ -616,8 +616,13 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem {
    const start = performance.now();
    try {
      assertTrustedForIntent(this.deps.trusted, 'write');
-      const buf = Buffer.from(content, 'utf-8');
-      enforceWriteSize(buf.length);
+      // `Buffer.byteLength` returns the UTF-8 byte count without
+      // allocating a Buffer. The earlier `Buffer.from(content,
+      // 'utf-8')` materialized the entire payload (up to
+      // `MAX_WRITE_BYTES = 5 MiB`) just to read its `.length`,
+      // wasting heap on every write.
+      const sizeBytes = Buffer.byteLength(content, 'utf-8');
+      enforceWriteSize(sizeBytes);
      await this.deps.lowFs.writeTextFile({
        path: p as string,
        content,
@ -633,7 +638,7 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem {
        intent: 'write',
        absolute: p,
        durationMs: performance.now() - start,
-        sizeBytes: buf.length,
+        sizeBytes,
        matchedIgnore: verdict.ignored ? verdict.category : undefined,
      });
    } catch (err) {
@ -672,6 +677,22 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem {
          hint: 'edit() works on text files only',
        });
      }
+      // Reject empty `oldText` BEFORE reading. JavaScript's
+      // `''.indexOf('')` returns `0`, so without this guard
+      // `current.slice(0, 0) + newText + current.slice(0)` would
+      // silently prepend `newText` to the entire file and emit a
+      // success audit event — a textbook silent data corruption
+      // bug. PR 20 routes that pass user-supplied `oldText`
+      // through verbatim must not be able to trigger it.
+      if (oldText.length === 0) {
+        throw new FsError(
+          'parse_error',
+          `oldText must be a non-empty string for edit on ${p}`,
+          {
+            hint: 'empty oldText would match at position 0 and silently prepend newText',
+          },
+        );
+      }
      const current = await fsp.readFile(p as string, 'utf-8');
      // Post-read TOCTOU guard — catches the swap-during-read
      // attack where `p` is replaced with a symlink between
@ -686,14 +707,22 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem {
      // mechanical.
      const idx = current.indexOf(oldText);
      if (idx === -1) {
+        // Include a snippet of `oldText` in the hint so an operator
+        // staring at "edit failed" at 3 AM can tell whether the
+        // mismatch is whitespace, a stale file, or a wrong target
+        // path. Truncate to keep the hint readable on a one-line
+        // log; the full `oldText` is always reproducible from the
+        // request body.
+        const snippet =
+          oldText.length > 80 ? oldText.slice(0, 80) + '…' : oldText;
        throw new FsError('parse_error', `oldText not found in ${p}`, {
-          hint: 'edit() expects oldText to appear verbatim in the file',
+          hint: `edit() expects oldText to appear verbatim; searched for: ${JSON.stringify(snippet)}`,
        });
      }
      const next =
        current.slice(0, idx) + newText + current.slice(idx + oldText.length);
-      const buf = Buffer.from(next, 'utf-8');
-      enforceWriteSize(buf.length);
+      const writtenBytes = Buffer.byteLength(next, 'utf-8');
+      enforceWriteSize(writtenBytes);
      await this.deps.lowFs.writeTextFile({
        path: p as string,
        content: next,
@ -713,10 +742,10 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem {
        intent: 'edit',
        absolute: p,
        durationMs: performance.now() - start,
-        sizeBytes: buf.length,
+        sizeBytes: writtenBytes,
        matchedIgnore: editVerdict.ignored ? editVerdict.category : undefined,
      });
-      return { writtenBytes: buf.length };
+      return { writtenBytes };
    } catch (err) {
      throw this.recordAndWrap(err, 'edit', p as string);
    }
@ -742,6 +771,11 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem {
      input,
      errorKind: fs.kind,
      hint: fs.hint,
+      // Quote the underlying OS / FsError message so audit
+      // consumers debugging a production incident can see the
+      // actual cause (errno text, byte counts, glob pattern,
+      // etc.) rather than just `errorKind` + `hint`.
+      message: fs.message,
    });
    return fs;
  }
@ -826,25 +860,23 @@ async function assertInodeStableAfterRead(
  }
 }

-function kindFromStats(st: {
+/**
+ * Map a `Stats` or `Dirent` (both expose the same `isFile` /
+ * `isDirectory` / `isSymbolicLink` methods) to the boundary's
+ * narrow `kind` union. `FsStat['kind']` and `FsEntry['kind']` are
+ * the same 4-value union, so a single helper keeps the
+ * classification rule in one place — reviewer flagged the previous
+ * `kindFromStats` + `kindFromDirent` pair as a maintenance hazard
+ * (drift risk if the union grows).
+ */
+function kindFromStatLike(s: {
  isFile(): boolean;
  isDirectory(): boolean;
  isSymbolicLink(): boolean;
 }): FsStat['kind'] {
-  if (st.isSymbolicLink()) return 'symlink';
-  if (st.isDirectory()) return 'directory';
-  if (st.isFile()) return 'file';
-  return 'other';
-}
-
-function kindFromDirent(d: {
-  isFile(): boolean;
-  isDirectory(): boolean;
-  isSymbolicLink(): boolean;
-}): FsEntry['kind'] {
-  if (d.isSymbolicLink()) return 'symlink';
-  if (d.isDirectory()) return 'directory';
-  if (d.isFile()) return 'file';
+  if (s.isSymbolicLink()) return 'symlink';
+  if (s.isDirectory()) return 'directory';
+  if (s.isFile()) return 'file';
  return 'other';
 }