fix(serve/fs): close 7 review-round-5 findings (#4250)

DeepSeek round-5 surfaced 9 comments; 7 are real findings (the
other 2 — UTF-8 truncation + glob opts.cwd — already fixed in
546834267 from round-4 and replied-to inline). Stack on round-4.

1. edit() empty-oldText silent-prepend (workspaceFileSystem.ts) —
   Critical silent data corruption. JS `''.indexOf('')` returns 0,
   so without an empty-string guard
   `current.slice(0,0) + newText + current.slice(0)` = `newText +
   current` — silently prepends `newText` to the whole file with
   a success audit event. PR 20 routes that thread user-supplied
   `oldText` verbatim must not be able to trigger this. Now
   throws `parse_error` BEFORE the read with a hint explaining
   why empty matches are rejected.

2. DOS device name regex misses bare names + first-extension forms
   (paths.ts) — Windows attack surface. The earlier
   /\.(CON|PRN|AUX|NUL|COM[1-9]|LPT[1-9])$/i only caught the
   last-extension form (file.CON). NTFS reserves these names
   regardless of extension: CON, NUL, CON.txt, NUL.dat,
   CON.foo.bar are ALL reserved device handles. New regex
   /(^|\.)(CON|...|LPT[1-9])(\.|$)/i covers all four positions
   (bare / first-ext / last-ext / middle-ext) while still
   admitting legitimate substrings (BACON, concat.txt, precon.go).

3. Buffer.byteLength replaces Buffer.from for size-only checks
   (workspaceFileSystem.ts) — 5 MB heap allocation per write
   eliminated. `writeText` and `edit()` previously materialized
   the entire UTF-8 payload (up to MAX_WRITE_BYTES) just to read
   `.length`; `Buffer.byteLength(content, 'utf-8')` returns the
   count without allocating.

4. edit() error message includes oldText snippet
   (workspaceFileSystem.ts) — Production debuggability. The
   earlier hint was just "edit() expects oldText to appear
   verbatim in the file" — at 3 AM an operator can't tell whether
   the mismatch is whitespace, a stale file, or a wrong target.
   Now includes a JSON-quoted truncated snippet (max 80 chars +
   ellipsis) of the searched-for text.

5. recordAndWrap forwards FsError message into fs.denied audit
   (workspaceFileSystem.ts + audit.ts) — Audit observability gap.
   Audit consumers debugging an incident saw `errorKind` + `hint`
   but lost the underlying OS error detail (path, errno text,
   byte count). FsDeniedAuditPayload now carries an optional
   `message` field; recordAndWrap forwards `fs.message`
   automatically.

6. EISDIR / ENOTDIR distinct hints (errors.ts) — UX. Both shared
   the same hint "a path component is not a directory where one
   was expected" — for EISDIR (path IS a directory but a file was
   expected) the wording was reversed. Now distinct hints with
   the errno name explicitly cited.

7. kindFromStats / kindFromDirent merged into kindFromStatLike
   (workspaceFileSystem.ts) — duplicate function bodies removed.
   Both fs.Stats and fs.Dirent expose the same isFile /
   isDirectory / isSymbolicLink interface, and both targets
   (FsStat['kind'], FsEntry['kind']) are the same 4-value union.
   Single helper avoids drift if the union grows.

4 new tests:
- bare/multi-ext DOS device names (CON, NUL, CON.txt, CON.foo.bar)
  + legitimate substrings (BACON, concat, precon, contemplating)
- edit() empty oldText -> parse_error + file unchanged
- edit() not-found error includes searched snippet in hint
- fs.denied audit payload carries FsError message

Plus the 2 already-fixed items (UTF-8 boundary, glob opts.cwd)
have new test coverage from round-4.

433/433 serve tests pass; typecheck + eslint clean.

Stack: 7b0db4c3aa81ada43fefd7a4611b38e82157911cb8e5d546834267 → THIS

🤖 Generated with [Qwen Code](https://github.com/QwenLM/qwen-code)
This commit is contained in:
doudouOUC 2026-05-18 01:48:45 +08:00
parent 546834267f
commit ebd9e78a17
6 changed files with 151 additions and 27 deletions

View file

@ -77,6 +77,16 @@ export interface FsDeniedAuditPayload {
relPath?: string;
errorKind: FsErrorKind;
hint?: string;
/**
* Human-readable error message from the underlying `FsError`.
* Audit consumers debugging a production incident need to see
* the actual OS error (e.g. errno detail, byte counts) rather
* than only `errorKind` + `hint`. Optional so privacy-sensitive
* deployments can suppress it; populated by default by
* `recordDenied` since the orchestrator already wraps every body
* error into an `FsError` whose message we can quote.
*/
message?: string;
}
/**
@ -204,6 +214,7 @@ export function createAuditPublisher(
errorKind: record.errorKind,
};
if (record.hint) payload.hint = record.hint;
if (record.message) payload.message = record.message;
if (includeRawPaths) {
payload.relPath = relForAudit(record.input, boundWorkspace);
}

View file

@ -138,10 +138,14 @@ export function wrapAsFsError(
hint: 'symlink chain forms a cycle or exceeds SYMLOOP_MAX',
});
case 'EISDIR':
return new FsError('parse_error', message, {
cause: err,
hint: 'EISDIR — path is a directory but a regular file was expected',
});
case 'ENOTDIR':
return new FsError('parse_error', message, {
cause: err,
hint: 'a path component is not a directory where one was expected',
hint: 'ENOTDIR — a path component is a regular file but a directory was expected',
});
case 'ENOSPC':
return new FsError('io_error', message, {

View file

@ -125,6 +125,27 @@ describe('hasSuspiciousPathPattern', () => {
expect(hasSuspiciousPathPattern('bar.LPT1')).toBe(true);
});
it('rejects bare and multi-extension DOS device names (NTFS reserves regardless of extension)', () => {
// Bare reserved names
expect(hasSuspiciousPathPattern('CON')).toBe(true);
expect(hasSuspiciousPathPattern('NUL')).toBe(true);
expect(hasSuspiciousPathPattern('PRN')).toBe(true);
expect(hasSuspiciousPathPattern('AUX')).toBe(true);
expect(hasSuspiciousPathPattern('COM1')).toBe(true);
expect(hasSuspiciousPathPattern('LPT9')).toBe(true);
// First-extension forms
expect(hasSuspiciousPathPattern('CON.txt')).toBe(true);
expect(hasSuspiciousPathPattern('NUL.dat')).toBe(true);
expect(hasSuspiciousPathPattern('LPT1.log')).toBe(true);
// Middle-extension form
expect(hasSuspiciousPathPattern('CON.foo.bar')).toBe(true);
// Substring of longer name must NOT match (BACON, concat, lprint…)
expect(hasSuspiciousPathPattern('BACON')).toBe(false);
expect(hasSuspiciousPathPattern('concat.txt')).toBe(false);
expect(hasSuspiciousPathPattern('precon.go')).toBe(false);
expect(hasSuspiciousPathPattern('contemplating.md')).toBe(false);
});
it('rejects three-or-more-dot path components', () => {
expect(hasSuspiciousPathPattern('foo/.../bar')).toBe(true);
expect(hasSuspiciousPathPattern('.../leaf')).toBe(true);

View file

@ -180,7 +180,22 @@ export function hasSuspiciousPathPattern(p: string): boolean {
for (const seg of p.split(/[\\/]/)) {
if (seg === '' || seg === '.' || seg === '..') continue;
if (/[.\s]+$/.test(seg)) return true;
if (/\.(CON|PRN|AUX|NUL|COM[1-9]|LPT[1-9])$/i.test(seg)) return true;
// DOS / NTFS reserved device names. The Windows kernel treats
// these as device handles regardless of extension, so all four
// forms are reserved:
// - bare: `CON`, `NUL`, `LPT1`
// - first-ext: `CON.txt`, `NUL.dat`, `COM1.json`
// - last-ext: `file.CON`, `audit.PRN`, `bar.LPT9`
// - any-ext: `CON.foo.bar`
// The earlier regex anchored to `$` only caught the last-ext
// form. Anchor to either start (bare or first-ext) or `.`
// (last-ext or middle-ext) to cover the full set. Names
// containing the reserved word as a substring of a longer
// segment (e.g. `BACON`, `concat.txt`) are NOT reserved — the
// boundary anchor `(^|\.)` keeps those legitimate.
if (/(^|\.)(CON|PRN|AUX|NUL|COM[1-9]|LPT[1-9])(\.|$)/i.test(seg)) {
return true;
}
}
return false;
}

View file

@ -348,6 +348,32 @@ describe('WorkspaceFileSystem - write/edit', () => {
expect((err as { kind: string }).kind).toBe('file_too_large');
});
it('rejects edit() with empty oldText (would silently prepend newText otherwise)', async () => {
// JS `''.indexOf('')` returns 0, so without the empty-check
// `current.slice(0, 0) + newText + current.slice(0)` would
// silently prepend `newText` to the entire file with a success
// audit event — textbook silent data corruption. Reject up-front.
const target = path.join(h.workspace, 'silent.txt');
await fsp.writeFile(target, 'original\n');
const r = await h.fs.resolve('silent.txt', 'edit');
const err = await h.fs.edit(r, '', 'INJECTED').catch((e: unknown) => e);
expect(isFsError(err)).toBe(true);
expect((err as { kind: string }).kind).toBe('parse_error');
// File must be unchanged.
const after = await fsp.readFile(target, 'utf-8');
expect(after).toBe('original\n');
});
it('edit() error includes oldText snippet in the hint', async () => {
const target = path.join(h.workspace, 'snippet.txt');
await fsp.writeFile(target, 'foo=1\nbar=2\n');
const r = await h.fs.resolve('snippet.txt', 'edit');
const err = (await h.fs
.edit(r, 'this string is not present', 'X')
.catch((e: unknown) => e)) as { hint?: string };
expect(err.hint).toMatch(/this string is not present/);
});
it('rejects edit on binary file', async () => {
const bin = path.join(h.workspace, 'bin.dat');
const buf = Buffer.alloc(64);
@ -527,6 +553,21 @@ describe('WorkspaceFileSystem - audit always emits on body errors', () => {
const denied = h.events.find((e) => e.type === FS_DENIED_EVENT_TYPE);
expect(denied).toBeDefined();
});
it('fs.denied audit payload carries the FsError message for forensic context', async () => {
// The earlier audit only recorded `errorKind` + `hint`; the
// underlying OS / `FsError` message (path, errno detail, byte
// count) was lost from the audit trail. `recordAndWrap` now
// forwards the message so audit consumers debugging a
// production incident can see the actual cause.
const err = (await h.fs
.resolve('../escape', 'read')
.catch((e: unknown) => e)) as { message: string };
expect(err.message).toMatch(/escapes workspace/);
const denied = h.events.find((e) => e.type === FS_DENIED_EVENT_TYPE);
expect(denied).toBeDefined();
expect((denied!.data as { message?: string }).message).toBe(err.message);
});
});
describe('WorkspaceFileSystem - glob escape audit', () => {

View file

@ -231,7 +231,7 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem {
assertTrustedForIntent(this.deps.trusted, 'stat');
const st = await fsp.lstat(p as string);
const out: FsStat = {
kind: kindFromStats(st),
kind: kindFromStatLike(st),
sizeBytes: st.size,
modifiedMs: st.mtimeMs,
};
@ -413,7 +413,7 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem {
// call `resolve()` separately. Treating each child as
// implicitly-resolved here would be a brand-cast bypass.
const childAbs = path.join(p as string, d.name);
const kind = kindFromDirent(d);
const kind = kindFromStatLike(d);
const verdict = shouldIgnore(
childAbs as ResolvedPath,
this.deps.boundWorkspace,
@ -616,8 +616,13 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem {
const start = performance.now();
try {
assertTrustedForIntent(this.deps.trusted, 'write');
const buf = Buffer.from(content, 'utf-8');
enforceWriteSize(buf.length);
// `Buffer.byteLength` returns the UTF-8 byte count without
// allocating a Buffer. The earlier `Buffer.from(content,
// 'utf-8')` materialized the entire payload (up to
// `MAX_WRITE_BYTES = 5 MiB`) just to read its `.length`,
// wasting heap on every write.
const sizeBytes = Buffer.byteLength(content, 'utf-8');
enforceWriteSize(sizeBytes);
await this.deps.lowFs.writeTextFile({
path: p as string,
content,
@ -633,7 +638,7 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem {
intent: 'write',
absolute: p,
durationMs: performance.now() - start,
sizeBytes: buf.length,
sizeBytes,
matchedIgnore: verdict.ignored ? verdict.category : undefined,
});
} catch (err) {
@ -672,6 +677,22 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem {
hint: 'edit() works on text files only',
});
}
// Reject empty `oldText` BEFORE reading. JavaScript's
// `''.indexOf('')` returns `0`, so without this guard
// `current.slice(0, 0) + newText + current.slice(0)` would
// silently prepend `newText` to the entire file and emit a
// success audit event — a textbook silent data corruption
// bug. PR 20 routes that pass user-supplied `oldText`
// through verbatim must not be able to trigger it.
if (oldText.length === 0) {
throw new FsError(
'parse_error',
`oldText must be a non-empty string for edit on ${p}`,
{
hint: 'empty oldText would match at position 0 and silently prepend newText',
},
);
}
const current = await fsp.readFile(p as string, 'utf-8');
// Post-read TOCTOU guard — catches the swap-during-read
// attack where `p` is replaced with a symlink between
@ -686,14 +707,22 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem {
// mechanical.
const idx = current.indexOf(oldText);
if (idx === -1) {
// Include a snippet of `oldText` in the hint so an operator
// staring at "edit failed" at 3 AM can tell whether the
// mismatch is whitespace, a stale file, or a wrong target
// path. Truncate to keep the hint readable on a one-line
// log; the full `oldText` is always reproducible from the
// request body.
const snippet =
oldText.length > 80 ? oldText.slice(0, 80) + '…' : oldText;
throw new FsError('parse_error', `oldText not found in ${p}`, {
hint: 'edit() expects oldText to appear verbatim in the file',
hint: `edit() expects oldText to appear verbatim; searched for: ${JSON.stringify(snippet)}`,
});
}
const next =
current.slice(0, idx) + newText + current.slice(idx + oldText.length);
const buf = Buffer.from(next, 'utf-8');
enforceWriteSize(buf.length);
const writtenBytes = Buffer.byteLength(next, 'utf-8');
enforceWriteSize(writtenBytes);
await this.deps.lowFs.writeTextFile({
path: p as string,
content: next,
@ -713,10 +742,10 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem {
intent: 'edit',
absolute: p,
durationMs: performance.now() - start,
sizeBytes: buf.length,
sizeBytes: writtenBytes,
matchedIgnore: editVerdict.ignored ? editVerdict.category : undefined,
});
return { writtenBytes: buf.length };
return { writtenBytes };
} catch (err) {
throw this.recordAndWrap(err, 'edit', p as string);
}
@ -742,6 +771,11 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem {
input,
errorKind: fs.kind,
hint: fs.hint,
// Quote the underlying OS / FsError message so audit
// consumers debugging a production incident can see the
// actual cause (errno text, byte counts, glob pattern,
// etc.) rather than just `errorKind` + `hint`.
message: fs.message,
});
return fs;
}
@ -826,25 +860,23 @@ async function assertInodeStableAfterRead(
}
}
function kindFromStats(st: {
/**
* Map a `Stats` or `Dirent` (both expose the same `isFile` /
* `isDirectory` / `isSymbolicLink` methods) to the boundary's
* narrow `kind` union. `FsStat['kind']` and `FsEntry['kind']` are
* the same 4-value union, so a single helper keeps the
* classification rule in one place reviewer flagged the previous
* `kindFromStats` + `kindFromDirent` pair as a maintenance hazard
* (drift risk if the union grows).
*/
function kindFromStatLike(s: {
isFile(): boolean;
isDirectory(): boolean;
isSymbolicLink(): boolean;
}): FsStat['kind'] {
if (st.isSymbolicLink()) return 'symlink';
if (st.isDirectory()) return 'directory';
if (st.isFile()) return 'file';
return 'other';
}
function kindFromDirent(d: {
isFile(): boolean;
isDirectory(): boolean;
isSymbolicLink(): boolean;
}): FsEntry['kind'] {
if (d.isSymbolicLink()) return 'symlink';
if (d.isDirectory()) return 'directory';
if (d.isFile()) return 'file';
if (s.isSymbolicLink()) return 'symlink';
if (s.isDirectory()) return 'directory';
if (s.isFile()) return 'file';
return 'other';
}