mirror of
https://github.com/QwenLM/qwen-code.git
synced 2026-05-24 22:03:46 +00:00
fix(serve/fs): close 7 review-round-5 findings (#4250)
DeepSeek round-5 surfaced 9 comments; 7 are real findings (the other 2 — UTF-8 truncation + glob opts.cwd — already fixed in546834267from round-4 and replied-to inline). Stack on round-4. 1. edit() empty-oldText silent-prepend (workspaceFileSystem.ts) — Critical silent data corruption. JS `''.indexOf('')` returns 0, so without an empty-string guard `current.slice(0,0) + newText + current.slice(0)` = `newText + current` — silently prepends `newText` to the whole file with a success audit event. PR 20 routes that thread user-supplied `oldText` verbatim must not be able to trigger this. Now throws `parse_error` BEFORE the read with a hint explaining why empty matches are rejected. 2. DOS device name regex misses bare names + first-extension forms (paths.ts) — Windows attack surface. The earlier /\.(CON|PRN|AUX|NUL|COM[1-9]|LPT[1-9])$/i only caught the last-extension form (file.CON). NTFS reserves these names regardless of extension: CON, NUL, CON.txt, NUL.dat, CON.foo.bar are ALL reserved device handles. New regex /(^|\.)(CON|...|LPT[1-9])(\.|$)/i covers all four positions (bare / first-ext / last-ext / middle-ext) while still admitting legitimate substrings (BACON, concat.txt, precon.go). 3. Buffer.byteLength replaces Buffer.from for size-only checks (workspaceFileSystem.ts) — 5 MB heap allocation per write eliminated. `writeText` and `edit()` previously materialized the entire UTF-8 payload (up to MAX_WRITE_BYTES) just to read `.length`; `Buffer.byteLength(content, 'utf-8')` returns the count without allocating. 4. edit() error message includes oldText snippet (workspaceFileSystem.ts) — Production debuggability. The earlier hint was just "edit() expects oldText to appear verbatim in the file" — at 3 AM an operator can't tell whether the mismatch is whitespace, a stale file, or a wrong target. Now includes a JSON-quoted truncated snippet (max 80 chars + ellipsis) of the searched-for text. 5. recordAndWrap forwards FsError message into fs.denied audit (workspaceFileSystem.ts + audit.ts) — Audit observability gap. Audit consumers debugging an incident saw `errorKind` + `hint` but lost the underlying OS error detail (path, errno text, byte count). FsDeniedAuditPayload now carries an optional `message` field; recordAndWrap forwards `fs.message` automatically. 6. EISDIR / ENOTDIR distinct hints (errors.ts) — UX. Both shared the same hint "a path component is not a directory where one was expected" — for EISDIR (path IS a directory but a file was expected) the wording was reversed. Now distinct hints with the errno name explicitly cited. 7. kindFromStats / kindFromDirent merged into kindFromStatLike (workspaceFileSystem.ts) — duplicate function bodies removed. Both fs.Stats and fs.Dirent expose the same isFile / isDirectory / isSymbolicLink interface, and both targets (FsStat['kind'], FsEntry['kind']) are the same 4-value union. Single helper avoids drift if the union grows. 4 new tests: - bare/multi-ext DOS device names (CON, NUL, CON.txt, CON.foo.bar) + legitimate substrings (BACON, concat, precon, contemplating) - edit() empty oldText -> parse_error + file unchanged - edit() not-found error includes searched snippet in hint - fs.denied audit payload carries FsError message Plus the 2 already-fixed items (UTF-8 boundary, glob opts.cwd) have new test coverage from round-4. 433/433 serve tests pass; typecheck + eslint clean. Stack:7b0db4c3a→a81ada43f→efd7a4611→b38e82157→911cb8e5d→546834267→ THIS 🤖 Generated with [Qwen Code](https://github.com/QwenLM/qwen-code)
This commit is contained in:
parent
546834267f
commit
ebd9e78a17
6 changed files with 151 additions and 27 deletions
|
|
@ -77,6 +77,16 @@ export interface FsDeniedAuditPayload {
|
|||
relPath?: string;
|
||||
errorKind: FsErrorKind;
|
||||
hint?: string;
|
||||
/**
|
||||
* Human-readable error message from the underlying `FsError`.
|
||||
* Audit consumers debugging a production incident need to see
|
||||
* the actual OS error (e.g. errno detail, byte counts) rather
|
||||
* than only `errorKind` + `hint`. Optional so privacy-sensitive
|
||||
* deployments can suppress it; populated by default by
|
||||
* `recordDenied` since the orchestrator already wraps every body
|
||||
* error into an `FsError` whose message we can quote.
|
||||
*/
|
||||
message?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -204,6 +214,7 @@ export function createAuditPublisher(
|
|||
errorKind: record.errorKind,
|
||||
};
|
||||
if (record.hint) payload.hint = record.hint;
|
||||
if (record.message) payload.message = record.message;
|
||||
if (includeRawPaths) {
|
||||
payload.relPath = relForAudit(record.input, boundWorkspace);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -138,10 +138,14 @@ export function wrapAsFsError(
|
|||
hint: 'symlink chain forms a cycle or exceeds SYMLOOP_MAX',
|
||||
});
|
||||
case 'EISDIR':
|
||||
return new FsError('parse_error', message, {
|
||||
cause: err,
|
||||
hint: 'EISDIR — path is a directory but a regular file was expected',
|
||||
});
|
||||
case 'ENOTDIR':
|
||||
return new FsError('parse_error', message, {
|
||||
cause: err,
|
||||
hint: 'a path component is not a directory where one was expected',
|
||||
hint: 'ENOTDIR — a path component is a regular file but a directory was expected',
|
||||
});
|
||||
case 'ENOSPC':
|
||||
return new FsError('io_error', message, {
|
||||
|
|
|
|||
|
|
@ -125,6 +125,27 @@ describe('hasSuspiciousPathPattern', () => {
|
|||
expect(hasSuspiciousPathPattern('bar.LPT1')).toBe(true);
|
||||
});
|
||||
|
||||
it('rejects bare and multi-extension DOS device names (NTFS reserves regardless of extension)', () => {
|
||||
// Bare reserved names
|
||||
expect(hasSuspiciousPathPattern('CON')).toBe(true);
|
||||
expect(hasSuspiciousPathPattern('NUL')).toBe(true);
|
||||
expect(hasSuspiciousPathPattern('PRN')).toBe(true);
|
||||
expect(hasSuspiciousPathPattern('AUX')).toBe(true);
|
||||
expect(hasSuspiciousPathPattern('COM1')).toBe(true);
|
||||
expect(hasSuspiciousPathPattern('LPT9')).toBe(true);
|
||||
// First-extension forms
|
||||
expect(hasSuspiciousPathPattern('CON.txt')).toBe(true);
|
||||
expect(hasSuspiciousPathPattern('NUL.dat')).toBe(true);
|
||||
expect(hasSuspiciousPathPattern('LPT1.log')).toBe(true);
|
||||
// Middle-extension form
|
||||
expect(hasSuspiciousPathPattern('CON.foo.bar')).toBe(true);
|
||||
// Substring of longer name must NOT match (BACON, concat, lprint…)
|
||||
expect(hasSuspiciousPathPattern('BACON')).toBe(false);
|
||||
expect(hasSuspiciousPathPattern('concat.txt')).toBe(false);
|
||||
expect(hasSuspiciousPathPattern('precon.go')).toBe(false);
|
||||
expect(hasSuspiciousPathPattern('contemplating.md')).toBe(false);
|
||||
});
|
||||
|
||||
it('rejects three-or-more-dot path components', () => {
|
||||
expect(hasSuspiciousPathPattern('foo/.../bar')).toBe(true);
|
||||
expect(hasSuspiciousPathPattern('.../leaf')).toBe(true);
|
||||
|
|
|
|||
|
|
@ -180,7 +180,22 @@ export function hasSuspiciousPathPattern(p: string): boolean {
|
|||
for (const seg of p.split(/[\\/]/)) {
|
||||
if (seg === '' || seg === '.' || seg === '..') continue;
|
||||
if (/[.\s]+$/.test(seg)) return true;
|
||||
if (/\.(CON|PRN|AUX|NUL|COM[1-9]|LPT[1-9])$/i.test(seg)) return true;
|
||||
// DOS / NTFS reserved device names. The Windows kernel treats
|
||||
// these as device handles regardless of extension, so all four
|
||||
// forms are reserved:
|
||||
// - bare: `CON`, `NUL`, `LPT1`
|
||||
// - first-ext: `CON.txt`, `NUL.dat`, `COM1.json`
|
||||
// - last-ext: `file.CON`, `audit.PRN`, `bar.LPT9`
|
||||
// - any-ext: `CON.foo.bar`
|
||||
// The earlier regex anchored to `$` only caught the last-ext
|
||||
// form. Anchor to either start (bare or first-ext) or `.`
|
||||
// (last-ext or middle-ext) to cover the full set. Names
|
||||
// containing the reserved word as a substring of a longer
|
||||
// segment (e.g. `BACON`, `concat.txt`) are NOT reserved — the
|
||||
// boundary anchor `(^|\.)` keeps those legitimate.
|
||||
if (/(^|\.)(CON|PRN|AUX|NUL|COM[1-9]|LPT[1-9])(\.|$)/i.test(seg)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -348,6 +348,32 @@ describe('WorkspaceFileSystem - write/edit', () => {
|
|||
expect((err as { kind: string }).kind).toBe('file_too_large');
|
||||
});
|
||||
|
||||
it('rejects edit() with empty oldText (would silently prepend newText otherwise)', async () => {
|
||||
// JS `''.indexOf('')` returns 0, so without the empty-check
|
||||
// `current.slice(0, 0) + newText + current.slice(0)` would
|
||||
// silently prepend `newText` to the entire file with a success
|
||||
// audit event — textbook silent data corruption. Reject up-front.
|
||||
const target = path.join(h.workspace, 'silent.txt');
|
||||
await fsp.writeFile(target, 'original\n');
|
||||
const r = await h.fs.resolve('silent.txt', 'edit');
|
||||
const err = await h.fs.edit(r, '', 'INJECTED').catch((e: unknown) => e);
|
||||
expect(isFsError(err)).toBe(true);
|
||||
expect((err as { kind: string }).kind).toBe('parse_error');
|
||||
// File must be unchanged.
|
||||
const after = await fsp.readFile(target, 'utf-8');
|
||||
expect(after).toBe('original\n');
|
||||
});
|
||||
|
||||
it('edit() error includes oldText snippet in the hint', async () => {
|
||||
const target = path.join(h.workspace, 'snippet.txt');
|
||||
await fsp.writeFile(target, 'foo=1\nbar=2\n');
|
||||
const r = await h.fs.resolve('snippet.txt', 'edit');
|
||||
const err = (await h.fs
|
||||
.edit(r, 'this string is not present', 'X')
|
||||
.catch((e: unknown) => e)) as { hint?: string };
|
||||
expect(err.hint).toMatch(/this string is not present/);
|
||||
});
|
||||
|
||||
it('rejects edit on binary file', async () => {
|
||||
const bin = path.join(h.workspace, 'bin.dat');
|
||||
const buf = Buffer.alloc(64);
|
||||
|
|
@ -527,6 +553,21 @@ describe('WorkspaceFileSystem - audit always emits on body errors', () => {
|
|||
const denied = h.events.find((e) => e.type === FS_DENIED_EVENT_TYPE);
|
||||
expect(denied).toBeDefined();
|
||||
});
|
||||
|
||||
it('fs.denied audit payload carries the FsError message for forensic context', async () => {
|
||||
// The earlier audit only recorded `errorKind` + `hint`; the
|
||||
// underlying OS / `FsError` message (path, errno detail, byte
|
||||
// count) was lost from the audit trail. `recordAndWrap` now
|
||||
// forwards the message so audit consumers debugging a
|
||||
// production incident can see the actual cause.
|
||||
const err = (await h.fs
|
||||
.resolve('../escape', 'read')
|
||||
.catch((e: unknown) => e)) as { message: string };
|
||||
expect(err.message).toMatch(/escapes workspace/);
|
||||
const denied = h.events.find((e) => e.type === FS_DENIED_EVENT_TYPE);
|
||||
expect(denied).toBeDefined();
|
||||
expect((denied!.data as { message?: string }).message).toBe(err.message);
|
||||
});
|
||||
});
|
||||
|
||||
describe('WorkspaceFileSystem - glob escape audit', () => {
|
||||
|
|
|
|||
|
|
@ -231,7 +231,7 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem {
|
|||
assertTrustedForIntent(this.deps.trusted, 'stat');
|
||||
const st = await fsp.lstat(p as string);
|
||||
const out: FsStat = {
|
||||
kind: kindFromStats(st),
|
||||
kind: kindFromStatLike(st),
|
||||
sizeBytes: st.size,
|
||||
modifiedMs: st.mtimeMs,
|
||||
};
|
||||
|
|
@ -413,7 +413,7 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem {
|
|||
// call `resolve()` separately. Treating each child as
|
||||
// implicitly-resolved here would be a brand-cast bypass.
|
||||
const childAbs = path.join(p as string, d.name);
|
||||
const kind = kindFromDirent(d);
|
||||
const kind = kindFromStatLike(d);
|
||||
const verdict = shouldIgnore(
|
||||
childAbs as ResolvedPath,
|
||||
this.deps.boundWorkspace,
|
||||
|
|
@ -616,8 +616,13 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem {
|
|||
const start = performance.now();
|
||||
try {
|
||||
assertTrustedForIntent(this.deps.trusted, 'write');
|
||||
const buf = Buffer.from(content, 'utf-8');
|
||||
enforceWriteSize(buf.length);
|
||||
// `Buffer.byteLength` returns the UTF-8 byte count without
|
||||
// allocating a Buffer. The earlier `Buffer.from(content,
|
||||
// 'utf-8')` materialized the entire payload (up to
|
||||
// `MAX_WRITE_BYTES = 5 MiB`) just to read its `.length`,
|
||||
// wasting heap on every write.
|
||||
const sizeBytes = Buffer.byteLength(content, 'utf-8');
|
||||
enforceWriteSize(sizeBytes);
|
||||
await this.deps.lowFs.writeTextFile({
|
||||
path: p as string,
|
||||
content,
|
||||
|
|
@ -633,7 +638,7 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem {
|
|||
intent: 'write',
|
||||
absolute: p,
|
||||
durationMs: performance.now() - start,
|
||||
sizeBytes: buf.length,
|
||||
sizeBytes,
|
||||
matchedIgnore: verdict.ignored ? verdict.category : undefined,
|
||||
});
|
||||
} catch (err) {
|
||||
|
|
@ -672,6 +677,22 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem {
|
|||
hint: 'edit() works on text files only',
|
||||
});
|
||||
}
|
||||
// Reject empty `oldText` BEFORE reading. JavaScript's
|
||||
// `''.indexOf('')` returns `0`, so without this guard
|
||||
// `current.slice(0, 0) + newText + current.slice(0)` would
|
||||
// silently prepend `newText` to the entire file and emit a
|
||||
// success audit event — a textbook silent data corruption
|
||||
// bug. PR 20 routes that pass user-supplied `oldText`
|
||||
// through verbatim must not be able to trigger it.
|
||||
if (oldText.length === 0) {
|
||||
throw new FsError(
|
||||
'parse_error',
|
||||
`oldText must be a non-empty string for edit on ${p}`,
|
||||
{
|
||||
hint: 'empty oldText would match at position 0 and silently prepend newText',
|
||||
},
|
||||
);
|
||||
}
|
||||
const current = await fsp.readFile(p as string, 'utf-8');
|
||||
// Post-read TOCTOU guard — catches the swap-during-read
|
||||
// attack where `p` is replaced with a symlink between
|
||||
|
|
@ -686,14 +707,22 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem {
|
|||
// mechanical.
|
||||
const idx = current.indexOf(oldText);
|
||||
if (idx === -1) {
|
||||
// Include a snippet of `oldText` in the hint so an operator
|
||||
// staring at "edit failed" at 3 AM can tell whether the
|
||||
// mismatch is whitespace, a stale file, or a wrong target
|
||||
// path. Truncate to keep the hint readable on a one-line
|
||||
// log; the full `oldText` is always reproducible from the
|
||||
// request body.
|
||||
const snippet =
|
||||
oldText.length > 80 ? oldText.slice(0, 80) + '…' : oldText;
|
||||
throw new FsError('parse_error', `oldText not found in ${p}`, {
|
||||
hint: 'edit() expects oldText to appear verbatim in the file',
|
||||
hint: `edit() expects oldText to appear verbatim; searched for: ${JSON.stringify(snippet)}`,
|
||||
});
|
||||
}
|
||||
const next =
|
||||
current.slice(0, idx) + newText + current.slice(idx + oldText.length);
|
||||
const buf = Buffer.from(next, 'utf-8');
|
||||
enforceWriteSize(buf.length);
|
||||
const writtenBytes = Buffer.byteLength(next, 'utf-8');
|
||||
enforceWriteSize(writtenBytes);
|
||||
await this.deps.lowFs.writeTextFile({
|
||||
path: p as string,
|
||||
content: next,
|
||||
|
|
@ -713,10 +742,10 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem {
|
|||
intent: 'edit',
|
||||
absolute: p,
|
||||
durationMs: performance.now() - start,
|
||||
sizeBytes: buf.length,
|
||||
sizeBytes: writtenBytes,
|
||||
matchedIgnore: editVerdict.ignored ? editVerdict.category : undefined,
|
||||
});
|
||||
return { writtenBytes: buf.length };
|
||||
return { writtenBytes };
|
||||
} catch (err) {
|
||||
throw this.recordAndWrap(err, 'edit', p as string);
|
||||
}
|
||||
|
|
@ -742,6 +771,11 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem {
|
|||
input,
|
||||
errorKind: fs.kind,
|
||||
hint: fs.hint,
|
||||
// Quote the underlying OS / FsError message so audit
|
||||
// consumers debugging a production incident can see the
|
||||
// actual cause (errno text, byte counts, glob pattern,
|
||||
// etc.) rather than just `errorKind` + `hint`.
|
||||
message: fs.message,
|
||||
});
|
||||
return fs;
|
||||
}
|
||||
|
|
@ -826,25 +860,23 @@ async function assertInodeStableAfterRead(
|
|||
}
|
||||
}
|
||||
|
||||
function kindFromStats(st: {
|
||||
/**
|
||||
* Map a `Stats` or `Dirent` (both expose the same `isFile` /
|
||||
* `isDirectory` / `isSymbolicLink` methods) to the boundary's
|
||||
* narrow `kind` union. `FsStat['kind']` and `FsEntry['kind']` are
|
||||
* the same 4-value union, so a single helper keeps the
|
||||
* classification rule in one place — reviewer flagged the previous
|
||||
* `kindFromStats` + `kindFromDirent` pair as a maintenance hazard
|
||||
* (drift risk if the union grows).
|
||||
*/
|
||||
function kindFromStatLike(s: {
|
||||
isFile(): boolean;
|
||||
isDirectory(): boolean;
|
||||
isSymbolicLink(): boolean;
|
||||
}): FsStat['kind'] {
|
||||
if (st.isSymbolicLink()) return 'symlink';
|
||||
if (st.isDirectory()) return 'directory';
|
||||
if (st.isFile()) return 'file';
|
||||
return 'other';
|
||||
}
|
||||
|
||||
function kindFromDirent(d: {
|
||||
isFile(): boolean;
|
||||
isDirectory(): boolean;
|
||||
isSymbolicLink(): boolean;
|
||||
}): FsEntry['kind'] {
|
||||
if (d.isSymbolicLink()) return 'symlink';
|
||||
if (d.isDirectory()) return 'directory';
|
||||
if (d.isFile()) return 'file';
|
||||
if (s.isSymbolicLink()) return 'symlink';
|
||||
if (s.isDirectory()) return 'directory';
|
||||
if (s.isFile()) return 'file';
|
||||
return 'other';
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue