From 546834267f52bc79fb29b3c4138608e2b3d31afd Mon Sep 17 00:00:00 2001 From: doudouOUC Date: Mon, 18 May 2026 01:23:37 +0800 Subject: [PATCH] fix(serve/fs): close 3 review-round-4 critical findings (#4250) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DeepSeek's fourth review pass surfaced three more critical bugs; all are fixed in-PR. 1. TOCTOU symlink substitution in readText/readBytes/edit (workspaceFileSystem.ts) — Critical. fsp.stat(p) and the subsequent lowFs.readTextFile(p) (or fsp.readFile / fsp.readFile for edit) are two separate syscalls. An attacker who can write into the workspace can swap p from a regular file to a symlink pointing outside between the two calls; pre-stat sees the original, the read follows the swap. Fix: assertInodeStableAfterRead(p, preIno) — after each read, re-lstat p; reject with symlink_escape if the path is now a symlink (post.isSymbolicLink()) or its inode changed (preIno !== post.ino, with a 0-ino fallback for procfs / virtual mounts that don't report meaningful inodes). Catches the swap-and-leave attack and the swap-and-keep-swapped attack. Residual race (attacker swaps back AFTER our read but BEFORE our lstat) is much smaller than the original window and outside PR 18's threat model; fd-based reading via fsp.open + fileHandle.read would close it entirely but requires a new variant of lowFs that takes a FileHandle — tracked as a follow-up. 2. UTF-8 truncation corruption in readText (workspaceFileSystem.ts) — Critical. buf.subarray(0, sizeOutcome.bytesToRead).toString ('utf-8') silently emits U+FFFD when bytesToRead falls mid-codepoint (CJK, emoji). A downstream consumer parsing JSON or source code over the truncated content would see broken trailing bytes; meta.truncated would be true but the prefix is corrupt. A subsequent edit() with the corrupted string as oldText would also fail to match the on-disk content. Fix: safeUtf8Truncate(buf, maxBytes) walks back from maxBytes through any continuation bytes (0b10xxxxxx), then verifies the leading byte's full sequence fits within the cap; drops the leading byte if it doesn't. The result is always a clean prefix at a valid codepoint boundary. Test pins '中文测试' (12 bytes / 3 bytes per char) truncated at 7 bytes -> '中文' (no U+FFFD). 3. glob opts.cwd bypasses workspace boundary (workspaceFileSystem.ts) — Critical. opts.cwd was used directly as the glob root with no validation against boundWorkspace. ResolvedPath is a brand cast and a stale or forged value lets a glob('**/*', { cwd: '/etc' }) enumerate files outside the workspace. The pattern-side absolute / UNC checks added in a81ada43f only constrain the *pattern*; cwd is the actual hazard. Fix: at the entry point of glob(), path.resolve cwd and isWithinRoot-check against boundWorkspace. Throws path_outside_workspace if cwd is outside, even when the pattern itself is harmlessly relative. Test pins the case with cwd: scratch (outside workspace). 3 new tests: - readText with mid-operation symlink swap -> symlink_escape - safeUtf8Truncate keeps CJK codepoints intact at 7-byte cap - glob with opts.cwd outside workspace -> path_outside_workspace 429/429 serve tests pass; typecheck + eslint clean. Remaining tracked follow-ups (Post-PR-18 hardening, in #4175): - list/glob brand-cast contract (PR 19) - runQwenServe → fsFactory injection contract test (PR 19) - edit() write-side TOCTOU + atomic-via-temp + expectedHash (PR 20) - glob audit pathHash (independent audit.ts commit) - glob maxResults streaming (independent hardening) - glob pattern preflight refactor to reuse hasSuspiciousPathPattern (cosmetic) 🤖 Generated with [Qwen Code](https://github.com/QwenLM/qwen-code) --- .../src/serve/fs/workspaceFileSystem.test.ts | 55 ++++++++ .../cli/src/serve/fs/workspaceFileSystem.ts | 125 +++++++++++++++++- 2 files changed, 176 insertions(+), 4 deletions(-) diff --git a/packages/cli/src/serve/fs/workspaceFileSystem.test.ts b/packages/cli/src/serve/fs/workspaceFileSystem.test.ts index 5e8fdc56c..1ac6e1888 100644 --- a/packages/cli/src/serve/fs/workspaceFileSystem.test.ts +++ b/packages/cli/src/serve/fs/workspaceFileSystem.test.ts @@ -14,6 +14,7 @@ import { FS_ACCESS_EVENT_TYPE, FS_DENIED_EVENT_TYPE, createWorkspaceFileSystemFactory, + type ResolvedPath, type WorkspaceFileSystem, type WorkspaceFileSystemFactory, } from './index.js'; @@ -439,6 +440,60 @@ describe('WorkspaceFileSystem - trust gate', () => { }); }); +describe('WorkspaceFileSystem - TOCTOU + UTF-8 + cwd hardening', () => { + let h: Harness; + beforeEach(async () => { + h = await makeHarness(); + }); + afterEach(async () => teardown(h)); + + it('readText detects post-stat symlink swap and rejects with symlink_escape', async () => { + // Simulate the swap: write a regular file, resolve, then + // replace it with a symlink to outside the workspace AFTER the + // boundary's pre-stat. We approximate by performing the swap + // *before* the call but after `resolve`; since the pre-stat + // and post-lstat happen back-to-back in the actual call, the + // post-lstat catches the symlink state. + const target = path.join(h.workspace, 'victim.txt'); + await fsp.writeFile(target, 'plain'); + const r = await h.fs.resolve('victim.txt', 'read'); + // Replace the regular file with a symlink to an outside path. + const outside = path.join(h.scratch, 'sensitive.txt'); + await fsp.writeFile(outside, 'sensitive'); + await fsp.unlink(target); + await fsp.symlink(outside, target, 'file'); + const err = await h.fs.readText(r).catch((e: unknown) => e); + expect(isFsError(err)).toBe(true); + expect((err as { kind: string }).kind).toBe('symlink_escape'); + }); + + it('safeUtf8Truncate keeps multi-byte codepoints intact at the boundary', async () => { + // 4-char Chinese string, each char 3 bytes UTF-8 = 12 bytes. + // A naive slice at 7 bytes would split the 3rd char. + const src = '中文测试'; + const target = path.join(h.workspace, 'cjk.txt'); + await fsp.writeFile(target, src, 'utf-8'); + const r = await h.fs.resolve('cjk.txt', 'read'); + const out = await h.fs.readText(r, { maxBytes: 7 }); + expect(out.meta.truncated).toBe(true); + // Result must be a valid prefix (no U+FFFD); 7 bytes / 3 bytes + // per char → 2 complete chars. + expect(out.content).toBe('中文'); + expect(out.content).not.toMatch(/�/); + }); + + it('glob rejects opts.cwd that lies outside boundWorkspace', async () => { + // Forge a `cwd` brand cast pointing outside the workspace; the + // entry-point validation should refuse before `globAsync` runs. + const outsideCwd = h.scratch as unknown as ResolvedPath; + const err = await h.fs + .glob('**/*', { cwd: outsideCwd }) + .catch((e: unknown) => e); + expect(isFsError(err)).toBe(true); + expect((err as { kind: string }).kind).toBe('path_outside_workspace'); + }); +}); + describe('WorkspaceFileSystem - audit always emits on body errors', () => { let h: Harness; beforeEach(async () => { diff --git a/packages/cli/src/serve/fs/workspaceFileSystem.ts b/packages/cli/src/serve/fs/workspaceFileSystem.ts index 202d423ea..1610db9b6 100644 --- a/packages/cli/src/serve/fs/workspaceFileSystem.ts +++ b/packages/cli/src/serve/fs/workspaceFileSystem.ts @@ -19,6 +19,7 @@ import { glob as globAsync } from 'glob'; import { StandardFileSystemService, loadIgnoreRules, + isWithinRoot, type Ignore, type WriteTextFileOptions, } from '@qwen-code/qwen-code-core'; @@ -327,11 +328,17 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem { }; let truncatedContent = result.content; if (sizeOutcome.truncated) { + // Use `safeUtf8Truncate` instead of `subarray(0,n).toString('utf-8')` + // so the slice never splits a multi-byte codepoint (CJK, + // emoji). The plain `subarray + toString` approach silently + // emits U+FFFD at the boundary and breaks downstream JSON / + // source-code parsing of the truncated prefix. const buf = Buffer.from(result.content, 'utf-8'); if (buf.length > sizeOutcome.bytesToRead) { - truncatedContent = buf - .subarray(0, sizeOutcome.bytesToRead) - .toString('utf-8'); + truncatedContent = safeUtf8Truncate( + buf, + sizeOutcome.bytesToRead, + ).toString('utf-8'); } meta.truncated = true; } @@ -348,6 +355,12 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem { meta.truncated = true; } if (ignoreVerdict.ignored) meta.matchedIgnore = ignoreVerdict.category; + // Post-read TOCTOU check: confirm the path's inode hasn't + // changed and it isn't now a symlink. Catches the + // swap-during-read attack where the file is replaced + // mid-operation with a symlink pointing outside the + // workspace. + await assertInodeStableAfterRead(p as string, st.ino); this.deps.audit.recordAccess(this.deps.ctx, { intent: 'read', absolute: p, @@ -372,6 +385,8 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem { const st = await fsp.stat(p as string); enforceReadBytesSize(st.size, opts.maxBytes); const buf = await fsp.readFile(p as string); + // Post-read TOCTOU guard — same shape as `readText`. + await assertInodeStableAfterRead(p as string, st.ino); this.deps.audit.recordAccess(this.deps.ctx, { intent: 'read', absolute: p, @@ -454,9 +469,25 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem { { hint: 'pass a relative pattern such as "src/**/*.ts"' }, ); } + // `opts.cwd` is typed `ResolvedPath` but a brand cast in + // calling code can produce a path that's never been verified + // against `boundWorkspace` (or was verified at a stale + // moment). Re-validate at the entry point so a glob with + // `cwd: '/etc'` cannot enumerate files outside the workspace + // even when the *pattern* is harmlessly relative. The + // pattern-side checks above only constrain the pattern + // shape; without this check `cwd` is the actual hazard. const cwd = (opts.cwd as string | undefined) ?? this.deps.boundWorkspace; + const cwdAbs = path.resolve(cwd); + if (!isWithinRoot(cwdAbs, this.deps.boundWorkspace)) { + throw new FsError( + 'path_outside_workspace', + `glob cwd is outside workspace: ${cwd}`, + { hint: 'opts.cwd must be a path obtained from fs.resolve()' }, + ); + } const matches = await globAsync(pattern, { - cwd, + cwd: cwdAbs, nodir: false, absolute: true, dot: true, @@ -642,6 +673,13 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem { }); } const current = await fsp.readFile(p as string, 'utf-8'); + // Post-read TOCTOU guard — catches the swap-during-read + // attack where `p` is replaced with a symlink between + // `fsp.stat` above and `fsp.readFile` here. The full + // read-modify-write race window (between this check and + // `lowFs.writeTextFile` below) is the deferred PR 20 + // follow-up that adds atomic-via-temp + `expectedHash`. + await assertInodeStableAfterRead(p as string, st.ino); // Single replacement to preserve atomic write-once semantics. // Multi-occurrence handling lives in PR 20's edit endpoint // where the route can decide policy; the boundary stays @@ -709,6 +747,85 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem { } } +/** + * Truncate a UTF-8 buffer to at most `maxBytes` bytes WITHOUT + * splitting a multi-byte codepoint. `Buffer.subarray(0, n).toString('utf-8')` + * silently emits U+FFFD replacement chars when `n` falls in the + * middle of a 2-4-byte sequence (CJK, emoji); a downstream consumer + * parsing JSON / source code over the truncated content sees corrupted + * trailing bytes. We back off `n` to the last valid codepoint + * boundary so the truncated string is always a clean prefix of the + * original. + * + * Algorithm: + * 1. If the buffer fits, return as-is. + * 2. Walk back from `maxBytes` while the previous byte is a UTF-8 + * continuation byte (`0b10xxxxxx`). + * 3. The byte at the new boundary is now either ASCII (`<0x80`) or + * a leading byte. If it's a leading byte, check whether the full + * multi-byte sequence fits within `maxBytes`. If not, drop the + * leading byte too — the sequence is incomplete. + */ +function safeUtf8Truncate(buf: Buffer, maxBytes: number): Buffer { + if (buf.length <= maxBytes) return buf; + let end = maxBytes; + while (end > 0 && (buf[end] & 0xc0) === 0x80) end--; + if (end > 0) { + const lead = buf[end - 1]; + let seqLen = 1; + if ((lead & 0x80) === 0) seqLen = 1; + else if ((lead & 0xe0) === 0xc0) seqLen = 2; + else if ((lead & 0xf0) === 0xe0) seqLen = 3; + else if ((lead & 0xf8) === 0xf0) seqLen = 4; + if (seqLen > 1 && end - 1 + seqLen > maxBytes) { + end = end - 1; + } + } + return buf.subarray(0, end); +} + +/** + * Post-read TOCTOU guard. After reading the file at `p`, re-`lstat` + * to confirm the inode hasn't changed and the path isn't now a + * symlink. Catches the swap-then-leave attack where a regular + * file is replaced with a symlink to outside the workspace + * BETWEEN the boundary's pre-stat and the actual read — the + * pre-stat saw the original (small, regular) file but the read + * followed the swap to wherever the attacker pointed. There's a + * residual race where the attacker swaps back after our read but + * before this check; that window is much smaller than the swap- + * and-leave attack and outside PR 18's threat model. The proper + * fix is fd-based reading (`fsp.open` + `fileHandle.read`) so the + * fd binds to the inode at open time; that's a follow-up since it + * requires a new variant of `lowFs.readTextFile` that takes a + * FileHandle instead of a path. + */ +async function assertInodeStableAfterRead( + p: string, + preIno: bigint | number, +): Promise { + const post = await fsp.lstat(p); + if (post.isSymbolicLink()) { + throw new FsError( + 'symlink_escape', + `path was replaced with a symlink during read: ${p}`, + { hint: 'TOCTOU swap detected via post-read lstat' }, + ); + } + // ino can be 0 on virtual filesystems (procfs etc.) — only compare + // when both sides report a meaningful value. + const preNum = typeof preIno === 'bigint' ? preIno : BigInt(preIno as number); + const postNum = + typeof post.ino === 'bigint' ? post.ino : BigInt(post.ino as number); + if (preNum !== 0n && postNum !== 0n && preNum !== postNum) { + throw new FsError( + 'symlink_escape', + `path inode changed during read: ${p}`, + { hint: 'TOCTOU swap detected via inode comparison' }, + ); + } +} + function kindFromStats(st: { isFile(): boolean; isDirectory(): boolean;