diff --git a/packages/cli/src/serve/fs/audit.ts b/packages/cli/src/serve/fs/audit.ts index 5a20a9cf5..2f0469c04 100644 --- a/packages/cli/src/serve/fs/audit.ts +++ b/packages/cli/src/serve/fs/audit.ts @@ -61,6 +61,16 @@ export interface FsAccessAuditPayload { intent: Intent; route: string; pathHash: string; + /** + * ACP session id from `AuditContext.sessionId`, when known. + * Multi-session daemons need this to correlate audit events + * back to the session that triggered them — `originatorClientId` + * alone identifies the *client*, not the *session*. Always + * present when the calling route is session-scoped (PR 19/20 + * routes that take `:sessionId`); absent on workspace-scoped + * routes that have no session context. + */ + sessionId?: string; /** Workspace-relative path; only populated when QWEN_AUDIT_RAW_PATHS=1. */ relPath?: string; sizeBytes?: number; @@ -74,6 +84,8 @@ export interface FsDeniedAuditPayload { intent: Intent; route: string; pathHash: string; + /** See `FsAccessAuditPayload.sessionId` — same semantics. */ + sessionId?: string; relPath?: string; errorKind: FsErrorKind; hint?: string; @@ -189,6 +201,7 @@ export function createAuditPublisher( pathHash: hashPath(absolute), durationMs: record.durationMs, }; + if (ctx.sessionId) payload.sessionId = ctx.sessionId; if (record.sizeBytes !== undefined) payload.sizeBytes = record.sizeBytes; if (record.truncated) payload.truncated = true; if (record.matchedIgnore) payload.matchedIgnore = record.matchedIgnore; @@ -213,6 +226,7 @@ export function createAuditPublisher( pathHash: hashPath(probe), errorKind: record.errorKind, }; + if (ctx.sessionId) payload.sessionId = ctx.sessionId; if (record.hint) payload.hint = record.hint; // `message` carries the underlying `FsError.message`, which // many throw-sites embed `${p}` (absolute workspace path) or diff --git a/packages/cli/src/serve/fs/errors.test.ts b/packages/cli/src/serve/fs/errors.test.ts index 482769656..3d4f6fdb9 100644 --- a/packages/cli/src/serve/fs/errors.test.ts +++ b/packages/cli/src/serve/fs/errors.test.ts @@ -135,13 +135,34 @@ describe('wrapAsFsError', () => { expect(new FsError('io_error', 'disk full').status).toBe(503); }); - it('falls back to the configured default kind for unknown errnos', () => { - expect(wrapAsFsError(errno('EWHATEVER')).kind).toBe('permission_denied'); + it('falls back to internal_error (not permission_denied) for unknown errnos and non-errno errors', () => { + // Default fallback used to be `permission_denied`, which mis-paged + // security oncall on a TypeError or null-deref. The new default is + // `internal_error` (HTTP 500) so monitoring keys stay aligned with + // the actual class of fault. + expect(wrapAsFsError(errno('EWHATEVER')).kind).toBe('internal_error'); + expect(wrapAsFsError(new TypeError('null deref')).kind).toBe( + 'internal_error', + ); + // Caller can still override. expect(wrapAsFsError(errno('EWHATEVER'), 'parse_error').kind).toBe( 'parse_error', ); }); + it('internal_error has HTTP status 500', () => { + expect(new FsError('internal_error', 'bug').status).toBe(500); + }); + + it('non-Error values fall back to internal_error not permission_denied', () => { + // `string thrown` from somewhere weird. Earlier default of + // permission_denied would page security oncall for what + // is a developer ticket. + const out = wrapAsFsError('boom'); + expect(out.kind).toBe('internal_error'); + expect(out.status).toBe(500); + }); + it('preserves the original error as cause', () => { const root = errno('ENOENT', 'where'); const out = wrapAsFsError(root); diff --git a/packages/cli/src/serve/fs/errors.ts b/packages/cli/src/serve/fs/errors.ts index be060a08b..488a426d4 100644 --- a/packages/cli/src/serve/fs/errors.ts +++ b/packages/cli/src/serve/fs/errors.ts @@ -35,6 +35,17 @@ export type FsErrorKind = * failure" to PR 19/20 route handlers and SDK consumers. */ | 'io_error' + /** + * Catch-all for non-errno errors that reach the boundary + * (`TypeError`, programmer-error throws, native module + * exceptions, etc.). Distinguished from `permission_denied` + * because monitoring pipelines key on `errorKind` for + * security alerting — conflating "code bug" with "ACL + * denied" pages security oncall for what should be a + * developer ticket. The 500 status communicates "daemon + * internal fault" to PR 19/20 route handlers. + */ + | 'internal_error' | 'parse_error'; /** @@ -45,7 +56,7 @@ export type FsErrorKind = * boundary is being asked to model a transport-level concern that * doesn't belong here (5xx, 401/403 from auth, etc.). */ -export type FsErrorStatus = 400 | 403 | 404 | 413 | 422 | 503; +export type FsErrorStatus = 400 | 403 | 404 | 413 | 422 | 500 | 503; /** * Default HTTP status mapping. Centralized here so callers can throw @@ -64,6 +75,7 @@ const DEFAULT_STATUS_BY_KIND: Record = { untrusted_workspace: 403, permission_denied: 403, io_error: 503, + internal_error: 500, parse_error: 400, }; @@ -120,7 +132,7 @@ export function isFsError(err: unknown): err is FsError { */ export function wrapAsFsError( err: unknown, - fallbackKind: FsErrorKind = 'permission_denied', + fallbackKind: FsErrorKind = 'internal_error', ): FsError { if (err instanceof FsError) return err; const errno = (err as NodeJS.ErrnoException | undefined)?.code; diff --git a/packages/cli/src/serve/fs/paths.ts b/packages/cli/src/serve/fs/paths.ts index c3b8fe82e..384408bdf 100644 --- a/packages/cli/src/serve/fs/paths.ts +++ b/packages/cli/src/serve/fs/paths.ts @@ -399,6 +399,12 @@ export async function resolveWithinWorkspace( } catch (err) { const code = (err as NodeJS.ErrnoException)?.code; if (code === 'ENOENT' && ENOENT_TOLERATING_INTENTS.has(intent)) { + // Captured iff the symlink chain validated successfully — + // we then return this verified canonical instead of falling + // through to a re-walk from `absolute` (which would + // re-traverse the chain and could pick up an attacker's + // mid-walk symlink swap). + let symlinkResolvedCanonical: string | null = null; // Dangling-symlink write-escape guard. `realpath` follows // symlinks, so a path like `/leak -> /etc/cron.d/evil` // (where the target doesn't exist YET) throws ENOENT here. @@ -481,7 +487,13 @@ export async function resolveWithinWorkspace( // Only run the containment check when we actually traversed // at least one symlink — `firstHopTarget !== null` means the // input was a symlink (vs a path through a non-existent - // ancestor that wasn't itself a symlink). + // ancestor that wasn't itself a symlink). The verified + // `canonicalTarget` becomes the function's result; we DO + // NOT re-walk from `absolute` afterwards, since that would + // open a TOCTOU window between the check and the re-walk + // where an attacker swapping an intermediate symlink + // could produce a different canonical than the one we + // just verified. if (firstHopTarget !== null) { const { ancestor: targetAncestor, tail: targetTail } = await findExistingAncestor(cursor); @@ -493,9 +505,19 @@ export async function resolveWithinWorkspace( throw new FsError( 'symlink_escape', `dangling symlink target escapes workspace: ${input}`, - { hint: `symlink points to ${firstHopTarget}` }, + { + // Hint must NOT embed the symlink target — `recordDenied` + // forwards `hint` into `fs.denied` even in privacy mode, + // and an absolute outside-target string would leak the + // attacker's intended exfiltration path through audit + // events. Operators wanting the actual target value + // run with `QWEN_AUDIT_RAW_PATHS=1` and read it from + // `relPath` / `message`. + hint: 'symlink chain leaves the workspace; enable QWEN_AUDIT_RAW_PATHS for the resolved target', + }, ); } + symlinkResolvedCanonical = canonicalTarget; } } catch (err2) { if (err2 instanceof FsError) throw err2; @@ -506,9 +528,18 @@ export async function resolveWithinWorkspace( const code2 = (err2 as NodeJS.ErrnoException)?.code; if (code2 !== 'ENOENT') throw err2; } - const { ancestor, tail } = await findExistingAncestor(absolute); - const ancestorReal = await fsp.realpath(ancestor); - canonical = tail ? path.join(ancestorReal, tail) : ancestorReal; + // If the symlink-walk produced a verified canonical, use it + // instead of re-walking from `absolute` (which would discard + // the verification and admit a TOCTOU swap window). The + // re-walk is only the right behavior when no symlink was + // present at all. + if (symlinkResolvedCanonical !== null) { + canonical = symlinkResolvedCanonical; + } else { + const { ancestor, tail } = await findExistingAncestor(absolute); + const ancestorReal = await fsp.realpath(ancestor); + canonical = tail ? path.join(ancestorReal, tail) : ancestorReal; + } } else if (code === 'ENOENT') { throw new FsError('path_not_found', `path does not exist: ${input}`, { cause: err, diff --git a/packages/cli/src/serve/fs/workspaceFileSystem.test.ts b/packages/cli/src/serve/fs/workspaceFileSystem.test.ts index 89cdbff64..5745ea6d6 100644 --- a/packages/cli/src/serve/fs/workspaceFileSystem.test.ts +++ b/packages/cli/src/serve/fs/workspaceFileSystem.test.ts @@ -206,6 +206,25 @@ describe('WorkspaceFileSystem - readBytes', () => { expect(isFsError(err)).toBe(true); expect((err as { kind: string }).kind).toBe('file_too_large'); }); + + it('readBytes catches concurrent post-stat growth via post-read size check', async () => { + // Pre-stat OOM gate sees a small file; post-read buf.length + // check catches the same-inode growth case (concurrent + // appender keeps the inode but extends past the cap). Test + // simulates by overwriting the file AFTER `resolve()` with a + // buffer larger than `MAX_READ_BYTES` — the readBytes call's + // pre-stat sees the large size and trips the hard cap; the + // post-read check is the defense-in-depth path for the case + // where stat passed but read picked up more bytes. + const policy = await import('./policy.js'); + const small = path.join(h.workspace, 'grew.bin'); + await fsp.writeFile(small, Buffer.alloc(64)); + const r = await h.fs.resolve('grew.bin', 'read'); + await fsp.writeFile(small, Buffer.alloc(policy.MAX_READ_BYTES + 1)); + const err = await h.fs.readBytes(r).catch((e) => e); + expect(isFsError(err)).toBe(true); + expect((err as { kind: string }).kind).toBe('file_too_large'); + }); }); describe('WorkspaceFileSystem - list', () => { diff --git a/packages/cli/src/serve/fs/workspaceFileSystem.ts b/packages/cli/src/serve/fs/workspaceFileSystem.ts index 288f1612e..9db91238f 100644 --- a/packages/cli/src/serve/fs/workspaceFileSystem.ts +++ b/packages/cli/src/serve/fs/workspaceFileSystem.ts @@ -180,6 +180,20 @@ export function createWorkspaceFileSystemFactory( useQwenignore: true, ignoreDirs: [], }); + // Freeze the `Ignore` instance so it cannot be mutated after + // the factory builds it. The `Ignore` class exposes a public + // `add(patterns): this` method that mutates state in-place; + // every `forRequest()` returns a `WorkspaceFileSystemImpl` + // sharing this same instance, so a future "ignore this + // pattern for this session" feature calling `.add()` would + // silently corrupt all concurrent requests. `Object.freeze` + // turns the mutation into a `TypeError` instead of a silent + // cross-request leak — surfacing the architectural mistake + // before it ships. Read paths (`getFileFilter` / + // `getDirectoryFilter`) are unaffected. Operators wanting + // per-session ignore rules should pass a different `Ignore` + // instance via `deps.ignore` to a separate factory. + Object.freeze(ignore); const audit: AuditPublisher = createAuditPublisher({ emit: deps.emit, boundWorkspace, @@ -409,6 +423,24 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem { // `maxBytes`. enforceReadBytesSize(st.size); let buf = await fsp.readFile(p as string); + // Post-read size sanity check — same TOCTOU window as + // `readText` (concurrent appender keeps the same inode, so + // `assertInodeStableAfterRead` doesn't catch *growth*). + // The pre-stat gate sees the size at stat time; an attacker + // can grow the file from sub-cap to multi-GB between + // `fsp.stat` and `fsp.readFile`, OOMing the daemon. Reject + // post-read if the buffer ended up over the hard cap. The + // proper fix (fd-based read with `fileHandle.stat` + + // bounded `fileHandle.read`) is a hardening follow-up. + if (buf.length > MAX_READ_BYTES) { + throw new FsError( + 'file_too_large', + `file grew during read to ${buf.length} bytes (cap ${MAX_READ_BYTES})`, + { + hint: 'concurrent writer detected via post-read size; retry or readText for capped truncation', + }, + ); + } // Soft cap: caller's `opts.maxBytes` is a window cap matching // the parameter name's promise. Earlier semantics treated // it as a "reject if file > maxBytes" gate, which violated @@ -522,18 +554,29 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem { // walk root. const cwd = (opts.cwd as string | undefined) ?? this.deps.boundWorkspace; let cwdReal: string; - try { - cwdReal = await fsp.realpath(path.resolve(cwd)); - } catch (err) { - const code = (err as NodeJS.ErrnoException)?.code; - if (code === 'ENOENT') { - throw new FsError( - 'path_not_found', - `glob cwd does not exist: ${cwd}`, - { cause: err }, - ); + // Short-circuit when `cwd` is exactly the canonical + // boundWorkspace — the factory already canonicalized it via + // `realpathSync.native`, so a per-request async `realpath` + // is a redundant syscall. Saves the syscall on the common + // path (route handlers omitting `opts.cwd` to glob the + // whole workspace) without losing the canonicalization + // guarantee — the factory's stored value IS the canonical. + if (cwd === this.deps.boundWorkspace) { + cwdReal = cwd; + } else { + try { + cwdReal = await fsp.realpath(path.resolve(cwd)); + } catch (err) { + const code = (err as NodeJS.ErrnoException)?.code; + if (code === 'ENOENT') { + throw new FsError( + 'path_not_found', + `glob cwd does not exist: ${cwd}`, + { cause: err }, + ); + } + throw err; } - throw err; } if (!isWithinRoot(cwdReal, this.deps.boundWorkspace)) { throw new FsError( @@ -903,19 +946,22 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem { */ function safeUtf8Truncate(buf: Buffer, maxBytes: number): Buffer { if (buf.length <= maxBytes) return buf; + // Walk `end` back through any UTF-8 continuation bytes + // (`0b10xxxxxx`) at the cut position. After the loop: + // - `end == 0`, OR + // - `buf[end]` is a leading byte (top bits `0xxxxxxx`, + // `110xxxxx`, `1110xxxx`, or `11110xxx`). + // Either way, `subarray(0, end)` is exactly the longest + // codepoint-aligned prefix at most `maxBytes` long: if + // `buf[end]` is the leading byte of an incomplete sequence + // we exclude it; if `buf[end]` is ASCII (i.e. the original + // `maxBytes` happened to land on a codepoint boundary) the + // walk-back is a no-op and we still cut at `maxBytes`. + // The earlier "seqLen check" was dead code — `subarray(0, + // end)` already excludes the leading byte at index `end`, + // so no further adjustment is ever needed. let end = maxBytes; while (end > 0 && (buf[end] & 0xc0) === 0x80) end--; - if (end > 0) { - const lead = buf[end - 1]; - let seqLen = 1; - if ((lead & 0x80) === 0) seqLen = 1; - else if ((lead & 0xe0) === 0xc0) seqLen = 2; - else if ((lead & 0xf0) === 0xe0) seqLen = 3; - else if ((lead & 0xf8) === 0xf0) seqLen = 4; - if (seqLen > 1 && end - 1 + seqLen > maxBytes) { - end = end - 1; - } - } return buf.subarray(0, end); } @@ -961,15 +1007,6 @@ async function assertInodeStableAfterRead( } } -/** - * Map a `Stats` or `Dirent` (both expose the same `isFile` / - * `isDirectory` / `isSymbolicLink` methods) to the boundary's - * narrow `kind` union. `FsStat['kind']` and `FsEntry['kind']` are - * the same 4-value union, so a single helper keeps the - * classification rule in one place — reviewer flagged the previous - * `kindFromStats` + `kindFromDirent` pair as a maintenance hazard - * (drift risk if the union grows). - */ /** * Pre-write TOCTOU guard. Mirrors the post-read inode check but * runs BEFORE the actual write. The earlier `resolve()` → @@ -1018,6 +1055,13 @@ async function assertNotSymlinkBeforeWrite(p: string): Promise { } } +/** + * Map a `Stats` or `Dirent` (both expose the same `isFile` / + * `isDirectory` / `isSymbolicLink` methods) to the boundary's + * narrow `kind` union. `FsStat['kind']` and `FsEntry['kind']` are + * the same 4-value union, so a single helper keeps the + * classification rule in one place. + */ function kindFromStatLike(s: { isFile(): boolean; isDirectory(): boolean; diff --git a/packages/cli/src/serve/server.ts b/packages/cli/src/serve/server.ts index 136bff5bf..d0cd5004d 100644 --- a/packages/cli/src/serve/server.ts +++ b/packages/cli/src/serve/server.ts @@ -45,21 +45,39 @@ import { } from './fs/index.js'; /** - * Build a no-op fs-audit emitter that logs a single warning the - * first time it's invoked. The default factory uses this so a - * regression that silently strips audit events shows up in - * operator logs instead of disappearing — `() => {}` is the - * "obvious" no-op but offers no failure signal. PR 19/20's - * `runQwenServe` injection replaces this with a real per-session - * emit, so legitimate production traffic never hits the warning. + * Build a no-op fs-audit emitter that logs a warning every + * `WARN_EVERY` dropped events with as much context as the audit + * payload exposes. The default factory uses this so a regression + * that silently strips audit events shows up in operator logs + * instead of disappearing — the earlier one-shot warn was a + * permanent silent no-op after the first event, which made a PR + * 19/20 regression where `runQwenServe` forgets to inject the real + * factory completely invisible (every write 403s; nothing in + * audit; one stale stderr line easy to miss for background + * daemons). Periodic warning + dropped-event count + first-event + * `errorKind` + `pathHash` make the regression actionable. + * + * PR 19/20's `runQwenServe` injection replaces this with a real + * per-session emit, so legitimate production traffic never hits + * the warning. */ function createDefaultFsAuditEmit(): (event: BridgeEvent) => void { - let warned = false; + const WARN_EVERY = 100; + let droppedCount = 0; return (event: BridgeEvent) => { - if (!warned) { - warned = true; + droppedCount += 1; + if (droppedCount === 1 || droppedCount % WARN_EVERY === 0) { + const data = event.data as + | { errorKind?: string; pathHash?: string; intent?: string } + | undefined; + const ctx: string[] = []; + if (data?.errorKind) ctx.push(`errorKind=${data.errorKind}`); + if (data?.intent) ctx.push(`intent=${data.intent}`); + if (data?.pathHash) ctx.push(`pathHash=${data.pathHash}`); + const ctxStr = ctx.length > 0 ? ` (${ctx.join(' ')})` : ''; writeStderrLine( - `qwen serve: fs audit emit is the default no-op (event ${event.type} dropped). ` + + `qwen serve: fs audit emit is the default no-op — ${droppedCount} event(s) dropped so far. ` + + `Latest type=${event.type}${ctxStr}. ` + `Inject deps.fsFactory in createServeApp to wire audit into the EventBus.`, ); }