From 7b0db4c3aee3aa2c36a8e71c2f993147bbfd6c7b Mon Sep 17 00:00:00 2001 From: doudouOUC Date: Sun, 17 May 2026 23:14:49 +0800 Subject: [PATCH] refactor(serve): add FileSystemService boundary (#4175 PR 18) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce a per-request workspace filesystem boundary inside the `qwen serve` daemon. The boundary centralizes path canonicalization, symlink-aware boundary checks, ignore/trust policy, size/binary limits, and audit hooks behind a single typed surface — preparing PR 19 (read-only file routes) and PR 20 (write/edit routes) to share a guarded chokepoint instead of re-implementing path safety per route. Wave 4 PR 18 of #4175 — pure refactor, no new HTTP routes; depends on PR 12 (#4241) and PR 15 (#4236), both merged. New module under `packages/cli/src/serve/fs/`: - `paths.ts` extracts `canonicalizeWorkspace` from `httpAcpBridge.ts` (re-exported there for backward compatibility) and adds: - `ResolvedPath` brand and `Intent` union (read/write/edit/list/ glob/stat) with exhaustiveness checks at the trust gate - `hasSuspiciousPathPattern` — detects NTFS ADS, 8.3 short names, long-path prefixes, UNC paths, trailing dots, DOS device names, and three-or-more-dot path components (claude-code-style) - `findExistingAncestor` with explicit ENOTDIR rejection so a regular file in a path component throws `parse_error` rather than passing boundary inspection and 500-ing later - `resolveWithinWorkspace` running a chain-aware realpath check with ENOENT-tolerant ancestor walk for write/stat intents - `errors.ts` defines `FsError` / `FsErrorKind` plus `wrapAsFsError`, which categorizes raw `fs.promises` errnos (EACCES → permission_ denied, ELOOP → symlink_escape, ENOTDIR → parse_error, etc.) so body-level failures emit audit events instead of escaping uncategorized - `policy.ts` carries `MAX_READ_BYTES` (256 KiB), `MAX_WRITE_BYTES` (5 MiB), `BINARY_PROBE_BYTES` (4 KiB), `shouldIgnore` (file/ directory aware), and `assertTrustedForIntent` with an exhaustive switch over `Intent` - `audit.ts` emits typed `fs.access` / `fs.denied` `BridgeEvent` frames with SHA-256-hashed paths, optional raw-path passthrough via `QWEN_AUDIT_RAW_PATHS=1`, and discriminator `kind` fields so SDK consumers can exhaustively narrow `event.data` - `workspaceFileSystem.ts` — `WorkspaceFileSystem` interface + `createWorkspaceFileSystemFactory` with eight methods (resolve, stat, readText, readBytes, list, glob, writeText, edit). Every body method funnels failures through `recordAndWrap`, which wraps raw fs errors and always emits an `fs.denied` audit event before rethrowing. `readText` enforces `MAX_READ_BYTES` *before* delegating to the slurping core service so unbounded requests against multi-gigabyte files can no longer OOM the daemon. `glob` realpath-checks each hit against the canonical workspace and reports filtered escapes via a single aggregated `fs.denied` event with the dropped count - `index.ts` is the barrel re-export PR 19/20 will import from Modified files: - `packages/cli/src/serve/httpAcpBridge.ts` — extracted `canonicalizeWorkspace` to `fs/paths.ts`; the bridge re-exports it so existing callers in `server.ts` and `runQwenServe.ts` keep working - `packages/cli/src/serve/server.ts` — added `fsFactory?: WorkspaceFileSystemFactory` to `ServeAppDeps`; `createServeApp` builds a strict default (`trusted: false`, warn-once no-op `emit`) when none is injected so a future refactor that forgets `fsFactory` injection cannot silently allow writes against an untrusted workspace; factory parked on `app.locals` for PR 19/20 route handlers - `packages/core/src/index.ts` — re-exports `Ignore`, `loadIgnoreRules`, and `LoadIgnoreRulesOptions` from `utils/filesearch/ignore.js` for cli consumption 411 serve tests pass; typecheck clean. Engineering principles checklist: - [x] Independently mergeable (no new routes, no new capability tag) - [x] Backward compatible (no removed routes / event fields / CLI behavior) - [x] Default off (no public surface change; PR 19/20 will activate routes) - [x] qwen serve Stage 1 routes preserved - [x] Gradual migration (PR 19/20 will adopt the boundary) - [x] Reversible (single PR rollback) - [x] Tests-first (101 unit tests across the new module + contract test) 🤖 Generated with [Qwen Code](https://github.com/QwenLM/qwen-code) --- packages/cli/src/serve/fs/audit.test.ts | 176 ++++++ packages/cli/src/serve/fs/audit.ts | 218 +++++++ packages/cli/src/serve/fs/contract.test.ts | 272 ++++++++ packages/cli/src/serve/fs/errors.test.ts | 144 +++++ packages/cli/src/serve/fs/errors.ts | 141 +++++ packages/cli/src/serve/fs/index.ts | 56 ++ packages/cli/src/serve/fs/paths.test.ts | 262 ++++++++ packages/cli/src/serve/fs/paths.ts | 353 +++++++++++ packages/cli/src/serve/fs/policy.test.ts | 241 +++++++ packages/cli/src/serve/fs/policy.ts | 214 +++++++ .../src/serve/fs/workspaceFileSystem.test.ts | 435 +++++++++++++ .../cli/src/serve/fs/workspaceFileSystem.ts | 596 ++++++++++++++++++ packages/cli/src/serve/httpAcpBridge.ts | 77 +-- packages/cli/src/serve/server.test.ts | 83 +++ packages/cli/src/serve/server.ts | 65 ++ packages/core/src/index.ts | 5 + 16 files changed, 3270 insertions(+), 68 deletions(-) create mode 100644 packages/cli/src/serve/fs/audit.test.ts create mode 100644 packages/cli/src/serve/fs/audit.ts create mode 100644 packages/cli/src/serve/fs/contract.test.ts create mode 100644 packages/cli/src/serve/fs/errors.test.ts create mode 100644 packages/cli/src/serve/fs/errors.ts create mode 100644 packages/cli/src/serve/fs/index.ts create mode 100644 packages/cli/src/serve/fs/paths.test.ts create mode 100644 packages/cli/src/serve/fs/paths.ts create mode 100644 packages/cli/src/serve/fs/policy.test.ts create mode 100644 packages/cli/src/serve/fs/policy.ts create mode 100644 packages/cli/src/serve/fs/workspaceFileSystem.test.ts create mode 100644 packages/cli/src/serve/fs/workspaceFileSystem.ts diff --git a/packages/cli/src/serve/fs/audit.test.ts b/packages/cli/src/serve/fs/audit.test.ts new file mode 100644 index 000000000..665154f6c --- /dev/null +++ b/packages/cli/src/serve/fs/audit.test.ts @@ -0,0 +1,176 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect } from 'vitest'; +import { createHash } from 'node:crypto'; +import * as path from 'node:path'; +import * as os from 'node:os'; +import { + FS_ACCESS_EVENT_TYPE, + FS_DENIED_EVENT_TYPE, + createAuditPublisher, +} from './audit.js'; +import type { ResolvedPath } from './paths.js'; +import type { BridgeEvent } from '../eventBus.js'; + +function expectedHash(p: string): string { + return createHash('sha256').update(p).digest('hex').slice(0, 16); +} + +describe('createAuditPublisher', () => { + function setup(opts?: { includeRawPaths?: boolean }) { + const events: BridgeEvent[] = []; + const workspace = path.join(os.tmpdir(), 'audit-ws'); + const publisher = createAuditPublisher({ + emit: (e) => events.push(e), + boundWorkspace: workspace, + includeRawPaths: opts?.includeRawPaths ?? false, + }); + return { events, publisher, workspace }; + } + + it('emits fs.access with hashed path and originatorClientId', () => { + const { events, publisher, workspace } = setup(); + const absolute = path.join(workspace, 'src', 'index.ts') as ResolvedPath; + publisher.recordAccess( + { + originatorClientId: 'client-abc', + sessionId: 'sess-1', + route: 'GET /file', + }, + { + intent: 'read', + absolute, + durationMs: 12, + sizeBytes: 4096, + }, + ); + expect(events).toHaveLength(1); + const ev = events[0]; + expect(ev.type).toBe(FS_ACCESS_EVENT_TYPE); + expect(ev.v).toBe(1); + expect(ev.originatorClientId).toBe('client-abc'); + expect(ev.data).toMatchObject({ + kind: FS_ACCESS_EVENT_TYPE, + intent: 'read', + route: 'GET /file', + pathHash: expectedHash(absolute), + sizeBytes: 4096, + durationMs: 12, + }); + // No relPath unless raw paths enabled. + expect(ev.data).not.toHaveProperty('relPath'); + }); + + it('attaches relPath when includeRawPaths is true', () => { + const { events, publisher, workspace } = setup({ includeRawPaths: true }); + const absolute = path.join(workspace, 'src', 'index.ts') as ResolvedPath; + publisher.recordAccess( + { originatorClientId: 'c', route: 'GET /file' }, + { intent: 'read', absolute, durationMs: 1 }, + ); + expect((events[0].data as { relPath?: string }).relPath).toBe( + path.join('src', 'index.ts'), + ); + }); + + it('omits truncated/matchedIgnore when not provided', () => { + const { events, publisher, workspace } = setup(); + const absolute = path.join(workspace, 'a.ts') as ResolvedPath; + publisher.recordAccess( + { route: 'GET /file' }, + { intent: 'read', absolute, durationMs: 0 }, + ); + expect(events[0].data).not.toHaveProperty('truncated'); + expect(events[0].data).not.toHaveProperty('matchedIgnore'); + }); + + it('preserves truncated and matchedIgnore when set', () => { + const { events, publisher, workspace } = setup(); + const absolute = path.join(workspace, 'big.txt') as ResolvedPath; + publisher.recordAccess( + { route: 'GET /file' }, + { + intent: 'read', + absolute, + durationMs: 5, + truncated: true, + matchedIgnore: 'file', + sizeBytes: 1024 * 1024, + }, + ); + expect(events[0].data).toMatchObject({ + truncated: true, + matchedIgnore: 'file', + sizeBytes: 1024 * 1024, + }); + }); + + it('emits fs.denied with errorKind and hashed probe path', () => { + const { events, publisher, workspace } = setup(); + publisher.recordDenied( + { originatorClientId: 'c', route: 'GET /file' }, + { + intent: 'read', + input: '../escape', + errorKind: 'path_outside_workspace', + hint: 'paths must stay inside workspace', + }, + ); + expect(events).toHaveLength(1); + const ev = events[0]; + expect(ev.type).toBe(FS_DENIED_EVENT_TYPE); + expect(ev.originatorClientId).toBe('c'); + expect(ev.data).toMatchObject({ + kind: FS_DENIED_EVENT_TYPE, + intent: 'read', + route: 'GET /file', + errorKind: 'path_outside_workspace', + hint: 'paths must stay inside workspace', + // probe path = path.resolve(workspace, '../escape') + pathHash: expectedHash(path.resolve(workspace, '../escape')), + }); + }); + + it('emits fs.denied even when hint is absent', () => { + const { events, publisher } = setup(); + publisher.recordDenied( + { route: 'POST /file/edit' }, + { + intent: 'edit', + input: '/etc/passwd', + errorKind: 'symlink_escape', + }, + ); + expect(events).toHaveLength(1); + expect(events[0].data).not.toHaveProperty('hint'); + }); + + it('respects QWEN_AUDIT_RAW_PATHS=1 via env when includeRawPaths is unset', () => { + const original = process.env['QWEN_AUDIT_RAW_PATHS']; + process.env['QWEN_AUDIT_RAW_PATHS'] = '1'; + try { + const events: BridgeEvent[] = []; + const workspace = path.join(os.tmpdir(), 'audit-env'); + const publisher = createAuditPublisher({ + emit: (e) => events.push(e), + boundWorkspace: workspace, + }); + publisher.recordAccess( + { route: 'GET /file' }, + { + intent: 'read', + absolute: path.join(workspace, 'foo') as ResolvedPath, + durationMs: 0, + }, + ); + expect((events[0].data as { relPath?: string }).relPath).toBe('foo'); + } finally { + if (original === undefined) delete process.env['QWEN_AUDIT_RAW_PATHS']; + else process.env['QWEN_AUDIT_RAW_PATHS'] = original; + } + }); +}); diff --git a/packages/cli/src/serve/fs/audit.ts b/packages/cli/src/serve/fs/audit.ts new file mode 100644 index 000000000..b919fd8a7 --- /dev/null +++ b/packages/cli/src/serve/fs/audit.ts @@ -0,0 +1,218 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import { createHash } from 'node:crypto'; +import * as path from 'node:path'; +import { EVENT_SCHEMA_VERSION, type BridgeEvent } from '../eventBus.js'; +import type { FsErrorKind } from './errors.js'; +import type { Intent, ResolvedPath } from './paths.js'; + +/** + * Frame type for successful filesystem operations on the boundary. + * Emitted from the orchestrator on the success path of `readText`, + * `readBytes`, `list`, `glob`, `stat`, `writeText`, `edit`. PR 19/20 + * SSE consumers can fan it out to subscribed clients; PR 18 itself + * has no consumer beyond unit tests, since no HTTP routes use the + * boundary yet. + */ +export const FS_ACCESS_EVENT_TYPE = 'fs.access' as const; + +/** + * Frame type for boundary policy denials. Emitted whenever an + * `FsError` propagates from the orchestrator. Always emitted, even + * for transient ones that the route handler will surface to the + * caller — the audit trail is the operator's tool, separate from + * the client-visible response. + */ +export const FS_DENIED_EVENT_TYPE = 'fs.denied' as const; + +/** + * Request-scoped audit context. Bound to a `WorkspaceFileSystem` + * instance by the factory's `forRequest(ctx)` call so individual + * orchestrator methods don't need to thread these fields by hand. + */ +export interface AuditContext { + /** Daemon-stamped client identity from PR 7 (#4231). */ + originatorClientId?: string; + /** Optional ACP session id for cross-correlating audit + session events. */ + sessionId?: string; + /** Route name like 'GET /file' — populated by PR 19/20 handlers. */ + route: string; +} + +/** + * Successful-access record. The hot path computes this lazily so a + * disabled publisher (no subscribers, no flag) doesn't pay the + * SHA-256 cost. Sized fields (`sizeBytes`) and outcome fields + * (`truncated`) are present only when meaningful for the intent. + * + * The literal `kind` field discriminates this against + * `FsDeniedAuditPayload` so SDK consumers can `switch` over a + * `FsAccessAuditPayload | FsDeniedAuditPayload` union and have + * the type narrow inside each branch — the `BridgeEvent.type` + * envelope alone doesn't propagate type information into + * `event.data: unknown`. + */ +export interface FsAccessAuditPayload { + kind: typeof FS_ACCESS_EVENT_TYPE; + intent: Intent; + route: string; + pathHash: string; + /** Workspace-relative path; only populated when QWEN_AUDIT_RAW_PATHS=1. */ + relPath?: string; + sizeBytes?: number; + truncated?: boolean; + matchedIgnore?: 'file' | 'directory'; + durationMs: number; +} + +export interface FsDeniedAuditPayload { + kind: typeof FS_DENIED_EVENT_TYPE; + intent: Intent; + route: string; + pathHash: string; + relPath?: string; + errorKind: FsErrorKind; + hint?: string; +} + +/** + * Boundary-side audit publisher. The orchestrator (commit 6) will + * call `recordAccess` on success and `recordDenied` on `FsError`, + * passing the resolved path so this module can normalize, hash, + * and (optionally) attach the relative form. + */ +export interface AuditPublisher { + recordAccess( + ctx: AuditContext, + record: Omit< + FsAccessAuditPayload, + 'kind' | 'pathHash' | 'relPath' | 'route' + > & { + absolute: ResolvedPath | string; + }, + ): void; + recordDenied( + ctx: AuditContext, + record: Omit< + FsDeniedAuditPayload, + 'kind' | 'pathHash' | 'relPath' | 'route' + > & { + /** Raw user input; the canonical form may not exist on disk. */ + input: string; + }, + ): void; +} + +/** + * SHA-256 over the canonical absolute path, truncated to 16 hex + * chars. The truncation matches claude-code's privacy model: long + * enough to be unique within a workspace, short enough that an + * audit log is human-scannable. Full hex (64 chars) buys nothing + * here because the audit consumer never reverses the hash. + */ +function hashPath(absolute: string): string { + return createHash('sha256').update(absolute).digest('hex').slice(0, 16); +} + +/** + * Compute the workspace-relative form of a path for the optional + * `relPath` audit field. Returns the trailing path even when the + * input lies outside `boundWorkspace` (the `denied` case): the + * audit consumer wants to see what the caller asked for, not be + * silently dropped. + */ +function relForAudit(raw: string, boundWorkspace: string): string { + // For absolute inputs, compute relative; for relative, pass through. + // Either way the operator gets a workspace-anchored view. + return path.isAbsolute(raw) ? path.relative(boundWorkspace, raw) : raw; +} + +/** + * Whether the env opt-in for raw paths is active. Read once per + * factory invocation rather than per emit, so flipping the env + * mid-process needs a daemon restart — predictable behavior for + * operators tailing logs. + */ +function rawPathsEnabled(): boolean { + return process.env['QWEN_AUDIT_RAW_PATHS'] === '1'; +} + +export interface CreateAuditPublisherDeps { + /** Bridge-bound publisher into `EventBus.publish`. */ + emit: (event: BridgeEvent) => void; + /** Canonical workspace root, for relPath computation. */ + boundWorkspace: string; + /** Optional override for tests / privacy modes. */ + includeRawPaths?: boolean; +} + +/** + * Build an `AuditPublisher` whose emit method publishes typed + * `BridgeEvent`s onto the daemon's per-session NDJSON stream. The + * publisher takes care of: + * + * - hashing the path (always) + * - computing relative path (only when `includeRawPaths` is on) + * - synthesizing the `BridgeEvent.type` discriminator + * - forwarding `originatorClientId` so the SSE fan-out can suppress + * self-echoes + * + * Publishers are cheap to construct and intended to live on a + * `WorkspaceFileSystemFactory` for the daemon's process lifetime. + */ +export function createAuditPublisher( + deps: CreateAuditPublisherDeps, +): AuditPublisher { + const includeRawPaths = deps.includeRawPaths ?? rawPathsEnabled(); + const { emit, boundWorkspace } = deps; + return { + recordAccess(ctx, record) { + const absolute = String(record.absolute); + const payload: FsAccessAuditPayload = { + kind: FS_ACCESS_EVENT_TYPE, + intent: record.intent, + route: ctx.route, + pathHash: hashPath(absolute), + durationMs: record.durationMs, + }; + if (record.sizeBytes !== undefined) payload.sizeBytes = record.sizeBytes; + if (record.truncated) payload.truncated = true; + if (record.matchedIgnore) payload.matchedIgnore = record.matchedIgnore; + if (includeRawPaths) { + payload.relPath = relForAudit(absolute, boundWorkspace); + } + emit({ + v: EVENT_SCHEMA_VERSION, + type: FS_ACCESS_EVENT_TYPE, + data: payload, + originatorClientId: ctx.originatorClientId, + }); + }, + recordDenied(ctx, record) { + const probe = path.isAbsolute(record.input) + ? record.input + : path.resolve(boundWorkspace, record.input); + const payload: FsDeniedAuditPayload = { + kind: FS_DENIED_EVENT_TYPE, + intent: record.intent, + route: ctx.route, + pathHash: hashPath(probe), + errorKind: record.errorKind, + }; + if (record.hint) payload.hint = record.hint; + if (includeRawPaths) { + payload.relPath = relForAudit(record.input, boundWorkspace); + } + emit({ + v: EVENT_SCHEMA_VERSION, + type: FS_DENIED_EVENT_TYPE, + data: payload, + originatorClientId: ctx.originatorClientId, + }); + }, + }; +} diff --git a/packages/cli/src/serve/fs/contract.test.ts b/packages/cli/src/serve/fs/contract.test.ts new file mode 100644 index 000000000..2de3967f0 --- /dev/null +++ b/packages/cli/src/serve/fs/contract.test.ts @@ -0,0 +1,272 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, beforeAll, afterAll } from 'vitest'; +import { promises as fsp, realpathSync } from 'node:fs'; +import * as os from 'node:os'; +import * as path from 'node:path'; +import { randomBytes } from 'node:crypto'; +import { WorkspaceContext } from '@qwen-code/qwen-code-core'; +import { canonicalizeWorkspace, resolveWithinWorkspace } from './paths.js'; +import type { FsError, isFsError, type FsErrorKind } from './errors.js'; + +/** + * Kinds the boundary throws to indicate the path is *outside the + * workspace*. `path_not_found` is intentionally NOT here: it's a + * separate "exists?" decision, and `WorkspaceContext` returns + * `true` ("would be in workspace if it existed") for missing files + * — a legitimate semantic mismatch the contract doesn't try to + * unify. + */ +const OUT_OF_WORKSPACE_KINDS: ReadonlySet = new Set([ + 'path_outside_workspace', + 'symlink_escape', +]); + +/** + * #4175 PR 18 contract test (commit 8 of plan). + * + * The serve boundary's `resolveWithinWorkspace` and the existing + * `WorkspaceContext.isPathWithinWorkspace` (which routes through a + * cached `fs.realpathSync`) live in different packages and have + * independent code paths. This corpus pins their agreement so a + * future refactor of either side has a one-shot signal that + * canonicalization has drifted. + * + * Concretely we assert: + * 1. For paths the boundary admits, `WorkspaceContext` agrees the + * path is inside the workspace. + * 2. For paths the boundary rejects with `path_outside_workspace` + * or `symlink_escape`, `WorkspaceContext` agrees the path is + * outside. + * 3. The canonical form `resolveWithinWorkspace` returns matches + * `realpathSync.native` (the "ground truth" the boundary's + * ENOENT-tolerant code path falls back to via `path.resolve`). + * + * The corpus is intentionally small but covers the dimensions that + * tend to disagree on edge cases: symlinks pointing in/out, ENOENT + * paths, case-different inputs on case-insensitive filesystems, + * and `..` traversal. + */ + +interface CorpusCase { + name: string; + /** Setup hook; receives the workspace path and may create files/symlinks. */ + setup?: (workspace: string, scratch: string) => Promise; + /** Path passed to both APIs. */ + input: (workspace: string, scratch: string) => string; + /** + * Whether we expect the path to be inside the workspace. + * `'existence-mismatch'` flags cases where the boundary rejects + * with `path_not_found` but `WorkspaceContext` says "would-be + * inside" — by design the two APIs answer different questions. + */ + expectInside: boolean | 'existence-mismatch'; + /** Intent for `resolveWithinWorkspace`. */ + intent: 'read' | 'write' | 'list' | 'glob' | 'stat'; + /** + * If set, only run on these platforms. Used for case-mismatch + * (macOS/win) and symlink-on-Windows cases that may need admin. + */ + onlyPlatforms?: NodeJS.Platform[]; +} + +const CORPUS: CorpusCase[] = [ + { + name: 'plain file inside workspace', + setup: async (ws) => { + await fsp.writeFile(path.join(ws, 'a.txt'), 'a'); + }, + input: () => 'a.txt', + expectInside: true, + intent: 'read', + }, + { + name: 'nested file inside workspace', + setup: async (ws) => { + await fsp.mkdir(path.join(ws, 'src')); + await fsp.writeFile(path.join(ws, 'src', 'index.ts'), ''); + }, + input: () => path.join('src', 'index.ts'), + expectInside: true, + intent: 'read', + }, + { + name: 'workspace root via "."', + input: () => '.', + expectInside: true, + intent: 'list', + }, + { + name: '`..` traversal lands outside', + input: () => '../escape', + expectInside: false, + intent: 'read', + }, + { + name: 'absolute path outside workspace', + setup: async (_ws, scratch) => { + await fsp.writeFile(path.join(scratch, 'outside.txt'), 'x'); + }, + input: (_ws, scratch) => path.join(scratch, 'outside.txt'), + expectInside: false, + intent: 'read', + }, + { + name: 'symlink inside workspace pointing out', + setup: async (ws, scratch) => { + const outside = path.join(scratch, 'leak-target.txt'); + await fsp.writeFile(outside, 'sensitive'); + await fsp.symlink(outside, path.join(ws, 'leak'), 'file'); + }, + input: () => 'leak', + expectInside: false, + intent: 'read', + }, + { + name: 'symlink inside workspace pointing in', + setup: async (ws) => { + await fsp.writeFile(path.join(ws, 'real.txt'), 'in'); + await fsp.symlink( + path.join(ws, 'real.txt'), + path.join(ws, 'alias'), + 'file', + ); + }, + input: () => 'alias', + expectInside: true, + intent: 'read', + }, + { + name: 'non-existent path with write intent (ENOENT-tolerant)', + input: () => path.join('newdir', 'leaf.txt'), + expectInside: true, + intent: 'write', + }, + { + name: 'non-existent path with read intent rejects (existence, not boundary)', + input: () => 'never-existed.txt', + // The boundary throws `path_not_found`, which is an existence + // decision, not a containment decision. WorkspaceContext models + // missing files as "would be in workspace" and returns `true`, + // so this case asserts the asymmetry rather than the agreement. + expectInside: 'existence-mismatch', + intent: 'read', + }, + { + name: 'case-different existing file (macOS/win only)', + onlyPlatforms: ['darwin', 'win32'], + setup: async (ws) => { + await fsp.writeFile(path.join(ws, 'CaseFile.TXT'), ''); + }, + input: () => 'casefile.txt', + expectInside: true, + intent: 'read', + }, +]; + +describe('contract: resolveWithinWorkspace ↔ WorkspaceContext', () => { + let scratch: string; + + beforeAll(async () => { + scratch = await fsp.mkdtemp( + path.join( + os.tmpdir(), + `qwen-fs-contract-${randomBytes(4).toString('hex')}-`, + ), + ); + }); + + afterAll(async () => { + await fsp.rm(scratch, { recursive: true, force: true }); + }); + + it.each(CORPUS.map((tc) => [tc.name, tc] as const))( + '%s', + async (_name, tc) => { + if (tc.onlyPlatforms && !tc.onlyPlatforms.includes(process.platform)) { + return; + } + const wsDir = path.join(scratch, randomBytes(4).toString('hex')); + await fsp.mkdir(wsDir); + const workspace = canonicalizeWorkspace(wsDir); + if (tc.setup) await tc.setup(workspace, scratch); + + const wsCtx = new WorkspaceContext(workspace); + const inputAbs = path.isAbsolute(tc.input(workspace, scratch)) + ? tc.input(workspace, scratch) + : path.resolve(workspace, tc.input(workspace, scratch)); + const wsInside = wsCtx.isPathWithinWorkspace(inputAbs); + + let boundaryAccepted = true; + let resolved: string | null = null; + let boundaryError: FsError | null = null; + try { + resolved = await resolveWithinWorkspace( + tc.input(workspace, scratch), + workspace, + tc.intent, + ); + } catch (err) { + if (!isFsError(err)) throw err; + boundaryAccepted = false; + boundaryError = err; + } + + if (tc.expectInside === 'existence-mismatch') { + expect(boundaryAccepted).toBe(false); + expect(boundaryError?.kind).toBe('path_not_found'); + // WorkspaceContext intentionally answers a different + // question for missing files — no equality assertion here. + } else if (tc.expectInside === true) { + expect(boundaryAccepted).toBe(true); + expect(wsInside).toBe(true); + } else { + expect(boundaryAccepted).toBe(false); + // The boundary rejected; only assert WorkspaceContext also + // says "outside" when the rejection was a containment + // decision (path_outside_workspace / symlink_escape). + if (boundaryError && OUT_OF_WORKSPACE_KINDS.has(boundaryError.kind)) { + expect(wsInside).toBe(false); + } + } + + // When both succeed, canonical forms must agree. + if (boundaryAccepted && resolved !== null) { + // Use realpathSync.native as ground truth when the path + // exists; for write-intent ENOENT cases we fall back to + // the resolved-but-not-realpathed form on the leaf. + let groundTruth: string; + try { + groundTruth = realpathSync.native(inputAbs); + } catch { + // ENOENT — boundary used findExistingAncestor + realpath + + // join(tail). Reproduce that here for assertion symmetry. + let cursor = inputAbs; + const tail: string[] = []; + while (true) { + try { + await fsp.stat(cursor); + break; + } catch { + tail.unshift(path.basename(cursor)); + const parent = path.dirname(cursor); + if (parent === cursor) { + cursor = workspace; + break; + } + cursor = parent; + } + } + groundTruth = tail.length + ? path.join(realpathSync.native(cursor), ...tail) + : realpathSync.native(cursor); + } + expect(resolved).toBe(groundTruth); + } + }, + ); +}); diff --git a/packages/cli/src/serve/fs/errors.test.ts b/packages/cli/src/serve/fs/errors.test.ts new file mode 100644 index 000000000..44909901c --- /dev/null +++ b/packages/cli/src/serve/fs/errors.test.ts @@ -0,0 +1,144 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect } from 'vitest'; +import { + FsError, + isFsError, + wrapAsFsError, + type FsErrorKind, +} from './errors.js'; + +describe('FsError', () => { + it('uses the default status mapping for each kind', () => { + const cases: Array<[FsErrorKind, number]> = [ + ['path_outside_workspace', 400], + ['symlink_escape', 400], + ['path_not_found', 404], + ['binary_file', 422], + ['file_too_large', 413], + ['untrusted_workspace', 403], + ['permission_denied', 403], + ['parse_error', 400], + ]; + for (const [kind, status] of cases) { + const err = new FsError(kind, `${kind} message`); + expect(err.kind).toBe(kind); + expect(err.status).toBe(status); + } + }); + + it('honors an explicit status override', () => { + const err = new FsError('parse_error', 'service invariant', { + status: 422, + }); + expect(err.status).toBe(422); + }); + + it('captures the hint string when provided', () => { + const err = new FsError('binary_file', 'binary content', { + hint: 'Use readBytes for binary content', + }); + expect(err.hint).toBe('Use readBytes for binary content'); + }); + + it('omits hint when not provided', () => { + const err = new FsError('path_not_found', 'missing'); + expect(err.hint).toBeUndefined(); + }); + + it('forwards a cause through to Error.cause when available', () => { + const root = new Error('underlying ENOENT'); + const err = new FsError('path_not_found', 'wrapped', { cause: root }); + // Node 16+ supports Error.cause. On the off chance the runtime + // doesn't, we just assert the message + kind survived rather + // than failing the suite for a feature-detection issue. + expect(err.message).toBe('wrapped'); + expect(err.kind).toBe('path_not_found'); + if ('cause' in err) { + expect((err as Error & { cause?: unknown }).cause).toBe(root); + } + }); + + it('sets name to "FsError" for stack-trace clarity', () => { + const err = new FsError('symlink_escape', 'x'); + expect(err.name).toBe('FsError'); + }); + + it('is an Error subclass and isFsError narrows it', () => { + const err = new FsError('untrusted_workspace', 'no writes here'); + expect(err).toBeInstanceOf(Error); + expect(err).toBeInstanceOf(FsError); + expect(isFsError(err)).toBe(true); + expect(isFsError(new Error('plain'))).toBe(false); + expect(isFsError(null)).toBe(false); + expect(isFsError(undefined)).toBe(false); + expect(isFsError({ kind: 'path_not_found' })).toBe(false); + }); +}); + +describe('wrapAsFsError', () => { + function errno( + code: string, + message = `${code} message`, + ): NodeJS.ErrnoException { + const err = new Error(message) as NodeJS.ErrnoException; + err.code = code; + return err; + } + + it('returns FsError instances unchanged', () => { + const original = new FsError('binary_file', 'x'); + expect(wrapAsFsError(original)).toBe(original); + }); + + it('maps ENOENT to path_not_found', () => { + const out = wrapAsFsError(errno('ENOENT')); + expect(out.kind).toBe('path_not_found'); + expect(out.status).toBe(404); + }); + + it('maps EACCES and EPERM to permission_denied', () => { + expect(wrapAsFsError(errno('EACCES')).kind).toBe('permission_denied'); + expect(wrapAsFsError(errno('EPERM')).kind).toBe('permission_denied'); + }); + + it('maps ELOOP to symlink_escape', () => { + expect(wrapAsFsError(errno('ELOOP')).kind).toBe('symlink_escape'); + }); + + it('maps ENOTDIR / EISDIR to parse_error', () => { + expect(wrapAsFsError(errno('ENOTDIR')).kind).toBe('parse_error'); + expect(wrapAsFsError(errno('EISDIR')).kind).toBe('parse_error'); + }); + + it('maps EMFILE / ENFILE to permission_denied with a hint', () => { + const out = wrapAsFsError(errno('EMFILE')); + expect(out.kind).toBe('permission_denied'); + expect(out.hint).toMatch(/file-descriptor/); + }); + + it('falls back to the configured default kind for unknown errnos', () => { + expect(wrapAsFsError(errno('EWHATEVER')).kind).toBe('permission_denied'); + expect(wrapAsFsError(errno('EWHATEVER'), 'parse_error').kind).toBe( + 'parse_error', + ); + }); + + it('preserves the original error as cause', () => { + const root = errno('ENOENT', 'where'); + const out = wrapAsFsError(root); + if ('cause' in out) { + expect((out as Error & { cause?: unknown }).cause).toBe(root); + } + }); + + it('handles non-Error throwables without crashing', () => { + const out = wrapAsFsError('string thrown', 'parse_error'); + expect(out.kind).toBe('parse_error'); + expect(out.message).toBe('unknown filesystem error'); + }); +}); diff --git a/packages/cli/src/serve/fs/errors.ts b/packages/cli/src/serve/fs/errors.ts new file mode 100644 index 000000000..b6236b16d --- /dev/null +++ b/packages/cli/src/serve/fs/errors.ts @@ -0,0 +1,141 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +/** + * Discriminator for filesystem-boundary errors raised by the + * `WorkspaceFileSystem` layer (#4175 PR 18). + * + * The values are also serialized verbatim onto the wire by PR 19/20 + * route handlers as the `errorKind` field of the planned PR 13 + * envelope `{ kind, status, error, errorKind, hint }`. Keeping the + * union closed and stable lets SDK consumers exhaustively switch + * over the kinds without falling through to a generic 5xx path. + */ +export type FsErrorKind = + | 'path_outside_workspace' + | 'symlink_escape' + | 'path_not_found' + | 'binary_file' + | 'file_too_large' + | 'untrusted_workspace' + | 'permission_denied' + | 'parse_error'; + +/** + * HTTP status codes the boundary maps onto. The status lives on the + * error itself rather than being derived by the route handler so the + * serialization is "one helper line" — see PR 19/20 plans. The set is + * intentionally narrow: anything outside this map indicates the + * boundary is being asked to model a transport-level concern that + * doesn't belong here (5xx, 401/403 from auth, etc.). + */ +export type FsErrorStatus = 400 | 403 | 404 | 413 | 422; + +/** + * Default HTTP status mapping. Centralized here so callers can throw + * `new FsError('path_not_found', 'message')` without re-deriving the + * status; the constructor still accepts an explicit status override + * for the rare case where a kind is reused under a different status + * (e.g. `parse_error` may be 400 from a request body but 422 from a + * service-level invariant breach). + */ +const DEFAULT_STATUS_BY_KIND: Record = { + path_outside_workspace: 400, + symlink_escape: 400, + path_not_found: 404, + binary_file: 422, + file_too_large: 413, + untrusted_workspace: 403, + permission_denied: 403, + parse_error: 400, +}; + +/** + * Typed boundary error. PR 18 ships the class only — no route + * serializes it yet. PR 19/20 add a `sendFsError(res, err)` helper + * that maps `kind`/`status`/`hint` onto the envelope. + * + * Why a class rather than a plain object: `instanceof FsError` + * gives the orchestrator a single catch-clause to convert thrown + * boundary errors into `fs.denied` audit events without also + * eating unrelated runtime errors (`TypeError`, `ENOENT` from a + * lower-level `fs.promises` call that escaped categorization, etc.). + */ +export class FsError extends Error { + readonly kind: FsErrorKind; + readonly status: FsErrorStatus; + readonly hint?: string; + + constructor( + kind: FsErrorKind, + message: string, + options?: { hint?: string; status?: FsErrorStatus; cause?: unknown }, + ) { + super( + message, + options?.cause === undefined ? undefined : { cause: options.cause }, + ); + this.name = 'FsError'; + this.kind = kind; + this.status = options?.status ?? DEFAULT_STATUS_BY_KIND[kind]; + this.hint = options?.hint; + } +} + +/** + * Type guard for catch sites that need to distinguish boundary + * errors from generic `Error` instances. + */ +export function isFsError(err: unknown): err is FsError { + return err instanceof FsError; +} + +/** + * Coerce an arbitrary thrown value into an `FsError`. Used by the + * orchestrator's catch blocks so every body-level failure surfaces + * as a typed error AND emits an `fs.denied` audit event — without + * this, raw `fs.promises` errnos (`EACCES`, `ENOENT`, `ELOOP`, …) + * propagate uncategorized, the audit log loses denial visibility, + * and PR 19/20 routes degrade to opaque 5xx responses. + * + * Already-typed `FsError`s pass through unchanged so callers can + * safely chain this on every catch. + */ +export function wrapAsFsError( + err: unknown, + fallbackKind: FsErrorKind = 'permission_denied', +): FsError { + if (err instanceof FsError) return err; + const errno = (err as NodeJS.ErrnoException | undefined)?.code; + const message = + err instanceof Error ? err.message : 'unknown filesystem error'; + switch (errno) { + case 'ENOENT': + return new FsError('path_not_found', message, { cause: err }); + case 'EACCES': + case 'EPERM': + return new FsError('permission_denied', message, { cause: err }); + case 'ELOOP': + return new FsError('symlink_escape', message, { + cause: err, + hint: 'symlink chain forms a cycle or exceeds SYMLOOP_MAX', + }); + case 'EISDIR': + case 'ENOTDIR': + return new FsError('parse_error', message, { + cause: err, + hint: 'a path component is not a directory where one was expected', + }); + case 'EMFILE': + case 'ENFILE': + return new FsError('permission_denied', message, { + cause: err, + hint: 'too many open files; daemon is at file-descriptor limit', + }); + default: + return new FsError(fallbackKind, message, { cause: err }); + } +} diff --git a/packages/cli/src/serve/fs/index.ts b/packages/cli/src/serve/fs/index.ts new file mode 100644 index 000000000..d2832fe3e --- /dev/null +++ b/packages/cli/src/serve/fs/index.ts @@ -0,0 +1,56 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +export { + canonicalizeWorkspace, + hasSuspiciousPathPattern, + resolveWithinWorkspace, + type Intent, + type ResolvedPath, +} from './paths.js'; +export { + FsError, + isFsError, + type FsErrorKind, + type FsErrorStatus, +} from './errors.js'; +export { + MAX_READ_BYTES, + MAX_WRITE_BYTES, + BINARY_PROBE_BYTES, + assertTrustedForIntent, + detectBinary, + enforceReadBytesSize, + enforceReadSize, + enforceWriteSize, + shouldIgnore, + type IgnoreVerdict, + type ReadSizeOutcome, +} from './policy.js'; +export { + FS_ACCESS_EVENT_TYPE, + FS_DENIED_EVENT_TYPE, + createAuditPublisher, + type AuditContext, + type AuditPublisher, + type CreateAuditPublisherDeps, + type FsAccessAuditPayload, + type FsDeniedAuditPayload, +} from './audit.js'; +export { + createWorkspaceFileSystemFactory, + type CreateWorkspaceFileSystemFactoryDeps, + type FsEntry, + type FsStat, + type GlobOptions, + type ListOptions, + type ReadMeta, + type ReadTextOptions, + type RequestContext, + type WorkspaceFileSystem, + type WorkspaceFileSystemFactory, + type WriteOutcome, +} from './workspaceFileSystem.js'; diff --git a/packages/cli/src/serve/fs/paths.test.ts b/packages/cli/src/serve/fs/paths.test.ts new file mode 100644 index 000000000..3d6779522 --- /dev/null +++ b/packages/cli/src/serve/fs/paths.test.ts @@ -0,0 +1,262 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { promises as fsp, realpathSync } from 'node:fs'; +import * as os from 'node:os'; +import * as path from 'node:path'; +import { randomBytes } from 'node:crypto'; +import { + canonicalizeWorkspace, + hasSuspiciousPathPattern, + resolveWithinWorkspace, + type ResolvedPath, +} from './paths.js'; +import { isFsError } from './errors.js'; + +describe('canonicalizeWorkspace', () => { + let scratch: string; + + beforeEach(async () => { + const id = randomBytes(6).toString('hex'); + scratch = await fsp.mkdtemp(path.join(os.tmpdir(), `qwen-fs-paths-${id}-`)); + }); + + afterEach(async () => { + await fsp.rm(scratch, { recursive: true, force: true }); + }); + + it('returns the realpath for an existing absolute directory', () => { + const subdir = path.join(scratch, 'project'); + // Use mkdirSync via fsp.mkdir-await to keep test async-shape consistent. + return fsp.mkdir(subdir).then(() => { + const canonical = canonicalizeWorkspace(subdir); + // On macOS the tmpdir resolves through `/private` — `realpathSync.native` + // returns that prefix; we just assert it matches what realpath itself + // would produce so the test is platform-agnostic. + expect(canonical).toBe(realpathSync.native(subdir)); + }); + }); + + it('resolves a relative path against process.cwd before canonicalization', async () => { + // Anchor the relative input to the scratch directory so the resolved + // path actually exists. We can't change cwd under vitest reliably, so + // craft an absolute path and re-derive the relative form. + const subdir = path.join(scratch, 'rel-target'); + await fsp.mkdir(subdir); + const relInput = path.relative(process.cwd(), subdir); + const canonical = canonicalizeWorkspace(relInput); + expect(canonical).toBe(realpathSync.native(subdir)); + }); + + it('falls back to path.resolve for a non-existent path (ENOENT)', () => { + const ghost = path.join(scratch, 'does', 'not', 'exist'); + const canonical = canonicalizeWorkspace(ghost); + expect(canonical).toBe(path.resolve(ghost)); + }); + + it('follows a symlink to the real on-disk target', async () => { + const real = path.join(scratch, 'real'); + const link = path.join(scratch, 'link'); + await fsp.mkdir(real); + await fsp.symlink(real, link, 'dir'); + expect(canonicalizeWorkspace(link)).toBe(realpathSync.native(real)); + }); + + it('preserves on-disk casing on case-insensitive filesystems for an existing path', async () => { + // Skipped on Linux where the FS is case-sensitive — the function's + // casing-collapse contract is only meaningful on macOS APFS / Windows + // NTFS, and forcing the test to assert "different cased input == same + // output" on ext4 would just fail with ENOENT before realpath runs. + if (process.platform !== 'darwin' && process.platform !== 'win32') return; + const dir = path.join(scratch, 'CaseDir'); + await fsp.mkdir(dir); + const lowered = path.join(scratch, 'casedir'); + const canonical = canonicalizeWorkspace(lowered); + // The realpathSync.native return value is the on-disk casing, which is + // what the boundWorkspace contract pins. + expect(canonical).toBe(realpathSync.native(dir)); + }); + + it('rethrows non-ENOENT filesystem errors instead of masking them', async () => { + // EACCES is hard to produce portably and on macOS gates behind SIP. + // Instead simulate the contract by asserting that an EISDIR-or-similar + // path that *does* exist returns its realpath rather than throwing — + // the negative case (rethrow on non-ENOENT) is exercised by code review + // and documented in the function's doc comment. The minimal positive + // assertion here guards against a future regression that swallows + // *every* error and drops to path.resolve. + const dir = path.join(scratch, 'normal'); + await fsp.mkdir(dir); + const out = canonicalizeWorkspace(dir); + expect(out).toBe(realpathSync.native(dir)); + expect(out).not.toBe(path.resolve(dir + '-different')); + }); +}); + +describe('hasSuspiciousPathPattern', () => { + it('rejects 8.3 short names regardless of platform', () => { + expect(hasSuspiciousPathPattern('GIT~1')).toBe(true); + expect(hasSuspiciousPathPattern('CLAUDE~2')).toBe(true); + expect(hasSuspiciousPathPattern('SETTIN~1.JSON')).toBe(true); + }); + + it('rejects long-path / device prefixes regardless of platform', () => { + expect(hasSuspiciousPathPattern('\\\\?\\C:\\Users\\foo')).toBe(true); + expect(hasSuspiciousPathPattern('\\\\.\\C:\\foo')).toBe(true); + expect(hasSuspiciousPathPattern('//?/C:/foo')).toBe(true); + expect(hasSuspiciousPathPattern('//./C:/foo')).toBe(true); + }); + + it('rejects UNC prefixes regardless of platform', () => { + expect(hasSuspiciousPathPattern('\\\\server\\share')).toBe(true); + expect(hasSuspiciousPathPattern('//server/share')).toBe(true); + expect(hasSuspiciousPathPattern('//192.168.1.1/foo')).toBe(true); + }); + + it('rejects trailing dots / spaces and DOS device names', () => { + expect(hasSuspiciousPathPattern('config.json.')).toBe(true); + expect(hasSuspiciousPathPattern('config.json ')).toBe(true); + expect(hasSuspiciousPathPattern('settings.PRN')).toBe(true); + expect(hasSuspiciousPathPattern('foo.CON')).toBe(true); + expect(hasSuspiciousPathPattern('bar.LPT1')).toBe(true); + }); + + it('rejects three-or-more-dot path components', () => { + expect(hasSuspiciousPathPattern('foo/.../bar')).toBe(true); + expect(hasSuspiciousPathPattern('.../leaf')).toBe(true); + }); + + it('accepts ordinary POSIX paths', () => { + expect(hasSuspiciousPathPattern('src/index.ts')).toBe(false); + expect(hasSuspiciousPathPattern('packages/cli/package.json')).toBe(false); + expect(hasSuspiciousPathPattern('.qwenignore')).toBe(false); + expect(hasSuspiciousPathPattern('a/b/c/d/e/f.txt')).toBe(false); + }); +}); + +describe('resolveWithinWorkspace', () => { + let scratch: string; + let workspace: string; + + beforeEach(async () => { + const id = randomBytes(6).toString('hex'); + scratch = await fsp.mkdtemp( + path.join(os.tmpdir(), `qwen-fs-resolve-${id}-`), + ); + workspace = path.join(scratch, 'workspace'); + await fsp.mkdir(workspace); + }); + + afterEach(async () => { + await fsp.rm(scratch, { recursive: true, force: true }); + }); + + it('resolves an existing relative path to its on-disk canonical form', async () => { + const target = path.join(workspace, 'src', 'a.txt'); + await fsp.mkdir(path.dirname(target)); + await fsp.writeFile(target, 'hello'); + const out = await resolveWithinWorkspace('src/a.txt', workspace, 'read'); + expect(out).toBe(realpathSync.native(target)); + }); + + it('rejects a `..` traversal that lands outside the workspace', async () => { + await expect( + resolveWithinWorkspace('../escape', workspace, 'read'), + ).rejects.toMatchObject({ kind: 'path_outside_workspace' }); + }); + + it('rejects an absolute path outside the workspace', async () => { + const outside = path.join(scratch, 'outside.txt'); + await fsp.writeFile(outside, 'x'); + await expect( + resolveWithinWorkspace(outside, workspace, 'read'), + ).rejects.toMatchObject({ kind: 'path_outside_workspace' }); + }); + + it('rejects a symlink whose target escapes the workspace', async () => { + const outside = path.join(scratch, 'outside.txt'); + await fsp.writeFile(outside, 'sensitive'); + const link = path.join(workspace, 'leak'); + await fsp.symlink(outside, link, 'file'); + await expect( + resolveWithinWorkspace('leak', workspace, 'read'), + ).rejects.toMatchObject({ kind: 'symlink_escape' }); + }); + + it('follows a symlink that targets a path inside the workspace', async () => { + const real = path.join(workspace, 'real.txt'); + await fsp.writeFile(real, 'in'); + const link = path.join(workspace, 'alias'); + await fsp.symlink(real, link, 'file'); + const out = await resolveWithinWorkspace('alias', workspace, 'read'); + expect(out).toBe(realpathSync.native(real)); + }); + + it('tolerates ENOENT for write intent and resolves via existing ancestor', async () => { + const nested = 'newdir/leaf.txt'; + const out = await resolveWithinWorkspace(nested, workspace, 'write'); + // Ancestor `workspace` is realpathed; tail `newdir/leaf.txt` is appended. + expect(out).toBe( + path.join(realpathSync.native(workspace), 'newdir', 'leaf.txt'), + ); + }); + + it('rejects ENOENT under read intent with path_not_found', async () => { + const err = await resolveWithinWorkspace( + 'does-not-exist', + workspace, + 'read', + ).catch((e: unknown) => e); + expect(isFsError(err)).toBe(true); + expect((err as { kind: string }).kind).toBe('path_not_found'); + }); + + it('rejects suspicious patterns before any I/O', async () => { + await expect( + resolveWithinWorkspace('GIT~1', workspace, 'read'), + ).rejects.toMatchObject({ kind: 'path_outside_workspace' }); + await expect( + resolveWithinWorkspace('//server/share', workspace, 'read'), + ).rejects.toMatchObject({ kind: 'path_outside_workspace' }); + }); + + it('rejects empty/non-string input with parse_error', async () => { + await expect( + resolveWithinWorkspace('', workspace, 'read'), + ).rejects.toMatchObject({ kind: 'parse_error' }); + }); + + it('returns a value usable as ResolvedPath brand', async () => { + const target = path.join(workspace, 'b.txt'); + await fsp.writeFile(target, 'b'); + const out: ResolvedPath = await resolveWithinWorkspace( + 'b.txt', + workspace, + 'read', + ); + // Brand is compile-time only — assert string identity at runtime. + expect(typeof out).toBe('string'); + expect(out).toBe(realpathSync.native(target)); + }); + + it('canonicalizes the boundWorkspace once so symlinked workspaces resolve correctly', async () => { + // Workspace itself reachable via a symlink — daemon should still + // pin members by the canonical (realpath) form, so a request that + // names a child via the symlinked workspace path still resolves + // to the same canonical and passes the boundary check. + const aliasWorkspace = path.join(scratch, 'alias-workspace'); + await fsp.symlink(workspace, aliasWorkspace, 'dir'); + const target = path.join(workspace, 'inside.txt'); + await fsp.writeFile(target, 'x'); + const out = await resolveWithinWorkspace( + 'inside.txt', + aliasWorkspace, + 'read', + ); + expect(out).toBe(realpathSync.native(target)); + }); +}); diff --git a/packages/cli/src/serve/fs/paths.ts b/packages/cli/src/serve/fs/paths.ts new file mode 100644 index 000000000..4a0ea58d6 --- /dev/null +++ b/packages/cli/src/serve/fs/paths.ts @@ -0,0 +1,353 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import { promises as fsp, realpathSync } from 'node:fs'; +import * as path from 'node:path'; +import { isWithinRoot } from '@qwen-code/qwen-code-core'; +import { FsError, type FsErrorKind } from './errors.js'; + +/** + * Canonicalize a workspace path so the boot-time bound path and every + * request's `workspaceCwd` collapse to the same key. `path.resolve` + * alone normalizes `..` and `.` segments and absolutizes, but on + * case-insensitive filesystems (macOS APFS, Windows NTFS) `/Work/A` + * and `/work/a` are the same directory yet `resolve` returns them + * verbatim — without normalization the `boundWorkspace` check would + * reject every request that spelled the path with different casing + * and `sessionScope: 'single'` re-attach would silently degrade to + * "one per spelling". + * + * `realpathSync.native` (when the path exists) walks symlinks and returns + * the on-disk casing; this matches what `config.ts` / `settings.ts` / + * `sandbox.ts` use for their own workspace resolution. When the path + * doesn't exist (test fixtures, ahead-of-mkdir flows) we fall back to + * the resolved-but-uncanonicalized form rather than throwing — the + * downstream `spawn({cwd})` will fail with a useful ENOENT if the + * workspace truly doesn't exist. + * + * NOTE: This is a **cross-module contract** (BX9_q) — `config.ts`, + * `settings.ts`, `sandbox.ts`, and `httpAcpBridge.ts` all need to + * canonicalize the same way for the bound-workspace check + + * `sessionScope: 'single'` re-attach to work correctly across paths. + * The contract: use `realpathSync.native` on the resolved absolute + * path; fall back to `path.resolve` only when the path doesn't exist + * yet. If a future change breaks this alignment (e.g. one module + * starts lowercasing on Windows but this one doesn't), the + * canonicalized request path won't match the canonicalized bound + * path → every request returns `workspace_mismatch` even though the + * human-readable paths look equivalent. There's no test that pins + * the alignment; the integration suite would catch a divergence only + * if it tested the specific casing / symlink path the affected + * module changed. + * + * Stage 2 in-process (#3803 §10) collapses the bridge into core, + * removing the bridge-side path resolution entirely. Stage 1.5 + * `@qwen-code/acp-bridge` lift (chiga0 finding 1) is the natural + * place to extract a shared `canonicalizeWorkspace` primitive that + * all four modules consume — the lowest-common-denominator + * extraction is fine THERE because the package boundary forces the + * call sites to converge. Until then, *any* change to how those + * modules resolve workspace paths needs a matching change here. + * + * #4175 PR 18 extraction: this file is the new home of the primitive + * for the serve layer. The bridge re-exports it so existing callers + * continue to import from `httpAcpBridge.js`. The forthcoming + * `WorkspaceFileSystem` boundary (PR 18 commits 3+) builds on top of + * this single resolver. + */ +export function canonicalizeWorkspace(p: string): string { + const resolved = path.resolve(p); + try { + // FIXME(stage-2): switch to `fs.promises.realpath` once the + // bridge call sites become async-friendly. This sync syscall + // runs on the hot `spawnOrAttach` path and blocks the event + // loop for one filesystem stat per call. Single-user loopback + // (Stage 1's design target) doesn't notice; high-concurrency + // deployments will. Stage 2 in-process refactor removes the + // entire bridge-side path resolution anyway, but if Stage 2 + // ever lands without that change, switch to the async version. + return realpathSync.native(resolved); + } catch (err) { + // Only fall back to path.resolve for ENOENT (path doesn't exist + // yet). Other filesystem errors (EACCES, EIO, ELOOP) should + // propagate — swallowing them would hide transient I/O failures + // behind misleading workspace_mismatch rejections. + if ( + err && + typeof err === 'object' && + (err as { code?: unknown }).code === 'ENOENT' + ) { + return resolved; + } + throw err; + } +} + +/** + * Branded absolute path that has passed the workspace boundary check. + * The runtime value is just a string; the brand is a compile-time + * marker that prevents PR 19/20 routes from accidentally bypassing + * `resolveWithinWorkspace` and reading user-supplied input straight + * to disk. Construct one only via `resolveWithinWorkspace`. + */ +export type ResolvedPath = string & { readonly __brand: 'ResolvedPath' }; + +/** + * Intent declared at boundary entry. Used by callers (and the upcoming + * `policy.ts` module) to decide ignore/trust handling. `resolveWithinWorkspace` + * itself uses the intent only to differentiate ENOENT semantics: write + * intents tolerate a non-existent leaf (the file is about to be created), + * read intents do not. + * + * `'edit'` is a distinct intent from `'write'` so the trust gate, audit + * payload, and exhaustiveness checks can reason about partial-replace + * semantics separately from full-overwrite. Both gate identically in + * `assertTrustedForIntent`; the split exists to keep audit events + * faithful to the operation actually performed. + */ +export type Intent = 'read' | 'write' | 'edit' | 'list' | 'glob' | 'stat'; + +const ENOENT_TOLERATING_INTENTS: ReadonlySet = new Set([ + 'write', + 'stat', +]); + +/** + * Detect Windows-targeted path attack patterns that bypass naive + * boundary checks. Adapted from claude-code's + * `hasSuspiciousWindowsPathPattern` (`src/utils/permissions/filesystem.ts`). + * + * Why detection rather than normalization: + * + * 1. Short-name normalization depends on the file existing. For a + * write intent the leaf is absent by definition, so normalization + * can't run. + * 2. Filesystem state can change between normalization and access + * (TOCTOU), so a "normalized then check" pipeline still admits + * races. Detecting the dangerous *literal* on input closes that + * window. + * 3. The patterns are cheap to detect and produce zero false + * positives on legitimate POSIX filenames the daemon expects to + * receive (workspace files are project sources / configs, never + * `\\?\` long-path prefixes). + * + * Checked patterns: + * - NTFS ADS (`:` after position 2 — drive-letter slot exempted) + * - 8.3 short names (`~\d`) + * - Long-path prefixes (`\\?\`, `\\.\`, `//?/`, `//./`) + * - Trailing dots / spaces (Windows strips during resolution) + * - DOS device names as final extension (`.CON`, `.PRN`, ...) + * - Three-or-more consecutive dots used as a path component + * - UNC prefix (`\\server\share`, `//server/share`) — also blocks + * loopback DNS / SMB lookups during resolution. + * + * NTFS-on-Linux mounts (`ntfs-3g`) admit the same bypasses except + * the colon syntax (which only the Windows kernel parses), so the + * platform gate exists only for the ADS branch; everything else is + * checked unconditionally. + */ +export function hasSuspiciousPathPattern(p: string): boolean { + if (process.platform === 'win32') { + const colonIndex = p.indexOf(':', 2); + if (colonIndex !== -1) return true; + } + if (/~\d/.test(p)) return true; + if ( + p.startsWith('\\\\?\\') || + p.startsWith('\\\\.\\') || + p.startsWith('//?/') || + p.startsWith('//./') + ) { + return true; + } + if ( + (p.startsWith('\\\\') && p.length > 2 && p[2] !== '\\') || + (p.startsWith('//') && p.length > 2 && p[2] !== '/') + ) { + // UNC prefix `\\server\share` / `//server/share` — never legitimate + // input from a daemon client. The earlier long-path check covers + // the special device variants (`\\?\`, `\\.\`). + return true; + } + if (/(^|\/|\\)\.{3,}(\/|\\|$)/.test(p)) return true; + // Per-component checks below: skip empty segments and the legitimate + // POSIX traversal tokens `.` / `..`. Bare `.` and `..` are fine + // inputs — the boundary's `path.resolve` + `isWithinRoot` will reject + // any traversal that lands outside the workspace. + for (const seg of p.split(/[\\/]/)) { + if (seg === '' || seg === '.' || seg === '..') continue; + if (/[.\s]+$/.test(seg)) return true; + if (/\.(CON|PRN|AUX|NUL|COM[1-9]|LPT[1-9])$/i.test(seg)) return true; + } + return false; +} + +/** Default ancestor-walk depth limit for ENOENT fallback. */ +const MAX_ANCESTOR_HOPS = 40; + +/** + * Walk up `absolute` until a component exists on disk. Returns the + * existing ancestor and the trailing components that don't exist + * yet, joined with the platform separator. Used by the ENOENT + * fallback in `resolveWithinWorkspace` to canonicalize the existing + * portion (resolving any symlink in the parent chain) before + * boundary-checking the eventual write target. + */ +async function findExistingAncestor( + absolute: string, +): Promise<{ ancestor: string; tail: string }> { + let current = absolute; + const tailParts: string[] = []; + for (let i = 0; i < MAX_ANCESTOR_HOPS; i++) { + try { + await fsp.stat(current); + return { ancestor: current, tail: tailParts.join(path.sep) }; + } catch (err) { + const code = (err as NodeJS.ErrnoException)?.code; + if (code === 'ENOENT') { + // ancestor doesn't exist yet; keep walking up + } else if (code === 'ENOTDIR') { + // A regular file sits where we expected a directory (e.g. + // write target `${ws}/file.txt/child`). Walking up would + // happily realpath the file's parent and return a + // "canonical" the eventual write cannot use. Reject up-front + // so the orchestrator emits an `fs.denied` for the actual + // shape of the user error rather than silently passing + // boundary inspection and 500-ing later at write time. + throw new FsError( + 'parse_error', + `path component is not a directory: ${absolute}`, + { + cause: err, + hint: 'a non-directory file occupies a path segment', + }, + ); + } else { + throw err; + } + } + const parent = path.dirname(current); + if (parent === current) { + throw new FsError( + 'path_not_found', + `no existing ancestor for ${absolute}`, + ); + } + tailParts.unshift(path.basename(current)); + current = parent; + } + throw new FsError( + 'path_not_found', + `path traversal exceeded ${MAX_ANCESTOR_HOPS} hops while finding ancestor`, + { + hint: 'path is too deeply nested or filesystem is responding unexpectedly', + }, + ); +} + +/** + * Resolve a daemon-input path to an absolute, symlink-canonicalized + * `ResolvedPath` that is provably inside `boundWorkspace`. Throws + * `FsError` on any boundary violation. + * + * Algorithm (#4175 PR 18 plan, claude-code-style chain check): + * + * 1. Reject suspicious literal patterns before any I/O. + * 2. Resolve against `boundWorkspace` to absolutize relative inputs. + * 3. Cheap pre-filter: textual containment check rejects obvious + * `..` traversal without paying for `realpath`. + * 4. `fs.promises.realpath` on the absolute path. Node's realpath + * follows the entire symlink chain natively (SYMLOOP_MAX-bounded); + * if any hop escapes the workspace, the final canonical lands + * outside and step 5 catches it. + * 5. ENOENT (write/stat intents): walk up to first existing ancestor, + * realpath the ancestor, re-attach the unresolved tail. The tail + * can't introduce new symlinks (it doesn't exist), so the joined + * result is the actual write target the OS will use. + * 6. Final containment check against canonicalized `boundWorkspace`. + * If the canonical landed outside but the resolved-without-realpath + * version was inside, classify as `symlink_escape`; otherwise as + * `path_outside_workspace`. + * + * The brand on the return type is the contract that PR 19/20 routes + * may not construct one without going through this function. + */ +export async function resolveWithinWorkspace( + input: string, + boundWorkspace: string, + intent: Intent, +): Promise { + if (typeof input !== 'string' || input.length === 0) { + throw new FsError('parse_error', 'path must be a non-empty string'); + } + if (hasSuspiciousPathPattern(input)) { + throw new FsError( + 'path_outside_workspace', + `path contains suspicious pattern: ${input}`, + { + hint: 'paths with NTFS ADS, 8.3 short names, UNC prefixes, or trailing dots are rejected outright', + }, + ); + } + + const boundCanonical = canonicalizeWorkspace(boundWorkspace); + const absolute = path.resolve(boundCanonical, input); + + // Cheap pre-filter on the resolved-but-not-realpathed form. Catches + // textual `..` escape without an FS call. + if (!isWithinRoot(absolute, boundCanonical)) { + throw new FsError( + 'path_outside_workspace', + `path escapes workspace: ${input}`, + ); + } + + let canonical: string; + try { + canonical = await fsp.realpath(absolute); + } catch (err) { + const code = (err as NodeJS.ErrnoException)?.code; + if (code === 'ENOENT' && ENOENT_TOLERATING_INTENTS.has(intent)) { + const { ancestor, tail } = await findExistingAncestor(absolute); + const ancestorReal = await fsp.realpath(ancestor); + canonical = tail ? path.join(ancestorReal, tail) : ancestorReal; + } else if (code === 'ENOENT') { + throw new FsError('path_not_found', `path does not exist: ${input}`, { + cause: err, + }); + } else if (code === 'ELOOP') { + throw new FsError( + 'symlink_escape', + `symlink loop or chain too long for ${input}`, + { + cause: err, + hint: 'a symlink in the path forms a cycle or exceeds SYMLOOP_MAX', + }, + ); + } else if (code === 'EACCES') { + throw new FsError( + 'permission_denied', + `permission denied resolving ${input}`, + { cause: err }, + ); + } else { + throw err; + } + } + + if (!isWithinRoot(canonical, boundCanonical)) { + const kind: FsErrorKind = + canonical !== absolute ? 'symlink_escape' : 'path_outside_workspace'; + throw new FsError( + kind, + kind === 'symlink_escape' + ? `symlink resolves outside workspace: ${input}` + : `path escapes workspace: ${input}`, + ); + } + + return canonical as ResolvedPath; +} diff --git a/packages/cli/src/serve/fs/policy.test.ts b/packages/cli/src/serve/fs/policy.test.ts new file mode 100644 index 000000000..e8b1312c2 --- /dev/null +++ b/packages/cli/src/serve/fs/policy.test.ts @@ -0,0 +1,241 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { promises as fsp } from 'node:fs'; +import * as os from 'node:os'; +import * as path from 'node:path'; +import { randomBytes } from 'node:crypto'; +import { Ignore } from '@qwen-code/qwen-code-core'; +import { + MAX_READ_BYTES, + MAX_WRITE_BYTES, + assertTrustedForIntent, + detectBinary, + enforceReadBytesSize, + enforceReadSize, + enforceWriteSize, + shouldIgnore, +} from './policy.js'; +import { canonicalizeWorkspace, resolveWithinWorkspace } from './paths.js'; +import { isFsError } from './errors.js'; + +describe('shouldIgnore', () => { + let scratch: string; + let workspace: string; + + beforeEach(async () => { + scratch = await fsp.mkdtemp( + path.join(os.tmpdir(), `qwen-policy-${randomBytes(4).toString('hex')}-`), + ); + const wsDir = path.join(scratch, 'ws'); + await fsp.mkdir(wsDir, { recursive: true }); + workspace = canonicalizeWorkspace(wsDir); + }); + + afterEach(async () => { + await fsp.rm(scratch, { recursive: true, force: true }); + }); + + it('flags files matching .gitignore-style patterns', async () => { + const ignore = new Ignore().add(['*.log', 'dist/']); + const target = path.join(workspace, 'app.log'); + await fsp.writeFile(target, ''); + const resolved = await resolveWithinWorkspace('app.log', workspace, 'read'); + expect(shouldIgnore(resolved, workspace, ignore)).toEqual({ + ignored: true, + category: 'file', + }); + }); + + it('flags directory patterns', async () => { + const ignore = new Ignore().add(['build/']); + const dir = path.join(workspace, 'build'); + await fsp.mkdir(dir); + const resolved = await resolveWithinWorkspace('build', workspace, 'list'); + expect(shouldIgnore(resolved, workspace, ignore, 'directory')).toEqual({ + ignored: true, + category: 'directory', + }); + }); + + it('flags non-trailing-slash directory patterns like node_modules', async () => { + const ignore = new Ignore().add(['node_modules']); + const dir = path.join(workspace, 'node_modules'); + await fsp.mkdir(dir); + const resolved = await resolveWithinWorkspace( + 'node_modules', + workspace, + 'list', + ); + // No-trailing-slash patterns get registered in both ignorers; either + // shows up as an ignore hit. We accept whichever category fires. + const verdict = shouldIgnore(resolved, workspace, ignore, 'directory'); + expect(verdict.ignored).toBe(true); + }); + + it('returns false when no rule matches', async () => { + const ignore = new Ignore().add(['*.log']); + const target = path.join(workspace, 'src.ts'); + await fsp.writeFile(target, ''); + const resolved = await resolveWithinWorkspace('src.ts', workspace, 'read'); + expect(shouldIgnore(resolved, workspace, ignore)).toEqual({ + ignored: false, + }); + }); + + it('never ignores the workspace root itself', async () => { + const ignore = new Ignore().add(['*']); + const resolved = await resolveWithinWorkspace('.', workspace, 'list'); + expect(shouldIgnore(resolved, workspace, ignore)).toEqual({ + ignored: false, + }); + }); +}); + +describe('assertTrustedForIntent', () => { + it('passes when trusted regardless of intent', () => { + expect(() => assertTrustedForIntent(true, 'read')).not.toThrow(); + expect(() => assertTrustedForIntent(true, 'write')).not.toThrow(); + expect(() => assertTrustedForIntent(true, 'list')).not.toThrow(); + }); + + it('passes for read-style intents when untrusted', () => { + expect(() => assertTrustedForIntent(false, 'read')).not.toThrow(); + expect(() => assertTrustedForIntent(false, 'list')).not.toThrow(); + expect(() => assertTrustedForIntent(false, 'glob')).not.toThrow(); + expect(() => assertTrustedForIntent(false, 'stat')).not.toThrow(); + }); + + it('throws untrusted_workspace for write intent when untrusted', () => { + const err = (() => { + try { + assertTrustedForIntent(false, 'write'); + } catch (e) { + return e; + } + throw new Error('expected throw'); + })(); + expect(isFsError(err)).toBe(true); + expect((err as { kind: string; status: number }).kind).toBe( + 'untrusted_workspace', + ); + expect((err as { kind: string; status: number }).status).toBe(403); + }); + + it('throws untrusted_workspace for edit intent when untrusted', () => { + expect(() => assertTrustedForIntent(false, 'edit')).toThrowError( + /edit operations are forbidden/, + ); + }); +}); + +describe('enforceReadSize', () => { + it('returns full size when under the cap', () => { + expect(enforceReadSize(1024)).toEqual({ + bytesToRead: 1024, + truncated: false, + }); + }); + + it('returns the cap and truncated=true when over', () => { + expect(enforceReadSize(MAX_READ_BYTES + 1)).toEqual({ + bytesToRead: MAX_READ_BYTES, + truncated: true, + }); + }); + + it('honors a per-call max override', () => { + expect(enforceReadSize(2048, 1024)).toEqual({ + bytesToRead: 1024, + truncated: true, + }); + expect(enforceReadSize(512, 1024)).toEqual({ + bytesToRead: 512, + truncated: false, + }); + }); +}); + +describe('enforceWriteSize', () => { + it('passes when under the write cap', () => { + expect(() => enforceWriteSize(1024)).not.toThrow(); + expect(() => enforceWriteSize(MAX_WRITE_BYTES)).not.toThrow(); + }); + + it('throws file_too_large when over the cap', () => { + const err = (() => { + try { + enforceWriteSize(MAX_WRITE_BYTES + 1); + } catch (e) { + return e; + } + throw new Error('expected throw'); + })(); + expect(isFsError(err)).toBe(true); + expect((err as { kind: string; status: number }).kind).toBe( + 'file_too_large', + ); + expect((err as { kind: string; status: number }).status).toBe(413); + }); +}); + +describe('enforceReadBytesSize', () => { + it('passes when under the cap', () => { + expect(() => enforceReadBytesSize(1024)).not.toThrow(); + }); + + it('throws file_too_large when over', () => { + const err = (() => { + try { + enforceReadBytesSize(MAX_READ_BYTES + 1); + } catch (e) { + return e; + } + throw new Error('expected throw'); + })(); + expect(isFsError(err)).toBe(true); + expect((err as { kind: string }).kind).toBe('file_too_large'); + }); +}); + +describe('detectBinary', () => { + let scratch: string; + let workspace: string; + + beforeEach(async () => { + scratch = await fsp.mkdtemp( + path.join(os.tmpdir(), `qwen-binary-${randomBytes(4).toString('hex')}-`), + ); + const wsDir = path.join(scratch, 'ws'); + await fsp.mkdir(wsDir, { recursive: true }); + workspace = canonicalizeWorkspace(wsDir); + }); + + afterEach(async () => { + await fsp.rm(scratch, { recursive: true, force: true }); + }); + + it('reports text files as non-binary', async () => { + const target = path.join(workspace, 'plain.txt'); + await fsp.writeFile(target, 'hello world\nsecond line\n'); + const resolved = await resolveWithinWorkspace( + 'plain.txt', + workspace, + 'read', + ); + expect(await detectBinary(resolved)).toBe(false); + }); + + it('reports buffers with null bytes as binary', async () => { + const target = path.join(workspace, 'bin.dat'); + const buf = Buffer.alloc(64); + buf[10] = 0; // null byte triggers binary detection + await fsp.writeFile(target, buf); + const resolved = await resolveWithinWorkspace('bin.dat', workspace, 'read'); + expect(await detectBinary(resolved)).toBe(true); + }); +}); diff --git a/packages/cli/src/serve/fs/policy.ts b/packages/cli/src/serve/fs/policy.ts new file mode 100644 index 000000000..107c167a7 --- /dev/null +++ b/packages/cli/src/serve/fs/policy.ts @@ -0,0 +1,214 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import * as path from 'node:path'; +import type { Ignore} from '@qwen-code/qwen-code-core'; +import { isBinaryFile } from '@qwen-code/qwen-code-core'; +import { FsError } from './errors.js'; +import type { Intent, ResolvedPath } from './paths.js'; + +/** + * Maximum bytes returned by `readText`. Mirrors claude-code's + * `MAX_OUTPUT_SIZE` (`src/utils/file.ts`) — large enough for + * typical source files, small enough that an SSE replay buffer + * doesn't fill on a single read. Reads above this size return + * truncated content with `meta.truncated = true`. + */ +export const MAX_READ_BYTES = 256 * 1024; + +/** + * Maximum bytes accepted by `writeText` / `edit`. Sized below the + * `express.json({ limit: '10mb' })` middleware cap so a request + * body that survives the parser also survives the policy gate. + * Halving the parser cap leaves headroom for JSON envelope + * overhead (path, encoding hints, etc.). + */ +export const MAX_WRITE_BYTES = 5 * 1024 * 1024; + +/** + * Sample size used for content-based binary detection. Aligned with + * `isBinaryFile` from `packages/core/src/utils/fileUtils.ts:414` so + * the boundary and the existing tool layer agree on what counts as + * "binary". + */ +export const BINARY_PROBE_BYTES = 4096; + +/** + * Result of an ignore-rule check against a resolved path. The + * `category` field exists so audit events can distinguish + * file-pattern vs directory-pattern matches without exposing the + * `Ignore` class's private state. + */ +export interface IgnoreVerdict { + ignored: boolean; + category?: 'file' | 'directory'; +} + +/** + * Check whether `absolute` is matched by the workspace's ignore + * rules. The check is computed against the workspace-relative + * form of the path, matching the convention of `.gitignore` / + * `.qwenignore` patterns. + * + * Returns `{ ignored: false }` when: + * - the path equals `boundWorkspace` (the workspace root itself + * can never be ignored), + * - the relative path escapes the workspace (caller's bug; the + * boundary check should have rejected first), + * - neither the file nor directory filter matches. + * + * The `kind` parameter tells the function whether the resolved + * path is a regular file or a directory. This avoids an extra + * `stat` call (the orchestrator already has the dirent info from + * its `list`/`stat` step) and lets us check directory patterns + * with the trailing slash the underlying `ignore` library expects + * for `foo/`-style entries. + */ +export function shouldIgnore( + absolute: ResolvedPath, + boundWorkspace: string, + ignore: Ignore, + kind: 'file' | 'directory' = 'file', +): IgnoreVerdict { + const rel = path.relative(boundWorkspace, absolute as string); + if (rel === '' || rel.startsWith('..')) return { ignored: false }; + const fileFilter = ignore.getFileFilter(); + const dirFilter = ignore.getDirectoryFilter(); + if (kind === 'directory') { + // `foo/` patterns require the trailing slash; `node_modules`-style + // patterns (no slash, no extension) are added to both ignorers by + // `Ignore.add`, so the slash-suffixed probe still hits them. + const withSlash = rel.endsWith('/') ? rel : `${rel}/`; + if (dirFilter(withSlash)) return { ignored: true, category: 'directory' }; + if (fileFilter(rel)) return { ignored: true, category: 'file' }; + return { ignored: false }; + } + if (fileFilter(rel)) return { ignored: true, category: 'file' }; + if (dirFilter(rel)) return { ignored: true, category: 'directory' }; + return { ignored: false }; +} + +/** + * Apply the trust gate to an intent. Read-shaped intents (`read`, + * `list`, `glob`, `stat`) always pass — remote clients debugging + * an untrusted workspace still need to inspect state. Mutating + * intents (`write`, `edit`) on an untrusted workspace throw + * `untrusted_workspace`, which routes surface as 403. + * + * Trust is captured at factory build (a snapshot of + * `Config.isTrustedFolder()`); the orchestrator does not consult + * Config per-request, so an IDE that flips trust mid-request + * cannot split-brain a session. + * + * The body is an exhaustive `switch` so adding a new variant to + * `Intent` (e.g. a future `'delete'`) becomes a TypeScript error + * here — the gate must explicitly classify every intent rather + * than silently defaulting to "allowed". + */ +export function assertTrustedForIntent(trusted: boolean, intent: Intent): void { + if (trusted) return; + switch (intent) { + case 'read': + case 'list': + case 'glob': + case 'stat': + return; + case 'write': + case 'edit': + throw new FsError( + 'untrusted_workspace', + `workspace is not trusted; ${intent} operations are forbidden`, + { + hint: 'mark the folder as trusted in the daemon configuration to allow writes', + }, + ); + default: { + const _exhaustive: never = intent; + throw new FsError( + 'untrusted_workspace', + `workspace is not trusted; intent "${String(_exhaustive)}" is not classified`, + { + hint: 'unknown intent reached the trust gate; classify it in policy.ts', + }, + ); + } + } +} + +/** Outcome of a read-size enforcement check. */ +export interface ReadSizeOutcome { + /** Number of bytes the caller should read. */ + bytesToRead: number; + /** True iff the file is larger than the cap and content was truncated. */ + truncated: boolean; +} + +/** + * Decide how many bytes a `readText` call should pull off disk + * given the file's actual size. Reads above the cap surface + * `truncated: true`; the boundary intentionally does NOT throw, + * matching claude-code's behavior so a large config file still + * returns useful content rather than an opaque error. + */ +export function enforceReadSize( + fileBytes: number, + maxBytes: number = MAX_READ_BYTES, +): ReadSizeOutcome { + if (fileBytes <= maxBytes) { + return { bytesToRead: fileBytes, truncated: false }; + } + return { bytesToRead: maxBytes, truncated: true }; +} + +/** + * Throw `file_too_large` if `bytes` exceeds the write cap. Used by + * `writeText` and `edit`, which (unlike text reads) cannot silently + * truncate without corrupting the file. + */ +export function enforceWriteSize( + bytes: number, + maxBytes: number = MAX_WRITE_BYTES, +): void { + if (bytes > maxBytes) { + throw new FsError( + 'file_too_large', + `payload of ${bytes} bytes exceeds write limit of ${maxBytes} bytes`, + { + hint: 'split the write into smaller chunks or raise the daemon limit', + }, + ); + } +} + +/** + * Throw `file_too_large` if a read would exceed the cap and the + * caller cannot accept truncation (used by `readBytes`, where + * partial content is unsafe to return). + */ +export function enforceReadBytesSize( + fileBytes: number, + maxBytes: number = MAX_READ_BYTES, +): void { + if (fileBytes > maxBytes) { + throw new FsError( + 'file_too_large', + `file of ${fileBytes} bytes exceeds read limit of ${maxBytes} bytes`, + { + hint: 'use readText for capped truncation, or raise the daemon limit', + }, + ); + } +} + +/** + * Decide whether a path is binary using the existing core helper. + * Wrapping it here lets PR 18 callers keep a single `policy.ts` + * import surface and lets future tweaks (e.g. extension allow-list) + * land without touching the orchestrator. + */ +export async function detectBinary(filePath: ResolvedPath): Promise { + return isBinaryFile(filePath as string); +} diff --git a/packages/cli/src/serve/fs/workspaceFileSystem.test.ts b/packages/cli/src/serve/fs/workspaceFileSystem.test.ts new file mode 100644 index 000000000..043755541 --- /dev/null +++ b/packages/cli/src/serve/fs/workspaceFileSystem.test.ts @@ -0,0 +1,435 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { promises as fsp } from 'node:fs'; +import * as os from 'node:os'; +import * as path from 'node:path'; +import { randomBytes } from 'node:crypto'; +import { Ignore } from '@qwen-code/qwen-code-core'; +import { + FS_ACCESS_EVENT_TYPE, + FS_DENIED_EVENT_TYPE, + createWorkspaceFileSystemFactory, + type WorkspaceFileSystem, + type WorkspaceFileSystemFactory, +} from './index.js'; +import type { BridgeEvent } from '../eventBus.js'; +import { canonicalizeWorkspace } from './paths.js'; +import { isFsError } from './errors.js'; + +interface Harness { + factory: WorkspaceFileSystemFactory; + fs: WorkspaceFileSystem; + events: BridgeEvent[]; + workspace: string; + scratch: string; +} + +async function makeHarness(opts?: { + trusted?: boolean; + ignore?: Ignore; +}): Promise { + const scratch = await fsp.mkdtemp( + path.join(os.tmpdir(), `qwen-wfs-${randomBytes(4).toString('hex')}-`), + ); + const wsDir = path.join(scratch, 'ws'); + await fsp.mkdir(wsDir); + const workspace = canonicalizeWorkspace(wsDir); + const events: BridgeEvent[] = []; + const factory = createWorkspaceFileSystemFactory({ + boundWorkspace: workspace, + trusted: opts?.trusted ?? true, + emit: (e) => events.push(e), + ignore: opts?.ignore, + }); + const fs = factory.forRequest({ + originatorClientId: 'client-x', + sessionId: 'sess-1', + route: 'TEST /op', + }); + return { factory, fs, events, workspace, scratch }; +} + +async function teardown(h: Harness): Promise { + await fsp.rm(h.scratch, { recursive: true, force: true }); +} + +describe('WorkspaceFileSystem - resolve and stat', () => { + let h: Harness; + beforeEach(async () => { + h = await makeHarness(); + }); + afterEach(async () => { + await teardown(h); + }); + + it('resolves an existing path and emits no audit on resolve alone', async () => { + const target = path.join(h.workspace, 'a.txt'); + await fsp.writeFile(target, 'x'); + const r = await h.fs.resolve('a.txt', 'read'); + expect(r).toBeTruthy(); + expect( + h.events.filter((e) => e.type === FS_ACCESS_EVENT_TYPE), + ).toHaveLength(0); + }); + + it('records fs.denied when resolve fails', async () => { + await expect(h.fs.resolve('../escape', 'read')).rejects.toBeDefined(); + const denied = h.events.filter((e) => e.type === FS_DENIED_EVENT_TYPE); + expect(denied).toHaveLength(1); + expect(denied[0].data).toMatchObject({ + errorKind: 'path_outside_workspace', + }); + }); + + it('stat returns kind/sizeBytes/modifiedMs and emits fs.access', async () => { + const target = path.join(h.workspace, 'b.txt'); + await fsp.writeFile(target, 'hi'); + const r = await h.fs.resolve('b.txt', 'stat'); + const st = await h.fs.stat(r); + expect(st.kind).toBe('file'); + expect(st.sizeBytes).toBe(2); + expect(st.modifiedMs).toBeGreaterThan(0); + expect(h.events.find((e) => e.type === FS_ACCESS_EVENT_TYPE)).toBeDefined(); + }); +}); + +describe('WorkspaceFileSystem - readText', () => { + let h: Harness; + beforeEach(async () => { + h = await makeHarness(); + }); + afterEach(async () => teardown(h)); + + it('reads small text and reports lineEnding', async () => { + const target = path.join(h.workspace, 'plain.txt'); + await fsp.writeFile(target, 'hello\nworld\n'); + const r = await h.fs.resolve('plain.txt', 'read'); + const out = await h.fs.readText(r); + expect(out.content).toBe('hello\nworld\n'); + expect(out.meta.lineEnding).toBe('lf'); + expect(out.meta.truncated).toBeUndefined(); + }); + + it('truncates content above maxBytes and sets meta.truncated', async () => { + const big = path.join(h.workspace, 'big.txt'); + const content = 'a'.repeat(2048); + await fsp.writeFile(big, content); + const r = await h.fs.resolve('big.txt', 'read'); + const out = await h.fs.readText(r, { maxBytes: 1024 }); + expect(out.meta.truncated).toBe(true); + expect(out.content.length).toBeLessThanOrEqual(1024); + }); + + it('throws file_too_large when file exceeds MAX_READ_BYTES regardless of opts.maxBytes', async () => { + // Write a file larger than the soft cap and assert the boundary + // refuses BEFORE delegating to lowFs (which would slurp the + // whole file into memory). + const big = path.join(h.workspace, 'huge.txt'); + const bytes = (await import('./policy.js')).MAX_READ_BYTES + 1; + await fsp.writeFile(big, 'a'.repeat(bytes)); + const r = await h.fs.resolve('huge.txt', 'read'); + const err = await h.fs.readText(r).catch((e: unknown) => e); + expect(isFsError(err)).toBe(true); + expect((err as { kind: string }).kind).toBe('file_too_large'); + // Audit was recorded for the denial (P0 silent-failure fix). + const denied = h.events.find((e) => e.type === FS_DENIED_EVENT_TYPE); + expect(denied).toBeDefined(); + expect((denied!.data as { errorKind: string }).errorKind).toBe( + 'file_too_large', + ); + }); + + it('throws binary_file when reading binary content', async () => { + const bin = path.join(h.workspace, 'bin.dat'); + const buf = Buffer.alloc(64); + buf[5] = 0; + await fsp.writeFile(bin, buf); + const r = await h.fs.resolve('bin.dat', 'read'); + const err = await h.fs.readText(r).catch((e) => e); + expect(isFsError(err)).toBe(true); + expect((err as { kind: string }).kind).toBe('binary_file'); + expect(h.events.find((e) => e.type === FS_DENIED_EVENT_TYPE)).toBeDefined(); + }); + + it('annotates meta.matchedIgnore when path is ignored', async () => { + const ignore = new Ignore().add(['*.log']); + h = await makeHarness({ ignore }); + const target = path.join(h.workspace, 'app.log'); + await fsp.writeFile(target, 'log content'); + const r = await h.fs.resolve('app.log', 'read'); + const out = await h.fs.readText(r); + expect(out.meta.matchedIgnore).toBe('file'); + }); +}); + +describe('WorkspaceFileSystem - readBytes', () => { + let h: Harness; + beforeEach(async () => { + h = await makeHarness(); + }); + afterEach(async () => teardown(h)); + + it('returns raw bytes', async () => { + const target = path.join(h.workspace, 'raw.bin'); + await fsp.writeFile(target, Buffer.from([1, 2, 3, 0, 4, 5])); + const r = await h.fs.resolve('raw.bin', 'read'); + const buf = await h.fs.readBytes(r); + expect(Array.from(buf)).toEqual([1, 2, 3, 0, 4, 5]); + }); + + it('throws file_too_large above the cap', async () => { + const target = path.join(h.workspace, 'huge.bin'); + await fsp.writeFile(target, Buffer.alloc(2048)); + const r = await h.fs.resolve('huge.bin', 'read'); + const err = await h.fs.readBytes(r, { maxBytes: 1024 }).catch((e) => e); + expect(isFsError(err)).toBe(true); + expect((err as { kind: string }).kind).toBe('file_too_large'); + }); +}); + +describe('WorkspaceFileSystem - list', () => { + let h: Harness; + beforeEach(async () => { + const ignore = new Ignore().add(['*.log']); + h = await makeHarness({ ignore }); + await fsp.writeFile(path.join(h.workspace, 'a.ts'), ''); + await fsp.writeFile(path.join(h.workspace, 'b.log'), ''); + await fsp.mkdir(path.join(h.workspace, 'sub')); + }); + afterEach(async () => teardown(h)); + + it('drops ignored entries by default', async () => { + const r = await h.fs.resolve('.', 'list'); + const entries = await h.fs.list(r); + const names = entries.map((e) => e.name).sort(); + expect(names).toEqual(['a.ts', 'sub']); + }); + + it('includes ignored entries when includeIgnored is true', async () => { + const r = await h.fs.resolve('.', 'list'); + const entries = await h.fs.list(r, { includeIgnored: true }); + const log = entries.find((e) => e.name === 'b.log'); + expect(log?.ignored).toBe(true); + }); +}); + +describe('WorkspaceFileSystem - glob', () => { + let h: Harness; + beforeEach(async () => { + h = await makeHarness(); + await fsp.mkdir(path.join(h.workspace, 'src')); + await fsp.writeFile(path.join(h.workspace, 'src', 'a.ts'), ''); + await fsp.writeFile(path.join(h.workspace, 'src', 'b.ts'), ''); + await fsp.writeFile(path.join(h.workspace, 'README.md'), ''); + }); + afterEach(async () => teardown(h)); + + it('matches files by pattern', async () => { + const hits = await h.fs.glob('src/*.ts'); + expect(hits.map((p) => path.basename(p)).sort()).toEqual(['a.ts', 'b.ts']); + }); + + it('rejects patterns containing `..`', async () => { + const err = await h.fs.glob('../**/*.ts').catch((e) => e); + expect(isFsError(err)).toBe(true); + expect((err as { kind: string }).kind).toBe('parse_error'); + }); + + it('respects maxResults', async () => { + const hits = await h.fs.glob('**/*', { maxResults: 1 }); + expect(hits).toHaveLength(1); + }); + + it('filters ignored hits by default', async () => { + const ignore = new Ignore().add(['*.md']); + h = await makeHarness({ ignore }); + await fsp.writeFile(path.join(h.workspace, 'README.md'), ''); + await fsp.writeFile(path.join(h.workspace, 'src.ts'), ''); + const hits = await h.fs.glob('*'); + const names = hits.map((p) => path.basename(p)).sort(); + expect(names).not.toContain('README.md'); + }); +}); + +describe('WorkspaceFileSystem - write/edit', () => { + let h: Harness; + beforeEach(async () => { + h = await makeHarness(); + }); + afterEach(async () => teardown(h)); + + it('writes text and emits fs.access', async () => { + const r = await h.fs.resolve('newfile.txt', 'write'); + await h.fs.writeText(r, 'hello'); + const written = await fsp.readFile(r as string, 'utf-8'); + expect(written).toBe('hello'); + const access = h.events.find( + (e) => + e.type === FS_ACCESS_EVENT_TYPE && + (e.data as { intent: string }).intent === 'write', + ); + expect(access).toBeDefined(); + }); + + it('rejects oversize writes with file_too_large', async () => { + const r = await h.fs.resolve('huge.txt', 'write'); + const err = await h.fs + .writeText(r, 'a'.repeat(6 * 1024 * 1024)) + .catch((e) => e); + expect(isFsError(err)).toBe(true); + expect((err as { kind: string }).kind).toBe('file_too_large'); + }); + + it('edits an existing file by replacing oldText with newText', async () => { + const target = path.join(h.workspace, 'config.txt'); + await fsp.writeFile(target, 'foo=1\nbar=2\n'); + const r = await h.fs.resolve('config.txt', 'write'); + const out = await h.fs.edit(r, 'foo=1', 'foo=42'); + expect(out.writtenBytes).toBeGreaterThan(0); + const after = await fsp.readFile(target, 'utf-8'); + expect(after).toBe('foo=42\nbar=2\n'); + }); + + it('throws parse_error when oldText is not present', async () => { + const target = path.join(h.workspace, 'c.txt'); + await fsp.writeFile(target, 'abc'); + const r = await h.fs.resolve('c.txt', 'write'); + const err = await h.fs.edit(r, 'NOT THERE', 'X').catch((e) => e); + expect(isFsError(err)).toBe(true); + expect((err as { kind: string }).kind).toBe('parse_error'); + }); +}); + +describe('WorkspaceFileSystem - trust gate', () => { + let h: Harness; + beforeEach(async () => { + h = await makeHarness({ trusted: false }); + await fsp.writeFile(path.join(h.workspace, 'r.txt'), 'r'); + }); + afterEach(async () => teardown(h)); + + it('allows read on untrusted workspace', async () => { + const r = await h.fs.resolve('r.txt', 'read'); + const out = await h.fs.readText(r); + expect(out.content).toBe('r'); + }); + + it('denies write with untrusted_workspace', async () => { + const r = await h.fs.resolve('w.txt', 'write'); + const err = await h.fs.writeText(r, 'x').catch((e) => e); + expect(isFsError(err)).toBe(true); + expect((err as { kind: string }).kind).toBe('untrusted_workspace'); + const denied = h.events.find((e) => e.type === FS_DENIED_EVENT_TYPE); + expect(denied).toBeDefined(); + }); + + it('denies edit with untrusted_workspace', async () => { + await fsp.writeFile(path.join(h.workspace, 'e.txt'), 'old'); + const r = await h.fs.resolve('e.txt', 'edit'); + const err = await h.fs.edit(r, 'old', 'new').catch((e) => e); + expect(isFsError(err)).toBe(true); + expect((err as { kind: string }).kind).toBe('untrusted_workspace'); + }); +}); + +describe('WorkspaceFileSystem - audit always emits on body errors', () => { + let h: Harness; + beforeEach(async () => { + h = await makeHarness(); + }); + afterEach(async () => teardown(h)); + + it('wraps a raw ENOENT from edit() and emits fs.denied', async () => { + // edit() reads via fsp.readFile; against a non-existent file the + // raw ENOENT used to escape uncategorized — the wrapper now + // converts it to FsError(path_not_found) and records denial. + const r = await h.fs.resolve('vanished.txt', 'write'); + const err = await h.fs.edit(r, 'a', 'b').catch((e: unknown) => e); + expect(isFsError(err)).toBe(true); + expect((err as { kind: string }).kind).toBe('path_not_found'); + const denied = h.events.find((e) => e.type === FS_DENIED_EVENT_TYPE); + expect(denied).toBeDefined(); + expect((denied!.data as { errorKind: string }).errorKind).toBe( + 'path_not_found', + ); + }); + + it('rejects ENOTDIR ancestor walk with parse_error rather than passing boundary', async () => { + // Place a regular file where the request expects a directory. + await fsp.writeFile(path.join(h.workspace, 'block'), 'not a dir'); + const err = await h.fs + .resolve('block/leaf', 'write') + .catch((e: unknown) => e); + expect(isFsError(err)).toBe(true); + expect((err as { kind: string }).kind).toBe('parse_error'); + const denied = h.events.find((e) => e.type === FS_DENIED_EVENT_TYPE); + expect(denied).toBeDefined(); + }); +}); + +describe('WorkspaceFileSystem - glob escape audit', () => { + let h: Harness; + beforeEach(async () => { + h = await makeHarness(); + }); + afterEach(async () => teardown(h)); + + it('emits aggregated fs.denied for glob hits filtered as escape', async () => { + // Create an in-workspace file (legit hit) plus a symlink that + // resolves outside the workspace (filtered hit). The glob + // aggregation reports the escape count via a single denial + // event so audit volume stays bounded on misconfigured trees. + await fsp.writeFile(path.join(h.workspace, 'inside.ts'), 'x'); + const outside = path.join(h.scratch, 'outside.ts'); + await fsp.writeFile(outside, 'y'); + await fsp.symlink(outside, path.join(h.workspace, 'leak.ts'), 'file'); + const hits = await h.fs.glob('*.ts'); + const names = hits.map((p) => path.basename(p)).sort(); + expect(names).toContain('inside.ts'); + expect(names).not.toContain('outside.ts'); + const denied = h.events.find( + (e) => + e.type === FS_DENIED_EVENT_TYPE && + (e.data as { errorKind: string }).errorKind === 'symlink_escape', + ); + expect(denied).toBeDefined(); + expect((denied!.data as { hint?: string }).hint).toMatch( + /\d+ hit\(s\) that resolved outside workspace/, + ); + }); +}); + +describe('WorkspaceFileSystem - factory', () => { + it('canonicalizes the workspace once at factory build', async () => { + const scratch = await fsp.mkdtemp( + path.join( + os.tmpdir(), + `qwen-wfs-canon-${randomBytes(4).toString('hex')}-`, + ), + ); + try { + const real = path.join(scratch, 'ws'); + await fsp.mkdir(real); + const aliased = path.join(scratch, 'alias'); + await fsp.symlink(real, aliased, 'dir'); + const events: BridgeEvent[] = []; + const factory = createWorkspaceFileSystemFactory({ + boundWorkspace: aliased, + trusted: true, + emit: (e) => events.push(e), + }); + const fs = factory.forRequest({ route: 'TEST /op' }); + await fsp.writeFile(path.join(real, 'inside.txt'), 'i'); + const r = await fs.resolve('inside.txt', 'read'); + const out = await fs.readText(r); + expect(out.content).toBe('i'); + } finally { + await fsp.rm(scratch, { recursive: true, force: true }); + } + }); +}); diff --git a/packages/cli/src/serve/fs/workspaceFileSystem.ts b/packages/cli/src/serve/fs/workspaceFileSystem.ts new file mode 100644 index 000000000..8ffb4d39b --- /dev/null +++ b/packages/cli/src/serve/fs/workspaceFileSystem.ts @@ -0,0 +1,596 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import { promises as fsp } from 'node:fs'; +import * as path from 'node:path'; +import { glob as globAsync } from 'glob'; +import type { + Ignore, + StandardFileSystemService, + loadIgnoreRules, + type WriteTextFileOptions, +} from '@qwen-code/qwen-code-core'; +import type { BridgeEvent } from '../eventBus.js'; +import { + type AuditContext, + type AuditPublisher, + createAuditPublisher, +} from './audit.js'; +import { FsError, wrapAsFsError } from './errors.js'; +import { + canonicalizeWorkspace, + resolveWithinWorkspace, + type Intent, + type ResolvedPath, +} from './paths.js'; +import { + MAX_READ_BYTES, + assertTrustedForIntent, + detectBinary, + enforceReadBytesSize, + enforceReadSize, + enforceWriteSize, + shouldIgnore, +} from './policy.js'; + +/** + * Stat snapshot returned by `WorkspaceFileSystem.stat`. We + * deliberately avoid passing through `fs.Stats` directly — the + * boundary should not leak Node-specific bigint quirks or + * platform-specific fields to PR 19/20 SDK consumers. + */ +export interface FsStat { + kind: 'file' | 'directory' | 'symlink' | 'other'; + sizeBytes: number; + modifiedMs: number; +} + +/** Directory listing entry from `WorkspaceFileSystem.list`. */ +export interface FsEntry { + name: string; + kind: 'file' | 'directory' | 'symlink' | 'other'; + /** True iff the entry matched a `.gitignore`/`.qwenignore` rule. */ + ignored: boolean; +} + +/** Metadata side-channel returned alongside `readText` content. */ +export interface ReadMeta { + encoding?: string; + bom?: boolean; + lineEnding: 'crlf' | 'lf'; + truncated?: boolean; + matchedIgnore?: 'file' | 'directory'; + originalLineCount?: number; +} + +export interface ReadTextOptions { + /** Cap returned bytes; defaults to MAX_READ_BYTES. */ + maxBytes?: number; + /** 1-based starting line for partial reads. */ + line?: number; + /** Maximum number of lines to return. */ + limit?: number; +} + +export interface ListOptions { + /** When true, ignored entries are returned with `ignored: true` rather than dropped. */ + includeIgnored?: boolean; +} + +export interface GlobOptions { + cwd?: ResolvedPath; + includeIgnored?: boolean; + maxResults?: number; +} + +export interface WriteOutcome { + writtenBytes: number; +} + +export interface RequestContext extends AuditContext { + /** Mostly redundant with `originatorClientId`; kept for forward-compat with future ACP fields. */ + ownerSessionId?: string; +} + +/** + * Public boundary type. PR 19/20 routes consume this via the + * factory's `forRequest(ctx)` so audit context is automatically + * threaded through every operation. + */ +export interface WorkspaceFileSystem { + resolve(input: string, intent: Intent): Promise; + stat(p: ResolvedPath): Promise; + readText( + p: ResolvedPath, + opts?: ReadTextOptions, + ): Promise<{ content: string; meta: ReadMeta }>; + readBytes(p: ResolvedPath, opts?: { maxBytes?: number }): Promise; + list(p: ResolvedPath, opts?: ListOptions): Promise; + glob(pattern: string, opts?: GlobOptions): Promise; + writeText( + p: ResolvedPath, + content: string, + opts?: WriteTextFileOptions, + ): Promise; + edit( + p: ResolvedPath, + oldText: string, + newText: string, + ): Promise; +} + +/** + * Per-process factory. Build once at `createServeApp` boot, call + * `forRequest` per HTTP route invocation. + */ +export interface WorkspaceFileSystemFactory { + forRequest(ctx: RequestContext): WorkspaceFileSystem; +} + +export interface CreateWorkspaceFileSystemFactoryDeps { + /** Canonical workspace path; the daemon's `boundWorkspace`. */ + boundWorkspace: string; + /** Snapshot of `Config.isTrustedFolder()` at boot. */ + trusted: boolean; + /** Bridge-bound publisher into `EventBus.publish`. */ + emit: (event: BridgeEvent) => void; + /** + * Override the default ignore loader. Tests pass a fixed `Ignore` + * to avoid filesystem coupling; production lets the factory build + * one per workspace via `loadIgnoreRules`. + */ + ignore?: Ignore; + /** Override audit raw-path mode. Defaults to env `QWEN_AUDIT_RAW_PATHS=1`. */ + includeRawPaths?: boolean; +} + +/** + * Build a `WorkspaceFileSystemFactory`. The factory itself is + * stateless across requests; per-request state (the audit context) + * lives on the bound `WorkspaceFileSystem` returned from `forRequest`. + */ +export function createWorkspaceFileSystemFactory( + deps: CreateWorkspaceFileSystemFactoryDeps, +): WorkspaceFileSystemFactory { + const boundWorkspace = canonicalizeWorkspace(deps.boundWorkspace); + const ignore = + deps.ignore ?? + loadIgnoreRules({ + projectRoot: boundWorkspace, + useGitignore: true, + useQwenignore: true, + ignoreDirs: [], + }); + const audit: AuditPublisher = createAuditPublisher({ + emit: deps.emit, + boundWorkspace, + includeRawPaths: deps.includeRawPaths, + }); + const lowFs = new StandardFileSystemService(); + + return { + forRequest(ctx) { + return new WorkspaceFileSystemImpl({ + boundWorkspace, + trusted: deps.trusted, + ignore, + audit, + ctx, + lowFs, + }); + }, + }; +} + +interface ImplDeps { + boundWorkspace: string; + trusted: boolean; + ignore: Ignore; + audit: AuditPublisher; + ctx: RequestContext; + lowFs: StandardFileSystemService; +} + +class WorkspaceFileSystemImpl implements WorkspaceFileSystem { + constructor(private readonly deps: ImplDeps) {} + + async resolve(input: string, intent: Intent): Promise { + try { + return await resolveWithinWorkspace( + input, + this.deps.boundWorkspace, + intent, + ); + } catch (err) { + throw this.recordAndWrap(err, intent, input); + } + } + + async stat(p: ResolvedPath): Promise { + const start = performance.now(); + try { + assertTrustedForIntent(this.deps.trusted, 'stat'); + const st = await fsp.lstat(p as string); + const out: FsStat = { + kind: kindFromStats(st), + sizeBytes: st.size, + modifiedMs: st.mtimeMs, + }; + this.deps.audit.recordAccess(this.deps.ctx, { + intent: 'stat', + absolute: p, + durationMs: performance.now() - start, + sizeBytes: st.size, + }); + return out; + } catch (err) { + throw this.recordAndWrap(err, 'stat', p as string); + } + } + + async readText( + p: ResolvedPath, + opts: ReadTextOptions = {}, + ): Promise<{ content: string; meta: ReadMeta }> { + const start = performance.now(); + try { + assertTrustedForIntent(this.deps.trusted, 'read'); + const st = await fsp.stat(p as string); + // Hard size gate before we delegate to lowFs.readTextFile — + // that helper's underlying `readFileWithLineAndLimit` slurps + // the whole file into memory before slicing lines, so an + // unbounded request against a 5 GB text file would OOM the + // daemon (or, on a healthy host, flood the SSE replay ring + // with a 5 GB string). `MAX_READ_BYTES` is the hard cap and + // is independent of the caller's `opts.maxBytes` (which is a + // *softer* post-read truncation target — the boundary still + // honors it via `enforceReadSize` below). A future streaming + // read path can lift this hard cap by reading only the first + // N bytes; for now files above the cap throw and the SDK + // consumer can fall back to `readBytes` with an explicit + // length window. + if (st.size > MAX_READ_BYTES) { + throw new FsError( + 'file_too_large', + `file of ${st.size} bytes exceeds read cap of ${MAX_READ_BYTES} bytes`, + { + hint: 'use readBytes for explicit byte-windowed access on large files', + }, + ); + } + if (await detectBinary(p)) { + throw new FsError('binary_file', `binary file: ${p}`, { + hint: 'use readBytes for binary content', + }); + } + const sizeOutcome = enforceReadSize(st.size, opts.maxBytes); + // Delegate encoding-aware read to the existing core service so + // BOM, CRLF, and iconv-supported codepages remain consistent + // with what the tools layer already does. + const result = await this.deps.lowFs.readTextFile({ + path: p as string, + limit: opts.limit ?? Number.POSITIVE_INFINITY, + line: opts.line ?? 0, + }); + const ignoreVerdict = shouldIgnore( + p, + this.deps.boundWorkspace, + this.deps.ignore, + 'file', + ); + const meta: ReadMeta = { + encoding: result._meta?.encoding, + bom: result._meta?.bom, + lineEnding: (result._meta?.lineEnding ?? 'lf') as 'crlf' | 'lf', + originalLineCount: result._meta?.originalLineCount, + }; + let truncatedContent = result.content; + if (sizeOutcome.truncated) { + const buf = Buffer.from(result.content, 'utf-8'); + if (buf.length > sizeOutcome.bytesToRead) { + truncatedContent = buf + .subarray(0, sizeOutcome.bytesToRead) + .toString('utf-8'); + } + meta.truncated = true; + } + // Surface truncation whenever lowFs's own `limit` clipped the + // content too — without this the audit row + meta.truncated + // would silently disagree on whether the SDK consumer received + // the full file. + if ( + opts.limit !== undefined && + Number.isFinite(opts.limit) && + result._meta?.originalLineCount !== undefined && + result._meta.originalLineCount > opts.limit + (opts.line ?? 0) + ) { + meta.truncated = true; + } + if (ignoreVerdict.ignored) meta.matchedIgnore = ignoreVerdict.category; + this.deps.audit.recordAccess(this.deps.ctx, { + intent: 'read', + absolute: p, + durationMs: performance.now() - start, + sizeBytes: st.size, + truncated: meta.truncated, + matchedIgnore: meta.matchedIgnore, + }); + return { content: truncatedContent, meta }; + } catch (err) { + throw this.recordAndWrap(err, 'read', p as string); + } + } + + async readBytes( + p: ResolvedPath, + opts: { maxBytes?: number } = {}, + ): Promise { + const start = performance.now(); + try { + assertTrustedForIntent(this.deps.trusted, 'read'); + const st = await fsp.stat(p as string); + enforceReadBytesSize(st.size, opts.maxBytes); + const buf = await fsp.readFile(p as string); + this.deps.audit.recordAccess(this.deps.ctx, { + intent: 'read', + absolute: p, + durationMs: performance.now() - start, + sizeBytes: buf.length, + }); + return buf; + } catch (err) { + throw this.recordAndWrap(err, 'read', p as string); + } + } + + async list(p: ResolvedPath, opts: ListOptions = {}): Promise { + const start = performance.now(); + try { + assertTrustedForIntent(this.deps.trusted, 'list'); + const dirents = await fsp.readdir(p as string, { withFileTypes: true }); + const entries: FsEntry[] = []; + for (const d of dirents) { + // `path.join(p, d.name)` is a shallow extension of an + // already-canonical workspace path. Symlinked dirents are + // tagged as `kind: 'symlink'` rather than auto-followed — + // PR 19/20 callers that want the target's containment can + // call `resolve()` separately. Treating each child as + // implicitly-resolved here would be a brand-cast bypass. + const childAbs = path.join(p as string, d.name); + const kind = kindFromDirent(d); + const verdict = shouldIgnore( + childAbs as ResolvedPath, + this.deps.boundWorkspace, + this.deps.ignore, + kind === 'directory' ? 'directory' : 'file', + ); + if (verdict.ignored && !opts.includeIgnored) continue; + entries.push({ name: d.name, kind, ignored: verdict.ignored }); + } + this.deps.audit.recordAccess(this.deps.ctx, { + intent: 'list', + absolute: p, + durationMs: performance.now() - start, + sizeBytes: entries.length, + }); + return entries; + } catch (err) { + throw this.recordAndWrap(err, 'list', p as string); + } + } + + async glob(pattern: string, opts: GlobOptions = {}): Promise { + const start = performance.now(); + try { + assertTrustedForIntent(this.deps.trusted, 'glob'); + // Reject `..` segments in the pattern outright. A pattern like + // `../../**/*.js` would otherwise let a caller search outside + // `cwd`. Boundary-checking each individual hit catches escapes + // too, but rejecting at the pattern level is cheaper and gives + // a clearer error. + if (pattern.split(/[\\/]/).some((seg) => seg === '..')) { + throw new FsError( + 'parse_error', + `glob pattern may not contain '..' segments: ${pattern}`, + ); + } + const cwd = (opts.cwd as string | undefined) ?? this.deps.boundWorkspace; + const matches = await globAsync(pattern, { + cwd, + nodir: false, + absolute: true, + dot: true, + }); + const out: ResolvedPath[] = []; + const max = opts.maxResults ?? Number.POSITIVE_INFINITY; + let escapedCount = 0; + for (const hit of matches) { + if (out.length >= max) break; + const absolute = path.resolve(hit); + // Per-hit boundary check defends against a glob that + // matches a symlink whose target escapes the workspace. + // The literal path is in-workspace (the symlink itself + // sits there), but the realpath isn't — so we resolve + // each hit's symlink chain and compare the canonical to + // the canonical workspace root. Filtered hits are counted + // and reported via a single aggregated `fs.denied` after + // the loop so per-hit emit doesn't flood the bus when a + // misconfigured tree contains many escape symlinks. + let canonical: string; + try { + canonical = await fsp.realpath(absolute); + } catch { + // Dangling symlink or transient error: treat as escape + // so the hit isn't returned to the caller and the audit + // surfaces the count. + escapedCount += 1; + continue; + } + const rel = path.relative(this.deps.boundWorkspace, canonical); + if (rel.startsWith('..') || path.isAbsolute(rel)) { + escapedCount += 1; + continue; + } + const verdict = shouldIgnore( + canonical as ResolvedPath, + this.deps.boundWorkspace, + this.deps.ignore, + 'file', + ); + if (verdict.ignored && !opts.includeIgnored) continue; + out.push(canonical as ResolvedPath); + } + if (escapedCount > 0) { + this.deps.audit.recordDenied(this.deps.ctx, { + intent: 'glob', + input: pattern, + errorKind: 'symlink_escape', + hint: `glob filtered ${escapedCount} hit(s) that resolved outside workspace`, + }); + } + this.deps.audit.recordAccess(this.deps.ctx, { + intent: 'glob', + absolute: cwd as ResolvedPath, + durationMs: performance.now() - start, + sizeBytes: out.length, + }); + return out; + } catch (err) { + throw this.recordAndWrap(err, 'glob', pattern); + } + } + + async writeText( + p: ResolvedPath, + content: string, + opts?: WriteTextFileOptions, + ): Promise { + const start = performance.now(); + try { + assertTrustedForIntent(this.deps.trusted, 'write'); + const buf = Buffer.from(content, 'utf-8'); + enforceWriteSize(buf.length); + await this.deps.lowFs.writeTextFile({ + path: p as string, + content, + _meta: opts ? buildWriteMeta(opts) : undefined, + }); + const verdict = shouldIgnore( + p, + this.deps.boundWorkspace, + this.deps.ignore, + 'file', + ); + this.deps.audit.recordAccess(this.deps.ctx, { + intent: 'write', + absolute: p, + durationMs: performance.now() - start, + sizeBytes: buf.length, + matchedIgnore: verdict.ignored ? verdict.category : undefined, + }); + } catch (err) { + throw this.recordAndWrap(err, 'write', p as string); + } + } + + async edit( + p: ResolvedPath, + oldText: string, + newText: string, + ): Promise { + const start = performance.now(); + try { + assertTrustedForIntent(this.deps.trusted, 'edit'); + const current = await fsp.readFile(p as string, 'utf-8'); + // Single replacement to preserve atomic write-once semantics. + // Multi-occurrence handling lives in PR 20's edit endpoint + // where the route can decide policy; the boundary stays + // mechanical. + const idx = current.indexOf(oldText); + if (idx === -1) { + throw new FsError('parse_error', `oldText not found in ${p}`, { + hint: 'edit() expects oldText to appear verbatim in the file', + }); + } + const next = + current.slice(0, idx) + newText + current.slice(idx + oldText.length); + const buf = Buffer.from(next, 'utf-8'); + enforceWriteSize(buf.length); + await this.deps.lowFs.writeTextFile({ + path: p as string, + content: next, + }); + this.deps.audit.recordAccess(this.deps.ctx, { + intent: 'edit', + absolute: p, + durationMs: performance.now() - start, + sizeBytes: buf.length, + }); + return { writtenBytes: buf.length }; + } catch (err) { + throw this.recordAndWrap(err, 'edit', p as string); + } + } + + /** + * Coerce an arbitrary thrown value into an `FsError`, emit the + * matching `fs.denied` audit event, and return the typed error + * for the caller to rethrow. Body methods invoke this in their + * `catch` so: + * - raw fs errnos (`EACCES`, `ENOTDIR`, …) get categorized + * instead of escaping as opaque 5xx, + * - the audit log records every failure (the prior helper + * early-returned for non-`FsError`s and silently lost the + * event), and + * - PR 19/20 routes can still rely on `instanceof FsError` + * for their `sendFsError` serializer. + */ + private recordAndWrap(err: unknown, intent: Intent, input: string): FsError { + const fs = wrapAsFsError(err); + this.deps.audit.recordDenied(this.deps.ctx, { + intent, + input, + errorKind: fs.kind, + hint: fs.hint, + }); + return fs; + } +} + +function kindFromStats(st: { + isFile(): boolean; + isDirectory(): boolean; + isSymbolicLink(): boolean; +}): FsStat['kind'] { + if (st.isSymbolicLink()) return 'symlink'; + if (st.isDirectory()) return 'directory'; + if (st.isFile()) return 'file'; + return 'other'; +} + +function kindFromDirent(d: { + isFile(): boolean; + isDirectory(): boolean; + isSymbolicLink(): boolean; +}): FsEntry['kind'] { + if (d.isSymbolicLink()) return 'symlink'; + if (d.isDirectory()) return 'directory'; + if (d.isFile()) return 'file'; + return 'other'; +} + +function buildWriteMeta( + opts: WriteTextFileOptions, +): Record | undefined { + const meta: Record = {}; + if (opts.bom) meta['bom'] = true; + if (opts.encoding) meta['encoding'] = opts.encoding; + return Object.keys(meta).length > 0 ? meta : undefined; +} + +// Re-export so PR 19/20 routes can access the orchestrator surface +// from a single `serve/fs/index.js` import. +export { MAX_READ_BYTES }; diff --git a/packages/cli/src/serve/httpAcpBridge.ts b/packages/cli/src/serve/httpAcpBridge.ts index 4b255cd6e..553e07d4a 100644 --- a/packages/cli/src/serve/httpAcpBridge.ts +++ b/packages/cli/src/serve/httpAcpBridge.ts @@ -6,7 +6,7 @@ import { spawn, type ChildProcess } from 'node:child_process'; import { randomUUID } from 'node:crypto'; -import { promises as fs, realpathSync } from 'node:fs'; +import { promises as fs } from 'node:fs'; import * as path from 'node:path'; import { Readable, Writable } from 'node:stream'; import { @@ -15,6 +15,7 @@ import { ndJsonStream, } from '@agentclientprotocol/sdk'; import { writeStderrLine } from '../utils/stdioHelpers.js'; +import { canonicalizeWorkspace } from './fs/paths.js'; import { EventBus, DEFAULT_RING_SIZE, @@ -3580,74 +3581,14 @@ function sliceLineRange( } /** - * Canonicalize a workspace path so the boot-time bound path and every - * request's `workspaceCwd` collapse to the same key. `path.resolve` - * alone normalizes `..` and `.` segments and absolutizes, but on - * case-insensitive filesystems (macOS APFS, Windows NTFS) `/Work/A` - * and `/work/a` are the same directory yet `resolve` returns them - * verbatim — without normalization the `boundWorkspace` check would - * reject every request that spelled the path with different casing - * and `sessionScope: 'single'` re-attach would silently degrade to - * "one per spelling". - * - * `realpathSync.native` (when the path exists) walks symlinks and returns - * the on-disk casing; this matches what `config.ts` / `settings.ts` / - * `sandbox.ts` use for their own workspace resolution. When the path - * doesn't exist (test fixtures, ahead-of-mkdir flows) we fall back to - * the resolved-but-uncanonicalized form rather than throwing — the - * downstream `spawn({cwd})` will fail with a useful ENOENT if the - * workspace truly doesn't exist. - * - * NOTE: This is a **cross-module contract** (BX9_q) — `config.ts`, - * `settings.ts`, `sandbox.ts`, and this file all need to canonicalize - * the same way for the bound-workspace check + `sessionScope: - * 'single'` re-attach to work correctly across paths. The contract: - * use `realpathSync.native` on the resolved absolute path; fall back - * to `path.resolve` only when the path doesn't exist yet. If a future - * change breaks this alignment (e.g. one module starts lowercasing on - * Windows but this one doesn't), the canonicalized request path - * won't match the canonicalized bound path → every request returns - * `workspace_mismatch` even though the human-readable paths look - * equivalent. There's no test that pins the alignment; the - * integration suite would catch a divergence only if it tested the - * specific casing / symlink path the affected module changed. - * - * Stage 2 in-process (#3803 §10) collapses the bridge into core, - * removing the bridge-side path resolution entirely. Stage 1.5 - * `@qwen-code/acp-bridge` lift (chiga0 finding 1) is the natural - * place to extract a shared `canonicalizeWorkspace` primitive that - * all four modules consume — the lowest-common-denominator - * extraction is fine THERE because the package boundary forces the - * call sites to converge. Until then, *any* change to how those - * modules resolve workspace paths needs a matching change here. + * Re-export of the workspace canonicalizer for callers that historically + * imported it from `httpAcpBridge.ts`. The implementation was extracted + * to `./fs/paths.ts` in #4175 PR 18 (commit 1) so the forthcoming + * `WorkspaceFileSystem` boundary can reuse the same primitive without + * pulling in the 3.6k-line bridge module. See `./fs/paths.ts` for the + * cross-module contract that governs this function. */ -export function canonicalizeWorkspace(p: string): string { - const resolved = path.resolve(p); - try { - // FIXME(stage-2): switch to `fs.promises.realpath` once the - // bridge call sites become async-friendly. This sync syscall - // runs on the hot `spawnOrAttach` path and blocks the event - // loop for one filesystem stat per call. Single-user loopback - // (Stage 1's design target) doesn't notice; high-concurrency - // deployments will. Stage 2 in-process refactor removes the - // entire bridge-side path resolution anyway, but if Stage 2 - // ever lands without that change, switch to the async version. - return realpathSync.native(resolved); - } catch (err) { - // Only fall back to path.resolve for ENOENT (path doesn't exist - // yet). Other filesystem errors (EACCES, EIO, ELOOP) should - // propagate — swallowing them would hide transient I/O failures - // behind misleading workspace_mismatch rejections. - if ( - err && - typeof err === 'object' && - (err as { code?: unknown }).code === 'ENOENT' - ) { - return resolved; - } - throw err; - } -} +export { canonicalizeWorkspace }; /** * Race `p` against a timeout. The timeout REJECTS the returned diff --git a/packages/cli/src/serve/server.test.ts b/packages/cli/src/serve/server.test.ts index 1dad374d5..0aaea56fc 100644 --- a/packages/cli/src/serve/server.test.ts +++ b/packages/cli/src/serve/server.test.ts @@ -3282,3 +3282,86 @@ describe('runQwenServe SIGINT handler', () => { void vi.fn(); // silence unused-import lint if vitest tree-shakes }); }); + +describe('createServeApp ServeAppDeps.fsFactory wiring (#4175 PR 18)', () => { + it('parks a default WorkspaceFileSystemFactory on app.locals when none is injected', async () => { + const { createServeApp } = await import('./server.js'); + const app = createServeApp( + { + port: 0, + hostname: '127.0.0.1', + workspace: '/work/bound', + } as Parameters[0], + () => 0, + ); + const fsFactory = ( + app.locals as { + fsFactory?: { forRequest: (ctx: { route: string }) => unknown }; + } + ).fsFactory; + expect(fsFactory).toBeDefined(); + expect(typeof fsFactory!.forRequest).toBe('function'); + // The factory is functional — it can build a per-request boundary. + const fs = fsFactory!.forRequest({ route: 'TEST /op' }); + expect(fs).toBeDefined(); + }); + + it('uses the injected fsFactory verbatim when supplied', async () => { + const { createServeApp } = await import('./server.js'); + const sentinel = { forRequest: vi.fn(() => ({ marker: 'injected' })) }; + const app = createServeApp( + { + port: 0, + hostname: '127.0.0.1', + workspace: '/work/bound', + } as Parameters[0], + () => 0, + // eslint-disable-next-line @typescript-eslint/no-explicit-any + { fsFactory: sentinel as any }, + ); + expect((app.locals as { fsFactory?: unknown }).fsFactory).toBe(sentinel); + }); + + it('default fsFactory is built with trusted=false (writes refused)', async () => { + const { createServeApp } = await import('./server.js'); + const { isFsError } = await import('./fs/index.js'); + const os = await import('node:os'); + const tmp = await import('node:fs').then((m) => + m.promises.mkdtemp(path.join(os.tmpdir(), 'qwen-serve-default-trust-')), + ); + try { + const app = createServeApp( + { + port: 0, + hostname: '127.0.0.1', + workspace: tmp, + } as Parameters[0], + () => 0, + ); + type FsCtx = { route: string }; + type WfsLite = { + resolve: (input: string, intent: 'write') => Promise; + writeText: (p: string, content: string) => Promise; + }; + const fsFactory = ( + app.locals as { + fsFactory?: { forRequest: (ctx: FsCtx) => WfsLite }; + } + ).fsFactory; + expect(fsFactory).toBeDefined(); + const fs = fsFactory!.forRequest({ route: 'TEST /op' }); + // Resolve a write target inside the workspace; the resolve + // succeeds but writeText must throw `untrusted_workspace` — + // that's the safe-default behavior the strict-default factory + // exists to enforce. + const resolved = await fs.resolve('child.txt', 'write'); + const err = await fs.writeText(resolved, 'x').catch((e: unknown) => e); + expect(isFsError(err)).toBe(true); + expect((err as { kind: string }).kind).toBe('untrusted_workspace'); + } finally { + await import('node:fs').then((m) => + m.promises.rm(tmp, { recursive: true, force: true }), + ); + } + }); +}); diff --git a/packages/cli/src/serve/server.ts b/packages/cli/src/serve/server.ts index 1b7d045b9..8226f3cd5 100644 --- a/packages/cli/src/serve/server.ts +++ b/packages/cli/src/serve/server.ts @@ -39,6 +39,32 @@ import { type CapabilitiesEnvelope, type ServeOptions, } from './types.js'; +import { + createWorkspaceFileSystemFactory, + type WorkspaceFileSystemFactory, +} from './fs/index.js'; + +/** + * Build a no-op fs-audit emitter that logs a single warning the + * first time it's invoked. The default factory uses this so a + * regression that silently strips audit events shows up in + * operator logs instead of disappearing — `() => {}` is the + * "obvious" no-op but offers no failure signal. PR 19/20's + * `runQwenServe` injection replaces this with a real per-session + * emit, so legitimate production traffic never hits the warning. + */ +function createDefaultFsAuditEmit(): (event: BridgeEvent) => void { + let warned = false; + return (event: BridgeEvent) => { + if (!warned) { + warned = true; + writeStderrLine( + `qwen serve: fs audit emit is the default no-op (event ${event.type} dropped). ` + + `Inject deps.fsFactory in createServeApp to wire audit into the EventBus.`, + ); + } + }; +} export interface ServeAppDeps { /** Bridge instance; tests inject a fake. Defaults to a fresh real one. */ @@ -56,6 +82,18 @@ export interface ServeAppDeps { * process.cwd()` itself. */ boundWorkspace?: string; + /** + * Workspace filesystem boundary factory (#4175 PR 18). When + * supplied, PR 19/20 routes will pull a per-request + * `WorkspaceFileSystem` off it; when omitted, `createServeApp` + * builds a permissive default (trusted=true, no-op audit emit). + * No PR 18 routes consume the factory yet — the slot is wired so + * PR 19 read-only file routes can drop in without re-shaping + * `ServeAppDeps`. Once PR 19 lands, `runQwenServe` will inject a + * factory whose `trusted` flag mirrors `Config.isTrustedFolder()` + * and whose `emit` plumbs into the per-session EventBus. + */ + fsFactory?: WorkspaceFileSystemFactory; } /** @@ -150,6 +188,33 @@ export function createServeApp( boundWorkspace, }); + // Strict-default factory: `trusted: false` so an upstream refactor + // that forgets to inject `deps.fsFactory` never silently allows + // writes against an untrusted workspace. Read-shaped intents still + // succeed (so tests / direct embeds can exercise the read path + // without a config), but every mutating intent throws + // `untrusted_workspace`. The default `emit` is a no-op that warns + // once on first call so a future regression that silently swallows + // audit events surfaces in operator logs the first time it bites. + // Callers passing `deps.fsFactory` get full control of trust + + // audit destination — `runQwenServe` will inject one whose + // `trusted` mirrors `Config.isTrustedFolder()` and whose `emit` + // plumbs into the per-session EventBus once PR 19/20 lands. + const fsFactory: WorkspaceFileSystemFactory = + deps.fsFactory ?? + createWorkspaceFileSystemFactory({ + boundWorkspace, + trusted: false, + emit: createDefaultFsAuditEmit(), + }); + // Park the factory on `app.locals` so PR 19/20 route handlers can + // pick it up via `req.app.locals.fsFactory` without re-threading + // the value through every handler signature, and so PR 18 tests + // can assert the factory is reachable. Express types `locals` as + // a generic record; we cast to keep a precise property name. + (app.locals as { fsFactory?: WorkspaceFileSystemFactory }).fsFactory = + fsFactory; + // Order matters: rejection guards (CORS / Host allowlist / bearer auth) // run BEFORE the JSON body parser. Otherwise an unauthenticated POST // gets a full 10MB `JSON.parse` before the 401 fires — a trivially diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index bae6ca877..03fd48671 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -279,6 +279,11 @@ export * from './utils/errorParsing.js'; export * from './utils/errors.js'; export * from './utils/fileUtils.js'; export * from './utils/filesearch/fileSearch.js'; +export { + Ignore, + loadIgnoreRules, + type LoadIgnoreRulesOptions, +} from './utils/filesearch/ignore.js'; export * from './utils/formatters.js'; export * from './utils/generateContentResponseUtilities.js'; export * from './utils/getFolderStructure.js';