mirror of
https://github.com/QwenLM/qwen-code.git
synced 2026-05-24 05:26:21 +00:00
refactor(serve): add FileSystemService boundary (#4175 PR 18)
Introduce a per-request workspace filesystem boundary inside the `qwen serve` daemon. The boundary centralizes path canonicalization, symlink-aware boundary checks, ignore/trust policy, size/binary limits, and audit hooks behind a single typed surface — preparing PR 19 (read-only file routes) and PR 20 (write/edit routes) to share a guarded chokepoint instead of re-implementing path safety per route. Wave 4 PR 18 of #4175 — pure refactor, no new HTTP routes; depends on PR 12 (#4241) and PR 15 (#4236), both merged. New module under `packages/cli/src/serve/fs/`: - `paths.ts` extracts `canonicalizeWorkspace` from `httpAcpBridge.ts` (re-exported there for backward compatibility) and adds: - `ResolvedPath` brand and `Intent` union (read/write/edit/list/ glob/stat) with exhaustiveness checks at the trust gate - `hasSuspiciousPathPattern` — detects NTFS ADS, 8.3 short names, long-path prefixes, UNC paths, trailing dots, DOS device names, and three-or-more-dot path components (claude-code-style) - `findExistingAncestor` with explicit ENOTDIR rejection so a regular file in a path component throws `parse_error` rather than passing boundary inspection and 500-ing later - `resolveWithinWorkspace` running a chain-aware realpath check with ENOENT-tolerant ancestor walk for write/stat intents - `errors.ts` defines `FsError` / `FsErrorKind` plus `wrapAsFsError`, which categorizes raw `fs.promises` errnos (EACCES → permission_ denied, ELOOP → symlink_escape, ENOTDIR → parse_error, etc.) so body-level failures emit audit events instead of escaping uncategorized - `policy.ts` carries `MAX_READ_BYTES` (256 KiB), `MAX_WRITE_BYTES` (5 MiB), `BINARY_PROBE_BYTES` (4 KiB), `shouldIgnore` (file/ directory aware), and `assertTrustedForIntent` with an exhaustive switch over `Intent` - `audit.ts` emits typed `fs.access` / `fs.denied` `BridgeEvent` frames with SHA-256-hashed paths, optional raw-path passthrough via `QWEN_AUDIT_RAW_PATHS=1`, and discriminator `kind` fields so SDK consumers can exhaustively narrow `event.data` - `workspaceFileSystem.ts` — `WorkspaceFileSystem` interface + `createWorkspaceFileSystemFactory` with eight methods (resolve, stat, readText, readBytes, list, glob, writeText, edit). Every body method funnels failures through `recordAndWrap`, which wraps raw fs errors and always emits an `fs.denied` audit event before rethrowing. `readText` enforces `MAX_READ_BYTES` *before* delegating to the slurping core service so unbounded requests against multi-gigabyte files can no longer OOM the daemon. `glob` realpath-checks each hit against the canonical workspace and reports filtered escapes via a single aggregated `fs.denied` event with the dropped count - `index.ts` is the barrel re-export PR 19/20 will import from Modified files: - `packages/cli/src/serve/httpAcpBridge.ts` — extracted `canonicalizeWorkspace` to `fs/paths.ts`; the bridge re-exports it so existing callers in `server.ts` and `runQwenServe.ts` keep working - `packages/cli/src/serve/server.ts` — added `fsFactory?: WorkspaceFileSystemFactory` to `ServeAppDeps`; `createServeApp` builds a strict default (`trusted: false`, warn-once no-op `emit`) when none is injected so a future refactor that forgets `fsFactory` injection cannot silently allow writes against an untrusted workspace; factory parked on `app.locals` for PR 19/20 route handlers - `packages/core/src/index.ts` — re-exports `Ignore`, `loadIgnoreRules`, and `LoadIgnoreRulesOptions` from `utils/filesearch/ignore.js` for cli consumption 411 serve tests pass; typecheck clean. Engineering principles checklist: - [x] Independently mergeable (no new routes, no new capability tag) - [x] Backward compatible (no removed routes / event fields / CLI behavior) - [x] Default off (no public surface change; PR 19/20 will activate routes) - [x] qwen serve Stage 1 routes preserved - [x] Gradual migration (PR 19/20 will adopt the boundary) - [x] Reversible (single PR rollback) - [x] Tests-first (101 unit tests across the new module + contract test) 🤖 Generated with [Qwen Code](https://github.com/QwenLM/qwen-code)
This commit is contained in:
parent
60fe594e8f
commit
7b0db4c3ae
16 changed files with 3270 additions and 68 deletions
176
packages/cli/src/serve/fs/audit.test.ts
Normal file
176
packages/cli/src/serve/fs/audit.test.ts
Normal file
|
|
@ -0,0 +1,176 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2025 Qwen Team
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { createHash } from 'node:crypto';
|
||||
import * as path from 'node:path';
|
||||
import * as os from 'node:os';
|
||||
import {
|
||||
FS_ACCESS_EVENT_TYPE,
|
||||
FS_DENIED_EVENT_TYPE,
|
||||
createAuditPublisher,
|
||||
} from './audit.js';
|
||||
import type { ResolvedPath } from './paths.js';
|
||||
import type { BridgeEvent } from '../eventBus.js';
|
||||
|
||||
function expectedHash(p: string): string {
|
||||
return createHash('sha256').update(p).digest('hex').slice(0, 16);
|
||||
}
|
||||
|
||||
describe('createAuditPublisher', () => {
|
||||
function setup(opts?: { includeRawPaths?: boolean }) {
|
||||
const events: BridgeEvent[] = [];
|
||||
const workspace = path.join(os.tmpdir(), 'audit-ws');
|
||||
const publisher = createAuditPublisher({
|
||||
emit: (e) => events.push(e),
|
||||
boundWorkspace: workspace,
|
||||
includeRawPaths: opts?.includeRawPaths ?? false,
|
||||
});
|
||||
return { events, publisher, workspace };
|
||||
}
|
||||
|
||||
it('emits fs.access with hashed path and originatorClientId', () => {
|
||||
const { events, publisher, workspace } = setup();
|
||||
const absolute = path.join(workspace, 'src', 'index.ts') as ResolvedPath;
|
||||
publisher.recordAccess(
|
||||
{
|
||||
originatorClientId: 'client-abc',
|
||||
sessionId: 'sess-1',
|
||||
route: 'GET /file',
|
||||
},
|
||||
{
|
||||
intent: 'read',
|
||||
absolute,
|
||||
durationMs: 12,
|
||||
sizeBytes: 4096,
|
||||
},
|
||||
);
|
||||
expect(events).toHaveLength(1);
|
||||
const ev = events[0];
|
||||
expect(ev.type).toBe(FS_ACCESS_EVENT_TYPE);
|
||||
expect(ev.v).toBe(1);
|
||||
expect(ev.originatorClientId).toBe('client-abc');
|
||||
expect(ev.data).toMatchObject({
|
||||
kind: FS_ACCESS_EVENT_TYPE,
|
||||
intent: 'read',
|
||||
route: 'GET /file',
|
||||
pathHash: expectedHash(absolute),
|
||||
sizeBytes: 4096,
|
||||
durationMs: 12,
|
||||
});
|
||||
// No relPath unless raw paths enabled.
|
||||
expect(ev.data).not.toHaveProperty('relPath');
|
||||
});
|
||||
|
||||
it('attaches relPath when includeRawPaths is true', () => {
|
||||
const { events, publisher, workspace } = setup({ includeRawPaths: true });
|
||||
const absolute = path.join(workspace, 'src', 'index.ts') as ResolvedPath;
|
||||
publisher.recordAccess(
|
||||
{ originatorClientId: 'c', route: 'GET /file' },
|
||||
{ intent: 'read', absolute, durationMs: 1 },
|
||||
);
|
||||
expect((events[0].data as { relPath?: string }).relPath).toBe(
|
||||
path.join('src', 'index.ts'),
|
||||
);
|
||||
});
|
||||
|
||||
it('omits truncated/matchedIgnore when not provided', () => {
|
||||
const { events, publisher, workspace } = setup();
|
||||
const absolute = path.join(workspace, 'a.ts') as ResolvedPath;
|
||||
publisher.recordAccess(
|
||||
{ route: 'GET /file' },
|
||||
{ intent: 'read', absolute, durationMs: 0 },
|
||||
);
|
||||
expect(events[0].data).not.toHaveProperty('truncated');
|
||||
expect(events[0].data).not.toHaveProperty('matchedIgnore');
|
||||
});
|
||||
|
||||
it('preserves truncated and matchedIgnore when set', () => {
|
||||
const { events, publisher, workspace } = setup();
|
||||
const absolute = path.join(workspace, 'big.txt') as ResolvedPath;
|
||||
publisher.recordAccess(
|
||||
{ route: 'GET /file' },
|
||||
{
|
||||
intent: 'read',
|
||||
absolute,
|
||||
durationMs: 5,
|
||||
truncated: true,
|
||||
matchedIgnore: 'file',
|
||||
sizeBytes: 1024 * 1024,
|
||||
},
|
||||
);
|
||||
expect(events[0].data).toMatchObject({
|
||||
truncated: true,
|
||||
matchedIgnore: 'file',
|
||||
sizeBytes: 1024 * 1024,
|
||||
});
|
||||
});
|
||||
|
||||
it('emits fs.denied with errorKind and hashed probe path', () => {
|
||||
const { events, publisher, workspace } = setup();
|
||||
publisher.recordDenied(
|
||||
{ originatorClientId: 'c', route: 'GET /file' },
|
||||
{
|
||||
intent: 'read',
|
||||
input: '../escape',
|
||||
errorKind: 'path_outside_workspace',
|
||||
hint: 'paths must stay inside workspace',
|
||||
},
|
||||
);
|
||||
expect(events).toHaveLength(1);
|
||||
const ev = events[0];
|
||||
expect(ev.type).toBe(FS_DENIED_EVENT_TYPE);
|
||||
expect(ev.originatorClientId).toBe('c');
|
||||
expect(ev.data).toMatchObject({
|
||||
kind: FS_DENIED_EVENT_TYPE,
|
||||
intent: 'read',
|
||||
route: 'GET /file',
|
||||
errorKind: 'path_outside_workspace',
|
||||
hint: 'paths must stay inside workspace',
|
||||
// probe path = path.resolve(workspace, '../escape')
|
||||
pathHash: expectedHash(path.resolve(workspace, '../escape')),
|
||||
});
|
||||
});
|
||||
|
||||
it('emits fs.denied even when hint is absent', () => {
|
||||
const { events, publisher } = setup();
|
||||
publisher.recordDenied(
|
||||
{ route: 'POST /file/edit' },
|
||||
{
|
||||
intent: 'edit',
|
||||
input: '/etc/passwd',
|
||||
errorKind: 'symlink_escape',
|
||||
},
|
||||
);
|
||||
expect(events).toHaveLength(1);
|
||||
expect(events[0].data).not.toHaveProperty('hint');
|
||||
});
|
||||
|
||||
it('respects QWEN_AUDIT_RAW_PATHS=1 via env when includeRawPaths is unset', () => {
|
||||
const original = process.env['QWEN_AUDIT_RAW_PATHS'];
|
||||
process.env['QWEN_AUDIT_RAW_PATHS'] = '1';
|
||||
try {
|
||||
const events: BridgeEvent[] = [];
|
||||
const workspace = path.join(os.tmpdir(), 'audit-env');
|
||||
const publisher = createAuditPublisher({
|
||||
emit: (e) => events.push(e),
|
||||
boundWorkspace: workspace,
|
||||
});
|
||||
publisher.recordAccess(
|
||||
{ route: 'GET /file' },
|
||||
{
|
||||
intent: 'read',
|
||||
absolute: path.join(workspace, 'foo') as ResolvedPath,
|
||||
durationMs: 0,
|
||||
},
|
||||
);
|
||||
expect((events[0].data as { relPath?: string }).relPath).toBe('foo');
|
||||
} finally {
|
||||
if (original === undefined) delete process.env['QWEN_AUDIT_RAW_PATHS'];
|
||||
else process.env['QWEN_AUDIT_RAW_PATHS'] = original;
|
||||
}
|
||||
});
|
||||
});
|
||||
218
packages/cli/src/serve/fs/audit.ts
Normal file
218
packages/cli/src/serve/fs/audit.ts
Normal file
|
|
@ -0,0 +1,218 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2025 Qwen Team
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { createHash } from 'node:crypto';
|
||||
import * as path from 'node:path';
|
||||
import { EVENT_SCHEMA_VERSION, type BridgeEvent } from '../eventBus.js';
|
||||
import type { FsErrorKind } from './errors.js';
|
||||
import type { Intent, ResolvedPath } from './paths.js';
|
||||
|
||||
/**
|
||||
* Frame type for successful filesystem operations on the boundary.
|
||||
* Emitted from the orchestrator on the success path of `readText`,
|
||||
* `readBytes`, `list`, `glob`, `stat`, `writeText`, `edit`. PR 19/20
|
||||
* SSE consumers can fan it out to subscribed clients; PR 18 itself
|
||||
* has no consumer beyond unit tests, since no HTTP routes use the
|
||||
* boundary yet.
|
||||
*/
|
||||
export const FS_ACCESS_EVENT_TYPE = 'fs.access' as const;
|
||||
|
||||
/**
|
||||
* Frame type for boundary policy denials. Emitted whenever an
|
||||
* `FsError` propagates from the orchestrator. Always emitted, even
|
||||
* for transient ones that the route handler will surface to the
|
||||
* caller — the audit trail is the operator's tool, separate from
|
||||
* the client-visible response.
|
||||
*/
|
||||
export const FS_DENIED_EVENT_TYPE = 'fs.denied' as const;
|
||||
|
||||
/**
|
||||
* Request-scoped audit context. Bound to a `WorkspaceFileSystem`
|
||||
* instance by the factory's `forRequest(ctx)` call so individual
|
||||
* orchestrator methods don't need to thread these fields by hand.
|
||||
*/
|
||||
export interface AuditContext {
|
||||
/** Daemon-stamped client identity from PR 7 (#4231). */
|
||||
originatorClientId?: string;
|
||||
/** Optional ACP session id for cross-correlating audit + session events. */
|
||||
sessionId?: string;
|
||||
/** Route name like 'GET /file' — populated by PR 19/20 handlers. */
|
||||
route: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Successful-access record. The hot path computes this lazily so a
|
||||
* disabled publisher (no subscribers, no flag) doesn't pay the
|
||||
* SHA-256 cost. Sized fields (`sizeBytes`) and outcome fields
|
||||
* (`truncated`) are present only when meaningful for the intent.
|
||||
*
|
||||
* The literal `kind` field discriminates this against
|
||||
* `FsDeniedAuditPayload` so SDK consumers can `switch` over a
|
||||
* `FsAccessAuditPayload | FsDeniedAuditPayload` union and have
|
||||
* the type narrow inside each branch — the `BridgeEvent.type`
|
||||
* envelope alone doesn't propagate type information into
|
||||
* `event.data: unknown`.
|
||||
*/
|
||||
export interface FsAccessAuditPayload {
|
||||
kind: typeof FS_ACCESS_EVENT_TYPE;
|
||||
intent: Intent;
|
||||
route: string;
|
||||
pathHash: string;
|
||||
/** Workspace-relative path; only populated when QWEN_AUDIT_RAW_PATHS=1. */
|
||||
relPath?: string;
|
||||
sizeBytes?: number;
|
||||
truncated?: boolean;
|
||||
matchedIgnore?: 'file' | 'directory';
|
||||
durationMs: number;
|
||||
}
|
||||
|
||||
export interface FsDeniedAuditPayload {
|
||||
kind: typeof FS_DENIED_EVENT_TYPE;
|
||||
intent: Intent;
|
||||
route: string;
|
||||
pathHash: string;
|
||||
relPath?: string;
|
||||
errorKind: FsErrorKind;
|
||||
hint?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Boundary-side audit publisher. The orchestrator (commit 6) will
|
||||
* call `recordAccess` on success and `recordDenied` on `FsError`,
|
||||
* passing the resolved path so this module can normalize, hash,
|
||||
* and (optionally) attach the relative form.
|
||||
*/
|
||||
export interface AuditPublisher {
|
||||
recordAccess(
|
||||
ctx: AuditContext,
|
||||
record: Omit<
|
||||
FsAccessAuditPayload,
|
||||
'kind' | 'pathHash' | 'relPath' | 'route'
|
||||
> & {
|
||||
absolute: ResolvedPath | string;
|
||||
},
|
||||
): void;
|
||||
recordDenied(
|
||||
ctx: AuditContext,
|
||||
record: Omit<
|
||||
FsDeniedAuditPayload,
|
||||
'kind' | 'pathHash' | 'relPath' | 'route'
|
||||
> & {
|
||||
/** Raw user input; the canonical form may not exist on disk. */
|
||||
input: string;
|
||||
},
|
||||
): void;
|
||||
}
|
||||
|
||||
/**
|
||||
* SHA-256 over the canonical absolute path, truncated to 16 hex
|
||||
* chars. The truncation matches claude-code's privacy model: long
|
||||
* enough to be unique within a workspace, short enough that an
|
||||
* audit log is human-scannable. Full hex (64 chars) buys nothing
|
||||
* here because the audit consumer never reverses the hash.
|
||||
*/
|
||||
function hashPath(absolute: string): string {
|
||||
return createHash('sha256').update(absolute).digest('hex').slice(0, 16);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the workspace-relative form of a path for the optional
|
||||
* `relPath` audit field. Returns the trailing path even when the
|
||||
* input lies outside `boundWorkspace` (the `denied` case): the
|
||||
* audit consumer wants to see what the caller asked for, not be
|
||||
* silently dropped.
|
||||
*/
|
||||
function relForAudit(raw: string, boundWorkspace: string): string {
|
||||
// For absolute inputs, compute relative; for relative, pass through.
|
||||
// Either way the operator gets a workspace-anchored view.
|
||||
return path.isAbsolute(raw) ? path.relative(boundWorkspace, raw) : raw;
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether the env opt-in for raw paths is active. Read once per
|
||||
* factory invocation rather than per emit, so flipping the env
|
||||
* mid-process needs a daemon restart — predictable behavior for
|
||||
* operators tailing logs.
|
||||
*/
|
||||
function rawPathsEnabled(): boolean {
|
||||
return process.env['QWEN_AUDIT_RAW_PATHS'] === '1';
|
||||
}
|
||||
|
||||
export interface CreateAuditPublisherDeps {
|
||||
/** Bridge-bound publisher into `EventBus.publish`. */
|
||||
emit: (event: BridgeEvent) => void;
|
||||
/** Canonical workspace root, for relPath computation. */
|
||||
boundWorkspace: string;
|
||||
/** Optional override for tests / privacy modes. */
|
||||
includeRawPaths?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build an `AuditPublisher` whose emit method publishes typed
|
||||
* `BridgeEvent`s onto the daemon's per-session NDJSON stream. The
|
||||
* publisher takes care of:
|
||||
*
|
||||
* - hashing the path (always)
|
||||
* - computing relative path (only when `includeRawPaths` is on)
|
||||
* - synthesizing the `BridgeEvent.type` discriminator
|
||||
* - forwarding `originatorClientId` so the SSE fan-out can suppress
|
||||
* self-echoes
|
||||
*
|
||||
* Publishers are cheap to construct and intended to live on a
|
||||
* `WorkspaceFileSystemFactory` for the daemon's process lifetime.
|
||||
*/
|
||||
export function createAuditPublisher(
|
||||
deps: CreateAuditPublisherDeps,
|
||||
): AuditPublisher {
|
||||
const includeRawPaths = deps.includeRawPaths ?? rawPathsEnabled();
|
||||
const { emit, boundWorkspace } = deps;
|
||||
return {
|
||||
recordAccess(ctx, record) {
|
||||
const absolute = String(record.absolute);
|
||||
const payload: FsAccessAuditPayload = {
|
||||
kind: FS_ACCESS_EVENT_TYPE,
|
||||
intent: record.intent,
|
||||
route: ctx.route,
|
||||
pathHash: hashPath(absolute),
|
||||
durationMs: record.durationMs,
|
||||
};
|
||||
if (record.sizeBytes !== undefined) payload.sizeBytes = record.sizeBytes;
|
||||
if (record.truncated) payload.truncated = true;
|
||||
if (record.matchedIgnore) payload.matchedIgnore = record.matchedIgnore;
|
||||
if (includeRawPaths) {
|
||||
payload.relPath = relForAudit(absolute, boundWorkspace);
|
||||
}
|
||||
emit({
|
||||
v: EVENT_SCHEMA_VERSION,
|
||||
type: FS_ACCESS_EVENT_TYPE,
|
||||
data: payload,
|
||||
originatorClientId: ctx.originatorClientId,
|
||||
});
|
||||
},
|
||||
recordDenied(ctx, record) {
|
||||
const probe = path.isAbsolute(record.input)
|
||||
? record.input
|
||||
: path.resolve(boundWorkspace, record.input);
|
||||
const payload: FsDeniedAuditPayload = {
|
||||
kind: FS_DENIED_EVENT_TYPE,
|
||||
intent: record.intent,
|
||||
route: ctx.route,
|
||||
pathHash: hashPath(probe),
|
||||
errorKind: record.errorKind,
|
||||
};
|
||||
if (record.hint) payload.hint = record.hint;
|
||||
if (includeRawPaths) {
|
||||
payload.relPath = relForAudit(record.input, boundWorkspace);
|
||||
}
|
||||
emit({
|
||||
v: EVENT_SCHEMA_VERSION,
|
||||
type: FS_DENIED_EVENT_TYPE,
|
||||
data: payload,
|
||||
originatorClientId: ctx.originatorClientId,
|
||||
});
|
||||
},
|
||||
};
|
||||
}
|
||||
272
packages/cli/src/serve/fs/contract.test.ts
Normal file
272
packages/cli/src/serve/fs/contract.test.ts
Normal file
|
|
@ -0,0 +1,272 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2025 Qwen Team
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeAll, afterAll } from 'vitest';
|
||||
import { promises as fsp, realpathSync } from 'node:fs';
|
||||
import * as os from 'node:os';
|
||||
import * as path from 'node:path';
|
||||
import { randomBytes } from 'node:crypto';
|
||||
import { WorkspaceContext } from '@qwen-code/qwen-code-core';
|
||||
import { canonicalizeWorkspace, resolveWithinWorkspace } from './paths.js';
|
||||
import type { FsError, isFsError, type FsErrorKind } from './errors.js';
|
||||
|
||||
/**
|
||||
* Kinds the boundary throws to indicate the path is *outside the
|
||||
* workspace*. `path_not_found` is intentionally NOT here: it's a
|
||||
* separate "exists?" decision, and `WorkspaceContext` returns
|
||||
* `true` ("would be in workspace if it existed") for missing files
|
||||
* — a legitimate semantic mismatch the contract doesn't try to
|
||||
* unify.
|
||||
*/
|
||||
const OUT_OF_WORKSPACE_KINDS: ReadonlySet<FsErrorKind> = new Set([
|
||||
'path_outside_workspace',
|
||||
'symlink_escape',
|
||||
]);
|
||||
|
||||
/**
|
||||
* #4175 PR 18 contract test (commit 8 of plan).
|
||||
*
|
||||
* The serve boundary's `resolveWithinWorkspace` and the existing
|
||||
* `WorkspaceContext.isPathWithinWorkspace` (which routes through a
|
||||
* cached `fs.realpathSync`) live in different packages and have
|
||||
* independent code paths. This corpus pins their agreement so a
|
||||
* future refactor of either side has a one-shot signal that
|
||||
* canonicalization has drifted.
|
||||
*
|
||||
* Concretely we assert:
|
||||
* 1. For paths the boundary admits, `WorkspaceContext` agrees the
|
||||
* path is inside the workspace.
|
||||
* 2. For paths the boundary rejects with `path_outside_workspace`
|
||||
* or `symlink_escape`, `WorkspaceContext` agrees the path is
|
||||
* outside.
|
||||
* 3. The canonical form `resolveWithinWorkspace` returns matches
|
||||
* `realpathSync.native` (the "ground truth" the boundary's
|
||||
* ENOENT-tolerant code path falls back to via `path.resolve`).
|
||||
*
|
||||
* The corpus is intentionally small but covers the dimensions that
|
||||
* tend to disagree on edge cases: symlinks pointing in/out, ENOENT
|
||||
* paths, case-different inputs on case-insensitive filesystems,
|
||||
* and `..` traversal.
|
||||
*/
|
||||
|
||||
interface CorpusCase {
|
||||
name: string;
|
||||
/** Setup hook; receives the workspace path and may create files/symlinks. */
|
||||
setup?: (workspace: string, scratch: string) => Promise<void>;
|
||||
/** Path passed to both APIs. */
|
||||
input: (workspace: string, scratch: string) => string;
|
||||
/**
|
||||
* Whether we expect the path to be inside the workspace.
|
||||
* `'existence-mismatch'` flags cases where the boundary rejects
|
||||
* with `path_not_found` but `WorkspaceContext` says "would-be
|
||||
* inside" — by design the two APIs answer different questions.
|
||||
*/
|
||||
expectInside: boolean | 'existence-mismatch';
|
||||
/** Intent for `resolveWithinWorkspace`. */
|
||||
intent: 'read' | 'write' | 'list' | 'glob' | 'stat';
|
||||
/**
|
||||
* If set, only run on these platforms. Used for case-mismatch
|
||||
* (macOS/win) and symlink-on-Windows cases that may need admin.
|
||||
*/
|
||||
onlyPlatforms?: NodeJS.Platform[];
|
||||
}
|
||||
|
||||
const CORPUS: CorpusCase[] = [
|
||||
{
|
||||
name: 'plain file inside workspace',
|
||||
setup: async (ws) => {
|
||||
await fsp.writeFile(path.join(ws, 'a.txt'), 'a');
|
||||
},
|
||||
input: () => 'a.txt',
|
||||
expectInside: true,
|
||||
intent: 'read',
|
||||
},
|
||||
{
|
||||
name: 'nested file inside workspace',
|
||||
setup: async (ws) => {
|
||||
await fsp.mkdir(path.join(ws, 'src'));
|
||||
await fsp.writeFile(path.join(ws, 'src', 'index.ts'), '');
|
||||
},
|
||||
input: () => path.join('src', 'index.ts'),
|
||||
expectInside: true,
|
||||
intent: 'read',
|
||||
},
|
||||
{
|
||||
name: 'workspace root via "."',
|
||||
input: () => '.',
|
||||
expectInside: true,
|
||||
intent: 'list',
|
||||
},
|
||||
{
|
||||
name: '`..` traversal lands outside',
|
||||
input: () => '../escape',
|
||||
expectInside: false,
|
||||
intent: 'read',
|
||||
},
|
||||
{
|
||||
name: 'absolute path outside workspace',
|
||||
setup: async (_ws, scratch) => {
|
||||
await fsp.writeFile(path.join(scratch, 'outside.txt'), 'x');
|
||||
},
|
||||
input: (_ws, scratch) => path.join(scratch, 'outside.txt'),
|
||||
expectInside: false,
|
||||
intent: 'read',
|
||||
},
|
||||
{
|
||||
name: 'symlink inside workspace pointing out',
|
||||
setup: async (ws, scratch) => {
|
||||
const outside = path.join(scratch, 'leak-target.txt');
|
||||
await fsp.writeFile(outside, 'sensitive');
|
||||
await fsp.symlink(outside, path.join(ws, 'leak'), 'file');
|
||||
},
|
||||
input: () => 'leak',
|
||||
expectInside: false,
|
||||
intent: 'read',
|
||||
},
|
||||
{
|
||||
name: 'symlink inside workspace pointing in',
|
||||
setup: async (ws) => {
|
||||
await fsp.writeFile(path.join(ws, 'real.txt'), 'in');
|
||||
await fsp.symlink(
|
||||
path.join(ws, 'real.txt'),
|
||||
path.join(ws, 'alias'),
|
||||
'file',
|
||||
);
|
||||
},
|
||||
input: () => 'alias',
|
||||
expectInside: true,
|
||||
intent: 'read',
|
||||
},
|
||||
{
|
||||
name: 'non-existent path with write intent (ENOENT-tolerant)',
|
||||
input: () => path.join('newdir', 'leaf.txt'),
|
||||
expectInside: true,
|
||||
intent: 'write',
|
||||
},
|
||||
{
|
||||
name: 'non-existent path with read intent rejects (existence, not boundary)',
|
||||
input: () => 'never-existed.txt',
|
||||
// The boundary throws `path_not_found`, which is an existence
|
||||
// decision, not a containment decision. WorkspaceContext models
|
||||
// missing files as "would be in workspace" and returns `true`,
|
||||
// so this case asserts the asymmetry rather than the agreement.
|
||||
expectInside: 'existence-mismatch',
|
||||
intent: 'read',
|
||||
},
|
||||
{
|
||||
name: 'case-different existing file (macOS/win only)',
|
||||
onlyPlatforms: ['darwin', 'win32'],
|
||||
setup: async (ws) => {
|
||||
await fsp.writeFile(path.join(ws, 'CaseFile.TXT'), '');
|
||||
},
|
||||
input: () => 'casefile.txt',
|
||||
expectInside: true,
|
||||
intent: 'read',
|
||||
},
|
||||
];
|
||||
|
||||
describe('contract: resolveWithinWorkspace ↔ WorkspaceContext', () => {
|
||||
let scratch: string;
|
||||
|
||||
beforeAll(async () => {
|
||||
scratch = await fsp.mkdtemp(
|
||||
path.join(
|
||||
os.tmpdir(),
|
||||
`qwen-fs-contract-${randomBytes(4).toString('hex')}-`,
|
||||
),
|
||||
);
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
await fsp.rm(scratch, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it.each(CORPUS.map((tc) => [tc.name, tc] as const))(
|
||||
'%s',
|
||||
async (_name, tc) => {
|
||||
if (tc.onlyPlatforms && !tc.onlyPlatforms.includes(process.platform)) {
|
||||
return;
|
||||
}
|
||||
const wsDir = path.join(scratch, randomBytes(4).toString('hex'));
|
||||
await fsp.mkdir(wsDir);
|
||||
const workspace = canonicalizeWorkspace(wsDir);
|
||||
if (tc.setup) await tc.setup(workspace, scratch);
|
||||
|
||||
const wsCtx = new WorkspaceContext(workspace);
|
||||
const inputAbs = path.isAbsolute(tc.input(workspace, scratch))
|
||||
? tc.input(workspace, scratch)
|
||||
: path.resolve(workspace, tc.input(workspace, scratch));
|
||||
const wsInside = wsCtx.isPathWithinWorkspace(inputAbs);
|
||||
|
||||
let boundaryAccepted = true;
|
||||
let resolved: string | null = null;
|
||||
let boundaryError: FsError | null = null;
|
||||
try {
|
||||
resolved = await resolveWithinWorkspace(
|
||||
tc.input(workspace, scratch),
|
||||
workspace,
|
||||
tc.intent,
|
||||
);
|
||||
} catch (err) {
|
||||
if (!isFsError(err)) throw err;
|
||||
boundaryAccepted = false;
|
||||
boundaryError = err;
|
||||
}
|
||||
|
||||
if (tc.expectInside === 'existence-mismatch') {
|
||||
expect(boundaryAccepted).toBe(false);
|
||||
expect(boundaryError?.kind).toBe('path_not_found');
|
||||
// WorkspaceContext intentionally answers a different
|
||||
// question for missing files — no equality assertion here.
|
||||
} else if (tc.expectInside === true) {
|
||||
expect(boundaryAccepted).toBe(true);
|
||||
expect(wsInside).toBe(true);
|
||||
} else {
|
||||
expect(boundaryAccepted).toBe(false);
|
||||
// The boundary rejected; only assert WorkspaceContext also
|
||||
// says "outside" when the rejection was a containment
|
||||
// decision (path_outside_workspace / symlink_escape).
|
||||
if (boundaryError && OUT_OF_WORKSPACE_KINDS.has(boundaryError.kind)) {
|
||||
expect(wsInside).toBe(false);
|
||||
}
|
||||
}
|
||||
|
||||
// When both succeed, canonical forms must agree.
|
||||
if (boundaryAccepted && resolved !== null) {
|
||||
// Use realpathSync.native as ground truth when the path
|
||||
// exists; for write-intent ENOENT cases we fall back to
|
||||
// the resolved-but-not-realpathed form on the leaf.
|
||||
let groundTruth: string;
|
||||
try {
|
||||
groundTruth = realpathSync.native(inputAbs);
|
||||
} catch {
|
||||
// ENOENT — boundary used findExistingAncestor + realpath +
|
||||
// join(tail). Reproduce that here for assertion symmetry.
|
||||
let cursor = inputAbs;
|
||||
const tail: string[] = [];
|
||||
while (true) {
|
||||
try {
|
||||
await fsp.stat(cursor);
|
||||
break;
|
||||
} catch {
|
||||
tail.unshift(path.basename(cursor));
|
||||
const parent = path.dirname(cursor);
|
||||
if (parent === cursor) {
|
||||
cursor = workspace;
|
||||
break;
|
||||
}
|
||||
cursor = parent;
|
||||
}
|
||||
}
|
||||
groundTruth = tail.length
|
||||
? path.join(realpathSync.native(cursor), ...tail)
|
||||
: realpathSync.native(cursor);
|
||||
}
|
||||
expect(resolved).toBe(groundTruth);
|
||||
}
|
||||
},
|
||||
);
|
||||
});
|
||||
144
packages/cli/src/serve/fs/errors.test.ts
Normal file
144
packages/cli/src/serve/fs/errors.test.ts
Normal file
|
|
@ -0,0 +1,144 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2025 Qwen Team
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import {
|
||||
FsError,
|
||||
isFsError,
|
||||
wrapAsFsError,
|
||||
type FsErrorKind,
|
||||
} from './errors.js';
|
||||
|
||||
describe('FsError', () => {
|
||||
it('uses the default status mapping for each kind', () => {
|
||||
const cases: Array<[FsErrorKind, number]> = [
|
||||
['path_outside_workspace', 400],
|
||||
['symlink_escape', 400],
|
||||
['path_not_found', 404],
|
||||
['binary_file', 422],
|
||||
['file_too_large', 413],
|
||||
['untrusted_workspace', 403],
|
||||
['permission_denied', 403],
|
||||
['parse_error', 400],
|
||||
];
|
||||
for (const [kind, status] of cases) {
|
||||
const err = new FsError(kind, `${kind} message`);
|
||||
expect(err.kind).toBe(kind);
|
||||
expect(err.status).toBe(status);
|
||||
}
|
||||
});
|
||||
|
||||
it('honors an explicit status override', () => {
|
||||
const err = new FsError('parse_error', 'service invariant', {
|
||||
status: 422,
|
||||
});
|
||||
expect(err.status).toBe(422);
|
||||
});
|
||||
|
||||
it('captures the hint string when provided', () => {
|
||||
const err = new FsError('binary_file', 'binary content', {
|
||||
hint: 'Use readBytes for binary content',
|
||||
});
|
||||
expect(err.hint).toBe('Use readBytes for binary content');
|
||||
});
|
||||
|
||||
it('omits hint when not provided', () => {
|
||||
const err = new FsError('path_not_found', 'missing');
|
||||
expect(err.hint).toBeUndefined();
|
||||
});
|
||||
|
||||
it('forwards a cause through to Error.cause when available', () => {
|
||||
const root = new Error('underlying ENOENT');
|
||||
const err = new FsError('path_not_found', 'wrapped', { cause: root });
|
||||
// Node 16+ supports Error.cause. On the off chance the runtime
|
||||
// doesn't, we just assert the message + kind survived rather
|
||||
// than failing the suite for a feature-detection issue.
|
||||
expect(err.message).toBe('wrapped');
|
||||
expect(err.kind).toBe('path_not_found');
|
||||
if ('cause' in err) {
|
||||
expect((err as Error & { cause?: unknown }).cause).toBe(root);
|
||||
}
|
||||
});
|
||||
|
||||
it('sets name to "FsError" for stack-trace clarity', () => {
|
||||
const err = new FsError('symlink_escape', 'x');
|
||||
expect(err.name).toBe('FsError');
|
||||
});
|
||||
|
||||
it('is an Error subclass and isFsError narrows it', () => {
|
||||
const err = new FsError('untrusted_workspace', 'no writes here');
|
||||
expect(err).toBeInstanceOf(Error);
|
||||
expect(err).toBeInstanceOf(FsError);
|
||||
expect(isFsError(err)).toBe(true);
|
||||
expect(isFsError(new Error('plain'))).toBe(false);
|
||||
expect(isFsError(null)).toBe(false);
|
||||
expect(isFsError(undefined)).toBe(false);
|
||||
expect(isFsError({ kind: 'path_not_found' })).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('wrapAsFsError', () => {
|
||||
function errno(
|
||||
code: string,
|
||||
message = `${code} message`,
|
||||
): NodeJS.ErrnoException {
|
||||
const err = new Error(message) as NodeJS.ErrnoException;
|
||||
err.code = code;
|
||||
return err;
|
||||
}
|
||||
|
||||
it('returns FsError instances unchanged', () => {
|
||||
const original = new FsError('binary_file', 'x');
|
||||
expect(wrapAsFsError(original)).toBe(original);
|
||||
});
|
||||
|
||||
it('maps ENOENT to path_not_found', () => {
|
||||
const out = wrapAsFsError(errno('ENOENT'));
|
||||
expect(out.kind).toBe('path_not_found');
|
||||
expect(out.status).toBe(404);
|
||||
});
|
||||
|
||||
it('maps EACCES and EPERM to permission_denied', () => {
|
||||
expect(wrapAsFsError(errno('EACCES')).kind).toBe('permission_denied');
|
||||
expect(wrapAsFsError(errno('EPERM')).kind).toBe('permission_denied');
|
||||
});
|
||||
|
||||
it('maps ELOOP to symlink_escape', () => {
|
||||
expect(wrapAsFsError(errno('ELOOP')).kind).toBe('symlink_escape');
|
||||
});
|
||||
|
||||
it('maps ENOTDIR / EISDIR to parse_error', () => {
|
||||
expect(wrapAsFsError(errno('ENOTDIR')).kind).toBe('parse_error');
|
||||
expect(wrapAsFsError(errno('EISDIR')).kind).toBe('parse_error');
|
||||
});
|
||||
|
||||
it('maps EMFILE / ENFILE to permission_denied with a hint', () => {
|
||||
const out = wrapAsFsError(errno('EMFILE'));
|
||||
expect(out.kind).toBe('permission_denied');
|
||||
expect(out.hint).toMatch(/file-descriptor/);
|
||||
});
|
||||
|
||||
it('falls back to the configured default kind for unknown errnos', () => {
|
||||
expect(wrapAsFsError(errno('EWHATEVER')).kind).toBe('permission_denied');
|
||||
expect(wrapAsFsError(errno('EWHATEVER'), 'parse_error').kind).toBe(
|
||||
'parse_error',
|
||||
);
|
||||
});
|
||||
|
||||
it('preserves the original error as cause', () => {
|
||||
const root = errno('ENOENT', 'where');
|
||||
const out = wrapAsFsError(root);
|
||||
if ('cause' in out) {
|
||||
expect((out as Error & { cause?: unknown }).cause).toBe(root);
|
||||
}
|
||||
});
|
||||
|
||||
it('handles non-Error throwables without crashing', () => {
|
||||
const out = wrapAsFsError('string thrown', 'parse_error');
|
||||
expect(out.kind).toBe('parse_error');
|
||||
expect(out.message).toBe('unknown filesystem error');
|
||||
});
|
||||
});
|
||||
141
packages/cli/src/serve/fs/errors.ts
Normal file
141
packages/cli/src/serve/fs/errors.ts
Normal file
|
|
@ -0,0 +1,141 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2025 Qwen Team
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* Discriminator for filesystem-boundary errors raised by the
|
||||
* `WorkspaceFileSystem` layer (#4175 PR 18).
|
||||
*
|
||||
* The values are also serialized verbatim onto the wire by PR 19/20
|
||||
* route handlers as the `errorKind` field of the planned PR 13
|
||||
* envelope `{ kind, status, error, errorKind, hint }`. Keeping the
|
||||
* union closed and stable lets SDK consumers exhaustively switch
|
||||
* over the kinds without falling through to a generic 5xx path.
|
||||
*/
|
||||
export type FsErrorKind =
|
||||
| 'path_outside_workspace'
|
||||
| 'symlink_escape'
|
||||
| 'path_not_found'
|
||||
| 'binary_file'
|
||||
| 'file_too_large'
|
||||
| 'untrusted_workspace'
|
||||
| 'permission_denied'
|
||||
| 'parse_error';
|
||||
|
||||
/**
|
||||
* HTTP status codes the boundary maps onto. The status lives on the
|
||||
* error itself rather than being derived by the route handler so the
|
||||
* serialization is "one helper line" — see PR 19/20 plans. The set is
|
||||
* intentionally narrow: anything outside this map indicates the
|
||||
* boundary is being asked to model a transport-level concern that
|
||||
* doesn't belong here (5xx, 401/403 from auth, etc.).
|
||||
*/
|
||||
export type FsErrorStatus = 400 | 403 | 404 | 413 | 422;
|
||||
|
||||
/**
|
||||
* Default HTTP status mapping. Centralized here so callers can throw
|
||||
* `new FsError('path_not_found', 'message')` without re-deriving the
|
||||
* status; the constructor still accepts an explicit status override
|
||||
* for the rare case where a kind is reused under a different status
|
||||
* (e.g. `parse_error` may be 400 from a request body but 422 from a
|
||||
* service-level invariant breach).
|
||||
*/
|
||||
const DEFAULT_STATUS_BY_KIND: Record<FsErrorKind, FsErrorStatus> = {
|
||||
path_outside_workspace: 400,
|
||||
symlink_escape: 400,
|
||||
path_not_found: 404,
|
||||
binary_file: 422,
|
||||
file_too_large: 413,
|
||||
untrusted_workspace: 403,
|
||||
permission_denied: 403,
|
||||
parse_error: 400,
|
||||
};
|
||||
|
||||
/**
|
||||
* Typed boundary error. PR 18 ships the class only — no route
|
||||
* serializes it yet. PR 19/20 add a `sendFsError(res, err)` helper
|
||||
* that maps `kind`/`status`/`hint` onto the envelope.
|
||||
*
|
||||
* Why a class rather than a plain object: `instanceof FsError`
|
||||
* gives the orchestrator a single catch-clause to convert thrown
|
||||
* boundary errors into `fs.denied` audit events without also
|
||||
* eating unrelated runtime errors (`TypeError`, `ENOENT` from a
|
||||
* lower-level `fs.promises` call that escaped categorization, etc.).
|
||||
*/
|
||||
export class FsError extends Error {
|
||||
readonly kind: FsErrorKind;
|
||||
readonly status: FsErrorStatus;
|
||||
readonly hint?: string;
|
||||
|
||||
constructor(
|
||||
kind: FsErrorKind,
|
||||
message: string,
|
||||
options?: { hint?: string; status?: FsErrorStatus; cause?: unknown },
|
||||
) {
|
||||
super(
|
||||
message,
|
||||
options?.cause === undefined ? undefined : { cause: options.cause },
|
||||
);
|
||||
this.name = 'FsError';
|
||||
this.kind = kind;
|
||||
this.status = options?.status ?? DEFAULT_STATUS_BY_KIND[kind];
|
||||
this.hint = options?.hint;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Type guard for catch sites that need to distinguish boundary
|
||||
* errors from generic `Error` instances.
|
||||
*/
|
||||
export function isFsError(err: unknown): err is FsError {
|
||||
return err instanceof FsError;
|
||||
}
|
||||
|
||||
/**
|
||||
* Coerce an arbitrary thrown value into an `FsError`. Used by the
|
||||
* orchestrator's catch blocks so every body-level failure surfaces
|
||||
* as a typed error AND emits an `fs.denied` audit event — without
|
||||
* this, raw `fs.promises` errnos (`EACCES`, `ENOENT`, `ELOOP`, …)
|
||||
* propagate uncategorized, the audit log loses denial visibility,
|
||||
* and PR 19/20 routes degrade to opaque 5xx responses.
|
||||
*
|
||||
* Already-typed `FsError`s pass through unchanged so callers can
|
||||
* safely chain this on every catch.
|
||||
*/
|
||||
export function wrapAsFsError(
|
||||
err: unknown,
|
||||
fallbackKind: FsErrorKind = 'permission_denied',
|
||||
): FsError {
|
||||
if (err instanceof FsError) return err;
|
||||
const errno = (err as NodeJS.ErrnoException | undefined)?.code;
|
||||
const message =
|
||||
err instanceof Error ? err.message : 'unknown filesystem error';
|
||||
switch (errno) {
|
||||
case 'ENOENT':
|
||||
return new FsError('path_not_found', message, { cause: err });
|
||||
case 'EACCES':
|
||||
case 'EPERM':
|
||||
return new FsError('permission_denied', message, { cause: err });
|
||||
case 'ELOOP':
|
||||
return new FsError('symlink_escape', message, {
|
||||
cause: err,
|
||||
hint: 'symlink chain forms a cycle or exceeds SYMLOOP_MAX',
|
||||
});
|
||||
case 'EISDIR':
|
||||
case 'ENOTDIR':
|
||||
return new FsError('parse_error', message, {
|
||||
cause: err,
|
||||
hint: 'a path component is not a directory where one was expected',
|
||||
});
|
||||
case 'EMFILE':
|
||||
case 'ENFILE':
|
||||
return new FsError('permission_denied', message, {
|
||||
cause: err,
|
||||
hint: 'too many open files; daemon is at file-descriptor limit',
|
||||
});
|
||||
default:
|
||||
return new FsError(fallbackKind, message, { cause: err });
|
||||
}
|
||||
}
|
||||
56
packages/cli/src/serve/fs/index.ts
Normal file
56
packages/cli/src/serve/fs/index.ts
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2025 Qwen Team
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
export {
|
||||
canonicalizeWorkspace,
|
||||
hasSuspiciousPathPattern,
|
||||
resolveWithinWorkspace,
|
||||
type Intent,
|
||||
type ResolvedPath,
|
||||
} from './paths.js';
|
||||
export {
|
||||
FsError,
|
||||
isFsError,
|
||||
type FsErrorKind,
|
||||
type FsErrorStatus,
|
||||
} from './errors.js';
|
||||
export {
|
||||
MAX_READ_BYTES,
|
||||
MAX_WRITE_BYTES,
|
||||
BINARY_PROBE_BYTES,
|
||||
assertTrustedForIntent,
|
||||
detectBinary,
|
||||
enforceReadBytesSize,
|
||||
enforceReadSize,
|
||||
enforceWriteSize,
|
||||
shouldIgnore,
|
||||
type IgnoreVerdict,
|
||||
type ReadSizeOutcome,
|
||||
} from './policy.js';
|
||||
export {
|
||||
FS_ACCESS_EVENT_TYPE,
|
||||
FS_DENIED_EVENT_TYPE,
|
||||
createAuditPublisher,
|
||||
type AuditContext,
|
||||
type AuditPublisher,
|
||||
type CreateAuditPublisherDeps,
|
||||
type FsAccessAuditPayload,
|
||||
type FsDeniedAuditPayload,
|
||||
} from './audit.js';
|
||||
export {
|
||||
createWorkspaceFileSystemFactory,
|
||||
type CreateWorkspaceFileSystemFactoryDeps,
|
||||
type FsEntry,
|
||||
type FsStat,
|
||||
type GlobOptions,
|
||||
type ListOptions,
|
||||
type ReadMeta,
|
||||
type ReadTextOptions,
|
||||
type RequestContext,
|
||||
type WorkspaceFileSystem,
|
||||
type WorkspaceFileSystemFactory,
|
||||
type WriteOutcome,
|
||||
} from './workspaceFileSystem.js';
|
||||
262
packages/cli/src/serve/fs/paths.test.ts
Normal file
262
packages/cli/src/serve/fs/paths.test.ts
Normal file
|
|
@ -0,0 +1,262 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2025 Qwen Team
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
||||
import { promises as fsp, realpathSync } from 'node:fs';
|
||||
import * as os from 'node:os';
|
||||
import * as path from 'node:path';
|
||||
import { randomBytes } from 'node:crypto';
|
||||
import {
|
||||
canonicalizeWorkspace,
|
||||
hasSuspiciousPathPattern,
|
||||
resolveWithinWorkspace,
|
||||
type ResolvedPath,
|
||||
} from './paths.js';
|
||||
import { isFsError } from './errors.js';
|
||||
|
||||
describe('canonicalizeWorkspace', () => {
|
||||
let scratch: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
const id = randomBytes(6).toString('hex');
|
||||
scratch = await fsp.mkdtemp(path.join(os.tmpdir(), `qwen-fs-paths-${id}-`));
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await fsp.rm(scratch, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('returns the realpath for an existing absolute directory', () => {
|
||||
const subdir = path.join(scratch, 'project');
|
||||
// Use mkdirSync via fsp.mkdir-await to keep test async-shape consistent.
|
||||
return fsp.mkdir(subdir).then(() => {
|
||||
const canonical = canonicalizeWorkspace(subdir);
|
||||
// On macOS the tmpdir resolves through `/private` — `realpathSync.native`
|
||||
// returns that prefix; we just assert it matches what realpath itself
|
||||
// would produce so the test is platform-agnostic.
|
||||
expect(canonical).toBe(realpathSync.native(subdir));
|
||||
});
|
||||
});
|
||||
|
||||
it('resolves a relative path against process.cwd before canonicalization', async () => {
|
||||
// Anchor the relative input to the scratch directory so the resolved
|
||||
// path actually exists. We can't change cwd under vitest reliably, so
|
||||
// craft an absolute path and re-derive the relative form.
|
||||
const subdir = path.join(scratch, 'rel-target');
|
||||
await fsp.mkdir(subdir);
|
||||
const relInput = path.relative(process.cwd(), subdir);
|
||||
const canonical = canonicalizeWorkspace(relInput);
|
||||
expect(canonical).toBe(realpathSync.native(subdir));
|
||||
});
|
||||
|
||||
it('falls back to path.resolve for a non-existent path (ENOENT)', () => {
|
||||
const ghost = path.join(scratch, 'does', 'not', 'exist');
|
||||
const canonical = canonicalizeWorkspace(ghost);
|
||||
expect(canonical).toBe(path.resolve(ghost));
|
||||
});
|
||||
|
||||
it('follows a symlink to the real on-disk target', async () => {
|
||||
const real = path.join(scratch, 'real');
|
||||
const link = path.join(scratch, 'link');
|
||||
await fsp.mkdir(real);
|
||||
await fsp.symlink(real, link, 'dir');
|
||||
expect(canonicalizeWorkspace(link)).toBe(realpathSync.native(real));
|
||||
});
|
||||
|
||||
it('preserves on-disk casing on case-insensitive filesystems for an existing path', async () => {
|
||||
// Skipped on Linux where the FS is case-sensitive — the function's
|
||||
// casing-collapse contract is only meaningful on macOS APFS / Windows
|
||||
// NTFS, and forcing the test to assert "different cased input == same
|
||||
// output" on ext4 would just fail with ENOENT before realpath runs.
|
||||
if (process.platform !== 'darwin' && process.platform !== 'win32') return;
|
||||
const dir = path.join(scratch, 'CaseDir');
|
||||
await fsp.mkdir(dir);
|
||||
const lowered = path.join(scratch, 'casedir');
|
||||
const canonical = canonicalizeWorkspace(lowered);
|
||||
// The realpathSync.native return value is the on-disk casing, which is
|
||||
// what the boundWorkspace contract pins.
|
||||
expect(canonical).toBe(realpathSync.native(dir));
|
||||
});
|
||||
|
||||
it('rethrows non-ENOENT filesystem errors instead of masking them', async () => {
|
||||
// EACCES is hard to produce portably and on macOS gates behind SIP.
|
||||
// Instead simulate the contract by asserting that an EISDIR-or-similar
|
||||
// path that *does* exist returns its realpath rather than throwing —
|
||||
// the negative case (rethrow on non-ENOENT) is exercised by code review
|
||||
// and documented in the function's doc comment. The minimal positive
|
||||
// assertion here guards against a future regression that swallows
|
||||
// *every* error and drops to path.resolve.
|
||||
const dir = path.join(scratch, 'normal');
|
||||
await fsp.mkdir(dir);
|
||||
const out = canonicalizeWorkspace(dir);
|
||||
expect(out).toBe(realpathSync.native(dir));
|
||||
expect(out).not.toBe(path.resolve(dir + '-different'));
|
||||
});
|
||||
});
|
||||
|
||||
describe('hasSuspiciousPathPattern', () => {
|
||||
it('rejects 8.3 short names regardless of platform', () => {
|
||||
expect(hasSuspiciousPathPattern('GIT~1')).toBe(true);
|
||||
expect(hasSuspiciousPathPattern('CLAUDE~2')).toBe(true);
|
||||
expect(hasSuspiciousPathPattern('SETTIN~1.JSON')).toBe(true);
|
||||
});
|
||||
|
||||
it('rejects long-path / device prefixes regardless of platform', () => {
|
||||
expect(hasSuspiciousPathPattern('\\\\?\\C:\\Users\\foo')).toBe(true);
|
||||
expect(hasSuspiciousPathPattern('\\\\.\\C:\\foo')).toBe(true);
|
||||
expect(hasSuspiciousPathPattern('//?/C:/foo')).toBe(true);
|
||||
expect(hasSuspiciousPathPattern('//./C:/foo')).toBe(true);
|
||||
});
|
||||
|
||||
it('rejects UNC prefixes regardless of platform', () => {
|
||||
expect(hasSuspiciousPathPattern('\\\\server\\share')).toBe(true);
|
||||
expect(hasSuspiciousPathPattern('//server/share')).toBe(true);
|
||||
expect(hasSuspiciousPathPattern('//192.168.1.1/foo')).toBe(true);
|
||||
});
|
||||
|
||||
it('rejects trailing dots / spaces and DOS device names', () => {
|
||||
expect(hasSuspiciousPathPattern('config.json.')).toBe(true);
|
||||
expect(hasSuspiciousPathPattern('config.json ')).toBe(true);
|
||||
expect(hasSuspiciousPathPattern('settings.PRN')).toBe(true);
|
||||
expect(hasSuspiciousPathPattern('foo.CON')).toBe(true);
|
||||
expect(hasSuspiciousPathPattern('bar.LPT1')).toBe(true);
|
||||
});
|
||||
|
||||
it('rejects three-or-more-dot path components', () => {
|
||||
expect(hasSuspiciousPathPattern('foo/.../bar')).toBe(true);
|
||||
expect(hasSuspiciousPathPattern('.../leaf')).toBe(true);
|
||||
});
|
||||
|
||||
it('accepts ordinary POSIX paths', () => {
|
||||
expect(hasSuspiciousPathPattern('src/index.ts')).toBe(false);
|
||||
expect(hasSuspiciousPathPattern('packages/cli/package.json')).toBe(false);
|
||||
expect(hasSuspiciousPathPattern('.qwenignore')).toBe(false);
|
||||
expect(hasSuspiciousPathPattern('a/b/c/d/e/f.txt')).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('resolveWithinWorkspace', () => {
|
||||
let scratch: string;
|
||||
let workspace: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
const id = randomBytes(6).toString('hex');
|
||||
scratch = await fsp.mkdtemp(
|
||||
path.join(os.tmpdir(), `qwen-fs-resolve-${id}-`),
|
||||
);
|
||||
workspace = path.join(scratch, 'workspace');
|
||||
await fsp.mkdir(workspace);
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await fsp.rm(scratch, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('resolves an existing relative path to its on-disk canonical form', async () => {
|
||||
const target = path.join(workspace, 'src', 'a.txt');
|
||||
await fsp.mkdir(path.dirname(target));
|
||||
await fsp.writeFile(target, 'hello');
|
||||
const out = await resolveWithinWorkspace('src/a.txt', workspace, 'read');
|
||||
expect(out).toBe(realpathSync.native(target));
|
||||
});
|
||||
|
||||
it('rejects a `..` traversal that lands outside the workspace', async () => {
|
||||
await expect(
|
||||
resolveWithinWorkspace('../escape', workspace, 'read'),
|
||||
).rejects.toMatchObject({ kind: 'path_outside_workspace' });
|
||||
});
|
||||
|
||||
it('rejects an absolute path outside the workspace', async () => {
|
||||
const outside = path.join(scratch, 'outside.txt');
|
||||
await fsp.writeFile(outside, 'x');
|
||||
await expect(
|
||||
resolveWithinWorkspace(outside, workspace, 'read'),
|
||||
).rejects.toMatchObject({ kind: 'path_outside_workspace' });
|
||||
});
|
||||
|
||||
it('rejects a symlink whose target escapes the workspace', async () => {
|
||||
const outside = path.join(scratch, 'outside.txt');
|
||||
await fsp.writeFile(outside, 'sensitive');
|
||||
const link = path.join(workspace, 'leak');
|
||||
await fsp.symlink(outside, link, 'file');
|
||||
await expect(
|
||||
resolveWithinWorkspace('leak', workspace, 'read'),
|
||||
).rejects.toMatchObject({ kind: 'symlink_escape' });
|
||||
});
|
||||
|
||||
it('follows a symlink that targets a path inside the workspace', async () => {
|
||||
const real = path.join(workspace, 'real.txt');
|
||||
await fsp.writeFile(real, 'in');
|
||||
const link = path.join(workspace, 'alias');
|
||||
await fsp.symlink(real, link, 'file');
|
||||
const out = await resolveWithinWorkspace('alias', workspace, 'read');
|
||||
expect(out).toBe(realpathSync.native(real));
|
||||
});
|
||||
|
||||
it('tolerates ENOENT for write intent and resolves via existing ancestor', async () => {
|
||||
const nested = 'newdir/leaf.txt';
|
||||
const out = await resolveWithinWorkspace(nested, workspace, 'write');
|
||||
// Ancestor `workspace` is realpathed; tail `newdir/leaf.txt` is appended.
|
||||
expect(out).toBe(
|
||||
path.join(realpathSync.native(workspace), 'newdir', 'leaf.txt'),
|
||||
);
|
||||
});
|
||||
|
||||
it('rejects ENOENT under read intent with path_not_found', async () => {
|
||||
const err = await resolveWithinWorkspace(
|
||||
'does-not-exist',
|
||||
workspace,
|
||||
'read',
|
||||
).catch((e: unknown) => e);
|
||||
expect(isFsError(err)).toBe(true);
|
||||
expect((err as { kind: string }).kind).toBe('path_not_found');
|
||||
});
|
||||
|
||||
it('rejects suspicious patterns before any I/O', async () => {
|
||||
await expect(
|
||||
resolveWithinWorkspace('GIT~1', workspace, 'read'),
|
||||
).rejects.toMatchObject({ kind: 'path_outside_workspace' });
|
||||
await expect(
|
||||
resolveWithinWorkspace('//server/share', workspace, 'read'),
|
||||
).rejects.toMatchObject({ kind: 'path_outside_workspace' });
|
||||
});
|
||||
|
||||
it('rejects empty/non-string input with parse_error', async () => {
|
||||
await expect(
|
||||
resolveWithinWorkspace('', workspace, 'read'),
|
||||
).rejects.toMatchObject({ kind: 'parse_error' });
|
||||
});
|
||||
|
||||
it('returns a value usable as ResolvedPath brand', async () => {
|
||||
const target = path.join(workspace, 'b.txt');
|
||||
await fsp.writeFile(target, 'b');
|
||||
const out: ResolvedPath = await resolveWithinWorkspace(
|
||||
'b.txt',
|
||||
workspace,
|
||||
'read',
|
||||
);
|
||||
// Brand is compile-time only — assert string identity at runtime.
|
||||
expect(typeof out).toBe('string');
|
||||
expect(out).toBe(realpathSync.native(target));
|
||||
});
|
||||
|
||||
it('canonicalizes the boundWorkspace once so symlinked workspaces resolve correctly', async () => {
|
||||
// Workspace itself reachable via a symlink — daemon should still
|
||||
// pin members by the canonical (realpath) form, so a request that
|
||||
// names a child via the symlinked workspace path still resolves
|
||||
// to the same canonical and passes the boundary check.
|
||||
const aliasWorkspace = path.join(scratch, 'alias-workspace');
|
||||
await fsp.symlink(workspace, aliasWorkspace, 'dir');
|
||||
const target = path.join(workspace, 'inside.txt');
|
||||
await fsp.writeFile(target, 'x');
|
||||
const out = await resolveWithinWorkspace(
|
||||
'inside.txt',
|
||||
aliasWorkspace,
|
||||
'read',
|
||||
);
|
||||
expect(out).toBe(realpathSync.native(target));
|
||||
});
|
||||
});
|
||||
353
packages/cli/src/serve/fs/paths.ts
Normal file
353
packages/cli/src/serve/fs/paths.ts
Normal file
|
|
@ -0,0 +1,353 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2025 Qwen Team
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { promises as fsp, realpathSync } from 'node:fs';
|
||||
import * as path from 'node:path';
|
||||
import { isWithinRoot } from '@qwen-code/qwen-code-core';
|
||||
import { FsError, type FsErrorKind } from './errors.js';
|
||||
|
||||
/**
|
||||
* Canonicalize a workspace path so the boot-time bound path and every
|
||||
* request's `workspaceCwd` collapse to the same key. `path.resolve`
|
||||
* alone normalizes `..` and `.` segments and absolutizes, but on
|
||||
* case-insensitive filesystems (macOS APFS, Windows NTFS) `/Work/A`
|
||||
* and `/work/a` are the same directory yet `resolve` returns them
|
||||
* verbatim — without normalization the `boundWorkspace` check would
|
||||
* reject every request that spelled the path with different casing
|
||||
* and `sessionScope: 'single'` re-attach would silently degrade to
|
||||
* "one per spelling".
|
||||
*
|
||||
* `realpathSync.native` (when the path exists) walks symlinks and returns
|
||||
* the on-disk casing; this matches what `config.ts` / `settings.ts` /
|
||||
* `sandbox.ts` use for their own workspace resolution. When the path
|
||||
* doesn't exist (test fixtures, ahead-of-mkdir flows) we fall back to
|
||||
* the resolved-but-uncanonicalized form rather than throwing — the
|
||||
* downstream `spawn({cwd})` will fail with a useful ENOENT if the
|
||||
* workspace truly doesn't exist.
|
||||
*
|
||||
* NOTE: This is a **cross-module contract** (BX9_q) — `config.ts`,
|
||||
* `settings.ts`, `sandbox.ts`, and `httpAcpBridge.ts` all need to
|
||||
* canonicalize the same way for the bound-workspace check +
|
||||
* `sessionScope: 'single'` re-attach to work correctly across paths.
|
||||
* The contract: use `realpathSync.native` on the resolved absolute
|
||||
* path; fall back to `path.resolve` only when the path doesn't exist
|
||||
* yet. If a future change breaks this alignment (e.g. one module
|
||||
* starts lowercasing on Windows but this one doesn't), the
|
||||
* canonicalized request path won't match the canonicalized bound
|
||||
* path → every request returns `workspace_mismatch` even though the
|
||||
* human-readable paths look equivalent. There's no test that pins
|
||||
* the alignment; the integration suite would catch a divergence only
|
||||
* if it tested the specific casing / symlink path the affected
|
||||
* module changed.
|
||||
*
|
||||
* Stage 2 in-process (#3803 §10) collapses the bridge into core,
|
||||
* removing the bridge-side path resolution entirely. Stage 1.5
|
||||
* `@qwen-code/acp-bridge` lift (chiga0 finding 1) is the natural
|
||||
* place to extract a shared `canonicalizeWorkspace` primitive that
|
||||
* all four modules consume — the lowest-common-denominator
|
||||
* extraction is fine THERE because the package boundary forces the
|
||||
* call sites to converge. Until then, *any* change to how those
|
||||
* modules resolve workspace paths needs a matching change here.
|
||||
*
|
||||
* #4175 PR 18 extraction: this file is the new home of the primitive
|
||||
* for the serve layer. The bridge re-exports it so existing callers
|
||||
* continue to import from `httpAcpBridge.js`. The forthcoming
|
||||
* `WorkspaceFileSystem` boundary (PR 18 commits 3+) builds on top of
|
||||
* this single resolver.
|
||||
*/
|
||||
export function canonicalizeWorkspace(p: string): string {
|
||||
const resolved = path.resolve(p);
|
||||
try {
|
||||
// FIXME(stage-2): switch to `fs.promises.realpath` once the
|
||||
// bridge call sites become async-friendly. This sync syscall
|
||||
// runs on the hot `spawnOrAttach` path and blocks the event
|
||||
// loop for one filesystem stat per call. Single-user loopback
|
||||
// (Stage 1's design target) doesn't notice; high-concurrency
|
||||
// deployments will. Stage 2 in-process refactor removes the
|
||||
// entire bridge-side path resolution anyway, but if Stage 2
|
||||
// ever lands without that change, switch to the async version.
|
||||
return realpathSync.native(resolved);
|
||||
} catch (err) {
|
||||
// Only fall back to path.resolve for ENOENT (path doesn't exist
|
||||
// yet). Other filesystem errors (EACCES, EIO, ELOOP) should
|
||||
// propagate — swallowing them would hide transient I/O failures
|
||||
// behind misleading workspace_mismatch rejections.
|
||||
if (
|
||||
err &&
|
||||
typeof err === 'object' &&
|
||||
(err as { code?: unknown }).code === 'ENOENT'
|
||||
) {
|
||||
return resolved;
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Branded absolute path that has passed the workspace boundary check.
|
||||
* The runtime value is just a string; the brand is a compile-time
|
||||
* marker that prevents PR 19/20 routes from accidentally bypassing
|
||||
* `resolveWithinWorkspace` and reading user-supplied input straight
|
||||
* to disk. Construct one only via `resolveWithinWorkspace`.
|
||||
*/
|
||||
export type ResolvedPath = string & { readonly __brand: 'ResolvedPath' };
|
||||
|
||||
/**
|
||||
* Intent declared at boundary entry. Used by callers (and the upcoming
|
||||
* `policy.ts` module) to decide ignore/trust handling. `resolveWithinWorkspace`
|
||||
* itself uses the intent only to differentiate ENOENT semantics: write
|
||||
* intents tolerate a non-existent leaf (the file is about to be created),
|
||||
* read intents do not.
|
||||
*
|
||||
* `'edit'` is a distinct intent from `'write'` so the trust gate, audit
|
||||
* payload, and exhaustiveness checks can reason about partial-replace
|
||||
* semantics separately from full-overwrite. Both gate identically in
|
||||
* `assertTrustedForIntent`; the split exists to keep audit events
|
||||
* faithful to the operation actually performed.
|
||||
*/
|
||||
export type Intent = 'read' | 'write' | 'edit' | 'list' | 'glob' | 'stat';
|
||||
|
||||
const ENOENT_TOLERATING_INTENTS: ReadonlySet<Intent> = new Set([
|
||||
'write',
|
||||
'stat',
|
||||
]);
|
||||
|
||||
/**
|
||||
* Detect Windows-targeted path attack patterns that bypass naive
|
||||
* boundary checks. Adapted from claude-code's
|
||||
* `hasSuspiciousWindowsPathPattern` (`src/utils/permissions/filesystem.ts`).
|
||||
*
|
||||
* Why detection rather than normalization:
|
||||
*
|
||||
* 1. Short-name normalization depends on the file existing. For a
|
||||
* write intent the leaf is absent by definition, so normalization
|
||||
* can't run.
|
||||
* 2. Filesystem state can change between normalization and access
|
||||
* (TOCTOU), so a "normalized then check" pipeline still admits
|
||||
* races. Detecting the dangerous *literal* on input closes that
|
||||
* window.
|
||||
* 3. The patterns are cheap to detect and produce zero false
|
||||
* positives on legitimate POSIX filenames the daemon expects to
|
||||
* receive (workspace files are project sources / configs, never
|
||||
* `\\?\` long-path prefixes).
|
||||
*
|
||||
* Checked patterns:
|
||||
* - NTFS ADS (`:` after position 2 — drive-letter slot exempted)
|
||||
* - 8.3 short names (`~\d`)
|
||||
* - Long-path prefixes (`\\?\`, `\\.\`, `//?/`, `//./`)
|
||||
* - Trailing dots / spaces (Windows strips during resolution)
|
||||
* - DOS device names as final extension (`.CON`, `.PRN`, ...)
|
||||
* - Three-or-more consecutive dots used as a path component
|
||||
* - UNC prefix (`\\server\share`, `//server/share`) — also blocks
|
||||
* loopback DNS / SMB lookups during resolution.
|
||||
*
|
||||
* NTFS-on-Linux mounts (`ntfs-3g`) admit the same bypasses except
|
||||
* the colon syntax (which only the Windows kernel parses), so the
|
||||
* platform gate exists only for the ADS branch; everything else is
|
||||
* checked unconditionally.
|
||||
*/
|
||||
export function hasSuspiciousPathPattern(p: string): boolean {
|
||||
if (process.platform === 'win32') {
|
||||
const colonIndex = p.indexOf(':', 2);
|
||||
if (colonIndex !== -1) return true;
|
||||
}
|
||||
if (/~\d/.test(p)) return true;
|
||||
if (
|
||||
p.startsWith('\\\\?\\') ||
|
||||
p.startsWith('\\\\.\\') ||
|
||||
p.startsWith('//?/') ||
|
||||
p.startsWith('//./')
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
if (
|
||||
(p.startsWith('\\\\') && p.length > 2 && p[2] !== '\\') ||
|
||||
(p.startsWith('//') && p.length > 2 && p[2] !== '/')
|
||||
) {
|
||||
// UNC prefix `\\server\share` / `//server/share` — never legitimate
|
||||
// input from a daemon client. The earlier long-path check covers
|
||||
// the special device variants (`\\?\`, `\\.\`).
|
||||
return true;
|
||||
}
|
||||
if (/(^|\/|\\)\.{3,}(\/|\\|$)/.test(p)) return true;
|
||||
// Per-component checks below: skip empty segments and the legitimate
|
||||
// POSIX traversal tokens `.` / `..`. Bare `.` and `..` are fine
|
||||
// inputs — the boundary's `path.resolve` + `isWithinRoot` will reject
|
||||
// any traversal that lands outside the workspace.
|
||||
for (const seg of p.split(/[\\/]/)) {
|
||||
if (seg === '' || seg === '.' || seg === '..') continue;
|
||||
if (/[.\s]+$/.test(seg)) return true;
|
||||
if (/\.(CON|PRN|AUX|NUL|COM[1-9]|LPT[1-9])$/i.test(seg)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/** Default ancestor-walk depth limit for ENOENT fallback. */
|
||||
const MAX_ANCESTOR_HOPS = 40;
|
||||
|
||||
/**
|
||||
* Walk up `absolute` until a component exists on disk. Returns the
|
||||
* existing ancestor and the trailing components that don't exist
|
||||
* yet, joined with the platform separator. Used by the ENOENT
|
||||
* fallback in `resolveWithinWorkspace` to canonicalize the existing
|
||||
* portion (resolving any symlink in the parent chain) before
|
||||
* boundary-checking the eventual write target.
|
||||
*/
|
||||
async function findExistingAncestor(
|
||||
absolute: string,
|
||||
): Promise<{ ancestor: string; tail: string }> {
|
||||
let current = absolute;
|
||||
const tailParts: string[] = [];
|
||||
for (let i = 0; i < MAX_ANCESTOR_HOPS; i++) {
|
||||
try {
|
||||
await fsp.stat(current);
|
||||
return { ancestor: current, tail: tailParts.join(path.sep) };
|
||||
} catch (err) {
|
||||
const code = (err as NodeJS.ErrnoException)?.code;
|
||||
if (code === 'ENOENT') {
|
||||
// ancestor doesn't exist yet; keep walking up
|
||||
} else if (code === 'ENOTDIR') {
|
||||
// A regular file sits where we expected a directory (e.g.
|
||||
// write target `${ws}/file.txt/child`). Walking up would
|
||||
// happily realpath the file's parent and return a
|
||||
// "canonical" the eventual write cannot use. Reject up-front
|
||||
// so the orchestrator emits an `fs.denied` for the actual
|
||||
// shape of the user error rather than silently passing
|
||||
// boundary inspection and 500-ing later at write time.
|
||||
throw new FsError(
|
||||
'parse_error',
|
||||
`path component is not a directory: ${absolute}`,
|
||||
{
|
||||
cause: err,
|
||||
hint: 'a non-directory file occupies a path segment',
|
||||
},
|
||||
);
|
||||
} else {
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
const parent = path.dirname(current);
|
||||
if (parent === current) {
|
||||
throw new FsError(
|
||||
'path_not_found',
|
||||
`no existing ancestor for ${absolute}`,
|
||||
);
|
||||
}
|
||||
tailParts.unshift(path.basename(current));
|
||||
current = parent;
|
||||
}
|
||||
throw new FsError(
|
||||
'path_not_found',
|
||||
`path traversal exceeded ${MAX_ANCESTOR_HOPS} hops while finding ancestor`,
|
||||
{
|
||||
hint: 'path is too deeply nested or filesystem is responding unexpectedly',
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve a daemon-input path to an absolute, symlink-canonicalized
|
||||
* `ResolvedPath` that is provably inside `boundWorkspace`. Throws
|
||||
* `FsError` on any boundary violation.
|
||||
*
|
||||
* Algorithm (#4175 PR 18 plan, claude-code-style chain check):
|
||||
*
|
||||
* 1. Reject suspicious literal patterns before any I/O.
|
||||
* 2. Resolve against `boundWorkspace` to absolutize relative inputs.
|
||||
* 3. Cheap pre-filter: textual containment check rejects obvious
|
||||
* `..` traversal without paying for `realpath`.
|
||||
* 4. `fs.promises.realpath` on the absolute path. Node's realpath
|
||||
* follows the entire symlink chain natively (SYMLOOP_MAX-bounded);
|
||||
* if any hop escapes the workspace, the final canonical lands
|
||||
* outside and step 5 catches it.
|
||||
* 5. ENOENT (write/stat intents): walk up to first existing ancestor,
|
||||
* realpath the ancestor, re-attach the unresolved tail. The tail
|
||||
* can't introduce new symlinks (it doesn't exist), so the joined
|
||||
* result is the actual write target the OS will use.
|
||||
* 6. Final containment check against canonicalized `boundWorkspace`.
|
||||
* If the canonical landed outside but the resolved-without-realpath
|
||||
* version was inside, classify as `symlink_escape`; otherwise as
|
||||
* `path_outside_workspace`.
|
||||
*
|
||||
* The brand on the return type is the contract that PR 19/20 routes
|
||||
* may not construct one without going through this function.
|
||||
*/
|
||||
export async function resolveWithinWorkspace(
|
||||
input: string,
|
||||
boundWorkspace: string,
|
||||
intent: Intent,
|
||||
): Promise<ResolvedPath> {
|
||||
if (typeof input !== 'string' || input.length === 0) {
|
||||
throw new FsError('parse_error', 'path must be a non-empty string');
|
||||
}
|
||||
if (hasSuspiciousPathPattern(input)) {
|
||||
throw new FsError(
|
||||
'path_outside_workspace',
|
||||
`path contains suspicious pattern: ${input}`,
|
||||
{
|
||||
hint: 'paths with NTFS ADS, 8.3 short names, UNC prefixes, or trailing dots are rejected outright',
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
const boundCanonical = canonicalizeWorkspace(boundWorkspace);
|
||||
const absolute = path.resolve(boundCanonical, input);
|
||||
|
||||
// Cheap pre-filter on the resolved-but-not-realpathed form. Catches
|
||||
// textual `..` escape without an FS call.
|
||||
if (!isWithinRoot(absolute, boundCanonical)) {
|
||||
throw new FsError(
|
||||
'path_outside_workspace',
|
||||
`path escapes workspace: ${input}`,
|
||||
);
|
||||
}
|
||||
|
||||
let canonical: string;
|
||||
try {
|
||||
canonical = await fsp.realpath(absolute);
|
||||
} catch (err) {
|
||||
const code = (err as NodeJS.ErrnoException)?.code;
|
||||
if (code === 'ENOENT' && ENOENT_TOLERATING_INTENTS.has(intent)) {
|
||||
const { ancestor, tail } = await findExistingAncestor(absolute);
|
||||
const ancestorReal = await fsp.realpath(ancestor);
|
||||
canonical = tail ? path.join(ancestorReal, tail) : ancestorReal;
|
||||
} else if (code === 'ENOENT') {
|
||||
throw new FsError('path_not_found', `path does not exist: ${input}`, {
|
||||
cause: err,
|
||||
});
|
||||
} else if (code === 'ELOOP') {
|
||||
throw new FsError(
|
||||
'symlink_escape',
|
||||
`symlink loop or chain too long for ${input}`,
|
||||
{
|
||||
cause: err,
|
||||
hint: 'a symlink in the path forms a cycle or exceeds SYMLOOP_MAX',
|
||||
},
|
||||
);
|
||||
} else if (code === 'EACCES') {
|
||||
throw new FsError(
|
||||
'permission_denied',
|
||||
`permission denied resolving ${input}`,
|
||||
{ cause: err },
|
||||
);
|
||||
} else {
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
if (!isWithinRoot(canonical, boundCanonical)) {
|
||||
const kind: FsErrorKind =
|
||||
canonical !== absolute ? 'symlink_escape' : 'path_outside_workspace';
|
||||
throw new FsError(
|
||||
kind,
|
||||
kind === 'symlink_escape'
|
||||
? `symlink resolves outside workspace: ${input}`
|
||||
: `path escapes workspace: ${input}`,
|
||||
);
|
||||
}
|
||||
|
||||
return canonical as ResolvedPath;
|
||||
}
|
||||
241
packages/cli/src/serve/fs/policy.test.ts
Normal file
241
packages/cli/src/serve/fs/policy.test.ts
Normal file
|
|
@ -0,0 +1,241 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2025 Qwen Team
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
||||
import { promises as fsp } from 'node:fs';
|
||||
import * as os from 'node:os';
|
||||
import * as path from 'node:path';
|
||||
import { randomBytes } from 'node:crypto';
|
||||
import { Ignore } from '@qwen-code/qwen-code-core';
|
||||
import {
|
||||
MAX_READ_BYTES,
|
||||
MAX_WRITE_BYTES,
|
||||
assertTrustedForIntent,
|
||||
detectBinary,
|
||||
enforceReadBytesSize,
|
||||
enforceReadSize,
|
||||
enforceWriteSize,
|
||||
shouldIgnore,
|
||||
} from './policy.js';
|
||||
import { canonicalizeWorkspace, resolveWithinWorkspace } from './paths.js';
|
||||
import { isFsError } from './errors.js';
|
||||
|
||||
describe('shouldIgnore', () => {
|
||||
let scratch: string;
|
||||
let workspace: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
scratch = await fsp.mkdtemp(
|
||||
path.join(os.tmpdir(), `qwen-policy-${randomBytes(4).toString('hex')}-`),
|
||||
);
|
||||
const wsDir = path.join(scratch, 'ws');
|
||||
await fsp.mkdir(wsDir, { recursive: true });
|
||||
workspace = canonicalizeWorkspace(wsDir);
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await fsp.rm(scratch, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('flags files matching .gitignore-style patterns', async () => {
|
||||
const ignore = new Ignore().add(['*.log', 'dist/']);
|
||||
const target = path.join(workspace, 'app.log');
|
||||
await fsp.writeFile(target, '');
|
||||
const resolved = await resolveWithinWorkspace('app.log', workspace, 'read');
|
||||
expect(shouldIgnore(resolved, workspace, ignore)).toEqual({
|
||||
ignored: true,
|
||||
category: 'file',
|
||||
});
|
||||
});
|
||||
|
||||
it('flags directory patterns', async () => {
|
||||
const ignore = new Ignore().add(['build/']);
|
||||
const dir = path.join(workspace, 'build');
|
||||
await fsp.mkdir(dir);
|
||||
const resolved = await resolveWithinWorkspace('build', workspace, 'list');
|
||||
expect(shouldIgnore(resolved, workspace, ignore, 'directory')).toEqual({
|
||||
ignored: true,
|
||||
category: 'directory',
|
||||
});
|
||||
});
|
||||
|
||||
it('flags non-trailing-slash directory patterns like node_modules', async () => {
|
||||
const ignore = new Ignore().add(['node_modules']);
|
||||
const dir = path.join(workspace, 'node_modules');
|
||||
await fsp.mkdir(dir);
|
||||
const resolved = await resolveWithinWorkspace(
|
||||
'node_modules',
|
||||
workspace,
|
||||
'list',
|
||||
);
|
||||
// No-trailing-slash patterns get registered in both ignorers; either
|
||||
// shows up as an ignore hit. We accept whichever category fires.
|
||||
const verdict = shouldIgnore(resolved, workspace, ignore, 'directory');
|
||||
expect(verdict.ignored).toBe(true);
|
||||
});
|
||||
|
||||
it('returns false when no rule matches', async () => {
|
||||
const ignore = new Ignore().add(['*.log']);
|
||||
const target = path.join(workspace, 'src.ts');
|
||||
await fsp.writeFile(target, '');
|
||||
const resolved = await resolveWithinWorkspace('src.ts', workspace, 'read');
|
||||
expect(shouldIgnore(resolved, workspace, ignore)).toEqual({
|
||||
ignored: false,
|
||||
});
|
||||
});
|
||||
|
||||
it('never ignores the workspace root itself', async () => {
|
||||
const ignore = new Ignore().add(['*']);
|
||||
const resolved = await resolveWithinWorkspace('.', workspace, 'list');
|
||||
expect(shouldIgnore(resolved, workspace, ignore)).toEqual({
|
||||
ignored: false,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('assertTrustedForIntent', () => {
|
||||
it('passes when trusted regardless of intent', () => {
|
||||
expect(() => assertTrustedForIntent(true, 'read')).not.toThrow();
|
||||
expect(() => assertTrustedForIntent(true, 'write')).not.toThrow();
|
||||
expect(() => assertTrustedForIntent(true, 'list')).not.toThrow();
|
||||
});
|
||||
|
||||
it('passes for read-style intents when untrusted', () => {
|
||||
expect(() => assertTrustedForIntent(false, 'read')).not.toThrow();
|
||||
expect(() => assertTrustedForIntent(false, 'list')).not.toThrow();
|
||||
expect(() => assertTrustedForIntent(false, 'glob')).not.toThrow();
|
||||
expect(() => assertTrustedForIntent(false, 'stat')).not.toThrow();
|
||||
});
|
||||
|
||||
it('throws untrusted_workspace for write intent when untrusted', () => {
|
||||
const err = (() => {
|
||||
try {
|
||||
assertTrustedForIntent(false, 'write');
|
||||
} catch (e) {
|
||||
return e;
|
||||
}
|
||||
throw new Error('expected throw');
|
||||
})();
|
||||
expect(isFsError(err)).toBe(true);
|
||||
expect((err as { kind: string; status: number }).kind).toBe(
|
||||
'untrusted_workspace',
|
||||
);
|
||||
expect((err as { kind: string; status: number }).status).toBe(403);
|
||||
});
|
||||
|
||||
it('throws untrusted_workspace for edit intent when untrusted', () => {
|
||||
expect(() => assertTrustedForIntent(false, 'edit')).toThrowError(
|
||||
/edit operations are forbidden/,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('enforceReadSize', () => {
|
||||
it('returns full size when under the cap', () => {
|
||||
expect(enforceReadSize(1024)).toEqual({
|
||||
bytesToRead: 1024,
|
||||
truncated: false,
|
||||
});
|
||||
});
|
||||
|
||||
it('returns the cap and truncated=true when over', () => {
|
||||
expect(enforceReadSize(MAX_READ_BYTES + 1)).toEqual({
|
||||
bytesToRead: MAX_READ_BYTES,
|
||||
truncated: true,
|
||||
});
|
||||
});
|
||||
|
||||
it('honors a per-call max override', () => {
|
||||
expect(enforceReadSize(2048, 1024)).toEqual({
|
||||
bytesToRead: 1024,
|
||||
truncated: true,
|
||||
});
|
||||
expect(enforceReadSize(512, 1024)).toEqual({
|
||||
bytesToRead: 512,
|
||||
truncated: false,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('enforceWriteSize', () => {
|
||||
it('passes when under the write cap', () => {
|
||||
expect(() => enforceWriteSize(1024)).not.toThrow();
|
||||
expect(() => enforceWriteSize(MAX_WRITE_BYTES)).not.toThrow();
|
||||
});
|
||||
|
||||
it('throws file_too_large when over the cap', () => {
|
||||
const err = (() => {
|
||||
try {
|
||||
enforceWriteSize(MAX_WRITE_BYTES + 1);
|
||||
} catch (e) {
|
||||
return e;
|
||||
}
|
||||
throw new Error('expected throw');
|
||||
})();
|
||||
expect(isFsError(err)).toBe(true);
|
||||
expect((err as { kind: string; status: number }).kind).toBe(
|
||||
'file_too_large',
|
||||
);
|
||||
expect((err as { kind: string; status: number }).status).toBe(413);
|
||||
});
|
||||
});
|
||||
|
||||
describe('enforceReadBytesSize', () => {
|
||||
it('passes when under the cap', () => {
|
||||
expect(() => enforceReadBytesSize(1024)).not.toThrow();
|
||||
});
|
||||
|
||||
it('throws file_too_large when over', () => {
|
||||
const err = (() => {
|
||||
try {
|
||||
enforceReadBytesSize(MAX_READ_BYTES + 1);
|
||||
} catch (e) {
|
||||
return e;
|
||||
}
|
||||
throw new Error('expected throw');
|
||||
})();
|
||||
expect(isFsError(err)).toBe(true);
|
||||
expect((err as { kind: string }).kind).toBe('file_too_large');
|
||||
});
|
||||
});
|
||||
|
||||
describe('detectBinary', () => {
|
||||
let scratch: string;
|
||||
let workspace: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
scratch = await fsp.mkdtemp(
|
||||
path.join(os.tmpdir(), `qwen-binary-${randomBytes(4).toString('hex')}-`),
|
||||
);
|
||||
const wsDir = path.join(scratch, 'ws');
|
||||
await fsp.mkdir(wsDir, { recursive: true });
|
||||
workspace = canonicalizeWorkspace(wsDir);
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await fsp.rm(scratch, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it('reports text files as non-binary', async () => {
|
||||
const target = path.join(workspace, 'plain.txt');
|
||||
await fsp.writeFile(target, 'hello world\nsecond line\n');
|
||||
const resolved = await resolveWithinWorkspace(
|
||||
'plain.txt',
|
||||
workspace,
|
||||
'read',
|
||||
);
|
||||
expect(await detectBinary(resolved)).toBe(false);
|
||||
});
|
||||
|
||||
it('reports buffers with null bytes as binary', async () => {
|
||||
const target = path.join(workspace, 'bin.dat');
|
||||
const buf = Buffer.alloc(64);
|
||||
buf[10] = 0; // null byte triggers binary detection
|
||||
await fsp.writeFile(target, buf);
|
||||
const resolved = await resolveWithinWorkspace('bin.dat', workspace, 'read');
|
||||
expect(await detectBinary(resolved)).toBe(true);
|
||||
});
|
||||
});
|
||||
214
packages/cli/src/serve/fs/policy.ts
Normal file
214
packages/cli/src/serve/fs/policy.ts
Normal file
|
|
@ -0,0 +1,214 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2025 Qwen Team
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import * as path from 'node:path';
|
||||
import type { Ignore} from '@qwen-code/qwen-code-core';
|
||||
import { isBinaryFile } from '@qwen-code/qwen-code-core';
|
||||
import { FsError } from './errors.js';
|
||||
import type { Intent, ResolvedPath } from './paths.js';
|
||||
|
||||
/**
|
||||
* Maximum bytes returned by `readText`. Mirrors claude-code's
|
||||
* `MAX_OUTPUT_SIZE` (`src/utils/file.ts`) — large enough for
|
||||
* typical source files, small enough that an SSE replay buffer
|
||||
* doesn't fill on a single read. Reads above this size return
|
||||
* truncated content with `meta.truncated = true`.
|
||||
*/
|
||||
export const MAX_READ_BYTES = 256 * 1024;
|
||||
|
||||
/**
|
||||
* Maximum bytes accepted by `writeText` / `edit`. Sized below the
|
||||
* `express.json({ limit: '10mb' })` middleware cap so a request
|
||||
* body that survives the parser also survives the policy gate.
|
||||
* Halving the parser cap leaves headroom for JSON envelope
|
||||
* overhead (path, encoding hints, etc.).
|
||||
*/
|
||||
export const MAX_WRITE_BYTES = 5 * 1024 * 1024;
|
||||
|
||||
/**
|
||||
* Sample size used for content-based binary detection. Aligned with
|
||||
* `isBinaryFile` from `packages/core/src/utils/fileUtils.ts:414` so
|
||||
* the boundary and the existing tool layer agree on what counts as
|
||||
* "binary".
|
||||
*/
|
||||
export const BINARY_PROBE_BYTES = 4096;
|
||||
|
||||
/**
|
||||
* Result of an ignore-rule check against a resolved path. The
|
||||
* `category` field exists so audit events can distinguish
|
||||
* file-pattern vs directory-pattern matches without exposing the
|
||||
* `Ignore` class's private state.
|
||||
*/
|
||||
export interface IgnoreVerdict {
|
||||
ignored: boolean;
|
||||
category?: 'file' | 'directory';
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether `absolute` is matched by the workspace's ignore
|
||||
* rules. The check is computed against the workspace-relative
|
||||
* form of the path, matching the convention of `.gitignore` /
|
||||
* `.qwenignore` patterns.
|
||||
*
|
||||
* Returns `{ ignored: false }` when:
|
||||
* - the path equals `boundWorkspace` (the workspace root itself
|
||||
* can never be ignored),
|
||||
* - the relative path escapes the workspace (caller's bug; the
|
||||
* boundary check should have rejected first),
|
||||
* - neither the file nor directory filter matches.
|
||||
*
|
||||
* The `kind` parameter tells the function whether the resolved
|
||||
* path is a regular file or a directory. This avoids an extra
|
||||
* `stat` call (the orchestrator already has the dirent info from
|
||||
* its `list`/`stat` step) and lets us check directory patterns
|
||||
* with the trailing slash the underlying `ignore` library expects
|
||||
* for `foo/`-style entries.
|
||||
*/
|
||||
export function shouldIgnore(
|
||||
absolute: ResolvedPath,
|
||||
boundWorkspace: string,
|
||||
ignore: Ignore,
|
||||
kind: 'file' | 'directory' = 'file',
|
||||
): IgnoreVerdict {
|
||||
const rel = path.relative(boundWorkspace, absolute as string);
|
||||
if (rel === '' || rel.startsWith('..')) return { ignored: false };
|
||||
const fileFilter = ignore.getFileFilter();
|
||||
const dirFilter = ignore.getDirectoryFilter();
|
||||
if (kind === 'directory') {
|
||||
// `foo/` patterns require the trailing slash; `node_modules`-style
|
||||
// patterns (no slash, no extension) are added to both ignorers by
|
||||
// `Ignore.add`, so the slash-suffixed probe still hits them.
|
||||
const withSlash = rel.endsWith('/') ? rel : `${rel}/`;
|
||||
if (dirFilter(withSlash)) return { ignored: true, category: 'directory' };
|
||||
if (fileFilter(rel)) return { ignored: true, category: 'file' };
|
||||
return { ignored: false };
|
||||
}
|
||||
if (fileFilter(rel)) return { ignored: true, category: 'file' };
|
||||
if (dirFilter(rel)) return { ignored: true, category: 'directory' };
|
||||
return { ignored: false };
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply the trust gate to an intent. Read-shaped intents (`read`,
|
||||
* `list`, `glob`, `stat`) always pass — remote clients debugging
|
||||
* an untrusted workspace still need to inspect state. Mutating
|
||||
* intents (`write`, `edit`) on an untrusted workspace throw
|
||||
* `untrusted_workspace`, which routes surface as 403.
|
||||
*
|
||||
* Trust is captured at factory build (a snapshot of
|
||||
* `Config.isTrustedFolder()`); the orchestrator does not consult
|
||||
* Config per-request, so an IDE that flips trust mid-request
|
||||
* cannot split-brain a session.
|
||||
*
|
||||
* The body is an exhaustive `switch` so adding a new variant to
|
||||
* `Intent` (e.g. a future `'delete'`) becomes a TypeScript error
|
||||
* here — the gate must explicitly classify every intent rather
|
||||
* than silently defaulting to "allowed".
|
||||
*/
|
||||
export function assertTrustedForIntent(trusted: boolean, intent: Intent): void {
|
||||
if (trusted) return;
|
||||
switch (intent) {
|
||||
case 'read':
|
||||
case 'list':
|
||||
case 'glob':
|
||||
case 'stat':
|
||||
return;
|
||||
case 'write':
|
||||
case 'edit':
|
||||
throw new FsError(
|
||||
'untrusted_workspace',
|
||||
`workspace is not trusted; ${intent} operations are forbidden`,
|
||||
{
|
||||
hint: 'mark the folder as trusted in the daemon configuration to allow writes',
|
||||
},
|
||||
);
|
||||
default: {
|
||||
const _exhaustive: never = intent;
|
||||
throw new FsError(
|
||||
'untrusted_workspace',
|
||||
`workspace is not trusted; intent "${String(_exhaustive)}" is not classified`,
|
||||
{
|
||||
hint: 'unknown intent reached the trust gate; classify it in policy.ts',
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Outcome of a read-size enforcement check. */
|
||||
export interface ReadSizeOutcome {
|
||||
/** Number of bytes the caller should read. */
|
||||
bytesToRead: number;
|
||||
/** True iff the file is larger than the cap and content was truncated. */
|
||||
truncated: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decide how many bytes a `readText` call should pull off disk
|
||||
* given the file's actual size. Reads above the cap surface
|
||||
* `truncated: true`; the boundary intentionally does NOT throw,
|
||||
* matching claude-code's behavior so a large config file still
|
||||
* returns useful content rather than an opaque error.
|
||||
*/
|
||||
export function enforceReadSize(
|
||||
fileBytes: number,
|
||||
maxBytes: number = MAX_READ_BYTES,
|
||||
): ReadSizeOutcome {
|
||||
if (fileBytes <= maxBytes) {
|
||||
return { bytesToRead: fileBytes, truncated: false };
|
||||
}
|
||||
return { bytesToRead: maxBytes, truncated: true };
|
||||
}
|
||||
|
||||
/**
|
||||
* Throw `file_too_large` if `bytes` exceeds the write cap. Used by
|
||||
* `writeText` and `edit`, which (unlike text reads) cannot silently
|
||||
* truncate without corrupting the file.
|
||||
*/
|
||||
export function enforceWriteSize(
|
||||
bytes: number,
|
||||
maxBytes: number = MAX_WRITE_BYTES,
|
||||
): void {
|
||||
if (bytes > maxBytes) {
|
||||
throw new FsError(
|
||||
'file_too_large',
|
||||
`payload of ${bytes} bytes exceeds write limit of ${maxBytes} bytes`,
|
||||
{
|
||||
hint: 'split the write into smaller chunks or raise the daemon limit',
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Throw `file_too_large` if a read would exceed the cap and the
|
||||
* caller cannot accept truncation (used by `readBytes`, where
|
||||
* partial content is unsafe to return).
|
||||
*/
|
||||
export function enforceReadBytesSize(
|
||||
fileBytes: number,
|
||||
maxBytes: number = MAX_READ_BYTES,
|
||||
): void {
|
||||
if (fileBytes > maxBytes) {
|
||||
throw new FsError(
|
||||
'file_too_large',
|
||||
`file of ${fileBytes} bytes exceeds read limit of ${maxBytes} bytes`,
|
||||
{
|
||||
hint: 'use readText for capped truncation, or raise the daemon limit',
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Decide whether a path is binary using the existing core helper.
|
||||
* Wrapping it here lets PR 18 callers keep a single `policy.ts`
|
||||
* import surface and lets future tweaks (e.g. extension allow-list)
|
||||
* land without touching the orchestrator.
|
||||
*/
|
||||
export async function detectBinary(filePath: ResolvedPath): Promise<boolean> {
|
||||
return isBinaryFile(filePath as string);
|
||||
}
|
||||
435
packages/cli/src/serve/fs/workspaceFileSystem.test.ts
Normal file
435
packages/cli/src/serve/fs/workspaceFileSystem.test.ts
Normal file
|
|
@ -0,0 +1,435 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2025 Qwen Team
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
||||
import { promises as fsp } from 'node:fs';
|
||||
import * as os from 'node:os';
|
||||
import * as path from 'node:path';
|
||||
import { randomBytes } from 'node:crypto';
|
||||
import { Ignore } from '@qwen-code/qwen-code-core';
|
||||
import {
|
||||
FS_ACCESS_EVENT_TYPE,
|
||||
FS_DENIED_EVENT_TYPE,
|
||||
createWorkspaceFileSystemFactory,
|
||||
type WorkspaceFileSystem,
|
||||
type WorkspaceFileSystemFactory,
|
||||
} from './index.js';
|
||||
import type { BridgeEvent } from '../eventBus.js';
|
||||
import { canonicalizeWorkspace } from './paths.js';
|
||||
import { isFsError } from './errors.js';
|
||||
|
||||
interface Harness {
|
||||
factory: WorkspaceFileSystemFactory;
|
||||
fs: WorkspaceFileSystem;
|
||||
events: BridgeEvent[];
|
||||
workspace: string;
|
||||
scratch: string;
|
||||
}
|
||||
|
||||
async function makeHarness(opts?: {
|
||||
trusted?: boolean;
|
||||
ignore?: Ignore;
|
||||
}): Promise<Harness> {
|
||||
const scratch = await fsp.mkdtemp(
|
||||
path.join(os.tmpdir(), `qwen-wfs-${randomBytes(4).toString('hex')}-`),
|
||||
);
|
||||
const wsDir = path.join(scratch, 'ws');
|
||||
await fsp.mkdir(wsDir);
|
||||
const workspace = canonicalizeWorkspace(wsDir);
|
||||
const events: BridgeEvent[] = [];
|
||||
const factory = createWorkspaceFileSystemFactory({
|
||||
boundWorkspace: workspace,
|
||||
trusted: opts?.trusted ?? true,
|
||||
emit: (e) => events.push(e),
|
||||
ignore: opts?.ignore,
|
||||
});
|
||||
const fs = factory.forRequest({
|
||||
originatorClientId: 'client-x',
|
||||
sessionId: 'sess-1',
|
||||
route: 'TEST /op',
|
||||
});
|
||||
return { factory, fs, events, workspace, scratch };
|
||||
}
|
||||
|
||||
async function teardown(h: Harness): Promise<void> {
|
||||
await fsp.rm(h.scratch, { recursive: true, force: true });
|
||||
}
|
||||
|
||||
describe('WorkspaceFileSystem - resolve and stat', () => {
|
||||
let h: Harness;
|
||||
beforeEach(async () => {
|
||||
h = await makeHarness();
|
||||
});
|
||||
afterEach(async () => {
|
||||
await teardown(h);
|
||||
});
|
||||
|
||||
it('resolves an existing path and emits no audit on resolve alone', async () => {
|
||||
const target = path.join(h.workspace, 'a.txt');
|
||||
await fsp.writeFile(target, 'x');
|
||||
const r = await h.fs.resolve('a.txt', 'read');
|
||||
expect(r).toBeTruthy();
|
||||
expect(
|
||||
h.events.filter((e) => e.type === FS_ACCESS_EVENT_TYPE),
|
||||
).toHaveLength(0);
|
||||
});
|
||||
|
||||
it('records fs.denied when resolve fails', async () => {
|
||||
await expect(h.fs.resolve('../escape', 'read')).rejects.toBeDefined();
|
||||
const denied = h.events.filter((e) => e.type === FS_DENIED_EVENT_TYPE);
|
||||
expect(denied).toHaveLength(1);
|
||||
expect(denied[0].data).toMatchObject({
|
||||
errorKind: 'path_outside_workspace',
|
||||
});
|
||||
});
|
||||
|
||||
it('stat returns kind/sizeBytes/modifiedMs and emits fs.access', async () => {
|
||||
const target = path.join(h.workspace, 'b.txt');
|
||||
await fsp.writeFile(target, 'hi');
|
||||
const r = await h.fs.resolve('b.txt', 'stat');
|
||||
const st = await h.fs.stat(r);
|
||||
expect(st.kind).toBe('file');
|
||||
expect(st.sizeBytes).toBe(2);
|
||||
expect(st.modifiedMs).toBeGreaterThan(0);
|
||||
expect(h.events.find((e) => e.type === FS_ACCESS_EVENT_TYPE)).toBeDefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('WorkspaceFileSystem - readText', () => {
|
||||
let h: Harness;
|
||||
beforeEach(async () => {
|
||||
h = await makeHarness();
|
||||
});
|
||||
afterEach(async () => teardown(h));
|
||||
|
||||
it('reads small text and reports lineEnding', async () => {
|
||||
const target = path.join(h.workspace, 'plain.txt');
|
||||
await fsp.writeFile(target, 'hello\nworld\n');
|
||||
const r = await h.fs.resolve('plain.txt', 'read');
|
||||
const out = await h.fs.readText(r);
|
||||
expect(out.content).toBe('hello\nworld\n');
|
||||
expect(out.meta.lineEnding).toBe('lf');
|
||||
expect(out.meta.truncated).toBeUndefined();
|
||||
});
|
||||
|
||||
it('truncates content above maxBytes and sets meta.truncated', async () => {
|
||||
const big = path.join(h.workspace, 'big.txt');
|
||||
const content = 'a'.repeat(2048);
|
||||
await fsp.writeFile(big, content);
|
||||
const r = await h.fs.resolve('big.txt', 'read');
|
||||
const out = await h.fs.readText(r, { maxBytes: 1024 });
|
||||
expect(out.meta.truncated).toBe(true);
|
||||
expect(out.content.length).toBeLessThanOrEqual(1024);
|
||||
});
|
||||
|
||||
it('throws file_too_large when file exceeds MAX_READ_BYTES regardless of opts.maxBytes', async () => {
|
||||
// Write a file larger than the soft cap and assert the boundary
|
||||
// refuses BEFORE delegating to lowFs (which would slurp the
|
||||
// whole file into memory).
|
||||
const big = path.join(h.workspace, 'huge.txt');
|
||||
const bytes = (await import('./policy.js')).MAX_READ_BYTES + 1;
|
||||
await fsp.writeFile(big, 'a'.repeat(bytes));
|
||||
const r = await h.fs.resolve('huge.txt', 'read');
|
||||
const err = await h.fs.readText(r).catch((e: unknown) => e);
|
||||
expect(isFsError(err)).toBe(true);
|
||||
expect((err as { kind: string }).kind).toBe('file_too_large');
|
||||
// Audit was recorded for the denial (P0 silent-failure fix).
|
||||
const denied = h.events.find((e) => e.type === FS_DENIED_EVENT_TYPE);
|
||||
expect(denied).toBeDefined();
|
||||
expect((denied!.data as { errorKind: string }).errorKind).toBe(
|
||||
'file_too_large',
|
||||
);
|
||||
});
|
||||
|
||||
it('throws binary_file when reading binary content', async () => {
|
||||
const bin = path.join(h.workspace, 'bin.dat');
|
||||
const buf = Buffer.alloc(64);
|
||||
buf[5] = 0;
|
||||
await fsp.writeFile(bin, buf);
|
||||
const r = await h.fs.resolve('bin.dat', 'read');
|
||||
const err = await h.fs.readText(r).catch((e) => e);
|
||||
expect(isFsError(err)).toBe(true);
|
||||
expect((err as { kind: string }).kind).toBe('binary_file');
|
||||
expect(h.events.find((e) => e.type === FS_DENIED_EVENT_TYPE)).toBeDefined();
|
||||
});
|
||||
|
||||
it('annotates meta.matchedIgnore when path is ignored', async () => {
|
||||
const ignore = new Ignore().add(['*.log']);
|
||||
h = await makeHarness({ ignore });
|
||||
const target = path.join(h.workspace, 'app.log');
|
||||
await fsp.writeFile(target, 'log content');
|
||||
const r = await h.fs.resolve('app.log', 'read');
|
||||
const out = await h.fs.readText(r);
|
||||
expect(out.meta.matchedIgnore).toBe('file');
|
||||
});
|
||||
});
|
||||
|
||||
describe('WorkspaceFileSystem - readBytes', () => {
|
||||
let h: Harness;
|
||||
beforeEach(async () => {
|
||||
h = await makeHarness();
|
||||
});
|
||||
afterEach(async () => teardown(h));
|
||||
|
||||
it('returns raw bytes', async () => {
|
||||
const target = path.join(h.workspace, 'raw.bin');
|
||||
await fsp.writeFile(target, Buffer.from([1, 2, 3, 0, 4, 5]));
|
||||
const r = await h.fs.resolve('raw.bin', 'read');
|
||||
const buf = await h.fs.readBytes(r);
|
||||
expect(Array.from(buf)).toEqual([1, 2, 3, 0, 4, 5]);
|
||||
});
|
||||
|
||||
it('throws file_too_large above the cap', async () => {
|
||||
const target = path.join(h.workspace, 'huge.bin');
|
||||
await fsp.writeFile(target, Buffer.alloc(2048));
|
||||
const r = await h.fs.resolve('huge.bin', 'read');
|
||||
const err = await h.fs.readBytes(r, { maxBytes: 1024 }).catch((e) => e);
|
||||
expect(isFsError(err)).toBe(true);
|
||||
expect((err as { kind: string }).kind).toBe('file_too_large');
|
||||
});
|
||||
});
|
||||
|
||||
describe('WorkspaceFileSystem - list', () => {
|
||||
let h: Harness;
|
||||
beforeEach(async () => {
|
||||
const ignore = new Ignore().add(['*.log']);
|
||||
h = await makeHarness({ ignore });
|
||||
await fsp.writeFile(path.join(h.workspace, 'a.ts'), '');
|
||||
await fsp.writeFile(path.join(h.workspace, 'b.log'), '');
|
||||
await fsp.mkdir(path.join(h.workspace, 'sub'));
|
||||
});
|
||||
afterEach(async () => teardown(h));
|
||||
|
||||
it('drops ignored entries by default', async () => {
|
||||
const r = await h.fs.resolve('.', 'list');
|
||||
const entries = await h.fs.list(r);
|
||||
const names = entries.map((e) => e.name).sort();
|
||||
expect(names).toEqual(['a.ts', 'sub']);
|
||||
});
|
||||
|
||||
it('includes ignored entries when includeIgnored is true', async () => {
|
||||
const r = await h.fs.resolve('.', 'list');
|
||||
const entries = await h.fs.list(r, { includeIgnored: true });
|
||||
const log = entries.find((e) => e.name === 'b.log');
|
||||
expect(log?.ignored).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('WorkspaceFileSystem - glob', () => {
|
||||
let h: Harness;
|
||||
beforeEach(async () => {
|
||||
h = await makeHarness();
|
||||
await fsp.mkdir(path.join(h.workspace, 'src'));
|
||||
await fsp.writeFile(path.join(h.workspace, 'src', 'a.ts'), '');
|
||||
await fsp.writeFile(path.join(h.workspace, 'src', 'b.ts'), '');
|
||||
await fsp.writeFile(path.join(h.workspace, 'README.md'), '');
|
||||
});
|
||||
afterEach(async () => teardown(h));
|
||||
|
||||
it('matches files by pattern', async () => {
|
||||
const hits = await h.fs.glob('src/*.ts');
|
||||
expect(hits.map((p) => path.basename(p)).sort()).toEqual(['a.ts', 'b.ts']);
|
||||
});
|
||||
|
||||
it('rejects patterns containing `..`', async () => {
|
||||
const err = await h.fs.glob('../**/*.ts').catch((e) => e);
|
||||
expect(isFsError(err)).toBe(true);
|
||||
expect((err as { kind: string }).kind).toBe('parse_error');
|
||||
});
|
||||
|
||||
it('respects maxResults', async () => {
|
||||
const hits = await h.fs.glob('**/*', { maxResults: 1 });
|
||||
expect(hits).toHaveLength(1);
|
||||
});
|
||||
|
||||
it('filters ignored hits by default', async () => {
|
||||
const ignore = new Ignore().add(['*.md']);
|
||||
h = await makeHarness({ ignore });
|
||||
await fsp.writeFile(path.join(h.workspace, 'README.md'), '');
|
||||
await fsp.writeFile(path.join(h.workspace, 'src.ts'), '');
|
||||
const hits = await h.fs.glob('*');
|
||||
const names = hits.map((p) => path.basename(p)).sort();
|
||||
expect(names).not.toContain('README.md');
|
||||
});
|
||||
});
|
||||
|
||||
describe('WorkspaceFileSystem - write/edit', () => {
|
||||
let h: Harness;
|
||||
beforeEach(async () => {
|
||||
h = await makeHarness();
|
||||
});
|
||||
afterEach(async () => teardown(h));
|
||||
|
||||
it('writes text and emits fs.access', async () => {
|
||||
const r = await h.fs.resolve('newfile.txt', 'write');
|
||||
await h.fs.writeText(r, 'hello');
|
||||
const written = await fsp.readFile(r as string, 'utf-8');
|
||||
expect(written).toBe('hello');
|
||||
const access = h.events.find(
|
||||
(e) =>
|
||||
e.type === FS_ACCESS_EVENT_TYPE &&
|
||||
(e.data as { intent: string }).intent === 'write',
|
||||
);
|
||||
expect(access).toBeDefined();
|
||||
});
|
||||
|
||||
it('rejects oversize writes with file_too_large', async () => {
|
||||
const r = await h.fs.resolve('huge.txt', 'write');
|
||||
const err = await h.fs
|
||||
.writeText(r, 'a'.repeat(6 * 1024 * 1024))
|
||||
.catch((e) => e);
|
||||
expect(isFsError(err)).toBe(true);
|
||||
expect((err as { kind: string }).kind).toBe('file_too_large');
|
||||
});
|
||||
|
||||
it('edits an existing file by replacing oldText with newText', async () => {
|
||||
const target = path.join(h.workspace, 'config.txt');
|
||||
await fsp.writeFile(target, 'foo=1\nbar=2\n');
|
||||
const r = await h.fs.resolve('config.txt', 'write');
|
||||
const out = await h.fs.edit(r, 'foo=1', 'foo=42');
|
||||
expect(out.writtenBytes).toBeGreaterThan(0);
|
||||
const after = await fsp.readFile(target, 'utf-8');
|
||||
expect(after).toBe('foo=42\nbar=2\n');
|
||||
});
|
||||
|
||||
it('throws parse_error when oldText is not present', async () => {
|
||||
const target = path.join(h.workspace, 'c.txt');
|
||||
await fsp.writeFile(target, 'abc');
|
||||
const r = await h.fs.resolve('c.txt', 'write');
|
||||
const err = await h.fs.edit(r, 'NOT THERE', 'X').catch((e) => e);
|
||||
expect(isFsError(err)).toBe(true);
|
||||
expect((err as { kind: string }).kind).toBe('parse_error');
|
||||
});
|
||||
});
|
||||
|
||||
describe('WorkspaceFileSystem - trust gate', () => {
|
||||
let h: Harness;
|
||||
beforeEach(async () => {
|
||||
h = await makeHarness({ trusted: false });
|
||||
await fsp.writeFile(path.join(h.workspace, 'r.txt'), 'r');
|
||||
});
|
||||
afterEach(async () => teardown(h));
|
||||
|
||||
it('allows read on untrusted workspace', async () => {
|
||||
const r = await h.fs.resolve('r.txt', 'read');
|
||||
const out = await h.fs.readText(r);
|
||||
expect(out.content).toBe('r');
|
||||
});
|
||||
|
||||
it('denies write with untrusted_workspace', async () => {
|
||||
const r = await h.fs.resolve('w.txt', 'write');
|
||||
const err = await h.fs.writeText(r, 'x').catch((e) => e);
|
||||
expect(isFsError(err)).toBe(true);
|
||||
expect((err as { kind: string }).kind).toBe('untrusted_workspace');
|
||||
const denied = h.events.find((e) => e.type === FS_DENIED_EVENT_TYPE);
|
||||
expect(denied).toBeDefined();
|
||||
});
|
||||
|
||||
it('denies edit with untrusted_workspace', async () => {
|
||||
await fsp.writeFile(path.join(h.workspace, 'e.txt'), 'old');
|
||||
const r = await h.fs.resolve('e.txt', 'edit');
|
||||
const err = await h.fs.edit(r, 'old', 'new').catch((e) => e);
|
||||
expect(isFsError(err)).toBe(true);
|
||||
expect((err as { kind: string }).kind).toBe('untrusted_workspace');
|
||||
});
|
||||
});
|
||||
|
||||
describe('WorkspaceFileSystem - audit always emits on body errors', () => {
|
||||
let h: Harness;
|
||||
beforeEach(async () => {
|
||||
h = await makeHarness();
|
||||
});
|
||||
afterEach(async () => teardown(h));
|
||||
|
||||
it('wraps a raw ENOENT from edit() and emits fs.denied', async () => {
|
||||
// edit() reads via fsp.readFile; against a non-existent file the
|
||||
// raw ENOENT used to escape uncategorized — the wrapper now
|
||||
// converts it to FsError(path_not_found) and records denial.
|
||||
const r = await h.fs.resolve('vanished.txt', 'write');
|
||||
const err = await h.fs.edit(r, 'a', 'b').catch((e: unknown) => e);
|
||||
expect(isFsError(err)).toBe(true);
|
||||
expect((err as { kind: string }).kind).toBe('path_not_found');
|
||||
const denied = h.events.find((e) => e.type === FS_DENIED_EVENT_TYPE);
|
||||
expect(denied).toBeDefined();
|
||||
expect((denied!.data as { errorKind: string }).errorKind).toBe(
|
||||
'path_not_found',
|
||||
);
|
||||
});
|
||||
|
||||
it('rejects ENOTDIR ancestor walk with parse_error rather than passing boundary', async () => {
|
||||
// Place a regular file where the request expects a directory.
|
||||
await fsp.writeFile(path.join(h.workspace, 'block'), 'not a dir');
|
||||
const err = await h.fs
|
||||
.resolve('block/leaf', 'write')
|
||||
.catch((e: unknown) => e);
|
||||
expect(isFsError(err)).toBe(true);
|
||||
expect((err as { kind: string }).kind).toBe('parse_error');
|
||||
const denied = h.events.find((e) => e.type === FS_DENIED_EVENT_TYPE);
|
||||
expect(denied).toBeDefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe('WorkspaceFileSystem - glob escape audit', () => {
|
||||
let h: Harness;
|
||||
beforeEach(async () => {
|
||||
h = await makeHarness();
|
||||
});
|
||||
afterEach(async () => teardown(h));
|
||||
|
||||
it('emits aggregated fs.denied for glob hits filtered as escape', async () => {
|
||||
// Create an in-workspace file (legit hit) plus a symlink that
|
||||
// resolves outside the workspace (filtered hit). The glob
|
||||
// aggregation reports the escape count via a single denial
|
||||
// event so audit volume stays bounded on misconfigured trees.
|
||||
await fsp.writeFile(path.join(h.workspace, 'inside.ts'), 'x');
|
||||
const outside = path.join(h.scratch, 'outside.ts');
|
||||
await fsp.writeFile(outside, 'y');
|
||||
await fsp.symlink(outside, path.join(h.workspace, 'leak.ts'), 'file');
|
||||
const hits = await h.fs.glob('*.ts');
|
||||
const names = hits.map((p) => path.basename(p)).sort();
|
||||
expect(names).toContain('inside.ts');
|
||||
expect(names).not.toContain('outside.ts');
|
||||
const denied = h.events.find(
|
||||
(e) =>
|
||||
e.type === FS_DENIED_EVENT_TYPE &&
|
||||
(e.data as { errorKind: string }).errorKind === 'symlink_escape',
|
||||
);
|
||||
expect(denied).toBeDefined();
|
||||
expect((denied!.data as { hint?: string }).hint).toMatch(
|
||||
/\d+ hit\(s\) that resolved outside workspace/,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('WorkspaceFileSystem - factory', () => {
|
||||
it('canonicalizes the workspace once at factory build', async () => {
|
||||
const scratch = await fsp.mkdtemp(
|
||||
path.join(
|
||||
os.tmpdir(),
|
||||
`qwen-wfs-canon-${randomBytes(4).toString('hex')}-`,
|
||||
),
|
||||
);
|
||||
try {
|
||||
const real = path.join(scratch, 'ws');
|
||||
await fsp.mkdir(real);
|
||||
const aliased = path.join(scratch, 'alias');
|
||||
await fsp.symlink(real, aliased, 'dir');
|
||||
const events: BridgeEvent[] = [];
|
||||
const factory = createWorkspaceFileSystemFactory({
|
||||
boundWorkspace: aliased,
|
||||
trusted: true,
|
||||
emit: (e) => events.push(e),
|
||||
});
|
||||
const fs = factory.forRequest({ route: 'TEST /op' });
|
||||
await fsp.writeFile(path.join(real, 'inside.txt'), 'i');
|
||||
const r = await fs.resolve('inside.txt', 'read');
|
||||
const out = await fs.readText(r);
|
||||
expect(out.content).toBe('i');
|
||||
} finally {
|
||||
await fsp.rm(scratch, { recursive: true, force: true });
|
||||
}
|
||||
});
|
||||
});
|
||||
596
packages/cli/src/serve/fs/workspaceFileSystem.ts
Normal file
596
packages/cli/src/serve/fs/workspaceFileSystem.ts
Normal file
|
|
@ -0,0 +1,596 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2025 Qwen Team
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { promises as fsp } from 'node:fs';
|
||||
import * as path from 'node:path';
|
||||
import { glob as globAsync } from 'glob';
|
||||
import type {
|
||||
Ignore,
|
||||
StandardFileSystemService,
|
||||
loadIgnoreRules,
|
||||
type WriteTextFileOptions,
|
||||
} from '@qwen-code/qwen-code-core';
|
||||
import type { BridgeEvent } from '../eventBus.js';
|
||||
import {
|
||||
type AuditContext,
|
||||
type AuditPublisher,
|
||||
createAuditPublisher,
|
||||
} from './audit.js';
|
||||
import { FsError, wrapAsFsError } from './errors.js';
|
||||
import {
|
||||
canonicalizeWorkspace,
|
||||
resolveWithinWorkspace,
|
||||
type Intent,
|
||||
type ResolvedPath,
|
||||
} from './paths.js';
|
||||
import {
|
||||
MAX_READ_BYTES,
|
||||
assertTrustedForIntent,
|
||||
detectBinary,
|
||||
enforceReadBytesSize,
|
||||
enforceReadSize,
|
||||
enforceWriteSize,
|
||||
shouldIgnore,
|
||||
} from './policy.js';
|
||||
|
||||
/**
|
||||
* Stat snapshot returned by `WorkspaceFileSystem.stat`. We
|
||||
* deliberately avoid passing through `fs.Stats` directly — the
|
||||
* boundary should not leak Node-specific bigint quirks or
|
||||
* platform-specific fields to PR 19/20 SDK consumers.
|
||||
*/
|
||||
export interface FsStat {
|
||||
kind: 'file' | 'directory' | 'symlink' | 'other';
|
||||
sizeBytes: number;
|
||||
modifiedMs: number;
|
||||
}
|
||||
|
||||
/** Directory listing entry from `WorkspaceFileSystem.list`. */
|
||||
export interface FsEntry {
|
||||
name: string;
|
||||
kind: 'file' | 'directory' | 'symlink' | 'other';
|
||||
/** True iff the entry matched a `.gitignore`/`.qwenignore` rule. */
|
||||
ignored: boolean;
|
||||
}
|
||||
|
||||
/** Metadata side-channel returned alongside `readText` content. */
|
||||
export interface ReadMeta {
|
||||
encoding?: string;
|
||||
bom?: boolean;
|
||||
lineEnding: 'crlf' | 'lf';
|
||||
truncated?: boolean;
|
||||
matchedIgnore?: 'file' | 'directory';
|
||||
originalLineCount?: number;
|
||||
}
|
||||
|
||||
export interface ReadTextOptions {
|
||||
/** Cap returned bytes; defaults to MAX_READ_BYTES. */
|
||||
maxBytes?: number;
|
||||
/** 1-based starting line for partial reads. */
|
||||
line?: number;
|
||||
/** Maximum number of lines to return. */
|
||||
limit?: number;
|
||||
}
|
||||
|
||||
export interface ListOptions {
|
||||
/** When true, ignored entries are returned with `ignored: true` rather than dropped. */
|
||||
includeIgnored?: boolean;
|
||||
}
|
||||
|
||||
export interface GlobOptions {
|
||||
cwd?: ResolvedPath;
|
||||
includeIgnored?: boolean;
|
||||
maxResults?: number;
|
||||
}
|
||||
|
||||
export interface WriteOutcome {
|
||||
writtenBytes: number;
|
||||
}
|
||||
|
||||
export interface RequestContext extends AuditContext {
|
||||
/** Mostly redundant with `originatorClientId`; kept for forward-compat with future ACP fields. */
|
||||
ownerSessionId?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Public boundary type. PR 19/20 routes consume this via the
|
||||
* factory's `forRequest(ctx)` so audit context is automatically
|
||||
* threaded through every operation.
|
||||
*/
|
||||
export interface WorkspaceFileSystem {
|
||||
resolve(input: string, intent: Intent): Promise<ResolvedPath>;
|
||||
stat(p: ResolvedPath): Promise<FsStat>;
|
||||
readText(
|
||||
p: ResolvedPath,
|
||||
opts?: ReadTextOptions,
|
||||
): Promise<{ content: string; meta: ReadMeta }>;
|
||||
readBytes(p: ResolvedPath, opts?: { maxBytes?: number }): Promise<Buffer>;
|
||||
list(p: ResolvedPath, opts?: ListOptions): Promise<FsEntry[]>;
|
||||
glob(pattern: string, opts?: GlobOptions): Promise<ResolvedPath[]>;
|
||||
writeText(
|
||||
p: ResolvedPath,
|
||||
content: string,
|
||||
opts?: WriteTextFileOptions,
|
||||
): Promise<void>;
|
||||
edit(
|
||||
p: ResolvedPath,
|
||||
oldText: string,
|
||||
newText: string,
|
||||
): Promise<WriteOutcome>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Per-process factory. Build once at `createServeApp` boot, call
|
||||
* `forRequest` per HTTP route invocation.
|
||||
*/
|
||||
export interface WorkspaceFileSystemFactory {
|
||||
forRequest(ctx: RequestContext): WorkspaceFileSystem;
|
||||
}
|
||||
|
||||
export interface CreateWorkspaceFileSystemFactoryDeps {
|
||||
/** Canonical workspace path; the daemon's `boundWorkspace`. */
|
||||
boundWorkspace: string;
|
||||
/** Snapshot of `Config.isTrustedFolder()` at boot. */
|
||||
trusted: boolean;
|
||||
/** Bridge-bound publisher into `EventBus.publish`. */
|
||||
emit: (event: BridgeEvent) => void;
|
||||
/**
|
||||
* Override the default ignore loader. Tests pass a fixed `Ignore`
|
||||
* to avoid filesystem coupling; production lets the factory build
|
||||
* one per workspace via `loadIgnoreRules`.
|
||||
*/
|
||||
ignore?: Ignore;
|
||||
/** Override audit raw-path mode. Defaults to env `QWEN_AUDIT_RAW_PATHS=1`. */
|
||||
includeRawPaths?: boolean;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a `WorkspaceFileSystemFactory`. The factory itself is
|
||||
* stateless across requests; per-request state (the audit context)
|
||||
* lives on the bound `WorkspaceFileSystem` returned from `forRequest`.
|
||||
*/
|
||||
export function createWorkspaceFileSystemFactory(
|
||||
deps: CreateWorkspaceFileSystemFactoryDeps,
|
||||
): WorkspaceFileSystemFactory {
|
||||
const boundWorkspace = canonicalizeWorkspace(deps.boundWorkspace);
|
||||
const ignore =
|
||||
deps.ignore ??
|
||||
loadIgnoreRules({
|
||||
projectRoot: boundWorkspace,
|
||||
useGitignore: true,
|
||||
useQwenignore: true,
|
||||
ignoreDirs: [],
|
||||
});
|
||||
const audit: AuditPublisher = createAuditPublisher({
|
||||
emit: deps.emit,
|
||||
boundWorkspace,
|
||||
includeRawPaths: deps.includeRawPaths,
|
||||
});
|
||||
const lowFs = new StandardFileSystemService();
|
||||
|
||||
return {
|
||||
forRequest(ctx) {
|
||||
return new WorkspaceFileSystemImpl({
|
||||
boundWorkspace,
|
||||
trusted: deps.trusted,
|
||||
ignore,
|
||||
audit,
|
||||
ctx,
|
||||
lowFs,
|
||||
});
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
interface ImplDeps {
|
||||
boundWorkspace: string;
|
||||
trusted: boolean;
|
||||
ignore: Ignore;
|
||||
audit: AuditPublisher;
|
||||
ctx: RequestContext;
|
||||
lowFs: StandardFileSystemService;
|
||||
}
|
||||
|
||||
class WorkspaceFileSystemImpl implements WorkspaceFileSystem {
|
||||
constructor(private readonly deps: ImplDeps) {}
|
||||
|
||||
async resolve(input: string, intent: Intent): Promise<ResolvedPath> {
|
||||
try {
|
||||
return await resolveWithinWorkspace(
|
||||
input,
|
||||
this.deps.boundWorkspace,
|
||||
intent,
|
||||
);
|
||||
} catch (err) {
|
||||
throw this.recordAndWrap(err, intent, input);
|
||||
}
|
||||
}
|
||||
|
||||
async stat(p: ResolvedPath): Promise<FsStat> {
|
||||
const start = performance.now();
|
||||
try {
|
||||
assertTrustedForIntent(this.deps.trusted, 'stat');
|
||||
const st = await fsp.lstat(p as string);
|
||||
const out: FsStat = {
|
||||
kind: kindFromStats(st),
|
||||
sizeBytes: st.size,
|
||||
modifiedMs: st.mtimeMs,
|
||||
};
|
||||
this.deps.audit.recordAccess(this.deps.ctx, {
|
||||
intent: 'stat',
|
||||
absolute: p,
|
||||
durationMs: performance.now() - start,
|
||||
sizeBytes: st.size,
|
||||
});
|
||||
return out;
|
||||
} catch (err) {
|
||||
throw this.recordAndWrap(err, 'stat', p as string);
|
||||
}
|
||||
}
|
||||
|
||||
async readText(
|
||||
p: ResolvedPath,
|
||||
opts: ReadTextOptions = {},
|
||||
): Promise<{ content: string; meta: ReadMeta }> {
|
||||
const start = performance.now();
|
||||
try {
|
||||
assertTrustedForIntent(this.deps.trusted, 'read');
|
||||
const st = await fsp.stat(p as string);
|
||||
// Hard size gate before we delegate to lowFs.readTextFile —
|
||||
// that helper's underlying `readFileWithLineAndLimit` slurps
|
||||
// the whole file into memory before slicing lines, so an
|
||||
// unbounded request against a 5 GB text file would OOM the
|
||||
// daemon (or, on a healthy host, flood the SSE replay ring
|
||||
// with a 5 GB string). `MAX_READ_BYTES` is the hard cap and
|
||||
// is independent of the caller's `opts.maxBytes` (which is a
|
||||
// *softer* post-read truncation target — the boundary still
|
||||
// honors it via `enforceReadSize` below). A future streaming
|
||||
// read path can lift this hard cap by reading only the first
|
||||
// N bytes; for now files above the cap throw and the SDK
|
||||
// consumer can fall back to `readBytes` with an explicit
|
||||
// length window.
|
||||
if (st.size > MAX_READ_BYTES) {
|
||||
throw new FsError(
|
||||
'file_too_large',
|
||||
`file of ${st.size} bytes exceeds read cap of ${MAX_READ_BYTES} bytes`,
|
||||
{
|
||||
hint: 'use readBytes for explicit byte-windowed access on large files',
|
||||
},
|
||||
);
|
||||
}
|
||||
if (await detectBinary(p)) {
|
||||
throw new FsError('binary_file', `binary file: ${p}`, {
|
||||
hint: 'use readBytes for binary content',
|
||||
});
|
||||
}
|
||||
const sizeOutcome = enforceReadSize(st.size, opts.maxBytes);
|
||||
// Delegate encoding-aware read to the existing core service so
|
||||
// BOM, CRLF, and iconv-supported codepages remain consistent
|
||||
// with what the tools layer already does.
|
||||
const result = await this.deps.lowFs.readTextFile({
|
||||
path: p as string,
|
||||
limit: opts.limit ?? Number.POSITIVE_INFINITY,
|
||||
line: opts.line ?? 0,
|
||||
});
|
||||
const ignoreVerdict = shouldIgnore(
|
||||
p,
|
||||
this.deps.boundWorkspace,
|
||||
this.deps.ignore,
|
||||
'file',
|
||||
);
|
||||
const meta: ReadMeta = {
|
||||
encoding: result._meta?.encoding,
|
||||
bom: result._meta?.bom,
|
||||
lineEnding: (result._meta?.lineEnding ?? 'lf') as 'crlf' | 'lf',
|
||||
originalLineCount: result._meta?.originalLineCount,
|
||||
};
|
||||
let truncatedContent = result.content;
|
||||
if (sizeOutcome.truncated) {
|
||||
const buf = Buffer.from(result.content, 'utf-8');
|
||||
if (buf.length > sizeOutcome.bytesToRead) {
|
||||
truncatedContent = buf
|
||||
.subarray(0, sizeOutcome.bytesToRead)
|
||||
.toString('utf-8');
|
||||
}
|
||||
meta.truncated = true;
|
||||
}
|
||||
// Surface truncation whenever lowFs's own `limit` clipped the
|
||||
// content too — without this the audit row + meta.truncated
|
||||
// would silently disagree on whether the SDK consumer received
|
||||
// the full file.
|
||||
if (
|
||||
opts.limit !== undefined &&
|
||||
Number.isFinite(opts.limit) &&
|
||||
result._meta?.originalLineCount !== undefined &&
|
||||
result._meta.originalLineCount > opts.limit + (opts.line ?? 0)
|
||||
) {
|
||||
meta.truncated = true;
|
||||
}
|
||||
if (ignoreVerdict.ignored) meta.matchedIgnore = ignoreVerdict.category;
|
||||
this.deps.audit.recordAccess(this.deps.ctx, {
|
||||
intent: 'read',
|
||||
absolute: p,
|
||||
durationMs: performance.now() - start,
|
||||
sizeBytes: st.size,
|
||||
truncated: meta.truncated,
|
||||
matchedIgnore: meta.matchedIgnore,
|
||||
});
|
||||
return { content: truncatedContent, meta };
|
||||
} catch (err) {
|
||||
throw this.recordAndWrap(err, 'read', p as string);
|
||||
}
|
||||
}
|
||||
|
||||
async readBytes(
|
||||
p: ResolvedPath,
|
||||
opts: { maxBytes?: number } = {},
|
||||
): Promise<Buffer> {
|
||||
const start = performance.now();
|
||||
try {
|
||||
assertTrustedForIntent(this.deps.trusted, 'read');
|
||||
const st = await fsp.stat(p as string);
|
||||
enforceReadBytesSize(st.size, opts.maxBytes);
|
||||
const buf = await fsp.readFile(p as string);
|
||||
this.deps.audit.recordAccess(this.deps.ctx, {
|
||||
intent: 'read',
|
||||
absolute: p,
|
||||
durationMs: performance.now() - start,
|
||||
sizeBytes: buf.length,
|
||||
});
|
||||
return buf;
|
||||
} catch (err) {
|
||||
throw this.recordAndWrap(err, 'read', p as string);
|
||||
}
|
||||
}
|
||||
|
||||
async list(p: ResolvedPath, opts: ListOptions = {}): Promise<FsEntry[]> {
|
||||
const start = performance.now();
|
||||
try {
|
||||
assertTrustedForIntent(this.deps.trusted, 'list');
|
||||
const dirents = await fsp.readdir(p as string, { withFileTypes: true });
|
||||
const entries: FsEntry[] = [];
|
||||
for (const d of dirents) {
|
||||
// `path.join(p, d.name)` is a shallow extension of an
|
||||
// already-canonical workspace path. Symlinked dirents are
|
||||
// tagged as `kind: 'symlink'` rather than auto-followed —
|
||||
// PR 19/20 callers that want the target's containment can
|
||||
// call `resolve()` separately. Treating each child as
|
||||
// implicitly-resolved here would be a brand-cast bypass.
|
||||
const childAbs = path.join(p as string, d.name);
|
||||
const kind = kindFromDirent(d);
|
||||
const verdict = shouldIgnore(
|
||||
childAbs as ResolvedPath,
|
||||
this.deps.boundWorkspace,
|
||||
this.deps.ignore,
|
||||
kind === 'directory' ? 'directory' : 'file',
|
||||
);
|
||||
if (verdict.ignored && !opts.includeIgnored) continue;
|
||||
entries.push({ name: d.name, kind, ignored: verdict.ignored });
|
||||
}
|
||||
this.deps.audit.recordAccess(this.deps.ctx, {
|
||||
intent: 'list',
|
||||
absolute: p,
|
||||
durationMs: performance.now() - start,
|
||||
sizeBytes: entries.length,
|
||||
});
|
||||
return entries;
|
||||
} catch (err) {
|
||||
throw this.recordAndWrap(err, 'list', p as string);
|
||||
}
|
||||
}
|
||||
|
||||
async glob(pattern: string, opts: GlobOptions = {}): Promise<ResolvedPath[]> {
|
||||
const start = performance.now();
|
||||
try {
|
||||
assertTrustedForIntent(this.deps.trusted, 'glob');
|
||||
// Reject `..` segments in the pattern outright. A pattern like
|
||||
// `../../**/*.js` would otherwise let a caller search outside
|
||||
// `cwd`. Boundary-checking each individual hit catches escapes
|
||||
// too, but rejecting at the pattern level is cheaper and gives
|
||||
// a clearer error.
|
||||
if (pattern.split(/[\\/]/).some((seg) => seg === '..')) {
|
||||
throw new FsError(
|
||||
'parse_error',
|
||||
`glob pattern may not contain '..' segments: ${pattern}`,
|
||||
);
|
||||
}
|
||||
const cwd = (opts.cwd as string | undefined) ?? this.deps.boundWorkspace;
|
||||
const matches = await globAsync(pattern, {
|
||||
cwd,
|
||||
nodir: false,
|
||||
absolute: true,
|
||||
dot: true,
|
||||
});
|
||||
const out: ResolvedPath[] = [];
|
||||
const max = opts.maxResults ?? Number.POSITIVE_INFINITY;
|
||||
let escapedCount = 0;
|
||||
for (const hit of matches) {
|
||||
if (out.length >= max) break;
|
||||
const absolute = path.resolve(hit);
|
||||
// Per-hit boundary check defends against a glob that
|
||||
// matches a symlink whose target escapes the workspace.
|
||||
// The literal path is in-workspace (the symlink itself
|
||||
// sits there), but the realpath isn't — so we resolve
|
||||
// each hit's symlink chain and compare the canonical to
|
||||
// the canonical workspace root. Filtered hits are counted
|
||||
// and reported via a single aggregated `fs.denied` after
|
||||
// the loop so per-hit emit doesn't flood the bus when a
|
||||
// misconfigured tree contains many escape symlinks.
|
||||
let canonical: string;
|
||||
try {
|
||||
canonical = await fsp.realpath(absolute);
|
||||
} catch {
|
||||
// Dangling symlink or transient error: treat as escape
|
||||
// so the hit isn't returned to the caller and the audit
|
||||
// surfaces the count.
|
||||
escapedCount += 1;
|
||||
continue;
|
||||
}
|
||||
const rel = path.relative(this.deps.boundWorkspace, canonical);
|
||||
if (rel.startsWith('..') || path.isAbsolute(rel)) {
|
||||
escapedCount += 1;
|
||||
continue;
|
||||
}
|
||||
const verdict = shouldIgnore(
|
||||
canonical as ResolvedPath,
|
||||
this.deps.boundWorkspace,
|
||||
this.deps.ignore,
|
||||
'file',
|
||||
);
|
||||
if (verdict.ignored && !opts.includeIgnored) continue;
|
||||
out.push(canonical as ResolvedPath);
|
||||
}
|
||||
if (escapedCount > 0) {
|
||||
this.deps.audit.recordDenied(this.deps.ctx, {
|
||||
intent: 'glob',
|
||||
input: pattern,
|
||||
errorKind: 'symlink_escape',
|
||||
hint: `glob filtered ${escapedCount} hit(s) that resolved outside workspace`,
|
||||
});
|
||||
}
|
||||
this.deps.audit.recordAccess(this.deps.ctx, {
|
||||
intent: 'glob',
|
||||
absolute: cwd as ResolvedPath,
|
||||
durationMs: performance.now() - start,
|
||||
sizeBytes: out.length,
|
||||
});
|
||||
return out;
|
||||
} catch (err) {
|
||||
throw this.recordAndWrap(err, 'glob', pattern);
|
||||
}
|
||||
}
|
||||
|
||||
async writeText(
|
||||
p: ResolvedPath,
|
||||
content: string,
|
||||
opts?: WriteTextFileOptions,
|
||||
): Promise<void> {
|
||||
const start = performance.now();
|
||||
try {
|
||||
assertTrustedForIntent(this.deps.trusted, 'write');
|
||||
const buf = Buffer.from(content, 'utf-8');
|
||||
enforceWriteSize(buf.length);
|
||||
await this.deps.lowFs.writeTextFile({
|
||||
path: p as string,
|
||||
content,
|
||||
_meta: opts ? buildWriteMeta(opts) : undefined,
|
||||
});
|
||||
const verdict = shouldIgnore(
|
||||
p,
|
||||
this.deps.boundWorkspace,
|
||||
this.deps.ignore,
|
||||
'file',
|
||||
);
|
||||
this.deps.audit.recordAccess(this.deps.ctx, {
|
||||
intent: 'write',
|
||||
absolute: p,
|
||||
durationMs: performance.now() - start,
|
||||
sizeBytes: buf.length,
|
||||
matchedIgnore: verdict.ignored ? verdict.category : undefined,
|
||||
});
|
||||
} catch (err) {
|
||||
throw this.recordAndWrap(err, 'write', p as string);
|
||||
}
|
||||
}
|
||||
|
||||
async edit(
|
||||
p: ResolvedPath,
|
||||
oldText: string,
|
||||
newText: string,
|
||||
): Promise<WriteOutcome> {
|
||||
const start = performance.now();
|
||||
try {
|
||||
assertTrustedForIntent(this.deps.trusted, 'edit');
|
||||
const current = await fsp.readFile(p as string, 'utf-8');
|
||||
// Single replacement to preserve atomic write-once semantics.
|
||||
// Multi-occurrence handling lives in PR 20's edit endpoint
|
||||
// where the route can decide policy; the boundary stays
|
||||
// mechanical.
|
||||
const idx = current.indexOf(oldText);
|
||||
if (idx === -1) {
|
||||
throw new FsError('parse_error', `oldText not found in ${p}`, {
|
||||
hint: 'edit() expects oldText to appear verbatim in the file',
|
||||
});
|
||||
}
|
||||
const next =
|
||||
current.slice(0, idx) + newText + current.slice(idx + oldText.length);
|
||||
const buf = Buffer.from(next, 'utf-8');
|
||||
enforceWriteSize(buf.length);
|
||||
await this.deps.lowFs.writeTextFile({
|
||||
path: p as string,
|
||||
content: next,
|
||||
});
|
||||
this.deps.audit.recordAccess(this.deps.ctx, {
|
||||
intent: 'edit',
|
||||
absolute: p,
|
||||
durationMs: performance.now() - start,
|
||||
sizeBytes: buf.length,
|
||||
});
|
||||
return { writtenBytes: buf.length };
|
||||
} catch (err) {
|
||||
throw this.recordAndWrap(err, 'edit', p as string);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Coerce an arbitrary thrown value into an `FsError`, emit the
|
||||
* matching `fs.denied` audit event, and return the typed error
|
||||
* for the caller to rethrow. Body methods invoke this in their
|
||||
* `catch` so:
|
||||
* - raw fs errnos (`EACCES`, `ENOTDIR`, …) get categorized
|
||||
* instead of escaping as opaque 5xx,
|
||||
* - the audit log records every failure (the prior helper
|
||||
* early-returned for non-`FsError`s and silently lost the
|
||||
* event), and
|
||||
* - PR 19/20 routes can still rely on `instanceof FsError`
|
||||
* for their `sendFsError` serializer.
|
||||
*/
|
||||
private recordAndWrap(err: unknown, intent: Intent, input: string): FsError {
|
||||
const fs = wrapAsFsError(err);
|
||||
this.deps.audit.recordDenied(this.deps.ctx, {
|
||||
intent,
|
||||
input,
|
||||
errorKind: fs.kind,
|
||||
hint: fs.hint,
|
||||
});
|
||||
return fs;
|
||||
}
|
||||
}
|
||||
|
||||
function kindFromStats(st: {
|
||||
isFile(): boolean;
|
||||
isDirectory(): boolean;
|
||||
isSymbolicLink(): boolean;
|
||||
}): FsStat['kind'] {
|
||||
if (st.isSymbolicLink()) return 'symlink';
|
||||
if (st.isDirectory()) return 'directory';
|
||||
if (st.isFile()) return 'file';
|
||||
return 'other';
|
||||
}
|
||||
|
||||
function kindFromDirent(d: {
|
||||
isFile(): boolean;
|
||||
isDirectory(): boolean;
|
||||
isSymbolicLink(): boolean;
|
||||
}): FsEntry['kind'] {
|
||||
if (d.isSymbolicLink()) return 'symlink';
|
||||
if (d.isDirectory()) return 'directory';
|
||||
if (d.isFile()) return 'file';
|
||||
return 'other';
|
||||
}
|
||||
|
||||
function buildWriteMeta(
|
||||
opts: WriteTextFileOptions,
|
||||
): Record<string, unknown> | undefined {
|
||||
const meta: Record<string, unknown> = {};
|
||||
if (opts.bom) meta['bom'] = true;
|
||||
if (opts.encoding) meta['encoding'] = opts.encoding;
|
||||
return Object.keys(meta).length > 0 ? meta : undefined;
|
||||
}
|
||||
|
||||
// Re-export so PR 19/20 routes can access the orchestrator surface
|
||||
// from a single `serve/fs/index.js` import.
|
||||
export { MAX_READ_BYTES };
|
||||
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
import { spawn, type ChildProcess } from 'node:child_process';
|
||||
import { randomUUID } from 'node:crypto';
|
||||
import { promises as fs, realpathSync } from 'node:fs';
|
||||
import { promises as fs } from 'node:fs';
|
||||
import * as path from 'node:path';
|
||||
import { Readable, Writable } from 'node:stream';
|
||||
import {
|
||||
|
|
@ -15,6 +15,7 @@ import {
|
|||
ndJsonStream,
|
||||
} from '@agentclientprotocol/sdk';
|
||||
import { writeStderrLine } from '../utils/stdioHelpers.js';
|
||||
import { canonicalizeWorkspace } from './fs/paths.js';
|
||||
import {
|
||||
EventBus,
|
||||
DEFAULT_RING_SIZE,
|
||||
|
|
@ -3580,74 +3581,14 @@ function sliceLineRange(
|
|||
}
|
||||
|
||||
/**
|
||||
* Canonicalize a workspace path so the boot-time bound path and every
|
||||
* request's `workspaceCwd` collapse to the same key. `path.resolve`
|
||||
* alone normalizes `..` and `.` segments and absolutizes, but on
|
||||
* case-insensitive filesystems (macOS APFS, Windows NTFS) `/Work/A`
|
||||
* and `/work/a` are the same directory yet `resolve` returns them
|
||||
* verbatim — without normalization the `boundWorkspace` check would
|
||||
* reject every request that spelled the path with different casing
|
||||
* and `sessionScope: 'single'` re-attach would silently degrade to
|
||||
* "one per spelling".
|
||||
*
|
||||
* `realpathSync.native` (when the path exists) walks symlinks and returns
|
||||
* the on-disk casing; this matches what `config.ts` / `settings.ts` /
|
||||
* `sandbox.ts` use for their own workspace resolution. When the path
|
||||
* doesn't exist (test fixtures, ahead-of-mkdir flows) we fall back to
|
||||
* the resolved-but-uncanonicalized form rather than throwing — the
|
||||
* downstream `spawn({cwd})` will fail with a useful ENOENT if the
|
||||
* workspace truly doesn't exist.
|
||||
*
|
||||
* NOTE: This is a **cross-module contract** (BX9_q) — `config.ts`,
|
||||
* `settings.ts`, `sandbox.ts`, and this file all need to canonicalize
|
||||
* the same way for the bound-workspace check + `sessionScope:
|
||||
* 'single'` re-attach to work correctly across paths. The contract:
|
||||
* use `realpathSync.native` on the resolved absolute path; fall back
|
||||
* to `path.resolve` only when the path doesn't exist yet. If a future
|
||||
* change breaks this alignment (e.g. one module starts lowercasing on
|
||||
* Windows but this one doesn't), the canonicalized request path
|
||||
* won't match the canonicalized bound path → every request returns
|
||||
* `workspace_mismatch` even though the human-readable paths look
|
||||
* equivalent. There's no test that pins the alignment; the
|
||||
* integration suite would catch a divergence only if it tested the
|
||||
* specific casing / symlink path the affected module changed.
|
||||
*
|
||||
* Stage 2 in-process (#3803 §10) collapses the bridge into core,
|
||||
* removing the bridge-side path resolution entirely. Stage 1.5
|
||||
* `@qwen-code/acp-bridge` lift (chiga0 finding 1) is the natural
|
||||
* place to extract a shared `canonicalizeWorkspace` primitive that
|
||||
* all four modules consume — the lowest-common-denominator
|
||||
* extraction is fine THERE because the package boundary forces the
|
||||
* call sites to converge. Until then, *any* change to how those
|
||||
* modules resolve workspace paths needs a matching change here.
|
||||
* Re-export of the workspace canonicalizer for callers that historically
|
||||
* imported it from `httpAcpBridge.ts`. The implementation was extracted
|
||||
* to `./fs/paths.ts` in #4175 PR 18 (commit 1) so the forthcoming
|
||||
* `WorkspaceFileSystem` boundary can reuse the same primitive without
|
||||
* pulling in the 3.6k-line bridge module. See `./fs/paths.ts` for the
|
||||
* cross-module contract that governs this function.
|
||||
*/
|
||||
export function canonicalizeWorkspace(p: string): string {
|
||||
const resolved = path.resolve(p);
|
||||
try {
|
||||
// FIXME(stage-2): switch to `fs.promises.realpath` once the
|
||||
// bridge call sites become async-friendly. This sync syscall
|
||||
// runs on the hot `spawnOrAttach` path and blocks the event
|
||||
// loop for one filesystem stat per call. Single-user loopback
|
||||
// (Stage 1's design target) doesn't notice; high-concurrency
|
||||
// deployments will. Stage 2 in-process refactor removes the
|
||||
// entire bridge-side path resolution anyway, but if Stage 2
|
||||
// ever lands without that change, switch to the async version.
|
||||
return realpathSync.native(resolved);
|
||||
} catch (err) {
|
||||
// Only fall back to path.resolve for ENOENT (path doesn't exist
|
||||
// yet). Other filesystem errors (EACCES, EIO, ELOOP) should
|
||||
// propagate — swallowing them would hide transient I/O failures
|
||||
// behind misleading workspace_mismatch rejections.
|
||||
if (
|
||||
err &&
|
||||
typeof err === 'object' &&
|
||||
(err as { code?: unknown }).code === 'ENOENT'
|
||||
) {
|
||||
return resolved;
|
||||
}
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
export { canonicalizeWorkspace };
|
||||
|
||||
/**
|
||||
* Race `p` against a timeout. The timeout REJECTS the returned
|
||||
|
|
|
|||
|
|
@ -3282,3 +3282,86 @@ describe('runQwenServe SIGINT handler', () => {
|
|||
void vi.fn(); // silence unused-import lint if vitest tree-shakes
|
||||
});
|
||||
});
|
||||
|
||||
describe('createServeApp ServeAppDeps.fsFactory wiring (#4175 PR 18)', () => {
|
||||
it('parks a default WorkspaceFileSystemFactory on app.locals when none is injected', async () => {
|
||||
const { createServeApp } = await import('./server.js');
|
||||
const app = createServeApp(
|
||||
{
|
||||
port: 0,
|
||||
hostname: '127.0.0.1',
|
||||
workspace: '/work/bound',
|
||||
} as Parameters<typeof createServeApp>[0],
|
||||
() => 0,
|
||||
);
|
||||
const fsFactory = (
|
||||
app.locals as {
|
||||
fsFactory?: { forRequest: (ctx: { route: string }) => unknown };
|
||||
}
|
||||
).fsFactory;
|
||||
expect(fsFactory).toBeDefined();
|
||||
expect(typeof fsFactory!.forRequest).toBe('function');
|
||||
// The factory is functional — it can build a per-request boundary.
|
||||
const fs = fsFactory!.forRequest({ route: 'TEST /op' });
|
||||
expect(fs).toBeDefined();
|
||||
});
|
||||
|
||||
it('uses the injected fsFactory verbatim when supplied', async () => {
|
||||
const { createServeApp } = await import('./server.js');
|
||||
const sentinel = { forRequest: vi.fn(() => ({ marker: 'injected' })) };
|
||||
const app = createServeApp(
|
||||
{
|
||||
port: 0,
|
||||
hostname: '127.0.0.1',
|
||||
workspace: '/work/bound',
|
||||
} as Parameters<typeof createServeApp>[0],
|
||||
() => 0,
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
{ fsFactory: sentinel as any },
|
||||
);
|
||||
expect((app.locals as { fsFactory?: unknown }).fsFactory).toBe(sentinel);
|
||||
});
|
||||
|
||||
it('default fsFactory is built with trusted=false (writes refused)', async () => {
|
||||
const { createServeApp } = await import('./server.js');
|
||||
const { isFsError } = await import('./fs/index.js');
|
||||
const os = await import('node:os');
|
||||
const tmp = await import('node:fs').then((m) =>
|
||||
m.promises.mkdtemp(path.join(os.tmpdir(), 'qwen-serve-default-trust-')),
|
||||
);
|
||||
try {
|
||||
const app = createServeApp(
|
||||
{
|
||||
port: 0,
|
||||
hostname: '127.0.0.1',
|
||||
workspace: tmp,
|
||||
} as Parameters<typeof createServeApp>[0],
|
||||
() => 0,
|
||||
);
|
||||
type FsCtx = { route: string };
|
||||
type WfsLite = {
|
||||
resolve: (input: string, intent: 'write') => Promise<string>;
|
||||
writeText: (p: string, content: string) => Promise<void>;
|
||||
};
|
||||
const fsFactory = (
|
||||
app.locals as {
|
||||
fsFactory?: { forRequest: (ctx: FsCtx) => WfsLite };
|
||||
}
|
||||
).fsFactory;
|
||||
expect(fsFactory).toBeDefined();
|
||||
const fs = fsFactory!.forRequest({ route: 'TEST /op' });
|
||||
// Resolve a write target inside the workspace; the resolve
|
||||
// succeeds but writeText must throw `untrusted_workspace` —
|
||||
// that's the safe-default behavior the strict-default factory
|
||||
// exists to enforce.
|
||||
const resolved = await fs.resolve('child.txt', 'write');
|
||||
const err = await fs.writeText(resolved, 'x').catch((e: unknown) => e);
|
||||
expect(isFsError(err)).toBe(true);
|
||||
expect((err as { kind: string }).kind).toBe('untrusted_workspace');
|
||||
} finally {
|
||||
await import('node:fs').then((m) =>
|
||||
m.promises.rm(tmp, { recursive: true, force: true }),
|
||||
);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -39,6 +39,32 @@ import {
|
|||
type CapabilitiesEnvelope,
|
||||
type ServeOptions,
|
||||
} from './types.js';
|
||||
import {
|
||||
createWorkspaceFileSystemFactory,
|
||||
type WorkspaceFileSystemFactory,
|
||||
} from './fs/index.js';
|
||||
|
||||
/**
|
||||
* Build a no-op fs-audit emitter that logs a single warning the
|
||||
* first time it's invoked. The default factory uses this so a
|
||||
* regression that silently strips audit events shows up in
|
||||
* operator logs instead of disappearing — `() => {}` is the
|
||||
* "obvious" no-op but offers no failure signal. PR 19/20's
|
||||
* `runQwenServe` injection replaces this with a real per-session
|
||||
* emit, so legitimate production traffic never hits the warning.
|
||||
*/
|
||||
function createDefaultFsAuditEmit(): (event: BridgeEvent) => void {
|
||||
let warned = false;
|
||||
return (event: BridgeEvent) => {
|
||||
if (!warned) {
|
||||
warned = true;
|
||||
writeStderrLine(
|
||||
`qwen serve: fs audit emit is the default no-op (event ${event.type} dropped). ` +
|
||||
`Inject deps.fsFactory in createServeApp to wire audit into the EventBus.`,
|
||||
);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
export interface ServeAppDeps {
|
||||
/** Bridge instance; tests inject a fake. Defaults to a fresh real one. */
|
||||
|
|
@ -56,6 +82,18 @@ export interface ServeAppDeps {
|
|||
* process.cwd()` itself.
|
||||
*/
|
||||
boundWorkspace?: string;
|
||||
/**
|
||||
* Workspace filesystem boundary factory (#4175 PR 18). When
|
||||
* supplied, PR 19/20 routes will pull a per-request
|
||||
* `WorkspaceFileSystem` off it; when omitted, `createServeApp`
|
||||
* builds a permissive default (trusted=true, no-op audit emit).
|
||||
* No PR 18 routes consume the factory yet — the slot is wired so
|
||||
* PR 19 read-only file routes can drop in without re-shaping
|
||||
* `ServeAppDeps`. Once PR 19 lands, `runQwenServe` will inject a
|
||||
* factory whose `trusted` flag mirrors `Config.isTrustedFolder()`
|
||||
* and whose `emit` plumbs into the per-session EventBus.
|
||||
*/
|
||||
fsFactory?: WorkspaceFileSystemFactory;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -150,6 +188,33 @@ export function createServeApp(
|
|||
boundWorkspace,
|
||||
});
|
||||
|
||||
// Strict-default factory: `trusted: false` so an upstream refactor
|
||||
// that forgets to inject `deps.fsFactory` never silently allows
|
||||
// writes against an untrusted workspace. Read-shaped intents still
|
||||
// succeed (so tests / direct embeds can exercise the read path
|
||||
// without a config), but every mutating intent throws
|
||||
// `untrusted_workspace`. The default `emit` is a no-op that warns
|
||||
// once on first call so a future regression that silently swallows
|
||||
// audit events surfaces in operator logs the first time it bites.
|
||||
// Callers passing `deps.fsFactory` get full control of trust +
|
||||
// audit destination — `runQwenServe` will inject one whose
|
||||
// `trusted` mirrors `Config.isTrustedFolder()` and whose `emit`
|
||||
// plumbs into the per-session EventBus once PR 19/20 lands.
|
||||
const fsFactory: WorkspaceFileSystemFactory =
|
||||
deps.fsFactory ??
|
||||
createWorkspaceFileSystemFactory({
|
||||
boundWorkspace,
|
||||
trusted: false,
|
||||
emit: createDefaultFsAuditEmit(),
|
||||
});
|
||||
// Park the factory on `app.locals` so PR 19/20 route handlers can
|
||||
// pick it up via `req.app.locals.fsFactory` without re-threading
|
||||
// the value through every handler signature, and so PR 18 tests
|
||||
// can assert the factory is reachable. Express types `locals` as
|
||||
// a generic record; we cast to keep a precise property name.
|
||||
(app.locals as { fsFactory?: WorkspaceFileSystemFactory }).fsFactory =
|
||||
fsFactory;
|
||||
|
||||
// Order matters: rejection guards (CORS / Host allowlist / bearer auth)
|
||||
// run BEFORE the JSON body parser. Otherwise an unauthenticated POST
|
||||
// gets a full 10MB `JSON.parse` before the 401 fires — a trivially
|
||||
|
|
|
|||
|
|
@ -279,6 +279,11 @@ export * from './utils/errorParsing.js';
|
|||
export * from './utils/errors.js';
|
||||
export * from './utils/fileUtils.js';
|
||||
export * from './utils/filesearch/fileSearch.js';
|
||||
export {
|
||||
Ignore,
|
||||
loadIgnoreRules,
|
||||
type LoadIgnoreRulesOptions,
|
||||
} from './utils/filesearch/ignore.js';
|
||||
export * from './utils/formatters.js';
|
||||
export * from './utils/generateContentResponseUtilities.js';
|
||||
export * from './utils/getFolderStructure.js';
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue