From 688d64416ea5b4bff894a77e4697a94c8da490e5 Mon Sep 17 00:00:00 2001 From: jinye Date: Mon, 18 May 2026 22:37:08 +0800 Subject: [PATCH] feat(serve): add workspace file write/edit routes (#4175 PR20) (#4280) * feat(serve): add workspace file write/edit routes Co-authored-by: Qwen-Coder * fix(serve): bind file hashes to text snapshots Co-authored-by: Qwen-Coder * fix(serve): tighten read-bytes snapshot and create-mode publish - readBytesWindow: re-stat the open fd after read and require unchanged ino+size+mtime before emitting the response. Mirrors the hardened text-snapshot path so the full-window hash can no longer pair with bytes that drifted under in-place rewrite or append. Surface drift as retryable hash_mismatch. - atomicWriteTextResolvedFile: reject a symlinked parent up-front as defense-in-depth ahead of the parent-fd publish follow-up referenced by assertInodeStableAfterRead. - atomicWriteTextResolvedFile: publish create-mode writes via link()+unlink() instead of rename(). POSIX rename() overwrites an existing regular file, so a racing external process could break the public create contract; link() returns EEXIST atomically and is portable across POSIX/NTFS. The early assertCreateTargetAbsent check stays for friendlier errors on the non-racing path. --------- Co-authored-by: Qwen-Coder --- .../examples/daemon-client-quickstart.md | 24 + docs/developers/qwen-serve-protocol.md | 179 ++- docs/users/qwen-serve.md | 14 + packages/cli/src/serve/capabilities.ts | 14 +- packages/cli/src/serve/fs/errors.test.ts | 6 + packages/cli/src/serve/fs/errors.ts | 10 +- packages/cli/src/serve/fs/index.ts | 7 + .../src/serve/fs/workspaceFileSystem.test.ts | 173 ++- .../cli/src/serve/fs/workspaceFileSystem.ts | 1075 ++++++++++++++--- .../serve/routes/workspaceFileRead.test.ts | 70 +- .../cli/src/serve/routes/workspaceFileRead.ts | 74 +- .../serve/routes/workspaceFileWrite.test.ts | 270 +++++ .../src/serve/routes/workspaceFileWrite.ts | 292 +++++ packages/cli/src/serve/server.test.ts | 13 + packages/cli/src/serve/server.ts | 7 + packages/core/src/index.ts | 1 + .../src/services/fileSystemService.test.ts | 16 + .../core/src/services/fileSystemService.ts | 115 +- packages/core/src/utils/fileUtils.test.ts | 26 + packages/core/src/utils/fileUtils.ts | 82 +- .../sdk-typescript/src/daemon/DaemonClient.ts | 93 ++ packages/sdk-typescript/src/daemon/index.ts | 8 + packages/sdk-typescript/src/daemon/types.ts | 86 ++ packages/sdk-typescript/src/index.ts | 8 + .../test/unit/DaemonClient.test.ts | 156 +++ 25 files changed, 2555 insertions(+), 264 deletions(-) create mode 100644 packages/cli/src/serve/routes/workspaceFileWrite.test.ts create mode 100644 packages/cli/src/serve/routes/workspaceFileWrite.ts diff --git a/docs/developers/examples/daemon-client-quickstart.md b/docs/developers/examples/daemon-client-quickstart.md index 16d1dd864..733a9fad7 100644 --- a/docs/developers/examples/daemon-client-quickstart.md +++ b/docs/developers/examples/daemon-client-quickstart.md @@ -109,6 +109,30 @@ function handleEvent(event: DaemonEvent): void { } ``` +## Workspace file helpers + +File routes are workspace-scoped, not session-scoped, so they live on +`DaemonClient` directly: + +```ts +const file = await client.readWorkspaceFile('src/main.ts'); + +const updated = await client.editWorkspaceFile({ + path: 'src/main.ts', + oldText: 'timeout: 30000', + newText: 'timeout: 60000', + expectedHash: file.hash!, +}); + +console.log(updated.hash); +``` + +`expectedHash` is SHA-256 over the raw on-disk bytes. `mode: "replace"` and +`editWorkspaceFile()` require it so stale clients do not overwrite a file they +did not just read. Write/edit require bearer-token configuration even on +loopback; start the daemon with `--token` or `QWEN_SERVER_TOKEN` before using +them. + ## Reconnect with `Last-Event-ID` If your client process restarts mid-session, replay events you missed: diff --git a/docs/developers/qwen-serve-protocol.md b/docs/developers/qwen-serve-protocol.md index d35a72adb..19738f026 100644 --- a/docs/developers/qwen-serve-protocol.md +++ b/docs/developers/qwen-serve-protocol.md @@ -99,7 +99,8 @@ registry. Clients **must** gate UI off `features`, not off `mode` (per design 'session_set_model', 'client_identity', 'client_heartbeat', 'session_permission_vote', 'permission_vote', 'workspace_mcp', 'workspace_skills', 'workspace_providers', 'session_context', 'session_supported_commands', - 'session_close', 'session_metadata', 'mcp_guardrails'] + 'session_close', 'session_metadata', 'mcp_guardrails', + 'workspace_file_read', 'workspace_file_bytes', 'workspace_file_write'] ``` `session_scope_override` is the negotiation handle for the per-request `sessionScope` field on `POST /session` (see below). Older daemons silently ignore the field, so SDK clients should pre-flight `caps.features` for this tag before sending it. @@ -118,6 +119,15 @@ registry. Clients **must** gate UI off `features`, not off `mode` (per design > ⚠️ **PR 14 v1 scope: per-session, not per-workspace.** Each ACP session inside the daemon constructs its own `Config` + `McpClientManager` (via `acpAgent.newSessionConfig`). The budget caps live MCP clients **per session**; each session independently reads `QWEN_SERVE_MCP_CLIENT_BUDGET` from the forwarded env. With `--mcp-client-budget=10` and 5 concurrent ACP sessions, the actual live MCP client count can reach 5 × 10 = 50 across the daemon. The `GET /workspace/mcp` snapshot reads the **bootstrap session's** `McpClientManager` accounting only — the `budgets[0].scope: 'session'` value is the honest signal that this is per-session, not aggregated. **Wave 5 PR 23 (shared MCP pool)** will introduce a workspace-scoped manager and add a `scope: 'workspace'` cell alongside the per-session cell for true cross-session aggregation. v1 is the in-process counter + soft enforcement foundation that PR 23 builds on. +`workspace_file_read` covers the text/list/stat/glob workspace file routes +(`GET /file`, `GET /list`, `GET /glob`, `GET /stat`). `workspace_file_bytes` +covers `GET /file/bytes`, which was added later so clients can pre-flight raw +byte-window support against PR19-era daemons. `workspace_file_write` covers +the hash-aware text mutation routes (`POST /file/write`, `POST /file/edit`). +The write tag means the route contract exists; it does not mean the current +deployment is open for anonymous mutation. Write/edit are strict mutation +routes and require a configured bearer token even on loopback. + **Conditional tags.** A small number of feature tags are advertised only when the matching deployment toggle is on. Tag presence = behavior is on; absence = either an older daemon predating the tag, OR a current daemon where the operator did not opt in. Currently: | Tag | Advertised when … | @@ -605,6 +615,173 @@ carries a single `ServeStatusCell` describing the failure and the cells fall back to `not_started` ACP placeholders. Daemon-level cells are still returned. +### Workspace file routes + +All file paths are resolved through the daemon's bound workspace. Responses use +workspace-relative paths and never return absolute filesystem paths for normal +success cases. Successful file responses include: + +```http +Cache-Control: no-store +X-Content-Type-Options: nosniff +``` + +Filesystem errors use this JSON shape: + +```json +{ + "errorKind": "hash_mismatch", + "error": "expected sha256:..., found sha256:...", + "hint": "re-read the file and retry with the latest hash", + "status": 409 +} +``` + +`errorKind` values include `path_outside_workspace`, `symlink_escape`, +`path_not_found`, `binary_file`, `file_too_large`, `untrusted_workspace`, +`permission_denied`, `parse_error`, `hash_mismatch`, +`file_already_exists`, `text_not_found`, and `ambiguous_text_match`. + +#### `GET /file` + +Reads a text file. Query params: `path` (required), `maxBytes`, `line`, and +`limit`. The daemon rejects binary files and files above the text read cap. +The response includes `hash`, a SHA-256 digest over the raw on-disk bytes for +the whole file, even when `line`, `limit`, or `maxBytes` returned a slice. + +```json +{ + "kind": "file", + "path": "src/index.ts", + "content": "export {};\n", + "encoding": "utf-8", + "bom": false, + "lineEnding": "lf", + "sizeBytes": 11, + "returnedBytes": 11, + "truncated": false, + "hash": "sha256:...", + "matchedIgnore": null, + "originalLineCount": null +} +``` + +#### `GET /file/bytes` + +Reads raw bytes from a file without decoding. Query params: `path` (required), +`offset` (default `0`), and `maxBytes` (default `65536`, max `262144`). This +route supports bounded windows on large binary files without slurping the whole +file. The response includes `hash` only when the returned window covers the +entire file. + +```json +{ + "kind": "file_bytes", + "path": "assets/logo.png", + "offset": 0, + "sizeBytes": 3912, + "returnedBytes": 3912, + "truncated": false, + "contentBase64": "...", + "hash": "sha256:..." +} +``` + +#### `POST /file/write` + +Creates or replaces a text file. This is a strict mutation route: on loopback +without a configured token it returns `401 { "code": "token_required" }`. +With `--require-auth`, the global bearer middleware rejects unauthenticated +requests before the route runs. + +Body: + +```json +{ + "path": "src/new.ts", + "content": "export const value = 1;\n", + "mode": "create" +} +``` + +```json +{ + "path": "src/existing.ts", + "content": "export const value = 2;\n", + "mode": "replace", + "expectedHash": "sha256:..." +} +``` + +`mode` must be `create` or `replace`. `create` never overwrites an existing +file (`409 file_already_exists`). `replace` requires `expectedHash`; missing or +malformed hashes are `400 parse_error`, and stale hashes are +`409 hash_mismatch`. `expectedHash` is `sha256:` plus 64 lowercase hex +characters, computed over raw on-disk bytes. + +`bom`, `encoding`, and `lineEnding` may be supplied. Replacement preserves the +existing file's encoding profile by default; explicit fields override it. +Binary writes are out of scope. + +The daemon writes to a random temp file in the target directory, fsyncs where +supported, re-checks the current hash immediately before `rename()`, then +renames into place. This prevents partial-file observation and serializes +daemon-originated writes to the same file, but it is not a cross-process +kernel compare-and-swap: an external editor can still race in the tiny window +between final hash check and rename. + +```json +{ + "kind": "file_write", + "path": "src/existing.ts", + "mode": "replace", + "created": false, + "sizeBytes": 24, + "hash": "sha256:...", + "encoding": "utf-8", + "bom": false, + "lineEnding": "lf", + "matchedIgnore": null +} +``` + +#### `POST /file/edit` + +Applies one exact text replacement to an existing text file. This is also a +strict mutation route and requires `expectedHash`. + +```json +{ + "path": "src/config.ts", + "oldText": "timeout: 30000", + "newText": "timeout: 60000", + "expectedHash": "sha256:..." +} +``` + +`oldText` must be non-empty and occur exactly once. No match returns +`422 text_not_found`; multiple matches return `422 ambiguous_text_match`. +The route preserves encoding, BOM, and line endings, and re-checks +`expectedHash` immediately before the atomic rename. + +Explicit writes/edits to ignored paths are allowed because the authenticated +caller named the path. Success responses and audit events include +`matchedIgnore: "file" | "directory" | null`. + +```json +{ + "kind": "file_edit", + "path": "src/config.ts", + "replacements": 1, + "sizeBytes": 128, + "hash": "sha256:...", + "encoding": "utf-8", + "bom": false, + "lineEnding": "lf", + "matchedIgnore": null +} +``` + ### `GET /session/:id/context` ```json diff --git a/docs/users/qwen-serve.md b/docs/users/qwen-serve.md index 19ba2a63c..a6c379f38 100644 --- a/docs/users/qwen-serve.md +++ b/docs/users/qwen-serve.md @@ -71,6 +71,20 @@ to populate them. Failures map to a closed `errorKind` enum (`missing_binary`, `parse_error`, `blocked_egress`) so client UIs can render structured remediation. +The daemon also exposes workspace file helpers: + +- `GET /file` reads text files and returns a raw-byte `sha256:` hash. +- `GET /file/bytes` reads bounded raw byte windows and returns base64 content. +- `POST /file/write` creates or replaces text files. +- `POST /file/edit` applies one exact text replacement. + +Write/edit are **strict mutation routes**: even on loopback they require a +configured bearer token, otherwise they return `token_required`. Replacements +and edits require the latest `expectedHash` from `GET /file` (or a full-window +`GET /file/bytes`). `create` never overwrites. Explicit writes to ignored paths +are allowed but audited. Binary writes, delete/move/mkdir, and recursive parent +creation are not part of this surface. + ### 3. Open a session ```bash diff --git a/packages/cli/src/serve/capabilities.ts b/packages/cli/src/serve/capabilities.ts index 21e8a2bfa..54bc1128c 100644 --- a/packages/cli/src/serve/capabilities.ts +++ b/packages/cli/src/serve/capabilities.ts @@ -104,10 +104,18 @@ export const SERVE_CAPABILITY_REGISTRY = { // others for free, and a future deprecation would have to coordinate // across all four anyway. Per-route tags would force four // simultaneous registry entries with no operator-meaningful - // difference between them. Mutating routes (`POST /file/write`, - // `POST /file/edit`) ship under a separate `workspace_file_write` - // tag in PR 20. + // difference between them. workspace_file_read: { since: 'v1' }, + // Issue #4175 PR 20. Daemon supports bounded raw byte reads via + // `GET /file/bytes`. This is separate from `workspace_file_read` + // because PR19 daemons already advertise the text/list/stat/glob + // surface without byte-window support. + workspace_file_bytes: { since: 'v1' }, + // Issue #4175 PR 20. Daemon supports hash-aware text mutation routes + // (`POST /file/write`, `POST /file/edit`) behind the strict mutation + // gate. Clients should still pre-flight `require_auth` separately for + // deployment posture; this tag only means the route contract exists. + workspace_file_write: { since: 'v1' }, // Issue #4175 PR 15. Daemon was booted with `--require-auth` (or // `requireAuth: true`), so even loopback callers must carry a bearer // token. Advertised CONDITIONALLY — only when the flag is on — so diff --git a/packages/cli/src/serve/fs/errors.test.ts b/packages/cli/src/serve/fs/errors.test.ts index 3d4f6fdb9..f56f99646 100644 --- a/packages/cli/src/serve/fs/errors.test.ts +++ b/packages/cli/src/serve/fs/errors.test.ts @@ -20,8 +20,14 @@ describe('FsError', () => { ['path_not_found', 404], ['binary_file', 422], ['file_too_large', 413], + ['hash_mismatch', 409], + ['file_already_exists', 409], + ['text_not_found', 422], + ['ambiguous_text_match', 422], ['untrusted_workspace', 403], ['permission_denied', 403], + ['io_error', 503], + ['internal_error', 500], ['parse_error', 400], ]; for (const [kind, status] of cases) { diff --git a/packages/cli/src/serve/fs/errors.ts b/packages/cli/src/serve/fs/errors.ts index 488a426d4..e3565b19e 100644 --- a/packages/cli/src/serve/fs/errors.ts +++ b/packages/cli/src/serve/fs/errors.ts @@ -20,6 +20,10 @@ export type FsErrorKind = | 'path_not_found' | 'binary_file' | 'file_too_large' + | 'hash_mismatch' + | 'file_already_exists' + | 'text_not_found' + | 'ambiguous_text_match' | 'untrusted_workspace' | 'permission_denied' /** @@ -56,7 +60,7 @@ export type FsErrorKind = * boundary is being asked to model a transport-level concern that * doesn't belong here (5xx, 401/403 from auth, etc.). */ -export type FsErrorStatus = 400 | 403 | 404 | 413 | 422 | 500 | 503; +export type FsErrorStatus = 400 | 403 | 404 | 409 | 413 | 422 | 500 | 503; /** * Default HTTP status mapping. Centralized here so callers can throw @@ -72,6 +76,10 @@ const DEFAULT_STATUS_BY_KIND: Record = { path_not_found: 404, binary_file: 422, file_too_large: 413, + hash_mismatch: 409, + file_already_exists: 409, + text_not_found: 422, + ambiguous_text_match: 422, untrusted_workspace: 403, permission_denied: 403, io_error: 503, diff --git a/packages/cli/src/serve/fs/index.ts b/packages/cli/src/serve/fs/index.ts index d2832fe3e..d22999a3f 100644 --- a/packages/cli/src/serve/fs/index.ts +++ b/packages/cli/src/serve/fs/index.ts @@ -42,15 +42,22 @@ export { } from './audit.js'; export { createWorkspaceFileSystemFactory, + isContentHash, + type ContentHash, type CreateWorkspaceFileSystemFactoryDeps, type FsEntry, type FsStat, type GlobOptions, type ListOptions, + type ReadBytesOptions, + type ReadBytesOutcome, type ReadMeta, type ReadTextOptions, type RequestContext, type WorkspaceFileSystem, type WorkspaceFileSystemFactory, + type WriteMode, type WriteOutcome, + type WriteTextAtomicOptions, + type WriteTextAtomicOutcome, } from './workspaceFileSystem.js'; diff --git a/packages/cli/src/serve/fs/workspaceFileSystem.test.ts b/packages/cli/src/serve/fs/workspaceFileSystem.test.ts index 36ce980e8..930b20900 100644 --- a/packages/cli/src/serve/fs/workspaceFileSystem.test.ts +++ b/packages/cli/src/serve/fs/workspaceFileSystem.test.ts @@ -4,7 +4,7 @@ * SPDX-License-Identifier: Apache-2.0 */ -import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; import { promises as fsp } from 'node:fs'; import * as os from 'node:os'; import * as path from 'node:path'; @@ -61,6 +61,10 @@ async function teardown(h: Harness): Promise { await fsp.rm(h.scratch, { recursive: true, force: true }); } +function rawHash(data: string | Buffer): `sha256:${string}` { + return `sha256:${createHash('sha256').update(data).digest('hex')}`; +} + describe('WorkspaceFileSystem - resolve and stat', () => { let h: Harness; beforeEach(async () => { @@ -116,6 +120,7 @@ describe('WorkspaceFileSystem - readText', () => { expect(out.content).toBe('hello\nworld\n'); expect(out.meta.lineEnding).toBe('lf'); expect(out.meta.sizeBytes).toBe(12); + expect(out.meta.hash).toBe(rawHash('hello\nworld\n')); expect(out.meta.truncated).toBeUndefined(); }); @@ -200,33 +205,27 @@ describe('WorkspaceFileSystem - readBytes', () => { expect(buf[0]).toBe(0xab); }); - it('throws file_too_large when file size exceeds the hard MAX_READ_BYTES cap regardless of maxBytes', async () => { + it('reads a bounded window from a file larger than MAX_READ_BYTES', async () => { const policy = await import('./policy.js'); const target = path.join(h.workspace, 'huge.bin'); await fsp.writeFile(target, Buffer.alloc(policy.MAX_READ_BYTES + 1)); const r = await h.fs.resolve('huge.bin', 'read'); - const err = await h.fs.readBytes(r).catch((e) => e); - expect(isFsError(err)).toBe(true); - expect((err as { kind: string }).kind).toBe('file_too_large'); + const out = await h.fs.readBytesWindow(r, { maxBytes: 16 }); + expect(out.sizeBytes).toBe(policy.MAX_READ_BYTES + 1); + expect(out.returnedBytes).toBe(16); + expect(out.truncated).toBe(true); + expect(out.hash).toBeUndefined(); }); - it('readBytes catches concurrent post-stat growth via post-read size check', async () => { - // Pre-stat OOM gate sees a small file; post-read buf.length - // check catches the same-inode growth case (concurrent - // appender keeps the inode but extends past the cap). Test - // simulates by overwriting the file AFTER `resolve()` with a - // buffer larger than `MAX_READ_BYTES` — the readBytes call's - // pre-stat sees the large size and trips the hard cap; the - // post-read check is the defense-in-depth path for the case - // where stat passed but read picked up more bytes. - const policy = await import('./policy.js'); - const small = path.join(h.workspace, 'grew.bin'); - await fsp.writeFile(small, Buffer.alloc(64)); - const r = await h.fs.resolve('grew.bin', 'read'); - await fsp.writeFile(small, Buffer.alloc(policy.MAX_READ_BYTES + 1)); - const err = await h.fs.readBytes(r).catch((e) => e); - expect(isFsError(err)).toBe(true); - expect((err as { kind: string }).kind).toBe('file_too_large'); + it('readBytesWindow honors byte offsets', async () => { + const target = path.join(h.workspace, 'offset.bin'); + await fsp.writeFile(target, Buffer.from([1, 2, 3, 4, 5, 6])); + const r = await h.fs.resolve('offset.bin', 'read'); + const out = await h.fs.readBytesWindow(r, { offset: 2, maxBytes: 3 }); + expect(Array.from(out.buffer)).toEqual([3, 4, 5]); + expect(out.offset).toBe(2); + expect(out.returnedBytes).toBe(3); + expect(out.truncated).toBe(true); }); }); @@ -388,6 +387,52 @@ describe('WorkspaceFileSystem - write/edit', () => { expect(access).toBeDefined(); }); + it('writeTextAtomic creates a new file and returns a raw-byte hash', async () => { + const r = await h.fs.resolve('atomic-new.txt', 'write'); + const out = await h.fs.writeTextAtomic(r, 'hello\n', { + mode: 'create', + }); + expect(out.created).toBe(true); + expect(out.sizeBytes).toBe(6); + expect(out.hash).toBe(rawHash('hello\n')); + expect(await fsp.readFile(r as string, 'utf-8')).toBe('hello\n'); + }); + + it('writeTextAtomic create rejects existing files', async () => { + const target = path.join(h.workspace, 'exists.txt'); + await fsp.writeFile(target, 'old'); + const r = await h.fs.resolve('exists.txt', 'write'); + const err = await h.fs + .writeTextAtomic(r, 'new', { mode: 'create' }) + .catch((e: unknown) => e); + expect(isFsError(err)).toBe(true); + expect((err as { kind: string }).kind).toBe('file_already_exists'); + expect(await fsp.readFile(target, 'utf-8')).toBe('old'); + }); + + it('writeTextAtomic replace requires the current expectedHash', async () => { + const target = path.join(h.workspace, 'replace.txt'); + await fsp.writeFile(target, 'old\n'); + const r = await h.fs.resolve('replace.txt', 'write'); + const err = await h.fs + .writeTextAtomic(r, 'new\n', { + mode: 'replace', + expectedHash: rawHash('not current'), + }) + .catch((e: unknown) => e); + expect(isFsError(err)).toBe(true); + expect((err as { kind: string }).kind).toBe('hash_mismatch'); + expect(await fsp.readFile(target, 'utf-8')).toBe('old\n'); + + const out = await h.fs.writeTextAtomic(r, 'new\n', { + mode: 'replace', + expectedHash: rawHash('old\n'), + }); + expect(out.created).toBe(false); + expect(out.hash).toBe(rawHash('new\n')); + expect(await fsp.readFile(target, 'utf-8')).toBe('new\n'); + }); + it('rejects oversize writes with file_too_large', async () => { const r = await h.fs.resolve('huge.txt', 'write'); const err = await h.fs @@ -537,6 +582,48 @@ describe('WorkspaceFileSystem - write/edit', () => { 'file', ); }); + + it('editAtomic applies exactly one replacement and returns the new hash', async () => { + const target = path.join(h.workspace, 'atomic-edit.txt'); + await fsp.writeFile(target, 'foo=1\nbar=2\n'); + const r = await h.fs.resolve('atomic-edit.txt', 'edit'); + const out = await h.fs.editAtomic(r, 'foo=1', 'foo=42', { + expectedHash: rawHash('foo=1\nbar=2\n'), + }); + expect(out.writtenBytes).toBe(Buffer.byteLength('foo=42\nbar=2\n')); + expect(out.hash).toBe(rawHash('foo=42\nbar=2\n')); + expect(await fsp.readFile(target, 'utf-8')).toBe('foo=42\nbar=2\n'); + }); + + it('editAtomic validates expectedHash against the edited snapshot first', async () => { + const target = path.join(h.workspace, 'atomic-edit-stale.txt'); + await fsp.writeFile(target, 'foo=1\n'); + const r = await h.fs.resolve('atomic-edit-stale.txt', 'edit'); + const err = await h.fs + .editAtomic(r, 'missing', 'foo=2', { + expectedHash: rawHash('different\n'), + }) + .catch((e: unknown) => e); + expect(isFsError(err)).toBe(true); + expect((err as { kind: string }).kind).toBe('hash_mismatch'); + expect(await fsp.readFile(target, 'utf-8')).toBe('foo=1\n'); + }); + + it('editAtomic rejects absent and ambiguous matches with typed errors', async () => { + const target = path.join(h.workspace, 'atomic-ambiguous.txt'); + await fsp.writeFile(target, 'x\nx\n'); + const r = await h.fs.resolve('atomic-ambiguous.txt', 'edit'); + const missing = await h.fs + .editAtomic(r, 'y', 'z', { expectedHash: rawHash('x\nx\n') }) + .catch((e: unknown) => e); + expect(isFsError(missing)).toBe(true); + expect((missing as { kind: string }).kind).toBe('text_not_found'); + const ambiguous = await h.fs + .editAtomic(r, 'x', 'z', { expectedHash: rawHash('x\nx\n') }) + .catch((e: unknown) => e); + expect(isFsError(ambiguous)).toBe(true); + expect((ambiguous as { kind: string }).kind).toBe('ambiguous_text_match'); + }); }); describe('WorkspaceFileSystem - trust gate', () => { @@ -682,17 +769,53 @@ describe('WorkspaceFileSystem - TOCTOU + UTF-8 + cwd hardening', () => { expect(outsideContent).toBe('foo=1\n'); }); - it('readBytes opts.maxBytes is clamped to MAX_READ_BYTES (cannot widen past hard cap)', async () => { + it('writeTextAtomic does not write through a swapped temporary symlink', async () => { + const outside = path.join(h.scratch, 'temp-race-outside.txt'); + await fsp.writeFile(outside, 'outside\n'); + const originalOpen = fsp.open.bind(fsp); + const openSpy = vi + .spyOn(fsp, 'open') + .mockImplementation(async (...args) => { + const fh = await originalOpen(...args); + const candidate = String(args[0]); + if ( + candidate.includes('.temp-race.txt.') && + candidate.endsWith('.tmp') && + args[1] === 'wx' + ) { + const originalWriteFile = fh.writeFile.bind(fh); + vi.spyOn(fh, 'writeFile').mockImplementation(async (...writeArgs) => { + await fsp.unlink(candidate); + await fsp.symlink(outside, candidate, 'file'); + return originalWriteFile(...writeArgs); + }); + } + return fh; + }); + + try { + const r = await h.fs.resolve('temp-race.txt', 'write'); + const err = await h.fs + .writeTextAtomic(r, 'secret\n', { mode: 'create' }) + .catch((e: unknown) => e); + expect(isFsError(err)).toBe(true); + expect((err as { kind: string }).kind).toBe('symlink_escape'); + expect(await fsp.readFile(outside, 'utf-8')).toBe('outside\n'); + } finally { + openSpy.mockRestore(); + } + }); + + it('readBytes rejects opts.maxBytes above MAX_READ_BYTES', async () => { const policy = await import('./policy.js'); const big = path.join(h.workspace, 'overrun.bin'); await fsp.writeFile(big, Buffer.alloc(policy.MAX_READ_BYTES + 1)); const r = await h.fs.resolve('overrun.bin', 'read'); - // Caller tries to widen past the hard cap. const err = await h.fs .readBytes(r, { maxBytes: policy.MAX_READ_BYTES * 10 }) .catch((e: unknown) => e); expect(isFsError(err)).toBe(true); - expect((err as { kind: string }).kind).toBe('file_too_large'); + expect((err as { kind: string }).kind).toBe('parse_error'); }); }); diff --git a/packages/cli/src/serve/fs/workspaceFileSystem.ts b/packages/cli/src/serve/fs/workspaceFileSystem.ts index c58e1d8c2..1b74473f7 100644 --- a/packages/cli/src/serve/fs/workspaceFileSystem.ts +++ b/packages/cli/src/serve/fs/workspaceFileSystem.ts @@ -4,6 +4,7 @@ * SPDX-License-Identifier: Apache-2.0 */ +import { createHash, randomBytes } from 'node:crypto'; import { promises as fsp } from 'node:fs'; import * as path from 'node:path'; import { glob as globAsync } from 'glob'; @@ -18,6 +19,9 @@ import { glob as globAsync } from 'glob'; import { StandardFileSystemService, + decodeBufferWithEncodingInfo, + detectLineEnding, + encodeTextFileContent, loadIgnoreRules, isWithinRoot, type Ignore, @@ -40,7 +44,6 @@ import { MAX_READ_BYTES, assertTrustedForIntent, detectBinary, - enforceReadBytesSize, enforceReadSize, enforceWriteSize, shouldIgnore, @@ -72,6 +75,7 @@ export interface ReadMeta { bom?: boolean; lineEnding: 'crlf' | 'lf'; sizeBytes?: number; + hash?: ContentHash; truncated?: boolean; matchedIgnore?: 'file' | 'directory'; originalLineCount?: number; @@ -105,8 +109,44 @@ export interface GlobOptions { maxResults?: number; } +export type ContentHash = `sha256:${string}`; + +export interface ReadBytesOptions { + /** Zero-based byte offset. */ + offset?: number; + /** Maximum bytes to return; defaults to MAX_READ_BYTES. */ + maxBytes?: number; +} + +export interface ReadBytesOutcome { + buffer: Buffer; + sizeBytes: number; + returnedBytes: number; + offset: number; + truncated: boolean; + /** Present only when the returned window covers the whole file. */ + hash?: ContentHash; +} + +export type WriteMode = 'create' | 'replace'; + +export interface WriteTextAtomicOptions extends WriteTextFileOptions { + mode: WriteMode; + expectedHash?: ContentHash; + lineEnding?: 'crlf' | 'lf'; +} + +export interface WriteTextAtomicOutcome { + created: boolean; + sizeBytes: number; + hash: ContentHash; + meta: ReadMeta; +} + export interface WriteOutcome { writtenBytes: number; + hash?: ContentHash; + meta?: ReadMeta; } export interface RequestContext extends AuditContext { @@ -126,9 +166,18 @@ export interface WorkspaceFileSystem { p: ResolvedPath, opts?: ReadTextOptions, ): Promise<{ content: string; meta: ReadMeta }>; - readBytes(p: ResolvedPath, opts?: { maxBytes?: number }): Promise; + readBytes(p: ResolvedPath, opts?: ReadBytesOptions): Promise; + readBytesWindow( + p: ResolvedPath, + opts?: ReadBytesOptions, + ): Promise; list(p: ResolvedPath, opts?: ListOptions): Promise; glob(pattern: string, opts?: GlobOptions): Promise; + writeTextAtomic( + p: ResolvedPath, + content: string, + opts: WriteTextAtomicOptions, + ): Promise; writeText( p: ResolvedPath, content: string, @@ -138,6 +187,13 @@ export interface WorkspaceFileSystem { p: ResolvedPath, oldText: string, newText: string, + opts?: { expectedHash?: ContentHash }, + ): Promise; + editAtomic( + p: ResolvedPath, + oldText: string, + newText: string, + opts: { expectedHash: ContentHash }, ): Promise; } @@ -203,6 +259,7 @@ export function createWorkspaceFileSystemFactory( includeRawPaths: deps.includeRawPaths, }); const lowFs = new StandardFileSystemService(); + const pathLocks = new PathMutexRegistry(); return { forRequest(ctx) { @@ -213,6 +270,7 @@ export function createWorkspaceFileSystemFactory( audit, ctx, lowFs, + pathLocks, }); }, }; @@ -225,6 +283,7 @@ interface ImplDeps { audit: AuditPublisher; ctx: RequestContext; lowFs: StandardFileSystemService; + pathLocks: PathMutexRegistry; } class WorkspaceFileSystemImpl implements WorkspaceFileSystem { @@ -271,35 +330,6 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem { const start = performance.now(); try { assertTrustedForIntent(this.deps.trusted, 'read'); - const st = await fsp.stat(p as string); - // Hard size gate before we delegate to lowFs.readTextFile — - // that helper's underlying `readFileWithLineAndLimit` slurps - // the whole file into memory before slicing lines, so an - // unbounded request against a 5 GB text file would OOM the - // daemon (or, on a healthy host, flood the SSE replay ring - // with a 5 GB string). `MAX_READ_BYTES` is the hard cap and - // is independent of the caller's `opts.maxBytes` (which is a - // *softer* post-read truncation target — the boundary still - // honors it via `enforceReadSize` below). A future streaming - // read path can lift this hard cap by reading only the first - // N bytes; for now files above the cap throw and the SDK - // consumer can fall back to `readBytes` with an explicit - // length window. - if (st.size > MAX_READ_BYTES) { - throw new FsError( - 'file_too_large', - `file of ${st.size} bytes exceeds read cap of ${MAX_READ_BYTES} bytes`, - { - hint: 'use readBytes for explicit byte-windowed access on large files', - }, - ); - } - if (await detectBinary(p)) { - throw new FsError('binary_file', `binary file: ${p}`, { - hint: 'use readBytes for binary content', - }); - } - const sizeOutcome = enforceReadSize(st.size, opts.maxBytes); // Reject `opts.line` values that the docstring forbids // (positive integer required). Without this guard `Infinity` // (`Infinity > 1` is true; `Infinity - 1` is still @@ -317,98 +347,24 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem { `line must be a positive integer, got ${opts.line}`, ); } - // Delegate encoding-aware read to the existing core service so - // BOM, CRLF, and iconv-supported codepages remain consistent - // with what the tools layer already does. The core service's - // `line` parameter is a 0-based slice index whereas the - // boundary's public `ReadTextOptions.line` is 1-based (the - // convention SDK consumers expect from line-numbered errors, - // editor jump-to-line, etc.). Convert here so the public - // contract isn't tied to the internal helper's indexing. - const startLineIndex = opts.line !== undefined ? opts.line - 1 : 0; - const result = await this.deps.lowFs.readTextFile({ - path: p as string, - limit: opts.limit ?? Number.POSITIVE_INFINITY, - line: startLineIndex, - }); - // Post-read size sanity check. The pre-stat `MAX_READ_BYTES` - // gate above sees the file's size at stat time; a concurrent - // writer can grow the file from sub-cap to multi-GB between - // `fsp.stat` and `lowFs.readTextFile`'s underlying - // `fs.promises.readFile`. `readFileWithLineAndLimit` slurps - // the whole file into memory before slicing, so the stat - // gate alone is bypassable. Reject post-read if the - // returned content exceeds the cap. The proper fix is - // fd-based reading (open + stat + read tied to the same fd) - // — tracked as a hardening follow-up; this byte-length - // check is the defense-in-depth layer. - const decodedBytes = Buffer.byteLength(result.content, 'utf-8'); - if (decodedBytes > MAX_READ_BYTES) { - throw new FsError( - 'file_too_large', - `file grew during read to ${decodedBytes} bytes (cap ${MAX_READ_BYTES})`, - { - hint: 'concurrent writer detected via post-read size; retry or readBytes with explicit window', - }, - ); - } + const snapshot = await readTextSnapshotFromResolvedFile(p, opts); const ignoreVerdict = shouldIgnore( p, this.deps.boundWorkspace, this.deps.ignore, 'file', ); - const meta: ReadMeta = { - encoding: result._meta?.encoding, - bom: result._meta?.bom, - lineEnding: (result._meta?.lineEnding ?? 'lf') as 'crlf' | 'lf', - sizeBytes: st.size, - originalLineCount: result._meta?.originalLineCount, - }; - let truncatedContent = result.content; - if (sizeOutcome.truncated) { - // Use `safeUtf8Truncate` instead of `subarray(0,n).toString('utf-8')` - // so the slice never splits a multi-byte codepoint (CJK, - // emoji). The plain `subarray + toString` approach silently - // emits U+FFFD at the boundary and breaks downstream JSON / - // source-code parsing of the truncated prefix. - const buf = Buffer.from(result.content, 'utf-8'); - if (buf.length > sizeOutcome.bytesToRead) { - truncatedContent = safeUtf8Truncate( - buf, - sizeOutcome.bytesToRead, - ).toString('utf-8'); - } - meta.truncated = true; - } - // Surface truncation whenever lowFs's own `limit` clipped the - // content too — without this the audit row + meta.truncated - // would silently disagree on whether the SDK consumer received - // the full file. - if ( - opts.limit !== undefined && - Number.isFinite(opts.limit) && - result._meta?.originalLineCount !== undefined && - result._meta.originalLineCount > opts.limit + startLineIndex - ) { - meta.truncated = true; - } + const meta = snapshot.meta; if (ignoreVerdict.ignored) meta.matchedIgnore = ignoreVerdict.category; - // Post-read TOCTOU check: confirm the path's inode hasn't - // changed and it isn't now a symlink. Catches the - // swap-during-read attack where the file is replaced - // mid-operation with a symlink pointing outside the - // workspace. - await assertInodeStableAfterRead(p as string, st.ino); this.deps.audit.recordAccess(this.deps.ctx, { intent: 'read', absolute: p, durationMs: performance.now() - start, - sizeBytes: st.size, + sizeBytes: meta.sizeBytes, truncated: meta.truncated, matchedIgnore: meta.matchedIgnore, }); - return { content: truncatedContent, meta }; + return { content: snapshot.content, meta }; } catch (err) { throw this.recordAndWrap(err, 'read', p as string); } @@ -416,56 +372,92 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem { async readBytes( p: ResolvedPath, - opts: { maxBytes?: number } = {}, + opts: ReadBytesOptions = {}, ): Promise { + const out = await this.readBytesWindow(p, opts); + return out.buffer; + } + + async readBytesWindow( + p: ResolvedPath, + opts: ReadBytesOptions = {}, + ): Promise { const start = performance.now(); try { assertTrustedForIntent(this.deps.trusted, 'read'); - const st = await fsp.stat(p as string); - // Hard cap (file size > MAX_READ_BYTES) always throws; this - // is the OOM defense that's independent of caller-supplied - // `maxBytes`. - enforceReadBytesSize(st.size); - let buf = await fsp.readFile(p as string); - // Post-read size sanity check — same TOCTOU window as - // `readText` (concurrent appender keeps the same inode, so - // `assertInodeStableAfterRead` doesn't catch *growth*). - // The pre-stat gate sees the size at stat time; an attacker - // can grow the file from sub-cap to multi-GB between - // `fsp.stat` and `fsp.readFile`, OOMing the daemon. Reject - // post-read if the buffer ended up over the hard cap. The - // proper fix (fd-based read with `fileHandle.stat` + - // bounded `fileHandle.read`) is a hardening follow-up. - if (buf.length > MAX_READ_BYTES) { + const offset = opts.offset ?? 0; + const maxBytes = opts.maxBytes ?? MAX_READ_BYTES; + if (!Number.isSafeInteger(offset) || offset < 0) { throw new FsError( - 'file_too_large', - `file grew during read to ${buf.length} bytes (cap ${MAX_READ_BYTES})`, - { - hint: 'concurrent writer detected via post-read size; retry or readText for capped truncation', - }, + 'parse_error', + `offset must be a non-negative integer, got ${offset}`, ); } - // Soft cap: caller's `opts.maxBytes` is a window cap matching - // the parameter name's promise. Earlier semantics treated - // it as a "reject if file > maxBytes" gate, which violated - // the API contract — a caller asking for `maxBytes: 1024` - // on a 200 KB file expected to receive 1 KB, not an - // exception. We now truncate post-read so the returned - // buffer never exceeds `opts.maxBytes`. The hard - // `MAX_READ_BYTES` cap above ensures the underlying - // `fsp.readFile` allocation is bounded regardless. - if (opts.maxBytes !== undefined && opts.maxBytes < buf.length) { - buf = buf.subarray(0, opts.maxBytes); + if ( + !Number.isSafeInteger(maxBytes) || + maxBytes < 1 || + maxBytes > MAX_READ_BYTES + ) { + throw new FsError( + 'parse_error', + `maxBytes must be a positive integer in [1, ${MAX_READ_BYTES}], got ${maxBytes}`, + ); + } + const pre = await fsp.lstat(p as string); + if (!pre.isFile()) { + throw new FsError('parse_error', `path is not a regular file: ${p}`); + } + const fh = await fsp.open(p as string, 'r'); + let st: Awaited>; + let buf: Buffer; + try { + st = await fh.stat(); + assertSameFile(pre, st, p as string, 'read'); + const available = Math.max(0, st.size - offset); + const toRead = Math.min(maxBytes, available); + buf = Buffer.allocUnsafe(toRead); + if (toRead > 0) { + const read = await fh.read(buf, 0, toRead, offset); + buf = + read.bytesRead === toRead ? buf : buf.subarray(0, read.bytesRead); + } + // Bind the returned bytes to a stable on-disk snapshot: an + // in-place rewrite (size unchanged, content changed) or + // append/truncate between the pre-stat and read would + // otherwise leave us with a buffer that no longer matches + // the file. Mirror `readStableRegularFileBuffer` and require + // ino+size+mtime to be unchanged on the same fd before + // emitting the response — clients use the full-window hash + // as an optimistic-concurrency token, so a stale snapshot + // must surface as a retryable `hash_mismatch`. + const afterRead = await fh.stat(); + assertSameFile(st, afterRead, p as string, 'read'); + if (afterRead.size !== st.size || afterRead.mtimeMs !== st.mtimeMs) { + throw new FsError('hash_mismatch', `file changed during read: ${p}`, { + hint: 'retry after re-reading the latest file hash', + }); + } + } finally { + await fh.close(); } - // Post-read TOCTOU guard — same shape as `readText`. await assertInodeStableAfterRead(p as string, st.ino); + const fullWindow = offset === 0 && buf.length === st.size; + const out: ReadBytesOutcome = { + buffer: buf, + sizeBytes: st.size, + returnedBytes: buf.length, + offset, + truncated: !fullWindow, + ...(fullWindow ? { hash: hashBuffer(buf) } : {}), + }; this.deps.audit.recordAccess(this.deps.ctx, { intent: 'read', absolute: p, durationMs: performance.now() - start, sizeBytes: buf.length, + truncated: out.truncated, }); - return buf; + return out; } catch (err) { throw this.recordAndWrap(err, 'read', p as string); } @@ -742,6 +734,65 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem { } } + async writeTextAtomic( + p: ResolvedPath, + content: string, + opts: WriteTextAtomicOptions, + ): Promise { + const start = performance.now(); + try { + assertTrustedForIntent(this.deps.trusted, 'write'); + validateWriteTextAtomicOptions(opts); + const decodedSizeBytes = Buffer.byteLength(content, 'utf-8'); + enforceWriteSize(decodedSizeBytes); + const out = await this.deps.pathLocks.runExclusive( + p as string, + async () => { + const existingMeta = + opts.mode === 'replace' + ? await readExistingTextMeta(p, opts.expectedHash) + : undefined; + if (opts.mode === 'create') { + await assertCreateTargetAbsent(p as string); + } + const meta = mergeWriteMeta(existingMeta, opts); + const result = await atomicWriteTextResolvedFile({ + target: p, + content, + mode: opts.mode, + expectedHash: opts.expectedHash, + meta, + }); + const verdict = shouldIgnore( + p, + this.deps.boundWorkspace, + this.deps.ignore, + 'file', + ); + if (verdict.ignored) meta.matchedIgnore = verdict.category; + meta.sizeBytes = result.sizeBytes; + meta.hash = result.hash; + this.deps.audit.recordAccess(this.deps.ctx, { + intent: 'write', + absolute: p, + durationMs: performance.now() - start, + sizeBytes: result.sizeBytes, + matchedIgnore: meta.matchedIgnore, + }); + return { + created: opts.mode === 'create', + sizeBytes: result.sizeBytes, + hash: result.hash, + meta, + }; + }, + ); + return out; + } catch (err) { + throw this.recordAndWrap(err, 'write', p as string); + } + } + async writeText( p: ResolvedPath, content: string, @@ -787,11 +838,113 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem { } } + async editAtomic( + p: ResolvedPath, + oldText: string, + newText: string, + opts: { expectedHash: ContentHash }, + ): Promise { + const start = performance.now(); + try { + assertTrustedForIntent(this.deps.trusted, 'edit'); + if (!isContentHash(opts.expectedHash)) { + throw new FsError( + 'parse_error', + 'expectedHash must match sha256:<64 lowercase hex chars>', + ); + } + if (typeof oldText !== 'string' || oldText.length === 0) { + throw new FsError( + 'parse_error', + `oldText must be a non-empty string for edit on ${p}`, + ); + } + if (typeof newText !== 'string') { + throw new FsError('parse_error', 'newText must be a string'); + } + const out = await this.deps.pathLocks.runExclusive( + p as string, + async () => { + const snapshot = await readTextSnapshotFromResolvedFile(p); + if (snapshot.meta.hash !== opts.expectedHash) { + throw new FsError( + 'hash_mismatch', + `expected ${opts.expectedHash}, found ${snapshot.meta.hash}`, + { hint: 're-read the file and retry with the latest hash' }, + ); + } + const current = snapshot.content; + const occurrences = countOccurrences(current, oldText); + if (occurrences === 0) { + const snippet = + oldText.length > 80 ? oldText.slice(0, 80) + '...' : oldText; + throw new FsError('text_not_found', `oldText not found in ${p}`, { + hint: `searched for: ${JSON.stringify(snippet)}`, + }); + } + if (occurrences > 1) { + throw new FsError( + 'ambiguous_text_match', + `oldText appears ${occurrences} times in ${p}`, + { + hint: 'pass a larger oldText span that occurs exactly once', + }, + ); + } + const idx = current.indexOf(oldText); + const next = + current.slice(0, idx) + + newText + + current.slice(idx + oldText.length); + enforceWriteSize(Buffer.byteLength(next, 'utf-8')); + const meta = mergeWriteMeta(snapshot.meta, {}); + const result = await atomicWriteTextResolvedFile({ + target: p, + content: next, + mode: 'replace', + expectedHash: opts.expectedHash, + meta, + }); + const verdict = shouldIgnore( + p, + this.deps.boundWorkspace, + this.deps.ignore, + 'file', + ); + if (verdict.ignored) meta.matchedIgnore = verdict.category; + meta.sizeBytes = result.sizeBytes; + meta.hash = result.hash; + this.deps.audit.recordAccess(this.deps.ctx, { + intent: 'edit', + absolute: p, + durationMs: performance.now() - start, + sizeBytes: result.sizeBytes, + matchedIgnore: meta.matchedIgnore, + }); + return { + writtenBytes: result.sizeBytes, + hash: result.hash, + meta, + }; + }, + ); + return out; + } catch (err) { + throw this.recordAndWrap(err, 'edit', p as string); + } + } + async edit( p: ResolvedPath, oldText: string, newText: string, + opts?: { expectedHash?: ContentHash }, ): Promise { + if (opts?.expectedHash !== undefined) { + return this.editAtomic(p, oldText, newText, { + expectedHash: opts.expectedHash, + }); + } const start = performance.now(); try { assertTrustedForIntent(this.deps.trusted, 'edit'); @@ -956,6 +1109,635 @@ class WorkspaceFileSystemImpl implements WorkspaceFileSystem { } } +const CONTENT_HASH_RE = /^sha256:[0-9a-f]{64}$/; + +export function isContentHash(value: unknown): value is ContentHash { + return typeof value === 'string' && CONTENT_HASH_RE.test(value); +} + +class PathMutexRegistry { + private readonly tails = new Map>(); + + async runExclusive(key: string, fn: () => Promise): Promise { + const previous = this.tails.get(key) ?? Promise.resolve(); + let release!: () => void; + const current = new Promise((resolve) => { + release = resolve; + }); + const tail = previous.catch(() => undefined).then(() => current); + this.tails.set(key, tail); + await previous.catch(() => undefined); + try { + return await fn(); + } finally { + release(); + if (this.tails.get(key) === tail) this.tails.delete(key); + } + } +} + +interface AtomicWriteTextInput { + target: ResolvedPath; + content: string; + mode: WriteMode; + expectedHash?: ContentHash; + meta: ReadMeta; +} + +interface AtomicWriteTextOutcome { + sizeBytes: number; + hash: ContentHash; + stat: Awaited>; +} + +function validateWriteTextAtomicOptions(opts: WriteTextAtomicOptions): void { + if (opts.mode !== 'create' && opts.mode !== 'replace') { + throw new FsError( + 'parse_error', + 'mode must be either "create" or "replace"', + ); + } + if (opts.expectedHash !== undefined && !isContentHash(opts.expectedHash)) { + throw new FsError( + 'parse_error', + 'expectedHash must match sha256:<64 lowercase hex chars>', + ); + } + if (opts.mode === 'replace' && opts.expectedHash === undefined) { + throw new FsError( + 'parse_error', + 'expectedHash is required when mode is "replace"', + ); + } + if ( + opts.lineEnding !== undefined && + opts.lineEnding !== 'lf' && + opts.lineEnding !== 'crlf' + ) { + throw new FsError('parse_error', 'lineEnding must be "lf" or "crlf"'); + } +} + +interface TextSnapshot { + content: string; + meta: ReadMeta & { hash: ContentHash; sizeBytes: number }; +} + +async function readTextSnapshotFromResolvedFile( + p: ResolvedPath, + opts: ReadTextOptions = {}, +): Promise { + const pre = await fsp.lstat(p as string); + if (pre.isSymbolicLink()) { + throw new FsError('symlink_escape', `path is a symlink: ${p}`, { + hint: 're-resolve the target file instead of reading through a link', + }); + } + if (!pre.isFile()) { + throw new FsError('parse_error', `path is not a regular file: ${p}`); + } + // Hard size gate before reading the full raw snapshot. Files above + // this cap should use `readBytesWindow()` with an explicit byte + // window instead of allocating a full decoded text snapshot. + if (pre.size > MAX_READ_BYTES) { + throw new FsError( + 'file_too_large', + `file of ${pre.size} bytes exceeds read cap of ${MAX_READ_BYTES} bytes`, + { + hint: 'use readBytes for explicit byte-windowed access on large files', + }, + ); + } + + const raw = await readStableRegularFileBuffer(p as string, pre); + if (looksBinary(raw)) { + throw new FsError('binary_file', `binary file: ${p}`, { + hint: 'use readBytes for binary content', + }); + } + + const decoded = decodeBufferWithEncodingInfo(raw); + const startLineIndex = opts.line !== undefined ? opts.line - 1 : 0; + const sliced = sliceDecodedText( + decoded.content, + startLineIndex, + opts.limit ?? Number.POSITIVE_INFINITY, + ); + const sizeOutcome = enforceReadSize(raw.length, opts.maxBytes); + let content = sliced.content; + const meta: TextSnapshot['meta'] = { + encoding: decoded.encoding, + bom: decoded.bom, + lineEnding: detectLineEnding(content), + sizeBytes: raw.length, + originalLineCount: sliced.originalLineCount, + hash: hashBuffer(raw), + }; + + if (sizeOutcome.truncated) { + const buf = Buffer.from(content, 'utf-8'); + if (buf.length > sizeOutcome.bytesToRead) { + content = safeUtf8Truncate(buf, sizeOutcome.bytesToRead).toString( + 'utf-8', + ); + meta.lineEnding = detectLineEnding(content); + } + meta.truncated = true; + } + + if ( + opts.limit !== undefined && + Number.isFinite(opts.limit) && + sliced.originalLineCount > opts.limit + startLineIndex + ) { + meta.truncated = true; + } + + return { content, meta }; +} + +async function readStableRegularFileBuffer( + p: string, + pre: Awaited>, +): Promise { + const fh = await fsp.open(p, 'r'); + let opened: Awaited> | undefined; + try { + opened = await fh.stat(); + assertSameFile(pre, opened, p, 'read'); + if (opened.size > MAX_READ_BYTES) { + throw new FsError( + 'file_too_large', + `file of ${opened.size} bytes exceeds read cap of ${MAX_READ_BYTES} bytes`, + { + hint: 'use readBytes for explicit byte-windowed access on large files', + }, + ); + } + const out = Buffer.alloc(opened.size); + let offset = 0; + while (offset < opened.size) { + const read = await fh.read(out, offset, opened.size - offset, offset); + if (read.bytesRead === 0) break; + offset += read.bytesRead; + } + const afterRead = await fh.stat(); + assertSameFile(opened, afterRead, p, 'read'); + if ( + afterRead.size !== opened.size || + afterRead.mtimeMs !== opened.mtimeMs + ) { + throw new FsError('hash_mismatch', `file changed during read: ${p}`, { + hint: 'retry after re-reading the latest file hash', + }); + } + const post = await fsp.lstat(p); + assertSameFile(pre, post, p, 'read'); + if (post.size !== opened.size || post.mtimeMs !== opened.mtimeMs) { + throw new FsError('hash_mismatch', `file changed during read: ${p}`, { + hint: 'retry after re-reading the latest file hash', + }); + } + return offset === out.length ? out : out.subarray(0, offset); + } finally { + await fh.close(); + } +} + +function sliceDecodedText( + content: string, + startLine: number, + limit: number, +): { content: string; originalLineCount: number } { + const lines = content.split('\n'); + const originalLineCount = lines.length; + const endLine = Math.min(startLine + limit, originalLineCount); + const actualStartLine = Math.min(startLine, originalLineCount); + return { + content: lines.slice(actualStartLine, endLine).join('\n'), + originalLineCount, + }; +} + +function looksBinary(buf: Buffer): boolean { + if (buf.length === 0) return false; + const bomProbe = buf.subarray(0, Math.min(4, buf.length)); + const hasUnicodeBom = + (bomProbe.length >= 4 && + ((bomProbe[0] === 0xff && + bomProbe[1] === 0xfe && + bomProbe[2] === 0x00 && + bomProbe[3] === 0x00) || + (bomProbe[0] === 0x00 && + bomProbe[1] === 0x00 && + bomProbe[2] === 0xfe && + bomProbe[3] === 0xff))) || + (bomProbe.length >= 3 && + bomProbe[0] === 0xef && + bomProbe[1] === 0xbb && + bomProbe[2] === 0xbf) || + (bomProbe.length >= 2 && + ((bomProbe[0] === 0xff && bomProbe[1] === 0xfe) || + (bomProbe[0] === 0xfe && bomProbe[1] === 0xff))); + if (hasUnicodeBom) return false; + + const sampleLength = Math.min(4096, buf.length); + let nonPrintableCount = 0; + for (let i = 0; i < sampleLength; i++) { + if (buf[i] === 0) return true; + if (buf[i] < 9 || (buf[i] > 13 && buf[i] < 32)) { + nonPrintableCount++; + } + } + return nonPrintableCount / sampleLength > 0.3; +} + +async function readExistingTextMeta( + p: ResolvedPath, + expectedHash?: ContentHash, +): Promise { + const snapshot = await readTextSnapshotFromResolvedFile(p); + if (expectedHash !== undefined && snapshot.meta.hash !== expectedHash) { + throw new FsError( + 'hash_mismatch', + `expected ${expectedHash}, found ${snapshot.meta.hash}`, + { hint: 're-read the file and retry with the latest hash' }, + ); + } + return snapshot.meta; +} + +function mergeWriteMeta( + existing: Partial | undefined, + opts: Partial, +): ReadMeta { + return { + encoding: opts.encoding ?? existing?.encoding ?? 'utf-8', + bom: opts.bom ?? existing?.bom ?? false, + lineEnding: opts.lineEnding ?? existing?.lineEnding ?? 'lf', + }; +} + +async function atomicWriteTextResolvedFile( + input: AtomicWriteTextInput, +): Promise { + const target = input.target as string; + const parent = path.dirname(target); + const parentStat = await fsp.lstat(parent); + // Defense-in-depth against a parent-symlink swap. A full fix + // requires parent-fd / `openat`-style publish (Node stdlib does + // not expose this) — tracked alongside the fd-based read + // follow-up referenced by `assertInodeStableAfterRead`. This + // guard at least surfaces an obviously-swapped parent before + // we open the temp file or rename through it. + if (parentStat.isSymbolicLink()) { + throw new FsError('symlink_escape', `parent path is a symlink: ${parent}`, { + hint: 're-resolve the target after detecting parent-symlink swaps', + }); + } + if (!parentStat.isDirectory()) { + throw new FsError( + 'parse_error', + `parent path is not a directory: ${parent}`, + ); + } + const tmpPath = path.join( + parent, + `.${path.basename(target)}.${process.pid}.${randomBytes(6).toString('hex')}.tmp`, + ); + let tempLive = false; + let tempHandle: Awaited> | undefined; + let tempStat: Awaited> | undefined; + try { + tempHandle = await reserveTempFile(tmpPath); + tempLive = true; + const encoded = await writeEncodedTextTemp({ + targetPath: target, + tmpPath, + content: input.content, + meta: input.meta, + handle: tempHandle, + }); + tempStat = encoded.stat; + const targetState = await assertAtomicTargetPrecondition({ + target, + mode: input.mode, + expectedHash: input.expectedHash, + }); + await chmodHandleBestEffort(tempHandle, targetState.mode ?? 0o600); + await assertTempPathMatchesStat(tmpPath, tempStat); + await tempHandle.close(); + tempHandle = undefined; + await assertTempPathMatchesStat(tmpPath, tempStat); + if (input.mode === 'create') { + await publishCreateNoClobber(tmpPath, target); + } else { + await renameWithRetryLocal(tmpPath, target, 3, 50); + } + tempLive = false; + await fsyncParentDirBestEffort(parent); + return encoded; + } catch (err) { + await tempHandle?.close().catch(() => undefined); + if (tempLive) { + try { + await fsp.unlink(tmpPath); + } catch { + // Best-effort cleanup; preserve the original failure. + } + } + throw err; + } +} + +async function reserveTempFile( + tmpPath: string, +): Promise>> { + return fsp.open(tmpPath, 'wx', 0o600); +} + +async function writeEncodedTextTemp(input: { + targetPath: string; + tmpPath: string; + content: string; + meta: ReadMeta; + handle: Awaited>; +}): Promise { + const buf = encodeTextFileContent( + input.targetPath, + input.content, + buildWriteMeta(input.meta), + ); + enforceWriteSize(buf.length); + await input.handle.writeFile(buf); + await syncHandleBestEffort(input.handle); + const st = await fsp.lstat(input.tmpPath); + const opened = await input.handle.stat(); + assertSameFile(opened, st, input.tmpPath, 'write'); + if (st.isSymbolicLink()) { + throw new FsError( + 'symlink_escape', + `temporary path became a symlink: ${input.tmpPath}`, + { hint: 'temp-file race detected before final rename' }, + ); + } + if (!st.isFile()) { + throw new FsError( + 'parse_error', + `temporary path is not a regular file: ${input.tmpPath}`, + ); + } + return { sizeBytes: buf.length, hash: hashBuffer(buf), stat: st }; +} + +async function assertCreateTargetAbsent(target: string): Promise { + try { + const st = await fsp.lstat(target); + if (st.isSymbolicLink()) { + throw new FsError( + 'symlink_escape', + `path is a symlink and cannot be created over: ${target}`, + { hint: 'remove the symlink or resolve the target explicitly' }, + ); + } + throw new FsError('file_already_exists', `file already exists: ${target}`); + } catch (err) { + const code = (err as NodeJS.ErrnoException)?.code; + if (code === 'ENOENT') return; + throw err; + } +} + +async function assertAtomicTargetPrecondition(input: { + target: string; + mode: WriteMode; + expectedHash?: ContentHash; +}): Promise<{ mode?: number }> { + if (input.mode === 'create') { + await assertCreateTargetAbsent(input.target); + return {}; + } + if (!isContentHash(input.expectedHash)) { + throw new FsError( + 'parse_error', + 'expectedHash is required when mode is "replace"', + ); + } + const pre = await fsp.lstat(input.target); + if (pre.isSymbolicLink()) { + throw new FsError( + 'symlink_escape', + `path is a symlink and cannot be replaced atomically: ${input.target}`, + { hint: 're-resolve the target file instead of writing through a link' }, + ); + } + if (!pre.isFile()) { + throw new FsError( + 'parse_error', + `path is not a regular file: ${input.target}`, + ); + } + const actual = await hashRegularFileAtPath(input.target, pre); + if (actual !== input.expectedHash) { + throw new FsError( + 'hash_mismatch', + `expected ${input.expectedHash}, found ${actual}`, + { hint: 're-read the file and retry with the latest hash' }, + ); + } + return { mode: pre.mode & 0o7777 }; +} + +async function hashRegularFileAtPath( + p: string, + pre: Awaited>, +): Promise { + const fh = await fsp.open(p, 'r'); + const hash = createHash('sha256'); + let opened: Awaited> | undefined; + try { + opened = await fh.stat(); + assertSameFile(pre, opened, p, 'read'); + const buf = Buffer.allocUnsafe(64 * 1024); + let offset = 0; + while (offset < opened.size) { + const read = await fh.read( + buf, + 0, + Math.min(buf.length, opened.size - offset), + offset, + ); + if (read.bytesRead === 0) break; + hash.update(buf.subarray(0, read.bytesRead)); + offset += read.bytesRead; + } + } finally { + await fh.close(); + } + if (opened === undefined) { + throw new FsError('internal_error', `failed to stat opened file: ${p}`); + } + const post = await fsp.lstat(p); + assertSameFile(pre, post, p, 'read'); + if (post.size !== opened.size || post.mtimeMs !== opened.mtimeMs) { + throw new FsError('hash_mismatch', `file changed during hash: ${p}`, { + hint: 'retry after re-reading the latest file hash', + }); + } + return `sha256:${hash.digest('hex')}`; +} + +function hashBuffer(buf: Buffer): ContentHash { + return `sha256:${createHash('sha256').update(buf).digest('hex')}`; +} + +function assertSameFile( + pre: { dev: number | bigint; ino: number | bigint }, + post: { dev: number | bigint; ino: number | bigint }, + p: string, + intent: Intent, +): void { + const preDev = toBigInt(pre.dev); + const postDev = toBigInt(post.dev); + const preIno = toBigInt(pre.ino); + const postIno = toBigInt(post.ino); + if ( + preDev !== 0n && + postDev !== 0n && + preIno !== 0n && + postIno !== 0n && + (preDev !== postDev || preIno !== postIno) + ) { + throw new FsError('symlink_escape', `path changed during ${intent}: ${p}`, { + hint: 'TOCTOU swap detected via device/inode comparison', + }); + } +} + +function toBigInt(value: number | bigint): bigint { + return typeof value === 'bigint' ? value : BigInt(value); +} + +async function syncHandleBestEffort( + fh: Awaited>, +): Promise { + try { + await fh.sync(); + } catch { + // Some platforms/filesystems reject fsync on temporary files. + } +} + +async function fsyncParentDirBestEffort(parent: string): Promise { + let fh: Awaited> | undefined; + try { + fh = await fsp.open(parent, 'r'); + await fh.sync(); + } catch { + // Windows and some filesystems do not support directory fsync. + } finally { + await fh?.close().catch(() => undefined); + } +} + +async function chmodHandleBestEffort( + fh: Awaited>, + mode: number, +): Promise { + try { + await fh.chmod(mode); + } catch { + // Not all filesystems support POSIX permission bits. + } +} + +async function assertTempPathMatchesStat( + tmpPath: string, + expected: Awaited>, +): Promise { + const st = await fsp.lstat(tmpPath); + if (st.isSymbolicLink()) { + throw new FsError( + 'symlink_escape', + `temporary path is a symlink: ${tmpPath}`, + { + hint: 'temp-file race detected before final rename', + }, + ); + } + if (!st.isFile()) { + throw new FsError( + 'parse_error', + `temporary path is not a regular file: ${tmpPath}`, + ); + } + assertSameFile(expected, st, tmpPath, 'write'); +} + +// POSIX `rename(src, dest)` overwrites an existing regular file, +// which would silently break the public `mode: 'create'` contract +// if an external process raced us between the absence check and +// the publish. `link()` is the portable no-clobber publish: it +// returns `EEXIST` atomically when `dest` already exists, on both +// POSIX filesystems and NTFS. The early `assertCreateTargetAbsent` +// stays in place to give friendlier `symlink_escape` / +// `file_already_exists` errors on the non-racing path; this is the +// hard guarantee that closes the race window. +async function publishCreateNoClobber( + tmpPath: string, + target: string, +): Promise { + try { + await fsp.link(tmpPath, target); + } catch (err) { + const code = (err as NodeJS.ErrnoException)?.code; + if (code === 'EEXIST') { + throw new FsError( + 'file_already_exists', + `file already exists: ${target}`, + ); + } + throw err; + } + // After link(), tmp and target name the same inode. Drop the + // tmp name best-effort — if unlink fails the publish has still + // succeeded, so we must not bubble the error and confuse the + // caller into thinking the create failed. + await fsp.unlink(tmpPath).catch(() => undefined); +} + +async function renameWithRetryLocal( + src: string, + dest: string, + retries: number, + delayMs: number, +): Promise { + for (let attempt = 0; attempt <= retries; attempt++) { + try { + await fsp.rename(src, dest); + return; + } catch (err) { + const code = (err as NodeJS.ErrnoException)?.code; + const retryable = code === 'EPERM' || code === 'EACCES'; + if (!retryable || attempt === retries) throw err; + await new Promise((resolve) => + setTimeout(resolve, delayMs * 2 ** attempt), + ); + } + } +} + +function countOccurrences(haystack: string, needle: string): number { + let count = 0; + let index = 0; + while (true) { + const found = haystack.indexOf(needle, index); + if (found === -1) return count; + count += 1; + index = found + needle.length; + } +} + /** * Truncate a UTF-8 buffer to at most `maxBytes` bytes WITHOUT * splitting a multi-byte codepoint. `Buffer.subarray(0, n).toString('utf-8')` @@ -1105,11 +1887,12 @@ function kindFromStatLike(s: { } function buildWriteMeta( - opts: WriteTextFileOptions, + opts: WriteTextFileOptions & { lineEnding?: 'crlf' | 'lf' }, ): Record | undefined { const meta: Record = {}; - if (opts.bom) meta['bom'] = true; + if (opts.bom !== undefined) meta['bom'] = opts.bom; if (opts.encoding) meta['encoding'] = opts.encoding; + if (opts.lineEnding) meta['lineEnding'] = opts.lineEnding; return Object.keys(meta).length > 0 ? meta : undefined; } diff --git a/packages/cli/src/serve/routes/workspaceFileRead.test.ts b/packages/cli/src/serve/routes/workspaceFileRead.test.ts index f7122e6a5..78326935c 100644 --- a/packages/cli/src/serve/routes/workspaceFileRead.test.ts +++ b/packages/cli/src/serve/routes/workspaceFileRead.test.ts @@ -7,7 +7,7 @@ import { promises as fsp } from 'node:fs'; import * as os from 'node:os'; import * as path from 'node:path'; -import { randomBytes } from 'node:crypto'; +import { createHash, randomBytes } from 'node:crypto'; import { afterEach, beforeEach, describe, expect, it } from 'vitest'; import request from 'supertest'; import { Ignore } from '@qwen-code/qwen-code-core'; @@ -63,6 +63,10 @@ function loopbackHost(): string { return `127.0.0.1:${baseOpts.port}`; } +function rawHash(data: string | Buffer): `sha256:${string}` { + return `sha256:${createHash('sha256').update(data).digest('hex')}`; +} + describe('GET /file', () => { let h: Harness; beforeEach(async () => { @@ -86,6 +90,7 @@ describe('GET /file', () => { }); expect(res.body.sizeBytes).toBe(12); expect(res.body.returnedBytes).toBe(12); + expect(res.body.hash).toBe(rawHash('hello\nworld\n')); }); it('returns the requested line window', async () => { @@ -191,6 +196,65 @@ describe('GET /file', () => { }); }); +describe('GET /file/bytes', () => { + let h: Harness; + beforeEach(async () => { + h = await makeHarness(); + }); + afterEach(async () => teardown(h)); + + it('returns base64 raw bytes and hash for a full-file window', async () => { + const data = Buffer.from([0, 1, 2, 3, 255]); + await fsp.writeFile(path.join(h.workspace, 'bin.dat'), data); + const res = await request(h.app) + .get('/file/bytes?path=bin.dat') + .set('Host', loopbackHost()); + expect(res.status).toBe(200); + expect(res.body).toMatchObject({ + kind: 'file_bytes', + path: 'bin.dat', + offset: 0, + sizeBytes: data.length, + returnedBytes: data.length, + truncated: false, + contentBase64: data.toString('base64'), + hash: rawHash(data), + }); + }); + + it('returns a partial byte window without hash', async () => { + await fsp.writeFile( + path.join(h.workspace, 'window.bin'), + Buffer.from([1, 2, 3, 4, 5]), + ); + const res = await request(h.app) + .get('/file/bytes?path=window.bin&offset=1&maxBytes=2') + .set('Host', loopbackHost()); + expect(res.status).toBe(200); + expect(res.body).toMatchObject({ + offset: 1, + returnedBytes: 2, + truncated: true, + contentBase64: Buffer.from([2, 3]).toString('base64'), + }); + expect(res.body.hash).toBeUndefined(); + }); + + it('rejects malformed offset and maxBytes with parse_error', async () => { + const badOffset = await request(h.app) + .get('/file/bytes?path=x&offset=-1') + .set('Host', loopbackHost()); + expect(badOffset.status).toBe(400); + expect(badOffset.body.errorKind).toBe('parse_error'); + + const badMax = await request(h.app) + .get('/file/bytes?path=x&maxBytes=999999999') + .set('Host', loopbackHost()); + expect(badMax.status).toBe(400); + expect(badMax.body.errorKind).toBe('parse_error'); + }); +}); + describe('GET /stat', () => { let h: Harness; beforeEach(async () => { @@ -442,7 +506,7 @@ describe('GET /glob', () => { }); describe('capability advertisement', () => { - it('advertises workspace_file_read on /capabilities', async () => { + it('advertises workspace file capabilities on /capabilities', async () => { const h = await makeHarness(); try { const res = await request(h.app) @@ -450,6 +514,8 @@ describe('capability advertisement', () => { .set('Host', loopbackHost()); expect(res.status).toBe(200); expect(res.body.features).toContain('workspace_file_read'); + expect(res.body.features).toContain('workspace_file_bytes'); + expect(res.body.features).toContain('workspace_file_write'); } finally { await teardown(h); } diff --git a/packages/cli/src/serve/routes/workspaceFileRead.ts b/packages/cli/src/serve/routes/workspaceFileRead.ts index b346829b0..aa988dd52 100644 --- a/packages/cli/src/serve/routes/workspaceFileRead.ts +++ b/packages/cli/src/serve/routes/workspaceFileRead.ts @@ -8,6 +8,7 @@ import * as path from 'node:path'; import type { Application, Request, Response } from 'express'; import { writeStderrLine } from '../../utils/stdioHelpers.js'; import { + MAX_READ_BYTES, isFsError, type FsError, type WorkspaceFileSystemFactory, @@ -35,6 +36,9 @@ export const MAX_LIST_ENTRIES = 2000; */ export const MAX_FILE_LINE_LIMIT = 2000; +/** Default byte window for `GET /file/bytes` when `maxBytes` is omitted. */ +export const DEFAULT_FILE_BYTES_MAX_BYTES = 64 * 1024; + /** * Default cap when the caller omits `?maxResults` on `GET /glob`. * Mirrors the orchestrator's default behavior (no cap) clipped to a @@ -63,7 +67,7 @@ export const MAX_GLOB_MAX_RESULTS = 50_000; * directly. Both are harmless on the SDK / curl path and * mandatory for any browser-adjacent client. */ -function applyReadHeaders(res: Response): void { +export function applyReadHeaders(res: Response): void { res.set('Cache-Control', 'no-store'); res.set('X-Content-Type-Options', 'nosniff'); } @@ -121,7 +125,7 @@ function parseIntInRange( if (raw === undefined) return undefined; if (typeof raw !== 'string' || !/^\d+$/.test(raw)) return null; const n = Number.parseInt(raw, 10); - if (!Number.isFinite(n) || n < min || n > max) return null; + if (!Number.isSafeInteger(n) || n < min || n > max) return null; return n; } @@ -254,6 +258,7 @@ async function handleGetFile( sizeBytes: out.meta.sizeBytes ?? returnedBytes, returnedBytes, truncated: out.meta.truncated === true, + hash: out.meta.hash, matchedIgnore: out.meta.matchedIgnore ?? null, originalLineCount: out.meta.originalLineCount ?? null, }); @@ -262,6 +267,68 @@ async function handleGetFile( } } +async function handleGetFileBytes( + req: Request, + res: Response, + deps: RegisterDeps, +): Promise { + const ROUTE = 'GET /file/bytes'; + const factory = getFsFactory(req, res); + if (!factory) return; + const clientId = deps.parseClientId(req, res); + if (clientId === null) return; + const queryPath = requireStringQuery(res, req.query['path'], 'path', ROUTE); + if (queryPath === null) return; + const offset = parseIntInRange( + req.query['offset'], + 0, + Number.MAX_SAFE_INTEGER, + ); + if (offset === null) { + applyReadHeaders(res); + res.status(400).json({ + errorKind: 'parse_error', + error: '`offset` must be a non-negative safe integer', + status: 400, + }); + return; + } + const maxBytes = parseIntInRange(req.query['maxBytes'], 1, MAX_READ_BYTES); + if (maxBytes === null) { + applyReadHeaders(res); + res.status(400).json({ + errorKind: 'parse_error', + error: `\`maxBytes\` must be a positive integer in [1, ${MAX_READ_BYTES}]`, + status: 400, + }); + return; + } + const fs = factory.forRequest({ + originatorClientId: clientId ?? undefined, + route: ROUTE, + }); + try { + const resolved = await fs.resolve(queryPath, 'read'); + const out = await fs.readBytesWindow(resolved, { + offset: offset ?? 0, + maxBytes: maxBytes ?? DEFAULT_FILE_BYTES_MAX_BYTES, + }); + applyReadHeaders(res); + res.status(200).json({ + kind: 'file_bytes', + path: workspaceRelative(req, resolved), + offset: out.offset, + sizeBytes: out.sizeBytes, + returnedBytes: out.returnedBytes, + truncated: out.truncated, + contentBase64: out.buffer.toString('base64'), + ...(out.hash ? { hash: out.hash } : {}), + }); + } catch (err) { + sendFsError(res, err, ROUTE); + } +} + async function handleGetStat( req: Request, res: Response, @@ -434,7 +501,7 @@ async function handleGetGlob( * Windows yields backslashes, which would otherwise leak into * `/file`, `/stat`, `/list`, and `/glob` response paths. */ -function workspaceRelative(req: Request, resolved: string): string { +export function workspaceRelative(req: Request, resolved: string): string { const boundWorkspace = (req.app.locals as { boundWorkspace?: string }) .boundWorkspace; if (!boundWorkspace) { @@ -450,6 +517,7 @@ export function registerWorkspaceFileReadRoutes( deps: RegisterDeps, ): void { app.get('/file', (req, res) => handleGetFile(req, res, deps)); + app.get('/file/bytes', (req, res) => handleGetFileBytes(req, res, deps)); app.get('/stat', (req, res) => handleGetStat(req, res, deps)); app.get('/list', (req, res) => handleGetList(req, res, deps)); app.get('/glob', (req, res) => handleGetGlob(req, res, deps)); diff --git a/packages/cli/src/serve/routes/workspaceFileWrite.test.ts b/packages/cli/src/serve/routes/workspaceFileWrite.test.ts new file mode 100644 index 000000000..d1fcdadb0 --- /dev/null +++ b/packages/cli/src/serve/routes/workspaceFileWrite.test.ts @@ -0,0 +1,270 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import { createHash, randomBytes } from 'node:crypto'; +import { promises as fsp } from 'node:fs'; +import * as os from 'node:os'; +import * as path from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import request from 'supertest'; +import { createServeApp } from '../server.js'; +import { + canonicalizeWorkspace, + createWorkspaceFileSystemFactory, +} from '../fs/index.js'; +import type { BridgeEvent } from '../eventBus.js'; +import type { ServeOptions } from '../types.js'; + +const baseOpts: ServeOptions = { + hostname: '127.0.0.1', + port: 4180, + mode: 'http-bridge', +}; + +interface Harness { + workspace: string; + scratch: string; + events: BridgeEvent[]; + app: ReturnType; +} + +async function makeHarness(opts?: { + trusted?: boolean; + token?: string; +}): Promise { + const scratch = await fsp.mkdtemp( + path.join( + os.tmpdir(), + `qwen-write-routes-${randomBytes(4).toString('hex')}-`, + ), + ); + const wsDir = path.join(scratch, 'ws'); + await fsp.mkdir(wsDir); + const workspace = canonicalizeWorkspace(wsDir); + const events: BridgeEvent[] = []; + const fsFactory = createWorkspaceFileSystemFactory({ + boundWorkspace: workspace, + trusted: opts?.trusted ?? true, + emit: (e) => events.push(e), + }); + const app = createServeApp( + { ...baseOpts, workspace, token: opts?.token }, + undefined, + { fsFactory }, + ); + return { workspace, scratch, events, app }; +} + +async function teardown(h: Harness): Promise { + await fsp.rm(h.scratch, { recursive: true, force: true }); +} + +function loopbackHost(): string { + return `127.0.0.1:${baseOpts.port}`; +} + +function rawHash(data: string | Buffer): `sha256:${string}` { + return `sha256:${createHash('sha256').update(data).digest('hex')}`; +} + +describe('POST /file/write', () => { + let h: Harness; + beforeEach(async () => { + h = await makeHarness({ token: 'secret' }); + }); + afterEach(async () => teardown(h)); + + it('requires a token even on loopback no-token defaults', async () => { + await teardown(h); + h = await makeHarness(); + const res = await request(h.app) + .post('/file/write') + .set('Host', loopbackHost()) + .send({ path: 'a.txt', content: 'x', mode: 'create' }); + expect(res.status).toBe(401); + expect(res.body.code).toBe('token_required'); + }); + + it('creates a text file with no-store headers', async () => { + const res = await request(h.app) + .post('/file/write') + .set('Host', loopbackHost()) + .set('Authorization', 'Bearer secret') + .send({ path: 'a.txt', content: 'hello\n', mode: 'create' }); + expect(res.status).toBe(201); + expect(res.headers['cache-control']).toBe('no-store'); + expect(res.headers['x-content-type-options']).toBe('nosniff'); + expect(res.body).toMatchObject({ + kind: 'file_write', + path: 'a.txt', + mode: 'create', + created: true, + sizeBytes: 6, + hash: rawHash('hello\n'), + matchedIgnore: null, + }); + expect(await fsp.readFile(path.join(h.workspace, 'a.txt'), 'utf-8')).toBe( + 'hello\n', + ); + }); + + it('does not overwrite existing files in create mode', async () => { + await fsp.writeFile(path.join(h.workspace, 'a.txt'), 'old'); + const res = await request(h.app) + .post('/file/write') + .set('Host', loopbackHost()) + .set('Authorization', 'Bearer secret') + .send({ path: 'a.txt', content: 'new', mode: 'create' }); + expect(res.status).toBe(409); + expect(res.body.errorKind).toBe('file_already_exists'); + expect(await fsp.readFile(path.join(h.workspace, 'a.txt'), 'utf-8')).toBe( + 'old', + ); + }); + + it('replaces only when expectedHash matches', async () => { + const target = path.join(h.workspace, 'r.txt'); + await fsp.writeFile(target, 'old'); + const stale = await request(h.app) + .post('/file/write') + .set('Host', loopbackHost()) + .set('Authorization', 'Bearer secret') + .send({ + path: 'r.txt', + content: 'new', + mode: 'replace', + expectedHash: rawHash('stale'), + }); + expect(stale.status).toBe(409); + expect(stale.body.errorKind).toBe('hash_mismatch'); + + const ok = await request(h.app) + .post('/file/write') + .set('Host', loopbackHost()) + .set('Authorization', 'Bearer secret') + .send({ + path: 'r.txt', + content: 'new', + mode: 'replace', + expectedHash: rawHash('old'), + }); + expect(ok.status).toBe(200); + expect(ok.body.hash).toBe(rawHash('new')); + expect(await fsp.readFile(target, 'utf-8')).toBe('new'); + }); + + it('returns parse_error for malformed bodies', async () => { + const res = await request(h.app) + .post('/file/write') + .set('Host', loopbackHost()) + .set('Authorization', 'Bearer secret') + .send({ path: 'a.txt', content: 'x', mode: 'replace' }); + expect(res.status).toBe(400); + expect(res.body.errorKind).toBe('parse_error'); + }); + + it('rejects unknown supplied client ids', async () => { + const res = await request(h.app) + .post('/file/write') + .set('Host', loopbackHost()) + .set('Authorization', 'Bearer secret') + .set('X-Qwen-Client-Id', 'unknown-client') + .send({ path: 'a.txt', content: 'x', mode: 'create' }); + expect(res.status).toBe(400); + expect(res.body.code).toBe('invalid_client_id'); + }); + + it('rejects untrusted workspace writes', async () => { + await teardown(h); + h = await makeHarness({ trusted: false, token: 'secret' }); + const res = await request(h.app) + .post('/file/write') + .set('Host', loopbackHost()) + .set('Authorization', 'Bearer secret') + .send({ path: 'a.txt', content: 'x', mode: 'create' }); + expect(res.status).toBe(403); + expect(res.body.errorKind).toBe('untrusted_workspace'); + }); +}); + +describe('POST /file/edit', () => { + let h: Harness; + beforeEach(async () => { + h = await makeHarness({ token: 'secret' }); + }); + afterEach(async () => teardown(h)); + + it('applies one edit and returns a new hash', async () => { + const target = path.join(h.workspace, 'config.txt'); + await fsp.writeFile(target, 'foo=1\nbar=2\n'); + const res = await request(h.app) + .post('/file/edit') + .set('Host', loopbackHost()) + .set('Authorization', 'Bearer secret') + .send({ + path: 'config.txt', + oldText: 'foo=1', + newText: 'foo=42', + expectedHash: rawHash('foo=1\nbar=2\n'), + }); + expect(res.status).toBe(200); + expect(res.body).toMatchObject({ + kind: 'file_edit', + path: 'config.txt', + replacements: 1, + hash: rawHash('foo=42\nbar=2\n'), + }); + expect(await fsp.readFile(target, 'utf-8')).toBe('foo=42\nbar=2\n'); + }); + + it('returns typed errors for absent and ambiguous oldText', async () => { + await fsp.writeFile(path.join(h.workspace, 'x.txt'), 'x\nx\n'); + const missing = await request(h.app) + .post('/file/edit') + .set('Host', loopbackHost()) + .set('Authorization', 'Bearer secret') + .send({ + path: 'x.txt', + oldText: 'y', + newText: 'z', + expectedHash: rawHash('x\nx\n'), + }); + expect(missing.status).toBe(422); + expect(missing.body.errorKind).toBe('text_not_found'); + + const ambiguous = await request(h.app) + .post('/file/edit') + .set('Host', loopbackHost()) + .set('Authorization', 'Bearer secret') + .send({ + path: 'x.txt', + oldText: 'x', + newText: 'z', + expectedHash: rawHash('x\nx\n'), + }); + expect(ambiguous.status).toBe(422); + expect(ambiguous.body.errorKind).toBe('ambiguous_text_match'); + }); + + it('rejects symlink targets after resolve', async () => { + const outside = path.join(h.scratch, 'outside.txt'); + await fsp.writeFile(outside, 'foo=1\n'); + await fsp.symlink(outside, path.join(h.workspace, 'link.txt'), 'file'); + const res = await request(h.app) + .post('/file/edit') + .set('Host', loopbackHost()) + .set('Authorization', 'Bearer secret') + .send({ + path: 'link.txt', + oldText: 'foo=1', + newText: 'foo=2', + expectedHash: rawHash('foo=1\n'), + }); + expect(res.status).toBe(400); + expect(res.body.errorKind).toBe('symlink_escape'); + expect(await fsp.readFile(outside, 'utf-8')).toBe('foo=1\n'); + }); +}); diff --git a/packages/cli/src/serve/routes/workspaceFileWrite.ts b/packages/cli/src/serve/routes/workspaceFileWrite.ts new file mode 100644 index 000000000..a746aae08 --- /dev/null +++ b/packages/cli/src/serve/routes/workspaceFileWrite.ts @@ -0,0 +1,292 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { Application, Request, RequestHandler, Response } from 'express'; +import type { HttpAcpBridge } from '../httpAcpBridge.js'; +import { + isContentHash, + type ContentHash, + type WorkspaceFileSystemFactory, + type WriteMode, +} from '../fs/index.js'; +import { + applyReadHeaders, + sendFsError, + workspaceRelative, +} from './workspaceFileRead.js'; + +interface RegisterDeps { + bridge: HttpAcpBridge; + mutate: (opts?: { strict?: boolean }) => RequestHandler; + parseClientId: (req: Request, res: Response) => string | undefined | null; + safeBody: (req: Request) => Record; +} + +function getFsFactory( + req: Request, + res: Response, +): WorkspaceFileSystemFactory | null { + const factory = (req.app.locals as { fsFactory?: WorkspaceFileSystemFactory }) + .fsFactory; + if (!factory) { + applyReadHeaders(res); + res.status(500).json({ + errorKind: 'internal_error', + error: 'workspace filesystem factory is not configured', + status: 500, + }); + return null; + } + return factory; +} + +function sendParseError(res: Response, _route: string, error: string): null { + applyReadHeaders(res); + res.status(400).json({ + errorKind: 'parse_error', + error, + status: 400, + }); + return null; +} + +function requireBodyString( + body: Record, + key: string, + res: Response, + route: string, +): string | null { + const value = body[key]; + if (typeof value !== 'string' || value.length === 0) { + return sendParseError(res, route, `\`${key}\` must be a non-empty string`); + } + return value; +} + +function optionalBoolean( + body: Record, + key: string, + res: Response, + route: string, +): boolean | undefined | null { + const value = body[key]; + if (value === undefined) return undefined; + if (typeof value !== 'boolean') { + return sendParseError(res, route, `\`${key}\` must be a boolean`); + } + return value; +} + +function optionalString( + body: Record, + key: string, + res: Response, + route: string, +): string | undefined | null { + const value = body[key]; + if (value === undefined) return undefined; + if (typeof value !== 'string' || value.length === 0) { + return sendParseError(res, route, `\`${key}\` must be a non-empty string`); + } + return value; +} + +function optionalLineEnding( + body: Record, + res: Response, + route: string, +): 'crlf' | 'lf' | undefined | null { + const value = body['lineEnding']; + if (value === undefined) return undefined; + if (value !== 'crlf' && value !== 'lf') { + return sendParseError(res, route, '`lineEnding` must be "lf" or "crlf"'); + } + return value; +} + +function requiredHash( + body: Record, + res: Response, + route: string, +): ContentHash | null { + const value = body['expectedHash']; + if (!isContentHash(value)) { + return sendParseError( + res, + route, + '`expectedHash` must match sha256:<64 lowercase hex chars>', + ); + } + return value; +} + +function optionalHash( + body: Record, + res: Response, + route: string, +): ContentHash | undefined | null { + const value = body['expectedHash']; + if (value === undefined) return undefined; + if (!isContentHash(value)) { + return sendParseError( + res, + route, + '`expectedHash` must match sha256:<64 lowercase hex chars>', + ); + } + return value; +} + +function resolveOriginatorClientId( + clientId: string | undefined, + deps: RegisterDeps, + res: Response, +): string | undefined | null { + if (clientId === undefined) return undefined; + if (!deps.bridge.knownClientIds().has(clientId)) { + applyReadHeaders(res); + res.status(400).json({ + error: `Client id "${clientId}" is not registered for this workspace`, + code: 'invalid_client_id', + clientId, + }); + return null; + } + return clientId; +} + +async function handlePostFileWrite( + req: Request, + res: Response, + deps: RegisterDeps, +): Promise { + const ROUTE = 'POST /file/write'; + const factory = getFsFactory(req, res); + if (!factory) return; + const body = deps.safeBody(req); + const queryPath = requireBodyString(body, 'path', res, ROUTE); + if (queryPath === null) return; + const content = body['content']; + if (typeof content !== 'string') { + sendParseError(res, ROUTE, '`content` must be a string'); + return; + } + const rawMode = body['mode']; + if (rawMode !== 'create' && rawMode !== 'replace') { + sendParseError(res, ROUTE, '`mode` must be "create" or "replace"'); + return; + } + const mode: WriteMode = rawMode; + const expectedHash = + mode === 'replace' + ? requiredHash(body, res, ROUTE) + : optionalHash(body, res, ROUTE); + if (expectedHash === null) return; + const bom = optionalBoolean(body, 'bom', res, ROUTE); + if (bom === null) return; + const encoding = optionalString(body, 'encoding', res, ROUTE); + if (encoding === null) return; + const lineEnding = optionalLineEnding(body, res, ROUTE); + if (lineEnding === null) return; + const clientId = deps.parseClientId(req, res); + if (clientId === null) return; + const originatorClientId = resolveOriginatorClientId(clientId, deps, res); + if (originatorClientId === null) return; + const fs = factory.forRequest({ + originatorClientId, + route: ROUTE, + }); + try { + const resolved = await fs.resolve(queryPath, 'write'); + const out = await fs.writeTextAtomic(resolved, content, { + mode, + ...(expectedHash ? { expectedHash } : {}), + ...(bom !== undefined ? { bom } : {}), + ...(encoding !== undefined ? { encoding } : {}), + ...(lineEnding !== undefined ? { lineEnding } : {}), + }); + applyReadHeaders(res); + res.status(out.created ? 201 : 200).json({ + kind: 'file_write', + path: workspaceRelative(req, resolved), + mode, + created: out.created, + sizeBytes: out.sizeBytes, + hash: out.hash, + encoding: out.meta.encoding ?? 'utf-8', + bom: out.meta.bom === true, + lineEnding: out.meta.lineEnding, + matchedIgnore: out.meta.matchedIgnore ?? null, + }); + } catch (err) { + sendFsError(res, err, ROUTE); + } +} + +async function handlePostFileEdit( + req: Request, + res: Response, + deps: RegisterDeps, +): Promise { + const ROUTE = 'POST /file/edit'; + const factory = getFsFactory(req, res); + if (!factory) return; + const body = deps.safeBody(req); + const queryPath = requireBodyString(body, 'path', res, ROUTE); + if (queryPath === null) return; + const oldText = body['oldText']; + if (typeof oldText !== 'string') { + sendParseError(res, ROUTE, '`oldText` must be a string'); + return; + } + const newText = body['newText']; + if (typeof newText !== 'string') { + sendParseError(res, ROUTE, '`newText` must be a string'); + return; + } + const expectedHash = requiredHash(body, res, ROUTE); + if (expectedHash === null) return; + const clientId = deps.parseClientId(req, res); + if (clientId === null) return; + const originatorClientId = resolveOriginatorClientId(clientId, deps, res); + if (originatorClientId === null) return; + const fs = factory.forRequest({ + originatorClientId, + route: ROUTE, + }); + try { + const resolved = await fs.resolve(queryPath, 'edit'); + const out = await fs.editAtomic(resolved, oldText, newText, { + expectedHash, + }); + applyReadHeaders(res); + res.status(200).json({ + kind: 'file_edit', + path: workspaceRelative(req, resolved), + replacements: 1, + sizeBytes: out.writtenBytes, + hash: out.hash, + encoding: out.meta?.encoding ?? 'utf-8', + bom: out.meta?.bom === true, + lineEnding: out.meta?.lineEnding ?? 'lf', + matchedIgnore: out.meta?.matchedIgnore ?? null, + }); + } catch (err) { + sendFsError(res, err, ROUTE); + } +} + +export function registerWorkspaceFileWriteRoutes( + app: Application, + deps: RegisterDeps, +): void { + app.post('/file/write', deps.mutate({ strict: true }), (req, res) => + handlePostFileWrite(req, res, deps), + ); + app.post('/file/edit', deps.mutate({ strict: true }), (req, res) => + handlePostFileEdit(req, res, deps), + ); +} diff --git a/packages/cli/src/serve/server.test.ts b/packages/cli/src/serve/server.test.ts index cf54758d4..01b3d9c77 100644 --- a/packages/cli/src/serve/server.test.ts +++ b/packages/cli/src/serve/server.test.ts @@ -114,6 +114,10 @@ const EXPECTED_STAGE1_FEATURES = [ // Issue #4175 PR 19. Always-on. Daemon exposes the read-only file // surface: `GET /file`, `GET /list`, `GET /glob`, `GET /stat`. 'workspace_file_read', + // Issue #4175 PR 20. Always-on. Daemon exposes raw byte windows and + // hash-aware text mutation routes behind the strict mutation gate. + 'workspace_file_bytes', + 'workspace_file_write', // Issue #4175 PR 21 — auth device-flow surface advertised unconditionally. 'auth_device_flow', ] as const; @@ -3249,6 +3253,9 @@ describe('runQwenServe', () => { readBytes: async () => { throw new Error('unreachable'); }, + readBytesWindow: async () => { + throw new Error('unreachable'); + }, list: async () => { throw new Error('unreachable'); }, @@ -3261,9 +3268,15 @@ describe('runQwenServe', () => { writeText: async () => { throw new Error('unreachable'); }, + writeTextAtomic: async () => { + throw new Error('unreachable'); + }, edit: async () => { throw new Error('unreachable'); }, + editAtomic: async () => { + throw new Error('unreachable'); + }, }), }; const bridge = fakeBridge(); diff --git a/packages/cli/src/serve/server.ts b/packages/cli/src/serve/server.ts index 23abc6461..b6f1258ef 100644 --- a/packages/cli/src/serve/server.ts +++ b/packages/cli/src/serve/server.ts @@ -59,6 +59,7 @@ import { type WorkspaceFileSystemFactory, } from './fs/index.js'; import { registerWorkspaceFileReadRoutes } from './routes/workspaceFileRead.js'; +import { registerWorkspaceFileWriteRoutes } from './routes/workspaceFileWrite.js'; /** * Build a no-op fs-audit emitter that logs a warning every @@ -619,6 +620,12 @@ export function createServeApp( registerWorkspaceFileReadRoutes(app, { parseClientId: parseClientIdHeader, }); + registerWorkspaceFileWriteRoutes(app, { + bridge, + mutate, + parseClientId: parseClientIdHeader, + safeBody, + }); // -- Issue #4175 PR 21 — auth device-flow routes ------------------------ diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 55be58312..9d24ff9fe 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -137,6 +137,7 @@ export * from './services/fileDiscoveryService.js'; export * from './services/fileHistoryService.js'; export * from './services/fileReadCache.js'; export * from './services/fileSystemService.js'; +export { decodeBufferWithEncodingInfo } from './utils/fileUtils.js'; export * from './services/gitService.js'; export * from './services/gitWorktreeService.js'; export * from './services/sessionRecap.js'; diff --git a/packages/core/src/services/fileSystemService.test.ts b/packages/core/src/services/fileSystemService.test.ts index 87683cc83..c89b40dff 100644 --- a/packages/core/src/services/fileSystemService.test.ts +++ b/packages/core/src/services/fileSystemService.test.ts @@ -8,6 +8,7 @@ import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; import fs from 'node:fs/promises'; import { StandardFileSystemService, + encodeTextFileContent, needsUtf8Bom, resetUtf8BomCache, detectLineEnding, @@ -141,6 +142,21 @@ describe('StandardFileSystemService', () => { }); describe('writeTextFile', () => { + it('encodeTextFileContent returns final bytes for UTF-8 and CRLF metadata', () => { + const encoded = encodeTextFileContent('/test/file.txt', 'a\nb\n', { + lineEnding: 'crlf', + }); + expect(encoded.toString('utf8')).toBe('a\r\nb\r\n'); + }); + + it('encodeTextFileContent preserves UTF-8 BOM in returned bytes', () => { + const encoded = encodeTextFileContent('/test/file.txt', 'Hello', { + bom: true, + }); + expect(Array.from(encoded.subarray(0, 3))).toEqual([0xef, 0xbb, 0xbf]); + expect(encoded.subarray(3).toString('utf8')).toBe('Hello'); + }); + it('should write file content using fs', async () => { vi.mocked(fs.writeFile).mockResolvedValue(); diff --git a/packages/core/src/services/fileSystemService.ts b/packages/core/src/services/fileSystemService.ts index 45d6a2620..4ecfd0ed7 100644 --- a/packages/core/src/services/fileSystemService.ts +++ b/packages/core/src/services/fileSystemService.ts @@ -167,6 +167,11 @@ export function detectLineEnding(content: string): LineEnding { return content.includes('\r\n') ? 'crlf' : 'lf'; } +interface PreparedTextFileContent { + data: string | Buffer; + encoding?: BufferEncoding; +} + /** * Return the BOM byte sequence for a given encoding name, or null if the * encoding does not use a standard BOM. Used when writing back a file that @@ -192,6 +197,65 @@ function getBOMBytesForEncoding(encoding: string): Buffer | null { } } +function prepareTextFileContent( + filePath: string, + content: string, + meta?: ReadTextFileResponse['_meta'] | null, +): PreparedTextFileContent { + const lineEnding = meta?.['lineEnding'] as string | undefined; + const shouldUseCrlf = needsCrlfLineEndings(filePath) || lineEnding === 'crlf'; + const normalizedContent = shouldUseCrlf + ? ensureCrlfLineEndings(content) + : content; + const bom = meta?.['bom'] ?? (false as boolean); + const encoding = meta?.['encoding'] as string | undefined; + + // Check if a non-UTF-8 encoding is specified and supported by iconv-lite + const isNonUtf8Encoding = + encoding && + !isUtf8CompatibleEncoding(encoding) && + iconvEncodingExists(encoding); + + if (isNonUtf8Encoding) { + // Non-UTF-8 encoding (e.g. GBK, Big5, Shift_JIS, UTF-16LE, UTF-32BE…) + // Use iconv-lite to encode the content. When the file originally had a BOM + // (bom: true), prepend the correct BOM bytes for this encoding so the + // byte-order mark is preserved on write-back. + const encoded = iconvEncode(normalizedContent, encoding); + if (bom) { + const bomBytes = getBOMBytesForEncoding(encoding); + return { + data: bomBytes ? Buffer.concat([bomBytes, encoded]) : encoded, + }; + } + return { data: encoded }; + } + + if (bom) { + // UTF-8 BOM: prepend EF BB BF + // If content already starts with the BOM character, strip it first to avoid double BOM. + const contentWithoutBom = + normalizedContent.charCodeAt(0) === 0xfeff + ? normalizedContent.slice(1) + : normalizedContent; + const bomBuffer = Buffer.from([0xef, 0xbb, 0xbf]); + const contentBuffer = Buffer.from(contentWithoutBom, 'utf-8'); + return { data: Buffer.concat([bomBuffer, contentBuffer]) }; + } + + return { data: normalizedContent, encoding: 'utf-8' }; +} + +export function encodeTextFileContent( + filePath: string, + content: string, + meta?: ReadTextFileResponse['_meta'] | null, +): Buffer { + const prepared = prepareTextFileContent(filePath, content, meta); + if (Buffer.isBuffer(prepared.data)) return prepared.data; + return Buffer.from(prepared.data, prepared.encoding ?? 'utf-8'); +} + /** * Standard file system implementation */ @@ -215,52 +279,13 @@ export class StandardFileSystemService implements FileSystemService { params: Omit, ): Promise { const { path: filePath, _meta } = params; - const lineEnding = _meta?.['lineEnding'] as string | undefined; - // Convert LF to CRLF when: - // 1. The file type requires it (e.g. .bat, .cmd on Windows), OR - // 2. The original file used CRLF line endings (preserve original style) - const shouldUseCrlf = - needsCrlfLineEndings(filePath) || lineEnding === 'crlf'; - const content = shouldUseCrlf - ? ensureCrlfLineEndings(params.content) - : params.content; - const bom = _meta?.['bom'] ?? (false as boolean); - const encoding = _meta?.['encoding'] as string | undefined; - - // Check if a non-UTF-8 encoding is specified and supported by iconv-lite - const isNonUtf8Encoding = - encoding && - !isUtf8CompatibleEncoding(encoding) && - iconvEncodingExists(encoding); - - if (isNonUtf8Encoding) { - // Non-UTF-8 encoding (e.g. GBK, Big5, Shift_JIS, UTF-16LE, UTF-32BE…) - // Use iconv-lite to encode the content. When the file originally had a BOM - // (bom: true), prepend the correct BOM bytes for this encoding so the - // byte-order mark is preserved on write-back. - const encoded = iconvEncode(content, encoding); - if (bom) { - const bomBytes = getBOMBytesForEncoding(encoding); - await atomicWriteFile( - filePath, - bomBytes ? Buffer.concat([bomBytes, encoded]) : encoded, - ); - } else { - await atomicWriteFile(filePath, encoded); - } - } else if (bom) { - // UTF-8 BOM: prepend EF BB BF - // If content already starts with the BOM character, strip it first to avoid double BOM. - const normalizedContent = - content.charCodeAt(0) === 0xfeff ? content.slice(1) : content; - const bomBuffer = Buffer.from([0xef, 0xbb, 0xbf]); - const contentBuffer = Buffer.from(normalizedContent, 'utf-8'); - await atomicWriteFile( - filePath, - Buffer.concat([bomBuffer, contentBuffer]), - ); + const prepared = prepareTextFileContent(filePath, params.content, _meta); + if (Buffer.isBuffer(prepared.data)) { + await atomicWriteFile(filePath, prepared.data); } else { - await atomicWriteFile(filePath, content, { encoding: 'utf-8' }); + await atomicWriteFile(filePath, prepared.data, { + encoding: prepared.encoding ?? 'utf-8', + }); } return { _meta }; } diff --git a/packages/core/src/utils/fileUtils.test.ts b/packages/core/src/utils/fileUtils.test.ts index dcb4e41ae..528afed97 100644 --- a/packages/core/src/utils/fileUtils.test.ts +++ b/packages/core/src/utils/fileUtils.test.ts @@ -27,6 +27,7 @@ import { detectFileType, processSingleFileContent, detectBOM, + decodeBufferWithEncodingInfo, readFileWithEncoding, readFileWithEncodingInfo, detectFileEncoding, @@ -491,6 +492,31 @@ describe('fileUtils', () => { }); describe('readFileWithEncodingInfo', () => { + it('should decode plain UTF-8 buffers without reading from a path', () => { + const result = decodeBufferWithEncodingInfo( + Buffer.from('Hello', 'utf8'), + ); + expect(result).toEqual({ + content: 'Hello', + encoding: 'utf-8', + bom: false, + }); + }); + + it('should decode UTF-8 BOM buffers without reading from a path', () => { + const result = decodeBufferWithEncodingInfo( + Buffer.concat([ + Buffer.from([0xef, 0xbb, 0xbf]), + Buffer.from('Hello', 'utf8'), + ]), + ); + expect(result).toEqual({ + content: 'Hello', + encoding: 'utf-8', + bom: true, + }); + }); + it('should return bom: false and encoding utf-8 for plain UTF-8 file', async () => { const filePath = path.join(testDir, 'info-utf8.txt'); await fsPromises.writeFile(filePath, 'Hello', 'utf8'); diff --git a/packages/core/src/utils/fileUtils.ts b/packages/core/src/utils/fileUtils.ts index 9d4f021c0..60d9b3f9d 100644 --- a/packages/core/src/utils/fileUtils.ts +++ b/packages/core/src/utils/fileUtils.ts @@ -165,6 +165,49 @@ export interface FileReadResult { bom: boolean; } +export function decodeBufferWithEncodingInfo(full: Buffer): FileReadResult { + if (full.length === 0) { + return { content: '', encoding: 'utf-8', bom: false }; + } + + const bomInfo = detectBOM(full); + if (bomInfo) { + return { + content: decodeBOMBuffer(full, bomInfo), + encoding: bomEncodingToName(bomInfo.encoding), + // Mark bom: true for all Unicode BOM variants (UTF-8/16/32) so that + // the BOM is re-written on save and the file's original format is preserved. + bom: true, + }; + } + + // No BOM — check if it's valid UTF-8 first (fast path for the common case) + if (isValidUtf8(full)) { + return { content: full.toString('utf8'), encoding: 'utf-8', bom: false }; + } + + // Not valid UTF-8 — try chardet statistical detection + const detected = detectEncodingFromBuffer(full); + if (detected && !isUtf8CompatibleEncoding(detected)) { + try { + if (iconvEncodingExists(detected)) { + return { + content: iconvDecode(full, detected), + encoding: detected, + bom: false, + }; + } + } catch (e) { + debugLogger.warn( + `Failed to decode buffer as ${detected}: ${e instanceof Error ? e.message : String(e)}`, + ); + } + } + + // Final fallback: UTF-8 with replacement characters + return { content: full.toString('utf8'), encoding: 'utf-8', bom: false }; +} + /** * Internal helper: decode a buffer given a BOMInfo. * Returns the decoded string for each supported BOM encoding. @@ -222,44 +265,7 @@ export async function readFileWithEncodingInfo( ): Promise { // Read the file once; detect BOM and decode from the single buffer. const full = await fs.promises.readFile(filePath); - if (full.length === 0) return { content: '', encoding: 'utf-8', bom: false }; - - const bomInfo = detectBOM(full); - if (bomInfo) { - return { - content: decodeBOMBuffer(full, bomInfo), - encoding: bomEncodingToName(bomInfo.encoding), - // Mark bom: true for all Unicode BOM variants (UTF-8/16/32) so that - // the BOM is re-written on save and the file's original format is preserved. - bom: true, - }; - } - - // No BOM — check if it's valid UTF-8 first (fast path for the common case) - if (isValidUtf8(full)) { - return { content: full.toString('utf8'), encoding: 'utf-8', bom: false }; - } - - // Not valid UTF-8 — try chardet statistical detection - const detected = detectEncodingFromBuffer(full); - if (detected && !isUtf8CompatibleEncoding(detected)) { - try { - if (iconvEncodingExists(detected)) { - return { - content: iconvDecode(full, detected), - encoding: detected, - bom: false, - }; - } - } catch (e) { - debugLogger.warn( - `Failed to decode file ${filePath} as ${detected}: ${e instanceof Error ? e.message : String(e)}`, - ); - } - } - - // Final fallback: UTF-8 with replacement characters - return { content: full.toString('utf8'), encoding: 'utf-8', bom: false }; + return decodeBufferWithEncodingInfo(full); } /** diff --git a/packages/sdk-typescript/src/daemon/DaemonClient.ts b/packages/sdk-typescript/src/daemon/DaemonClient.ts index 54e002d59..3943aec02 100644 --- a/packages/sdk-typescript/src/daemon/DaemonClient.ts +++ b/packages/sdk-typescript/src/daemon/DaemonClient.ts @@ -21,6 +21,12 @@ import type { DaemonSessionSummary, DaemonSessionSupportedCommandsStatus, DaemonUpdateAgentRequest, + DaemonWorkspaceFile, + DaemonWorkspaceFileBytes, + DaemonWorkspaceFileEditRequest, + DaemonWorkspaceFileEditResult, + DaemonWorkspaceFileWriteRequest, + DaemonWorkspaceFileWriteResult, DaemonWorkspaceAgentDetail, DaemonWorkspaceAgentsStatus, DaemonWorkspaceEnvStatus, @@ -370,6 +376,93 @@ export class DaemonClient { ); } + // -- Workspace files (issue #4175 PR 20) ------------------------------- + + async readWorkspaceFile( + filePath: string, + opts: { maxBytes?: number; line?: number; limit?: number } = {}, + clientId?: string, + ): Promise { + const url = new URL(`${this.baseUrl}/file`); + url.searchParams.set('path', filePath); + if (opts.maxBytes !== undefined) { + url.searchParams.set('maxBytes', String(opts.maxBytes)); + } + if (opts.line !== undefined) { + url.searchParams.set('line', String(opts.line)); + } + if (opts.limit !== undefined) { + url.searchParams.set('limit', String(opts.limit)); + } + return await this.fetchWithTimeout( + url.toString(), + { headers: this.headers({}, clientId) }, + async (res) => { + if (!res.ok) throw await this.failOnError(res, 'GET /file'); + return (await res.json()) as DaemonWorkspaceFile; + }, + ); + } + + async readWorkspaceFileBytes( + filePath: string, + opts: { offset?: number; maxBytes?: number } = {}, + clientId?: string, + ): Promise { + const url = new URL(`${this.baseUrl}/file/bytes`); + url.searchParams.set('path', filePath); + if (opts.offset !== undefined) { + url.searchParams.set('offset', String(opts.offset)); + } + if (opts.maxBytes !== undefined) { + url.searchParams.set('maxBytes', String(opts.maxBytes)); + } + return await this.fetchWithTimeout( + url.toString(), + { headers: this.headers({}, clientId) }, + async (res) => { + if (!res.ok) throw await this.failOnError(res, 'GET /file/bytes'); + return (await res.json()) as DaemonWorkspaceFileBytes; + }, + ); + } + + async writeWorkspaceFile( + req: DaemonWorkspaceFileWriteRequest, + clientId?: string, + ): Promise { + return await this.fetchWithTimeout( + `${this.baseUrl}/file/write`, + { + method: 'POST', + headers: this.headers({ 'Content-Type': 'application/json' }, clientId), + body: JSON.stringify(req), + }, + async (res) => { + if (!res.ok) throw await this.failOnError(res, 'POST /file/write'); + return (await res.json()) as DaemonWorkspaceFileWriteResult; + }, + ); + } + + async editWorkspaceFile( + req: DaemonWorkspaceFileEditRequest, + clientId?: string, + ): Promise { + return await this.fetchWithTimeout( + `${this.baseUrl}/file/edit`, + { + method: 'POST', + headers: this.headers({ 'Content-Type': 'application/json' }, clientId), + body: JSON.stringify(req), + }, + async (res) => { + if (!res.ok) throw await this.failOnError(res, 'POST /file/edit'); + return (await res.json()) as DaemonWorkspaceFileEditResult; + }, + ); + } + // -- Workspace memory (issue #4175 PR 16) ------------------------------ /** diff --git a/packages/sdk-typescript/src/daemon/index.ts b/packages/sdk-typescript/src/daemon/index.ts index 80a7047ca..603d8a92d 100644 --- a/packages/sdk-typescript/src/daemon/index.ts +++ b/packages/sdk-typescript/src/daemon/index.ts @@ -39,6 +39,7 @@ export { parseSseStream, SseFramingError } from './sse.js'; export { DAEMON_ERROR_KINDS, DaemonCapabilityMissingError, + isDaemonContentHash, requireWorkspaceCwd, } from './types.js'; export type { @@ -132,10 +133,17 @@ export type { DaemonStatus, DaemonStatusCell, DaemonUpdateAgentRequest, + DaemonContentHash, DaemonWorkspaceAgentDetail, DaemonWorkspaceAgentSummary, DaemonWorkspaceAgentsStatus, DaemonWorkspaceEnvStatus, + DaemonWorkspaceFile, + DaemonWorkspaceFileBytes, + DaemonWorkspaceFileEditRequest, + DaemonWorkspaceFileEditResult, + DaemonWorkspaceFileWriteRequest, + DaemonWorkspaceFileWriteResult, DaemonWorkspaceMcpServerStatus, DaemonWorkspaceMcpStatus, DaemonWorkspaceMemoryFile, diff --git a/packages/sdk-typescript/src/daemon/types.ts b/packages/sdk-typescript/src/daemon/types.ts index 35b0b88af..264ab5dd4 100644 --- a/packages/sdk-typescript/src/daemon/types.ts +++ b/packages/sdk-typescript/src/daemon/types.ts @@ -411,6 +411,92 @@ export interface DaemonWriteMemoryResult { changed?: boolean; } +export type DaemonContentHash = `sha256:${string}`; + +const DAEMON_CONTENT_HASH_RE = /^sha256:[0-9a-f]{64}$/; + +export function isDaemonContentHash( + value: unknown, +): value is DaemonContentHash { + return typeof value === 'string' && DAEMON_CONTENT_HASH_RE.test(value); +} + +export interface DaemonWorkspaceFile { + kind: 'file'; + path: string; + content: string; + encoding: string; + bom: boolean; + lineEnding: 'crlf' | 'lf'; + sizeBytes: number; + returnedBytes: number; + truncated: boolean; + hash?: DaemonContentHash; + matchedIgnore: 'file' | 'directory' | null; + originalLineCount: number | null; +} + +export interface DaemonWorkspaceFileBytes { + kind: 'file_bytes'; + path: string; + offset: number; + sizeBytes: number; + returnedBytes: number; + truncated: boolean; + contentBase64: string; + hash?: DaemonContentHash; +} + +interface DaemonWorkspaceFileWriteRequestBase { + path: string; + content: string; + bom?: boolean; + encoding?: string; + lineEnding?: 'crlf' | 'lf'; +} + +export type DaemonWorkspaceFileWriteRequest = + | (DaemonWorkspaceFileWriteRequestBase & { + mode: 'create'; + expectedHash?: DaemonContentHash; + }) + | (DaemonWorkspaceFileWriteRequestBase & { + mode: 'replace'; + expectedHash: DaemonContentHash; + }); + +export interface DaemonWorkspaceFileEditRequest { + path: string; + oldText: string; + newText: string; + expectedHash: DaemonContentHash; +} + +export interface DaemonWorkspaceFileWriteResult { + kind: 'file_write'; + path: string; + mode: 'create' | 'replace'; + created: boolean; + sizeBytes: number; + hash: DaemonContentHash; + encoding: string; + bom: boolean; + lineEnding: 'crlf' | 'lf'; + matchedIgnore: 'file' | 'directory' | null; +} + +export interface DaemonWorkspaceFileEditResult { + kind: 'file_edit'; + path: string; + replacements: 1; + sizeBytes: number; + hash: DaemonContentHash; + encoding: string; + bom: boolean; + lineEnding: 'crlf' | 'lf'; + matchedIgnore: 'file' | 'directory' | null; +} + /** * Issue #4175 PR 16: subagent CRUD types. `agentType` on the wire is * the `name` field from the agent's frontmatter (case-insensitive); diff --git a/packages/sdk-typescript/src/index.ts b/packages/sdk-typescript/src/index.ts index 7b0ff4a67..d72c3877f 100644 --- a/packages/sdk-typescript/src/index.ts +++ b/packages/sdk-typescript/src/index.ts @@ -12,6 +12,7 @@ export { DaemonSessionClient, asKnownDaemonEvent, createDaemonSessionViewState, + isDaemonContentHash, isDaemonEventType, isKnownDaemonEvent, parseSseStream, @@ -28,6 +29,7 @@ export { type DaemonClientEvictedData, type DaemonClientEvictedEvent, type DaemonClientOptions, + type DaemonContentHash, type DaemonControlEvent, type DaemonEvent, type DaemonEventEnvelope, @@ -66,6 +68,12 @@ export { type DaemonStatus, type DaemonStatusCell, type DaemonWorkspaceEnvStatus, + type DaemonWorkspaceFile, + type DaemonWorkspaceFileBytes, + type DaemonWorkspaceFileEditRequest, + type DaemonWorkspaceFileEditResult, + type DaemonWorkspaceFileWriteRequest, + type DaemonWorkspaceFileWriteResult, type DaemonWorkspacePreflightStatus, type DaemonSessionUpdateData, type DaemonSessionUpdateEvent, diff --git a/packages/sdk-typescript/test/unit/DaemonClient.test.ts b/packages/sdk-typescript/test/unit/DaemonClient.test.ts index 4a4d89595..e98bac0ef 100644 --- a/packages/sdk-typescript/test/unit/DaemonClient.test.ts +++ b/packages/sdk-typescript/test/unit/DaemonClient.test.ts @@ -13,6 +13,7 @@ import { } from '../../src/daemon/DaemonClient.js'; import { DaemonCapabilityMissingError, + isDaemonContentHash, requireWorkspaceCwd, } from '../../src/daemon/types.js'; import type { @@ -139,6 +140,161 @@ describe('DaemonClient', () => { }); }); + describe('workspace file helpers', () => { + it('validates daemon content hashes with the daemon regex', () => { + expect(isDaemonContentHash(`sha256:${'a'.repeat(64)}`)).toBe(true); + expect(isDaemonContentHash(`sha256:${'A'.repeat(64)}`)).toBe(false); + expect(isDaemonContentHash(`sha256:${'a'.repeat(63)}`)).toBe(false); + expect(isDaemonContentHash('md5:' + 'a'.repeat(64))).toBe(false); + expect(isDaemonContentHash(undefined)).toBe(false); + }); + + it('reads text files with query params and client identity', async () => { + const payload = { + kind: 'file', + path: 'src/a.ts', + content: 'export {}\n', + encoding: 'utf-8', + bom: false, + lineEnding: 'lf', + sizeBytes: 10, + returnedBytes: 10, + truncated: false, + hash: 'sha256:' + 'a'.repeat(64), + matchedIgnore: null, + originalLineCount: null, + }; + const { fetch, calls } = recordingFetch(() => jsonResponse(200, payload)); + const client = new DaemonClient({ baseUrl: 'http://daemon/', fetch }); + await expect( + client.readWorkspaceFile('src/a.ts', { line: 2, limit: 3 }, 'client-1'), + ).resolves.toEqual(payload); + expect(calls[0]?.method).toBe('GET'); + expect(calls[0]?.url).toBe( + 'http://daemon/file?path=src%2Fa.ts&line=2&limit=3', + ); + expect(calls[0]?.headers['x-qwen-client-id']).toBe('client-1'); + }); + + it('reads raw bytes as base64 payloads', async () => { + const payload = { + kind: 'file_bytes', + path: 'bin.dat', + offset: 4, + sizeBytes: 9, + returnedBytes: 2, + truncated: true, + contentBase64: Buffer.from([5, 6]).toString('base64'), + }; + const { fetch, calls } = recordingFetch(() => jsonResponse(200, payload)); + const client = new DaemonClient({ baseUrl: 'http://daemon', fetch }); + await expect( + client.readWorkspaceFileBytes('bin.dat', { + offset: 4, + maxBytes: 2, + }), + ).resolves.toEqual(payload); + expect(calls[0]?.url).toBe( + 'http://daemon/file/bytes?path=bin.dat&offset=4&maxBytes=2', + ); + }); + + it('writes and edits files with JSON bodies and client identity', async () => { + const writeResult = { + kind: 'file_write', + path: 'a.txt', + mode: 'replace', + created: false, + sizeBytes: 3, + hash: 'sha256:' + 'b'.repeat(64), + encoding: 'utf-8', + bom: false, + lineEnding: 'lf', + matchedIgnore: null, + }; + const editResult = { + kind: 'file_edit', + path: 'a.txt', + replacements: 1, + sizeBytes: 4, + hash: 'sha256:' + 'c'.repeat(64), + encoding: 'utf-8', + bom: false, + lineEnding: 'lf', + matchedIgnore: null, + }; + const { fetch, calls } = recordingFetch((req) => { + if (req.url.endsWith('/file/write')) { + return jsonResponse(200, writeResult); + } + if (req.url.endsWith('/file/edit')) { + return jsonResponse(200, editResult); + } + return jsonResponse(500, { error: 'unexpected' }); + }); + const client = new DaemonClient({ baseUrl: 'http://daemon', fetch }); + await expect( + client.writeWorkspaceFile( + { + path: 'a.txt', + content: 'new', + mode: 'replace', + expectedHash: `sha256:${'a'.repeat(64)}`, + }, + 'client-1', + ), + ).resolves.toEqual(writeResult); + await expect( + client.editWorkspaceFile( + { + path: 'a.txt', + oldText: 'new', + newText: 'next', + expectedHash: `sha256:${'b'.repeat(64)}`, + }, + 'client-1', + ), + ).resolves.toEqual(editResult); + expect(calls[0]).toMatchObject({ + method: 'POST', + url: 'http://daemon/file/write', + body: JSON.stringify({ + path: 'a.txt', + content: 'new', + mode: 'replace', + expectedHash: `sha256:${'a'.repeat(64)}`, + }), + }); + expect(calls[0]?.headers['content-type']).toBe('application/json'); + expect(calls[0]?.headers['x-qwen-client-id']).toBe('client-1'); + expect(calls[1]).toMatchObject({ + method: 'POST', + url: 'http://daemon/file/edit', + }); + }); + + it('preserves structured error bodies for hash conflicts', async () => { + const body = { + errorKind: 'hash_mismatch', + error: 'expected stale, found current', + status: 409, + }; + const { fetch } = recordingFetch(() => jsonResponse(409, body)); + const client = new DaemonClient({ baseUrl: 'http://daemon', fetch }); + const err = await client + .writeWorkspaceFile({ + path: 'a.txt', + content: 'new', + mode: 'replace', + expectedHash: `sha256:${'a'.repeat(64)}`, + }) + .catch((e: unknown) => e); + expect(err).toBeInstanceOf(DaemonHttpError); + expect((err as DaemonHttpError).status).toBe(409); + expect((err as DaemonHttpError).body).toEqual(body); + }); + }); + describe('read-only status routes', () => { it('GETs workspace status routes and returns payloads unchanged', async () => { const mcp: DaemonWorkspaceMcpStatus = {