test(core): cache reset hooks + regression-guards from audit

Self-review pass on the previous two perf commits surfaced a few follow-ups worth pinning down before they bite: - Module-level caches (paths.isDirectoryCache, ripGrep dirIsDir/qwen- Ignore, jsonl-utils.ensuredDirs) persisted across vitest cases silently. Added underscore-prefixed `_reset*ForTest` exports and wired one into the validatePath describe block so future cases mutating the same absolute paths can't pass by accident. - Documented the parentUuid-chain tradeoff on chatRecordingService .appendRecord: when the async write rejects, lastRecordUuid was already set sync, so subsequent records reference an absent ancestor — readers like sessionService.reconstructHistory then silently drop those descendants. Same observable failure mode as the prior sync code's caught-and-logged throw. - Documented the dir<->file mutation and mid-session .qwenignore staleness windows for the validatePath / ripGrep caches. - Added regression tests: * validatePath does NOT cache ENOENT (Edit-then-Read works) * validatePath skips re-stat on cache hit (perf assertion) * flush() resolves immediately on a fresh service * a rejected writeLine does not block the next record Full core suite: 6061 pass, 2 skipped — no regressions.
2026-05-20 01:01:53 +00:00 · 2026-04-24 14:40:18 +08:00 · 2026-04-24 14:40:18 +08:00 · d6485964c3
commit d6485964c3
parent 8fa7f4c498
6 changed files with 121 additions and 4 deletions
--- a/packages/core/src/services/chatRecordingService.test.ts
+++ b/packages/core/src/services/chatRecordingService.test.ts
@ -358,6 +358,34 @@ describe('ChatRecordingService', () => {
    });
  });

+  describe('flush', () => {
+    it('resolves immediately on a service with no enqueued writes', async () => {
+      // The writeChain starts as Promise.resolve(), so flush() on a fresh
+      // service should settle in a single microtask — important because
+      // Config.shutdown awaits flush on every exit path, even for sessions
+      // that never recorded anything.
+      await expect(chatRecordingService.flush()).resolves.toBeUndefined();
+      expect(jsonl.writeLine).not.toHaveBeenCalled();
+    });
+
+    it('a failed write does not block subsequent records', async () => {
+      // Regression guard: the inner .catch swallows fs errors and keeps
+      // the chain alive so the next record's write still runs.
+      vi.mocked(jsonl.writeLine).mockRejectedValueOnce(
+        new Error('simulated EACCES'),
+      );
+      chatRecordingService.recordUserMessage([{ text: 'first' }]);
+      chatRecordingService.recordUserMessage([{ text: 'second' }]);
+      await chatRecordingService.flush();
+
+      expect(jsonl.writeLine).toHaveBeenCalledTimes(2);
+      const second = vi.mocked(jsonl.writeLine).mock.calls[1][1] as ChatRecord;
+      expect(
+        (second.message as { parts: Array<{ text: string }> }).parts[0].text,
+      ).toBe('second');
+    });
+  });
+
  // Note: Session management tests (listSessions, loadSession, deleteSession, etc.)
  // have been moved to sessionService.test.ts
  // Session resume integration tests should test via SessionService mock
--- a/packages/core/src/services/chatRecordingService.ts
+++ b/packages/core/src/services/chatRecordingService.ts
@ -418,6 +418,16 @@ export class ChatRecordingService {
   * the correct parentUuid without waiting for the previous write. The actual
   * fs write is enqueued on {@link writeChain} and runs async; per-file
   * mutex inside {@link jsonl.writeLine} preserves on-disk ordering.
+   *
+   * **Known tradeoff (parentUuid chain integrity on write failure):** if the
+   * enqueued write rejects (e.g., disk full, permission dropped), the error
+   * is logged but subsequent records still claim the failed record's uuid
+   * as their parent. On resume, readers that walk parentUuid (e.g.
+   * sessionService.reconstructHistory) will silently drop records whose
+   * ancestor is missing on disk. This matches the sync version's behavior
+   * when its own throw was caught and logged by the caller — under normal
+   * local-disk writes failures are rare enough to accept the fire-and-forget
+   * simplification.
   */
  private appendRecord(record: ChatRecord): void {
    let conversationFile: string;
--- a/packages/core/src/tools/ripGrep.ts
+++ b/packages/core/src/tools/ripGrep.ts
@ -30,9 +30,10 @@ const debugLogger = createDebugLogger('RIPGREP');
 * `dirIsDir`: searchPath → boolean (is the path itself a directory?)
 * `qwenIgnore`: dir → string | null (cached `.qwenignore` path or null)
 *
- * Filesystem-state cache: a `.qwenignore` created mid-session won't be
- * picked up until the cache rolls. That's an acceptable tradeoff; users
- * rarely add ignore files between Grep calls.
+ * **Known staleness window:** a `.qwenignore` created mid-session, or a
+ * searchPath whose type flips (dir→file or vice versa), will not be
+ * picked up until the entry rotates out of the FIFO (256 entries). Users
+ * rarely add ignore files mid-session; a process restart resets the cache.
 */
 const dirIsDirCache = new Map<string, boolean>();
 const qwenIgnoreCache = new Map<string, string | null>();
@ -43,6 +44,14 @@ function trimCache<K, V>(m: Map<K, V>): void {
  if (oldest !== undefined) m.delete(oldest as K);
 }

+/**
+ * Test-only: clear ripGrep's module-level discovery caches between cases.
+ */
+export function _resetRipGrepCachesForTest(): void {
+  dirIsDirCache.clear();
+  qwenIgnoreCache.clear();
+}
+
 /**
 * Parameters for the GrepTool (Simplified)
 */
--- a/packages/core/src/utils/jsonl-utils.ts
+++ b/packages/core/src/utils/jsonl-utils.ts
@ -116,6 +116,14 @@ export async function read<T = unknown>(filePath: string): Promise<T[]> {
 */
 const ensuredDirs = new Set<string>();

+/**
+ * Test-only: clear the per-directory mkdir cache. Needed when tests mutate
+ * fs state at the same directory path across cases.
+ */
+export function _resetEnsuredDirsCacheForTest(): void {
+  ensuredDirs.clear();
+}
+
 /**
 * Appends a line to a JSONL file with concurrency control.
 * Uses a per-file mutex so concurrent callers serialize, and `fs.promises`
--- a/packages/core/src/utils/paths.test.ts
+++ b/packages/core/src/utils/paths.test.ts
@ -7,7 +7,15 @@
 import fs from 'node:fs';
 import os from 'node:os';
 import path from 'node:path';
-import { describe, it, expect, beforeAll, afterAll, vi } from 'vitest';
+import {
+  describe,
+  it,
+  expect,
+  beforeAll,
+  beforeEach,
+  afterAll,
+  vi,
+} from 'vitest';
 import {
  escapePath,
  resolvePath,
@ -18,6 +26,7 @@ import {
  shortenPath,
  tildeifyPath,
  getProjectHash,
+  _resetValidatePathCacheForTest,
 } from './paths.js';
 import type { Config } from '../config/config.js';

@ -431,6 +440,14 @@ describe('validatePath', () => {
    });
  });

+  beforeEach(() => {
+    // Module-level isDirectory cache persists across tests; tests here
+    // mutate the same absolute paths between cases (create file, remove,
+    // re-create as potentially-different type) so we reset to avoid stale
+    // lookups masking regressions.
+    _resetValidatePathCacheForTest();
+  });
+
  afterAll(() => {
    fs.rmSync(workspaceRoot, { recursive: true, force: true });
  });
@ -482,6 +499,36 @@ describe('validatePath', () => {
    expect(() => validatePath(config, workspaceRoot)).not.toThrow();
  });

+  it('does not cache ENOENT — recreating the path between calls succeeds', () => {
+    // Regression guard: a path that's missing at first-check, then created,
+    // must NOT be rejected on the second call. Positive stats are cached;
+    // ENOENT paths are not. This lets the model create a file with Edit
+    // and then have the next tool call see it.
+    const ephemeralDir = path.join(workspaceRoot, 'late-created');
+    expect(() => validatePath(config, ephemeralDir)).toThrowError(
+      /Path does not exist:/,
+    );
+    fs.mkdirSync(ephemeralDir);
+    try {
+      expect(() => validatePath(config, ephemeralDir)).not.toThrow();
+    } finally {
+      fs.rmSync(ephemeralDir, { recursive: true, force: true });
+    }
+  });
+
+  it('caches positive isDirectory — repeat call does not re-stat', () => {
+    const spy = vi.spyOn(fs, 'statSync');
+    const dir = path.join(workspaceRoot, 'subdir');
+    try {
+      validatePath(config, dir);
+      const afterFirst = spy.mock.calls.length;
+      validatePath(config, dir);
+      expect(spy.mock.calls.length).toBe(afterFirst);
+    } finally {
+      spy.mockRestore();
+    }
+  });
+
  it('validates paths in allowed directories', () => {
    const extraDir = fs.mkdtempSync(path.join(os.tmpdir(), 'validate-extra-'));
    try {
--- a/packages/core/src/utils/paths.ts
+++ b/packages/core/src/utils/paths.ts
@ -20,10 +20,25 @@ export const GOOGLE_ACCOUNTS_FILENAME = 'google_accounts.json';
 * created file is picked up immediately. Same path validated by back-to-back
 * tool calls (very common: model reads several files in one dir) used to
 * cost one syscall each.
+ *
+ * **Known tradeoff:** if a path is deleted and recreated as a different
+ * type (dir→file or file→dir) within the same process, the cache returns
+ * the stale type. The downstream tool will then hit a meaningful error
+ * (e.g., "not a directory") instead of a clean "does not exist", but no
+ * files are corrupted. This is rare enough in model-driven workflows that
+ * we accept the staleness for the common-case perf win.
 */
 const isDirectoryCache = new Map<string, boolean>();
 const VALIDATE_PATH_CACHE_MAX = 1024;

+/**
+ * Test-only: clear the validatePath stat cache. Module-level state would
+ * otherwise leak across vitest cases — `beforeEach(() => _resetValidatePathCacheForTest())`.
+ */
+export function _resetValidatePathCacheForTest(): void {
+  isDirectoryCache.clear();
+}
+
 /**
 * Special characters that need to be escaped in file paths for shell compatibility.
 * Includes: spaces, parentheses, brackets, braces, semicolons, ampersands, pipes,