perf(core): cache repeated fs lookups on tool hot path

Each tool invocation went through validatePath → isPathWithinWorkspace
→ fullyResolvedPath, plus its own existence/dir checks. The same paths
got re-resolved across back-to-back tool calls, and ripGrep re-
discovered .qwenignore on every Grep.

- workspaceContext.fullyResolvedPath: bounded LRU on input path
  (1024, FIFO). Failed resolutions are NOT cached so retries work.
- paths.validatePath: cache positive isDirectory results; ENOENT
  falls through every time so a freshly created file is picked up
  immediately.
- ripGrep: module-level caches for searchPath-is-dir and per-dir
  .qwenignore presence (256 each, FIFO).
- fileUtils.processSingleFileContent: drop the existsSync gate;
  let fs.promises.stat throw ENOENT and convert to FILE_NOT_FOUND
  in catch.

Trace: 20 → 10 sync I/O calls. Cumulative reduction since the
chat-recording change: 110 → 10, -91%. All 6057 core tests pass.
This commit is contained in:
wenshao 2026-04-24 14:28:28 +08:00
parent e48dcc4882
commit 8fa7f4c498
4 changed files with 120 additions and 30 deletions

View file

@ -21,6 +21,28 @@ import type { PermissionDecision } from '../permissions/types.js';
const debugLogger = createDebugLogger('RIPGREP');
/**
* Per-process cache for `.qwenignore` discovery. The same directories show
* up across many Grep invocations in a typical session without caching,
* each invocation pays 2-3 sync syscalls per searchPath. Bounded so a
* pathologically long session can't grow without limit.
*
* `dirIsDir`: searchPath boolean (is the path itself a directory?)
* `qwenIgnore`: dir string | null (cached `.qwenignore` path or null)
*
* Filesystem-state cache: a `.qwenignore` created mid-session won't be
* picked up until the cache rolls. That's an acceptable tradeoff; users
* rarely add ignore files between Grep calls.
*/
const dirIsDirCache = new Map<string, boolean>();
const qwenIgnoreCache = new Map<string, string | null>();
const RIPGREP_CACHE_MAX = 256;
function trimCache<K, V>(m: Map<K, V>): void {
if (m.size <= RIPGREP_CACHE_MAX) return;
const oldest = m.keys().next().value;
if (oldest !== undefined) m.delete(oldest as K);
}
/**
* Parameters for the GrepTool (Simplified)
*/
@ -253,15 +275,25 @@ class GrepToolInvocation extends BaseToolInvocation<
// Load .qwenignore from each workspace directory, not just the primary one
const seenIgnoreFiles = new Set<string>();
for (const searchPath of paths) {
const dir =
fs.existsSync(searchPath) && fs.statSync(searchPath).isDirectory()
? searchPath
: path.dirname(searchPath);
const qwenIgnorePath = path.join(dir, '.qwenignore');
if (
!seenIgnoreFiles.has(qwenIgnorePath) &&
fs.existsSync(qwenIgnorePath)
) {
let isDir = dirIsDirCache.get(searchPath);
if (isDir === undefined) {
try {
isDir = fs.statSync(searchPath).isDirectory();
} catch {
isDir = false;
}
dirIsDirCache.set(searchPath, isDir);
trimCache(dirIsDirCache);
}
const dir = isDir ? searchPath : path.dirname(searchPath);
let qwenIgnorePath = qwenIgnoreCache.get(dir);
if (qwenIgnorePath === undefined) {
const candidate = path.join(dir, '.qwenignore');
qwenIgnorePath = fs.existsSync(candidate) ? candidate : null;
qwenIgnoreCache.set(dir, qwenIgnorePath);
trimCache(qwenIgnoreCache);
}
if (qwenIgnorePath && !seenIgnoreFiles.has(qwenIgnorePath)) {
rgArgs.push('--ignore-file', qwenIgnorePath);
seenIgnoreFiles.add(qwenIgnorePath);
}

View file

@ -18,6 +18,7 @@ import { ToolErrorType } from '../tools/tool-error.js';
import { BINARY_EXTENSIONS } from './ignorePatterns.js';
import type { Config } from '../config/config.js';
import { createDebugLogger } from './debugLogger.js';
import { isNodeError } from './errors.js';
import type { InputModalities } from '../core/contentGenerator.js';
import { detectEncodingFromBuffer } from './systemEncoding.js';
import { extractPDFText, parsePDFPageRange } from './pdf.js';
@ -581,17 +582,24 @@ export async function processSingleFileContent(
): Promise<ProcessedFileReadResult> {
const rootDirectory = config.getTargetDir();
try {
if (!fs.existsSync(filePath)) {
// Sync check is acceptable before async read
return {
llmContent:
'Could not read file because no file was found at the specified path.',
returnDisplay: 'File not found.',
error: `File not found: ${filePath}`,
errorType: ToolErrorType.FILE_NOT_FOUND,
};
let stats: import('node:fs').Stats;
try {
// Async stat doubles as the existence check — ENOENT is handled below
// and surfaces the same FILE_NOT_FOUND error type as the old explicit
// existsSync gate, with one fewer sync syscall on the hot path.
stats = await fs.promises.stat(filePath);
} catch (error: unknown) {
if (isNodeError(error) && error.code === 'ENOENT') {
return {
llmContent:
'Could not read file because no file was found at the specified path.',
returnDisplay: 'File not found.',
error: `File not found: ${filePath}`,
errorType: ToolErrorType.FILE_NOT_FOUND,
};
}
throw error;
}
const stats = await fs.promises.stat(filePath);
if (stats.isDirectory()) {
return {
llmContent:

View file

@ -14,6 +14,16 @@ import { isNodeError } from './errors.js';
export const QWEN_DIR = '.qwen';
export const GOOGLE_ACCOUNTS_FILENAME = 'google_accounts.json';
/**
* Cache for `validatePath`'s isDirectory check. Only positive results are
* cached ENOENT and other errors fall through every time so a freshly
* created file is picked up immediately. Same path validated by back-to-back
* tool calls (very common: model reads several files in one dir) used to
* cost one syscall each.
*/
const isDirectoryCache = new Map<string, boolean>();
const VALIDATE_PATH_CACHE_MAX = 1024;
/**
* Special characters that need to be escaped in file paths for shell compatibility.
* Includes: spaces, parentheses, brackets, braces, semicolons, ampersands, pipes,
@ -314,16 +324,24 @@ export function validatePath(
return;
}
try {
const stats = fs.statSync(resolvedPath);
if (!allowFiles && !stats.isDirectory()) {
throw new Error(`Path is not a directory: ${resolvedPath}`);
let isDirectory = isDirectoryCache.get(resolvedPath);
if (isDirectory === undefined) {
try {
isDirectory = fs.statSync(resolvedPath).isDirectory();
} catch (error: unknown) {
if (isNodeError(error) && error.code === 'ENOENT') {
throw new Error(`Path does not exist: ${resolvedPath}`);
}
throw error;
}
} catch (error: unknown) {
if (isNodeError(error) && error.code === 'ENOENT') {
throw new Error(`Path does not exist: ${resolvedPath}`);
if (isDirectoryCache.size >= VALIDATE_PATH_CACHE_MAX) {
const oldest = isDirectoryCache.keys().next().value;
if (oldest !== undefined) isDirectoryCache.delete(oldest);
}
throw error;
isDirectoryCache.set(resolvedPath, isDirectory);
}
if (!allowFiles && !isDirectory) {
throw new Error(`Path is not a directory: ${resolvedPath}`);
}
}

View file

@ -23,6 +23,16 @@ export class WorkspaceContext {
private directories = new Set<string>();
private initialDirectories: Set<string>;
private onDirectoriesChangedListeners = new Set<() => void>();
/**
* Memoized realpath results. Every workspace-bounded tool call ultimately
* routes through {@link fullyResolvedPath} `fs.realpathSync`; without
* this cache the same path gets re-resolved on every Read/Glob/Grep/Ls
* invocation. Bounded so long sessions touching many files don't grow
* without limit; FIFO eviction is good enough the working set tends to
* be the small set of paths the model is actively manipulating.
*/
private resolvedPathCache = new Map<string, string>();
private static readonly RESOLVED_PATH_CACHE_MAX = 1024;
/**
* Creates a new WorkspaceContext with the given initial directory and optional additional directories.
@ -201,10 +211,21 @@ export class WorkspaceContext {
* Fully resolves a path, including symbolic links.
* If the path does not exist, it returns the fully resolved path as it would be
* if it did exist.
*
* Result is memoized in {@link resolvedPathCache}. Filesystem-state cache:
* if a file is renamed / a symlink is retargeted mid-session the cache
* goes stale, which is the same correctness profile as any single
* `realpathSync` call (it captures a moment in time). The win is cutting
* 8+ syscalls per tool-heavy prompt down to 1.
*/
private fullyResolvedPath(pathToCheck: string): string {
const cached = this.resolvedPathCache.get(pathToCheck);
if (cached !== undefined) {
return cached;
}
let resolved: string;
try {
return fs.realpathSync(pathToCheck);
resolved = fs.realpathSync(pathToCheck);
} catch (e: unknown) {
if (
isNodeError(e) &&
@ -215,10 +236,21 @@ export class WorkspaceContext {
!this.isFileSymlink(e.path)
) {
// If it doesn't exist, e.path contains the fully resolved path.
return e.path;
resolved = e.path;
} else {
// Don't cache exceptions — the path may exist on retry.
throw e;
}
throw e;
}
if (
this.resolvedPathCache.size >= WorkspaceContext.RESOLVED_PATH_CACHE_MAX
) {
// FIFO eviction: drop the oldest insertion (Map preserves insert order).
const oldest = this.resolvedPathCache.keys().next().value;
if (oldest !== undefined) this.resolvedPathCache.delete(oldest);
}
this.resolvedPathCache.set(pathToCheck, resolved);
return resolved;
}
/**