diff --git a/packages/core/src/tools/ripGrep.ts b/packages/core/src/tools/ripGrep.ts index 7cbf33677..490d805fd 100644 --- a/packages/core/src/tools/ripGrep.ts +++ b/packages/core/src/tools/ripGrep.ts @@ -21,6 +21,28 @@ import type { PermissionDecision } from '../permissions/types.js'; const debugLogger = createDebugLogger('RIPGREP'); +/** + * Per-process cache for `.qwenignore` discovery. The same directories show + * up across many Grep invocations in a typical session — without caching, + * each invocation pays 2-3 sync syscalls per searchPath. Bounded so a + * pathologically long session can't grow without limit. + * + * `dirIsDir`: searchPath → boolean (is the path itself a directory?) + * `qwenIgnore`: dir → string | null (cached `.qwenignore` path or null) + * + * Filesystem-state cache: a `.qwenignore` created mid-session won't be + * picked up until the cache rolls. That's an acceptable tradeoff; users + * rarely add ignore files between Grep calls. + */ +const dirIsDirCache = new Map(); +const qwenIgnoreCache = new Map(); +const RIPGREP_CACHE_MAX = 256; +function trimCache(m: Map): void { + if (m.size <= RIPGREP_CACHE_MAX) return; + const oldest = m.keys().next().value; + if (oldest !== undefined) m.delete(oldest as K); +} + /** * Parameters for the GrepTool (Simplified) */ @@ -253,15 +275,25 @@ class GrepToolInvocation extends BaseToolInvocation< // Load .qwenignore from each workspace directory, not just the primary one const seenIgnoreFiles = new Set(); for (const searchPath of paths) { - const dir = - fs.existsSync(searchPath) && fs.statSync(searchPath).isDirectory() - ? searchPath - : path.dirname(searchPath); - const qwenIgnorePath = path.join(dir, '.qwenignore'); - if ( - !seenIgnoreFiles.has(qwenIgnorePath) && - fs.existsSync(qwenIgnorePath) - ) { + let isDir = dirIsDirCache.get(searchPath); + if (isDir === undefined) { + try { + isDir = fs.statSync(searchPath).isDirectory(); + } catch { + isDir = false; + } + dirIsDirCache.set(searchPath, isDir); + trimCache(dirIsDirCache); + } + const dir = isDir ? searchPath : path.dirname(searchPath); + let qwenIgnorePath = qwenIgnoreCache.get(dir); + if (qwenIgnorePath === undefined) { + const candidate = path.join(dir, '.qwenignore'); + qwenIgnorePath = fs.existsSync(candidate) ? candidate : null; + qwenIgnoreCache.set(dir, qwenIgnorePath); + trimCache(qwenIgnoreCache); + } + if (qwenIgnorePath && !seenIgnoreFiles.has(qwenIgnorePath)) { rgArgs.push('--ignore-file', qwenIgnorePath); seenIgnoreFiles.add(qwenIgnorePath); } diff --git a/packages/core/src/utils/fileUtils.ts b/packages/core/src/utils/fileUtils.ts index 8428e4090..79467fb0e 100644 --- a/packages/core/src/utils/fileUtils.ts +++ b/packages/core/src/utils/fileUtils.ts @@ -18,6 +18,7 @@ import { ToolErrorType } from '../tools/tool-error.js'; import { BINARY_EXTENSIONS } from './ignorePatterns.js'; import type { Config } from '../config/config.js'; import { createDebugLogger } from './debugLogger.js'; +import { isNodeError } from './errors.js'; import type { InputModalities } from '../core/contentGenerator.js'; import { detectEncodingFromBuffer } from './systemEncoding.js'; import { extractPDFText, parsePDFPageRange } from './pdf.js'; @@ -581,17 +582,24 @@ export async function processSingleFileContent( ): Promise { const rootDirectory = config.getTargetDir(); try { - if (!fs.existsSync(filePath)) { - // Sync check is acceptable before async read - return { - llmContent: - 'Could not read file because no file was found at the specified path.', - returnDisplay: 'File not found.', - error: `File not found: ${filePath}`, - errorType: ToolErrorType.FILE_NOT_FOUND, - }; + let stats: import('node:fs').Stats; + try { + // Async stat doubles as the existence check — ENOENT is handled below + // and surfaces the same FILE_NOT_FOUND error type as the old explicit + // existsSync gate, with one fewer sync syscall on the hot path. + stats = await fs.promises.stat(filePath); + } catch (error: unknown) { + if (isNodeError(error) && error.code === 'ENOENT') { + return { + llmContent: + 'Could not read file because no file was found at the specified path.', + returnDisplay: 'File not found.', + error: `File not found: ${filePath}`, + errorType: ToolErrorType.FILE_NOT_FOUND, + }; + } + throw error; } - const stats = await fs.promises.stat(filePath); if (stats.isDirectory()) { return { llmContent: diff --git a/packages/core/src/utils/paths.ts b/packages/core/src/utils/paths.ts index 6067c5dc4..df6168beb 100644 --- a/packages/core/src/utils/paths.ts +++ b/packages/core/src/utils/paths.ts @@ -14,6 +14,16 @@ import { isNodeError } from './errors.js'; export const QWEN_DIR = '.qwen'; export const GOOGLE_ACCOUNTS_FILENAME = 'google_accounts.json'; +/** + * Cache for `validatePath`'s isDirectory check. Only positive results are + * cached — ENOENT and other errors fall through every time so a freshly + * created file is picked up immediately. Same path validated by back-to-back + * tool calls (very common: model reads several files in one dir) used to + * cost one syscall each. + */ +const isDirectoryCache = new Map(); +const VALIDATE_PATH_CACHE_MAX = 1024; + /** * Special characters that need to be escaped in file paths for shell compatibility. * Includes: spaces, parentheses, brackets, braces, semicolons, ampersands, pipes, @@ -314,16 +324,24 @@ export function validatePath( return; } - try { - const stats = fs.statSync(resolvedPath); - if (!allowFiles && !stats.isDirectory()) { - throw new Error(`Path is not a directory: ${resolvedPath}`); + let isDirectory = isDirectoryCache.get(resolvedPath); + if (isDirectory === undefined) { + try { + isDirectory = fs.statSync(resolvedPath).isDirectory(); + } catch (error: unknown) { + if (isNodeError(error) && error.code === 'ENOENT') { + throw new Error(`Path does not exist: ${resolvedPath}`); + } + throw error; } - } catch (error: unknown) { - if (isNodeError(error) && error.code === 'ENOENT') { - throw new Error(`Path does not exist: ${resolvedPath}`); + if (isDirectoryCache.size >= VALIDATE_PATH_CACHE_MAX) { + const oldest = isDirectoryCache.keys().next().value; + if (oldest !== undefined) isDirectoryCache.delete(oldest); } - throw error; + isDirectoryCache.set(resolvedPath, isDirectory); + } + if (!allowFiles && !isDirectory) { + throw new Error(`Path is not a directory: ${resolvedPath}`); } } diff --git a/packages/core/src/utils/workspaceContext.ts b/packages/core/src/utils/workspaceContext.ts index 5f052100d..aaabcd17c 100755 --- a/packages/core/src/utils/workspaceContext.ts +++ b/packages/core/src/utils/workspaceContext.ts @@ -23,6 +23,16 @@ export class WorkspaceContext { private directories = new Set(); private initialDirectories: Set; private onDirectoriesChangedListeners = new Set<() => void>(); + /** + * Memoized realpath results. Every workspace-bounded tool call ultimately + * routes through {@link fullyResolvedPath} → `fs.realpathSync`; without + * this cache the same path gets re-resolved on every Read/Glob/Grep/Ls + * invocation. Bounded so long sessions touching many files don't grow + * without limit; FIFO eviction is good enough — the working set tends to + * be the small set of paths the model is actively manipulating. + */ + private resolvedPathCache = new Map(); + private static readonly RESOLVED_PATH_CACHE_MAX = 1024; /** * Creates a new WorkspaceContext with the given initial directory and optional additional directories. @@ -201,10 +211,21 @@ export class WorkspaceContext { * Fully resolves a path, including symbolic links. * If the path does not exist, it returns the fully resolved path as it would be * if it did exist. + * + * Result is memoized in {@link resolvedPathCache}. Filesystem-state cache: + * if a file is renamed / a symlink is retargeted mid-session the cache + * goes stale, which is the same correctness profile as any single + * `realpathSync` call (it captures a moment in time). The win is cutting + * 8+ syscalls per tool-heavy prompt down to 1. */ private fullyResolvedPath(pathToCheck: string): string { + const cached = this.resolvedPathCache.get(pathToCheck); + if (cached !== undefined) { + return cached; + } + let resolved: string; try { - return fs.realpathSync(pathToCheck); + resolved = fs.realpathSync(pathToCheck); } catch (e: unknown) { if ( isNodeError(e) && @@ -215,10 +236,21 @@ export class WorkspaceContext { !this.isFileSymlink(e.path) ) { // If it doesn't exist, e.path contains the fully resolved path. - return e.path; + resolved = e.path; + } else { + // Don't cache exceptions — the path may exist on retry. + throw e; } - throw e; } + if ( + this.resolvedPathCache.size >= WorkspaceContext.RESOLVED_PATH_CACHE_MAX + ) { + // FIFO eviction: drop the oldest insertion (Map preserves insert order). + const oldest = this.resolvedPathCache.keys().next().value; + if (oldest !== undefined) this.resolvedPathCache.delete(oldest); + } + this.resolvedPathCache.set(pathToCheck, resolved); + return resolved; } /**