mirror of
https://github.com/QwenLM/qwen-code.git
synced 2026-04-28 11:41:04 +00:00
perf(core): cache repeated fs lookups on tool hot path
Each tool invocation went through validatePath → isPathWithinWorkspace → fullyResolvedPath, plus its own existence/dir checks. The same paths got re-resolved across back-to-back tool calls, and ripGrep re- discovered .qwenignore on every Grep. - workspaceContext.fullyResolvedPath: bounded LRU on input path (1024, FIFO). Failed resolutions are NOT cached so retries work. - paths.validatePath: cache positive isDirectory results; ENOENT falls through every time so a freshly created file is picked up immediately. - ripGrep: module-level caches for searchPath-is-dir and per-dir .qwenignore presence (256 each, FIFO). - fileUtils.processSingleFileContent: drop the existsSync gate; let fs.promises.stat throw ENOENT and convert to FILE_NOT_FOUND in catch. Trace: 20 → 10 sync I/O calls. Cumulative reduction since the chat-recording change: 110 → 10, -91%. All 6057 core tests pass.
This commit is contained in:
parent
e48dcc4882
commit
8fa7f4c498
4 changed files with 120 additions and 30 deletions
|
|
@ -21,6 +21,28 @@ import type { PermissionDecision } from '../permissions/types.js';
|
|||
|
||||
const debugLogger = createDebugLogger('RIPGREP');
|
||||
|
||||
/**
|
||||
* Per-process cache for `.qwenignore` discovery. The same directories show
|
||||
* up across many Grep invocations in a typical session — without caching,
|
||||
* each invocation pays 2-3 sync syscalls per searchPath. Bounded so a
|
||||
* pathologically long session can't grow without limit.
|
||||
*
|
||||
* `dirIsDir`: searchPath → boolean (is the path itself a directory?)
|
||||
* `qwenIgnore`: dir → string | null (cached `.qwenignore` path or null)
|
||||
*
|
||||
* Filesystem-state cache: a `.qwenignore` created mid-session won't be
|
||||
* picked up until the cache rolls. That's an acceptable tradeoff; users
|
||||
* rarely add ignore files between Grep calls.
|
||||
*/
|
||||
const dirIsDirCache = new Map<string, boolean>();
|
||||
const qwenIgnoreCache = new Map<string, string | null>();
|
||||
const RIPGREP_CACHE_MAX = 256;
|
||||
function trimCache<K, V>(m: Map<K, V>): void {
|
||||
if (m.size <= RIPGREP_CACHE_MAX) return;
|
||||
const oldest = m.keys().next().value;
|
||||
if (oldest !== undefined) m.delete(oldest as K);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parameters for the GrepTool (Simplified)
|
||||
*/
|
||||
|
|
@ -253,15 +275,25 @@ class GrepToolInvocation extends BaseToolInvocation<
|
|||
// Load .qwenignore from each workspace directory, not just the primary one
|
||||
const seenIgnoreFiles = new Set<string>();
|
||||
for (const searchPath of paths) {
|
||||
const dir =
|
||||
fs.existsSync(searchPath) && fs.statSync(searchPath).isDirectory()
|
||||
? searchPath
|
||||
: path.dirname(searchPath);
|
||||
const qwenIgnorePath = path.join(dir, '.qwenignore');
|
||||
if (
|
||||
!seenIgnoreFiles.has(qwenIgnorePath) &&
|
||||
fs.existsSync(qwenIgnorePath)
|
||||
) {
|
||||
let isDir = dirIsDirCache.get(searchPath);
|
||||
if (isDir === undefined) {
|
||||
try {
|
||||
isDir = fs.statSync(searchPath).isDirectory();
|
||||
} catch {
|
||||
isDir = false;
|
||||
}
|
||||
dirIsDirCache.set(searchPath, isDir);
|
||||
trimCache(dirIsDirCache);
|
||||
}
|
||||
const dir = isDir ? searchPath : path.dirname(searchPath);
|
||||
let qwenIgnorePath = qwenIgnoreCache.get(dir);
|
||||
if (qwenIgnorePath === undefined) {
|
||||
const candidate = path.join(dir, '.qwenignore');
|
||||
qwenIgnorePath = fs.existsSync(candidate) ? candidate : null;
|
||||
qwenIgnoreCache.set(dir, qwenIgnorePath);
|
||||
trimCache(qwenIgnoreCache);
|
||||
}
|
||||
if (qwenIgnorePath && !seenIgnoreFiles.has(qwenIgnorePath)) {
|
||||
rgArgs.push('--ignore-file', qwenIgnorePath);
|
||||
seenIgnoreFiles.add(qwenIgnorePath);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ import { ToolErrorType } from '../tools/tool-error.js';
|
|||
import { BINARY_EXTENSIONS } from './ignorePatterns.js';
|
||||
import type { Config } from '../config/config.js';
|
||||
import { createDebugLogger } from './debugLogger.js';
|
||||
import { isNodeError } from './errors.js';
|
||||
import type { InputModalities } from '../core/contentGenerator.js';
|
||||
import { detectEncodingFromBuffer } from './systemEncoding.js';
|
||||
import { extractPDFText, parsePDFPageRange } from './pdf.js';
|
||||
|
|
@ -581,17 +582,24 @@ export async function processSingleFileContent(
|
|||
): Promise<ProcessedFileReadResult> {
|
||||
const rootDirectory = config.getTargetDir();
|
||||
try {
|
||||
if (!fs.existsSync(filePath)) {
|
||||
// Sync check is acceptable before async read
|
||||
return {
|
||||
llmContent:
|
||||
'Could not read file because no file was found at the specified path.',
|
||||
returnDisplay: 'File not found.',
|
||||
error: `File not found: ${filePath}`,
|
||||
errorType: ToolErrorType.FILE_NOT_FOUND,
|
||||
};
|
||||
let stats: import('node:fs').Stats;
|
||||
try {
|
||||
// Async stat doubles as the existence check — ENOENT is handled below
|
||||
// and surfaces the same FILE_NOT_FOUND error type as the old explicit
|
||||
// existsSync gate, with one fewer sync syscall on the hot path.
|
||||
stats = await fs.promises.stat(filePath);
|
||||
} catch (error: unknown) {
|
||||
if (isNodeError(error) && error.code === 'ENOENT') {
|
||||
return {
|
||||
llmContent:
|
||||
'Could not read file because no file was found at the specified path.',
|
||||
returnDisplay: 'File not found.',
|
||||
error: `File not found: ${filePath}`,
|
||||
errorType: ToolErrorType.FILE_NOT_FOUND,
|
||||
};
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
const stats = await fs.promises.stat(filePath);
|
||||
if (stats.isDirectory()) {
|
||||
return {
|
||||
llmContent:
|
||||
|
|
|
|||
|
|
@ -14,6 +14,16 @@ import { isNodeError } from './errors.js';
|
|||
export const QWEN_DIR = '.qwen';
|
||||
export const GOOGLE_ACCOUNTS_FILENAME = 'google_accounts.json';
|
||||
|
||||
/**
|
||||
* Cache for `validatePath`'s isDirectory check. Only positive results are
|
||||
* cached — ENOENT and other errors fall through every time so a freshly
|
||||
* created file is picked up immediately. Same path validated by back-to-back
|
||||
* tool calls (very common: model reads several files in one dir) used to
|
||||
* cost one syscall each.
|
||||
*/
|
||||
const isDirectoryCache = new Map<string, boolean>();
|
||||
const VALIDATE_PATH_CACHE_MAX = 1024;
|
||||
|
||||
/**
|
||||
* Special characters that need to be escaped in file paths for shell compatibility.
|
||||
* Includes: spaces, parentheses, brackets, braces, semicolons, ampersands, pipes,
|
||||
|
|
@ -314,16 +324,24 @@ export function validatePath(
|
|||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const stats = fs.statSync(resolvedPath);
|
||||
if (!allowFiles && !stats.isDirectory()) {
|
||||
throw new Error(`Path is not a directory: ${resolvedPath}`);
|
||||
let isDirectory = isDirectoryCache.get(resolvedPath);
|
||||
if (isDirectory === undefined) {
|
||||
try {
|
||||
isDirectory = fs.statSync(resolvedPath).isDirectory();
|
||||
} catch (error: unknown) {
|
||||
if (isNodeError(error) && error.code === 'ENOENT') {
|
||||
throw new Error(`Path does not exist: ${resolvedPath}`);
|
||||
}
|
||||
throw error;
|
||||
}
|
||||
} catch (error: unknown) {
|
||||
if (isNodeError(error) && error.code === 'ENOENT') {
|
||||
throw new Error(`Path does not exist: ${resolvedPath}`);
|
||||
if (isDirectoryCache.size >= VALIDATE_PATH_CACHE_MAX) {
|
||||
const oldest = isDirectoryCache.keys().next().value;
|
||||
if (oldest !== undefined) isDirectoryCache.delete(oldest);
|
||||
}
|
||||
throw error;
|
||||
isDirectoryCache.set(resolvedPath, isDirectory);
|
||||
}
|
||||
if (!allowFiles && !isDirectory) {
|
||||
throw new Error(`Path is not a directory: ${resolvedPath}`);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -23,6 +23,16 @@ export class WorkspaceContext {
|
|||
private directories = new Set<string>();
|
||||
private initialDirectories: Set<string>;
|
||||
private onDirectoriesChangedListeners = new Set<() => void>();
|
||||
/**
|
||||
* Memoized realpath results. Every workspace-bounded tool call ultimately
|
||||
* routes through {@link fullyResolvedPath} → `fs.realpathSync`; without
|
||||
* this cache the same path gets re-resolved on every Read/Glob/Grep/Ls
|
||||
* invocation. Bounded so long sessions touching many files don't grow
|
||||
* without limit; FIFO eviction is good enough — the working set tends to
|
||||
* be the small set of paths the model is actively manipulating.
|
||||
*/
|
||||
private resolvedPathCache = new Map<string, string>();
|
||||
private static readonly RESOLVED_PATH_CACHE_MAX = 1024;
|
||||
|
||||
/**
|
||||
* Creates a new WorkspaceContext with the given initial directory and optional additional directories.
|
||||
|
|
@ -201,10 +211,21 @@ export class WorkspaceContext {
|
|||
* Fully resolves a path, including symbolic links.
|
||||
* If the path does not exist, it returns the fully resolved path as it would be
|
||||
* if it did exist.
|
||||
*
|
||||
* Result is memoized in {@link resolvedPathCache}. Filesystem-state cache:
|
||||
* if a file is renamed / a symlink is retargeted mid-session the cache
|
||||
* goes stale, which is the same correctness profile as any single
|
||||
* `realpathSync` call (it captures a moment in time). The win is cutting
|
||||
* 8+ syscalls per tool-heavy prompt down to 1.
|
||||
*/
|
||||
private fullyResolvedPath(pathToCheck: string): string {
|
||||
const cached = this.resolvedPathCache.get(pathToCheck);
|
||||
if (cached !== undefined) {
|
||||
return cached;
|
||||
}
|
||||
let resolved: string;
|
||||
try {
|
||||
return fs.realpathSync(pathToCheck);
|
||||
resolved = fs.realpathSync(pathToCheck);
|
||||
} catch (e: unknown) {
|
||||
if (
|
||||
isNodeError(e) &&
|
||||
|
|
@ -215,10 +236,21 @@ export class WorkspaceContext {
|
|||
!this.isFileSymlink(e.path)
|
||||
) {
|
||||
// If it doesn't exist, e.path contains the fully resolved path.
|
||||
return e.path;
|
||||
resolved = e.path;
|
||||
} else {
|
||||
// Don't cache exceptions — the path may exist on retry.
|
||||
throw e;
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
if (
|
||||
this.resolvedPathCache.size >= WorkspaceContext.RESOLVED_PATH_CACHE_MAX
|
||||
) {
|
||||
// FIFO eviction: drop the oldest insertion (Map preserves insert order).
|
||||
const oldest = this.resolvedPathCache.keys().next().value;
|
||||
if (oldest !== undefined) this.resolvedPathCache.delete(oldest);
|
||||
}
|
||||
this.resolvedPathCache.set(pathToCheck, resolved);
|
||||
return resolved;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue