diff --git a/packages/core/src/config/config.test.ts b/packages/core/src/config/config.test.ts index af223182d..7ff007ce9 100644 --- a/packages/core/src/config/config.test.ts +++ b/packages/core/src/config/config.test.ts @@ -313,6 +313,76 @@ describe('Server Config (config.ts)', () => { expect(config.getSystemPrompt()).toBeUndefined(); }); + describe('FileReadCache isolation', () => { + it('returns a distinct cache for child Configs created via Object.create', () => { + // Subagent / scoped-agent / fork construction all use + // `Object.create(parent)`, which does NOT run field initializers. + // Without explicit handling the child would resolve fileReadCache + // through the prototype chain back to the parent's instance, so a + // subagent's ReadFile would see the parent's recorded reads and + // return file_unchanged placeholders for files the subagent has + // never received in its own transcript. + const parent = new Config(baseParams); + const child = Object.create(parent) as Config; + + const parentCache = parent.getFileReadCache(); + const childCache = child.getFileReadCache(); + + expect(parentCache).toBeDefined(); + expect(childCache).toBeDefined(); + expect(childCache).not.toBe(parentCache); + + parentCache.recordRead( + '/tmp/parent.ts', + { + dev: 1, + ino: 100, + mtimeMs: 1_000_000, + size: 42, + } as unknown as import('node:fs').Stats, + { full: true, cacheable: true }, + ); + + expect(parentCache.size()).toBe(1); + expect(childCache.size()).toBe(0); + }); + + it('returns the same cache instance on repeated getter calls within one Config', () => { + // Sanity: the lazy own-property initialization in + // getFileReadCache() must not allocate a fresh cache on every + // call — recorded entries would vanish between operations. + const config = new Config(baseParams); + expect(config.getFileReadCache()).toBe(config.getFileReadCache()); + }); + }); + + describe('startNewSession', () => { + it('clears the FileReadCache so a new session does not inherit prior reads', () => { + // Regression guard: the file-read cache backs ReadFile's + // file_unchanged placeholder, whose correctness depends on the + // model having seen the prior read earlier in the *current* + // conversation. /clear and resume both go through + // startNewSession(), so it must drop cache entries the new + // session has never seen. + const config = new Config(baseParams); + const cache = config.getFileReadCache(); + cache.recordRead( + '/tmp/whatever.ts', + { + dev: 1, + ino: 100, + mtimeMs: 1_000_000, + size: 42, + } as unknown as import('node:fs').Stats, + { full: true, cacheable: true }, + ); + expect(cache.size()).toBe(1); + + config.startNewSession(); + expect(cache.size()).toBe(0); + }); + }); + describe('initialize', () => { it('should throw an error if checkpointing is enabled and GitService fails', async () => { const gitError = new Error('Git is not installed'); diff --git a/packages/core/src/config/config.ts b/packages/core/src/config/config.ts index f4b2c42fc..a40bbecac 100644 --- a/packages/core/src/config/config.ts +++ b/packages/core/src/config/config.ts @@ -62,6 +62,7 @@ import { SubagentManager } from '../subagents/subagent-manager.js'; import type { SubagentConfig } from '../subagents/types.js'; import { BackgroundTaskRegistry } from '../agents/background-tasks.js'; import { BackgroundShellRegistry } from '../services/backgroundShellRegistry.js'; +import { FileReadCache } from '../services/fileReadCache.js'; import { DEFAULT_OTLP_ENDPOINT, DEFAULT_TELEMETRY_TARGET, @@ -365,6 +366,14 @@ export interface ConfigParameters { telemetry?: TelemetrySettings; gitCoAuthor?: boolean; usageStatisticsEnabled?: boolean; + /** + * If true, disables the per-session FileReadCache short-circuit + * (file_unchanged placeholder). Useful for sessions that may undergo + * context compaction or transcript transformation, where the model + * cannot reliably retrieve a previously-emitted full file content + * from prior tool results. Defaults to false (cache active). + */ + fileReadCacheDisabled?: boolean; fileFiltering?: { respectGitIgnore?: boolean; respectQwenIgnore?: boolean; @@ -547,6 +556,11 @@ export class Config { private subagentManager!: SubagentManager; private readonly backgroundTaskRegistry = new BackgroundTaskRegistry(); private readonly backgroundShellRegistry = new BackgroundShellRegistry(); + // Field initializer runs once on the parent Config; child Configs + // built via Object.create(parent) intentionally do NOT pick this up + // — see getFileReadCache() for the per-instance lazy initialization + // that keeps subagent caches isolated from the parent's. + private fileReadCache: FileReadCache = new FileReadCache(); private extensionManager!: ExtensionManager; private skillManager: SkillManager | null = null; private permissionManager: PermissionManager | null = null; @@ -601,6 +615,7 @@ export class Config { private readonly telemetrySettings: TelemetrySettings; private readonly gitCoAuthor: GitCoAuthorSettings; private readonly usageStatisticsEnabled: boolean; + private readonly fileReadCacheDisabled: boolean; private geminiClient!: GeminiClient; private baseLlmClient!: BaseLlmClient; private cronScheduler: CronScheduler | null = null; @@ -753,6 +768,7 @@ export class Config { email: 'qwen-coder@alibabacloud.com', }; this.usageStatisticsEnabled = params.usageStatisticsEnabled ?? true; + this.fileReadCacheDisabled = params.fileReadCacheDisabled ?? false; this.outputLanguageFilePath = params.outputLanguageFilePath; this.fileFiltering = { @@ -1335,6 +1351,16 @@ export class Config { this.chatRecordingService = this.chatRecordingEnabled ? new ChatRecordingService(this) : undefined; + // The file-read cache is session-scoped: its `file_unchanged` + // placeholder relies on the model having seen the prior full read + // earlier in the *current* conversation. Carrying entries across + // /clear or session resume would let a follow-up Read return the + // placeholder despite the new session never having received the + // file contents. Use the getter so the lazy own-property + // initialization in getFileReadCache() applies even for Configs + // constructed via Object.create — those should clear their own + // cache, not the parent's. + this.getFileReadCache().clear(); if (this.initialized) { logStartSession(this, new StartSessionEvent(this)); } @@ -2513,6 +2539,47 @@ export class Config { return this.backgroundShellRegistry; } + /** + * Session-scoped cache that tracks Read / Edit / WriteFile operations + * on files. The cache must be **per-Config-instance** so that each + * subagent (which gets its own Config) does not inherit the parent's + * recorded reads via the prototype chain. + * + * The wrinkle: every subagent / scoped-agent / fork path in this + * codebase constructs its Config via `Object.create(parent)`. That + * does **not** run instance field initializers, so the parent's + * `fileReadCache` field is reachable on the child only by prototype + * lookup — i.e. child and parent end up sharing the same cache. The + * own-property check below detects "this instance was made by + * Object.create" and lazily attaches a fresh cache, ensuring + * isolation without requiring every Object.create site to remember + * to override the field. + */ + getFileReadCache(): FileReadCache { + if (!Object.prototype.hasOwnProperty.call(this, 'fileReadCache')) { + // The own-property write needs to bypass `private`'s structural + // check — the field is conceptually still private to the class, + // we just need TS to let us install an own copy on a child + // instance produced by `Object.create(parent)`. + (this as unknown as { fileReadCache: FileReadCache }).fileReadCache = + new FileReadCache(); + } + return this.fileReadCache; + } + + /** + * When true, ReadFile / Edit / WriteFile must bypass the session + * FileReadCache entirely and behave as if it did not exist (no + * `file_unchanged` placeholder, no future prior-read enforcement). + * Intended as an escape hatch for sessions where the cache's "model + * has already seen this content earlier in the conversation" + * assumption is unreliable — e.g. after context compaction or + * transcript transformation. + */ + getFileReadCacheDisabled(): boolean { + return this.fileReadCacheDisabled; + } + /** * Whether interactive permission prompts should be auto-denied. * True for background agents that have no UI to show prompts. diff --git a/packages/core/src/core/client.test.ts b/packages/core/src/core/client.test.ts index c253d81e1..b2a62349c 100644 --- a/packages/core/src/core/client.test.ts +++ b/packages/core/src/core/client.test.ts @@ -400,6 +400,9 @@ describe('Gemini Client (client.ts)', () => { warn: vi.fn(), error: vi.fn(), }), + getFileReadCache: vi.fn().mockReturnValue({ + clear: vi.fn(), + }), } as unknown as Config; client = new GeminiClient(mockConfig); diff --git a/packages/core/src/core/client.ts b/packages/core/src/core/client.ts index 32478a2f7..0866f0d20 100644 --- a/packages/core/src/core/client.ts +++ b/packages/core/src/core/client.ts @@ -1159,6 +1159,14 @@ export class GeminiClient { }); await this.startChat(newHistory); + // Compaction rewrites the prompt history: prior full-Read tool + // results may have been summarised away, but the FileReadCache + // still believes those reads are "in this conversation". A + // follow-up Read could then return the file_unchanged + // placeholder pointing at content the model can no longer + // retrieve from its own context. Clear the cache so post- + // compaction Reads re-emit the bytes. + this.config.getFileReadCache().clear(); uiTelemetryService.setLastPromptTokenCount(info.newTokenCount); this.forceFullIdeContext = true; } diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 88e356265..b2e8b5d69 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -139,6 +139,7 @@ export type { CronDeleteTool, CronDeleteParams } from './tools/cron-delete.js'; export * from './services/chatRecordingService.js'; export * from './services/cronScheduler.js'; export * from './services/fileDiscoveryService.js'; +export * from './services/fileReadCache.js'; export * from './services/fileSystemService.js'; export * from './services/gitService.js'; export * from './services/gitWorktreeService.js'; diff --git a/packages/core/src/services/fileReadCache.test.ts b/packages/core/src/services/fileReadCache.test.ts new file mode 100644 index 000000000..3f9f92f92 --- /dev/null +++ b/packages/core/src/services/fileReadCache.test.ts @@ -0,0 +1,420 @@ +/** + * @license + * Copyright 2026 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import * as fs from 'node:fs'; +import * as os from 'node:os'; +import * as path from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import type { Stats } from 'node:fs'; +import { FileReadCache } from './fileReadCache.js'; + +/** + * Build a Stats-shaped object with the fields the cache actually reads. + * Avoids hitting the filesystem in the bulk of unit tests. + */ +function makeStats(overrides: Partial = {}): Stats { + const base = { + dev: 1, + ino: 100, + mtimeMs: 1_000_000, + size: 42, + }; + return { ...base, ...overrides } as Stats; +} + +describe('FileReadCache', () => { + describe('inodeKey', () => { + it('combines dev and ino into a stable string', () => { + expect(FileReadCache.inodeKey(makeStats({ dev: 7, ino: 99 }))).toBe( + '7:99', + ); + }); + + it('treats different (dev, ino) as different keys', () => { + const a = FileReadCache.inodeKey(makeStats({ dev: 1, ino: 2 })); + const b = FileReadCache.inodeKey(makeStats({ dev: 2, ino: 1 })); + expect(a).not.toBe(b); + }); + }); + + describe('check', () => { + it('returns unknown for a never-seen file', () => { + const cache = new FileReadCache(); + expect(cache.check(makeStats()).state).toBe('unknown'); + }); + + it('returns fresh after a recordRead with matching stats', () => { + const cache = new FileReadCache(); + const stats = makeStats(); + cache.recordRead('/x/foo.ts', stats, { full: true, cacheable: true }); + expect(cache.check(stats).state).toBe('fresh'); + }); + + it('returns stale when mtime differs', () => { + const cache = new FileReadCache(); + cache.recordRead('/x/foo.ts', makeStats({ mtimeMs: 1000 }), { + full: true, + cacheable: true, + }); + const result = cache.check(makeStats({ mtimeMs: 2000 })); + expect(result.state).toBe('stale'); + }); + + it('returns stale when size differs', () => { + const cache = new FileReadCache(); + cache.recordRead('/x/foo.ts', makeStats({ size: 100 }), { + full: true, + cacheable: true, + }); + const result = cache.check(makeStats({ size: 200 })); + expect(result.state).toBe('stale'); + }); + + it('returns unknown — not stale — when only the inode differs', () => { + // rm + recreate scenario: same path, brand-new inode. The cache is + // keyed by inode, so the new file is genuinely a stranger. Edit / + // WriteFile callers will treat this as "must read first", which is + // the safer semantics than "stale" (which implies "you knew an + // earlier version of this exact file"). + const cache = new FileReadCache(); + cache.recordRead('/x/foo.ts', makeStats({ ino: 100 }), { + full: true, + cacheable: true, + }); + expect(cache.check(makeStats({ ino: 200 })).state).toBe('unknown'); + }); + + it('attaches the entry on fresh and stale results', () => { + const cache = new FileReadCache(); + cache.recordRead('/x/foo.ts', makeStats(), { + full: true, + cacheable: true, + }); + const fresh = cache.check(makeStats()); + expect(fresh.state).toBe('fresh'); + if (fresh.state === 'fresh') { + expect(fresh.entry.realPath).toBe('/x/foo.ts'); + } + + const stale = cache.check(makeStats({ size: 999 })); + expect(stale.state).toBe('stale'); + if (stale.state === 'stale') { + expect(stale.entry.realPath).toBe('/x/foo.ts'); + } + }); + }); + + describe('recordRead', () => { + beforeEach(() => { + vi.useFakeTimers(); + vi.setSystemTime(new Date('2026-04-29T00:00:00Z')); + }); + afterEach(() => { + vi.useRealTimers(); + }); + + it('sets lastReadAt to the current time', () => { + const cache = new FileReadCache(); + const entry = cache.recordRead('/x/foo.ts', makeStats(), { + full: true, + cacheable: true, + }); + expect(entry.lastReadAt).toBe(Date.now()); + }); + + it('preserves full vs ranged read distinction', () => { + const cache = new FileReadCache(); + const stats = makeStats(); + cache.recordRead('/x/foo.ts', stats, { full: false, cacheable: true }); + const result = cache.check(stats); + expect(result.state).toBe('fresh'); + if (result.state === 'fresh') { + expect(result.entry.lastReadWasFull).toBe(false); + } + }); + + it('overwrites earlier lastReadWasFull on subsequent reads', () => { + const cache = new FileReadCache(); + const stats = makeStats(); + cache.recordRead('/x/foo.ts', stats, { full: true, cacheable: true }); + cache.recordRead('/x/foo.ts', stats, { full: false, cacheable: true }); + const result = cache.check(stats); + expect(result.state).toBe('fresh'); + if (result.state === 'fresh') { + expect(result.entry.lastReadWasFull).toBe(false); + } + }); + + it('does not set lastWriteAt', () => { + const cache = new FileReadCache(); + const entry = cache.recordRead('/x/foo.ts', makeStats(), { + full: true, + cacheable: true, + }); + expect(entry.lastWriteAt).toBeUndefined(); + }); + + it('records cacheable=false for non-text reads (image / pdf / notebook)', () => { + const cache = new FileReadCache(); + const stats = makeStats(); + cache.recordRead('/x/img.png', stats, { full: true, cacheable: false }); + const result = cache.check(stats); + expect(result.state).toBe('fresh'); + if (result.state === 'fresh') { + expect(result.entry.lastReadCacheable).toBe(false); + } + }); + + it('flips cacheable to true on a subsequent text read of the same inode', () => { + // Pathological-but-possible: file was first read as PDF base64, then + // its bytes were rewritten to plain text and re-Read (the rewrite + // path goes through stale → fresh via a new recordRead). Verify the + // cacheable flag tracks the most recent Read, not a previous one. + const cache = new FileReadCache(); + const stats = makeStats(); + cache.recordRead('/x/file', stats, { full: true, cacheable: false }); + cache.recordRead('/x/file', stats, { full: true, cacheable: true }); + const result = cache.check(stats); + expect(result.state).toBe('fresh'); + if (result.state === 'fresh') { + expect(result.entry.lastReadCacheable).toBe(true); + } + }); + + it('updates realPath when the same inode is recorded under a different path', () => { + // e.g. the file was first read via a symlink, then via its real path. + const cache = new FileReadCache(); + const stats = makeStats(); + cache.recordRead('/x/symlink.ts', stats, { full: true, cacheable: true }); + cache.recordRead('/x/real.ts', stats, { full: true, cacheable: true }); + const result = cache.check(stats); + expect(result.state).toBe('fresh'); + if (result.state === 'fresh') { + expect(result.entry.realPath).toBe('/x/real.ts'); + } + }); + }); + + describe('recordWrite', () => { + beforeEach(() => { + vi.useFakeTimers(); + vi.setSystemTime(new Date('2026-04-29T00:00:00Z')); + }); + afterEach(() => { + vi.useRealTimers(); + }); + + it('sets lastWriteAt to the current time', () => { + const cache = new FileReadCache(); + const entry = cache.recordWrite('/x/foo.ts', makeStats()); + expect(entry.lastWriteAt).toBe(Date.now()); + }); + + it('does not set lastReadAt or flip lastReadWasFull', () => { + const cache = new FileReadCache(); + const entry = cache.recordWrite('/x/foo.ts', makeStats()); + expect(entry.lastReadAt).toBeUndefined(); + expect(entry.lastReadWasFull).toBe(false); + }); + + it('refreshes mtime+size so a follow-up Edit sees fresh', () => { + // Without this refresh the second Edit in a chain would see the + // post-write mtime as "stale" against the pre-write fingerprint, + // and reject its own caller's previous edit. Regression guard. + const cache = new FileReadCache(); + cache.recordRead('/x/foo.ts', makeStats({ mtimeMs: 1000, size: 10 }), { + full: true, + cacheable: true, + }); + cache.recordWrite('/x/foo.ts', makeStats({ mtimeMs: 2000, size: 20 })); + expect(cache.check(makeStats({ mtimeMs: 2000, size: 20 })).state).toBe( + 'fresh', + ); + }); + + it('preserves lastReadAt set by an earlier recordRead', () => { + const cache = new FileReadCache(); + const stats = makeStats(); + cache.recordRead('/x/foo.ts', stats, { full: true, cacheable: true }); + const readTime = Date.now(); + vi.advanceTimersByTime(5_000); + const entry = cache.recordWrite( + '/x/foo.ts', + makeStats({ mtimeMs: 9999 }), + ); + expect(entry.lastReadAt).toBe(readTime); + expect(entry.lastWriteAt).toBeGreaterThan(readTime); + }); + }); + + describe('read-then-write-then-read ordering', () => { + beforeEach(() => { + vi.useFakeTimers(); + vi.setSystemTime(new Date('2026-04-29T00:00:00Z')); + }); + afterEach(() => { + vi.useRealTimers(); + }); + + it('records lastWriteAt > lastReadAt after Read → Write', () => { + // PR2 will use this ordering to decide whether a Read can return a + // file_unchanged placeholder. Verifying the timestamps move + // monotonically here gives that downstream logic a stable contract. + const cache = new FileReadCache(); + cache.recordRead('/x/foo.ts', makeStats({ mtimeMs: 1000 }), { + full: true, + cacheable: true, + }); + vi.advanceTimersByTime(1); + cache.recordWrite('/x/foo.ts', makeStats({ mtimeMs: 2000 })); + const result = cache.check(makeStats({ mtimeMs: 2000 })); + expect(result.state).toBe('fresh'); + if (result.state === 'fresh') { + const { lastReadAt, lastWriteAt } = result.entry; + expect(lastReadAt).toBeDefined(); + expect(lastWriteAt).toBeDefined(); + expect(lastWriteAt!).toBeGreaterThan(lastReadAt!); + } + }); + + it('records lastReadAt > lastWriteAt after Read → Write → Read', () => { + const cache = new FileReadCache(); + cache.recordRead('/x/foo.ts', makeStats({ mtimeMs: 1000 }), { + full: true, + cacheable: true, + }); + vi.advanceTimersByTime(1); + cache.recordWrite('/x/foo.ts', makeStats({ mtimeMs: 2000 })); + vi.advanceTimersByTime(1); + cache.recordRead('/x/foo.ts', makeStats({ mtimeMs: 2000 }), { + full: true, + cacheable: true, + }); + const result = cache.check(makeStats({ mtimeMs: 2000 })); + expect(result.state).toBe('fresh'); + if (result.state === 'fresh') { + const { lastReadAt, lastWriteAt } = result.entry; + expect(lastReadAt!).toBeGreaterThan(lastWriteAt!); + } + }); + }); + + describe('isolation between files', () => { + it('keeps unrelated entries independent', () => { + const cache = new FileReadCache(); + const a = makeStats({ ino: 1 }); + const b = makeStats({ ino: 2 }); + cache.recordRead('/x/a.ts', a, { full: true, cacheable: true }); + expect(cache.check(b).state).toBe('unknown'); + cache.recordRead('/x/b.ts', b, { full: true, cacheable: true }); + expect(cache.check(a).state).toBe('fresh'); + expect(cache.check(b).state).toBe('fresh'); + }); + + it('treats same ino across different devs as separate files', () => { + const cache = new FileReadCache(); + cache.recordRead('/x/a', makeStats({ dev: 1, ino: 5 }), { + full: true, + cacheable: true, + }); + expect(cache.check(makeStats({ dev: 2, ino: 5 })).state).toBe('unknown'); + }); + }); + + describe('invalidate / clear / size', () => { + it('size reflects the count of tracked entries', () => { + const cache = new FileReadCache(); + expect(cache.size()).toBe(0); + cache.recordRead('/x/a', makeStats({ ino: 1 }), { + full: true, + cacheable: true, + }); + cache.recordRead('/x/b', makeStats({ ino: 2 }), { + full: true, + cacheable: true, + }); + expect(cache.size()).toBe(2); + }); + + it('invalidate removes the entry for the given Stats', () => { + const cache = new FileReadCache(); + const stats = makeStats(); + cache.recordRead('/x/a', stats, { full: true, cacheable: true }); + cache.invalidate(stats); + expect(cache.check(stats).state).toBe('unknown'); + expect(cache.size()).toBe(0); + }); + + it('invalidate is a no-op for entries that were never recorded', () => { + const cache = new FileReadCache(); + expect(() => cache.invalidate(makeStats())).not.toThrow(); + expect(cache.size()).toBe(0); + }); + + it('clear drops every entry', () => { + const cache = new FileReadCache(); + cache.recordRead('/x/a', makeStats({ ino: 1 }), { + full: true, + cacheable: true, + }); + cache.recordRead('/x/b', makeStats({ ino: 2 }), { + full: true, + cacheable: true, + }); + cache.clear(); + expect(cache.size()).toBe(0); + expect(cache.check(makeStats({ ino: 1 })).state).toBe('unknown'); + }); + }); + + describe('with real filesystem stats', () => { + // One end-to-end check that dev+ino keying actually works against + // node:fs.statSync. The bulk of the suite uses synthetic Stats; this + // is a sanity guard against accidentally relying on a field that + // isn't populated on real platforms. + let tmpDir: string; + + beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'frc-')); + }); + afterEach(() => { + fs.rmSync(tmpDir, { recursive: true, force: true }); + }); + + it('treats two paths sharing one inode (hardlink) as the same entry', () => { + const original = path.join(tmpDir, 'original.txt'); + const link = path.join(tmpDir, 'link.txt'); + fs.writeFileSync(original, 'hello'); + fs.linkSync(original, link); + + const cache = new FileReadCache(); + cache.recordRead(original, fs.statSync(original), { + full: true, + cacheable: true, + }); + // Same inode reached via a different path — must hit the same entry. + expect(cache.check(fs.statSync(link)).state).toBe('fresh'); + }); + + it('detects external modification as stale', () => { + const file = path.join(tmpDir, 'mut.txt'); + fs.writeFileSync(file, 'one'); + const cache = new FileReadCache(); + cache.recordRead(file, fs.statSync(file), { + full: true, + cacheable: true, + }); + + // Bump mtime explicitly; on some filesystems a same-second rewrite + // would not change mtime, which would mask the test. + const future = new Date(Date.now() + 60_000); + fs.writeFileSync(file, 'one-plus-more'); + fs.utimesSync(file, future, future); + + expect(cache.check(fs.statSync(file)).state).toBe('stale'); + }); + }); +}); diff --git a/packages/core/src/services/fileReadCache.ts b/packages/core/src/services/fileReadCache.ts new file mode 100644 index 000000000..f3ddda490 --- /dev/null +++ b/packages/core/src/services/fileReadCache.ts @@ -0,0 +1,188 @@ +/** + * @license + * Copyright 2026 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { Stats } from 'node:fs'; + +/** + * Session-scoped cache that tracks which files the model has Read or + * written in the current conversation, plus the (mtime, size) snapshot at + * the time of that operation. It exists so that Edit / WriteFile can + * verify the model is editing a file it has actually seen, and so that + * repeated full Reads of an unchanged file can be short-circuited. + * + * This is a pure in-memory data structure. Callers are responsible for + * `fs.stat`-ing the file and passing the resulting Stats in — the cache + * never touches the filesystem itself, which keeps it trivially testable + * and avoids double-stat overhead at the call sites (Read / Edit / + * WriteFile already stat for their own reasons). + * + * Identity: entries are keyed by `${dev}:${ino}`, not by path. This is + * deliberate: it makes symlinks, hardlinks, and case-variant paths on + * case-insensitive filesystems all collapse onto the same entry, which + * is what we want — the cache is reasoning about *files*, not strings. + * + * Platform note: on Windows, `Stats.ino` is documented as not guaranteed + * unique (Node returns it from `_BY_HANDLE_FILE_INFORMATION.nFileIndex`, + * which can collide across volumes and ReFS). Callers that target + * Windows should consider falling back to a path-based key; the POSIX + * platforms qwen-code primarily runs on (macOS / Linux) are unaffected. + * + * Lifecycle: one instance is created per `Config` via the field + * initializer, so any code that constructs its own Config — notably + * subagents — automatically gets an independent cache. The cache itself + * does not enforce isolation; it relies on the Config-per-session + * invariant maintained by the surrounding code. + */ + +/** A single tracked file. Mutated in place by {@link FileReadCache}. */ +export interface FileReadEntry { + /** `${stats.dev}:${stats.ino}` — the canonical identity. */ + readonly inodeKey: string; + /** + * Last absolute path we observed pointing at this inode. Diagnostic + * only — it is *not* used for lookup, since multiple paths can resolve + * to the same inode (symlinks, case variants). + */ + realPath: string; + /** mtime in ms at the time of the most recent record(). */ + mtimeMs: number; + /** Size in bytes at the time of the most recent record(). */ + sizeBytes: number; + /** ms epoch of the last successful Read. Undefined if never read. */ + lastReadAt?: number; + /** ms epoch of the last successful write. Undefined if never written. */ + lastWriteAt?: number; + /** + * True iff the most recent Read consumed the whole file (no offset / + * limit / pages). Used by the Read fast-path to decide whether a + * follow-up "no-args" Read can return a `file_unchanged` placeholder + * instead of re-emitting the full content. Range-scoped Reads never + * trigger the placeholder, since the model may legitimately ask for a + * different range next time. + */ + lastReadWasFull: boolean; + /** + * True iff the content the most recent Read produced is one we are + * willing to substitute with a `file_unchanged` placeholder. Plain + * text reads set this to true; binary, image, audio, video, PDF, and + * notebook reads set it to false, because the model will likely need + * the structured / multi-modal payload again rather than a stub. The + * cache itself does not interpret this flag — it is a hint produced + * and consumed by the Read tool. + */ + lastReadCacheable: boolean; +} + +/** Result of {@link FileReadCache.check}. */ +export type FileReadCheckResult = + | { state: 'fresh'; entry: FileReadEntry } + | { state: 'stale'; entry: FileReadEntry } + | { state: 'unknown' }; + +export class FileReadCache { + private readonly byInode = new Map(); + + /** Build the canonical key for a file from its Stats. */ + static inodeKey(stats: Stats): string { + return `${stats.dev}:${stats.ino}`; + } + + /** + * Record a successful Read of `absPath`. + * + * - `full` — the Read covered the entire file (no offset / limit + * / pages). Only full Reads enable the `file_unchanged` fast-path + * on subsequent reads. + * - `cacheable` — the produced content is suitable for substitution + * with a `file_unchanged` placeholder. Set true for plain text, + * false for binary / image / audio / video / PDF / notebook. + */ + recordRead( + absPath: string, + stats: Stats, + opts: { full: boolean; cacheable: boolean }, + ): FileReadEntry { + const entry = this.upsert(absPath, stats); + entry.lastReadAt = Date.now(); + entry.lastReadWasFull = opts.full; + entry.lastReadCacheable = opts.cacheable; + return entry; + } + + /** + * Record a successful write (Edit, WriteFile, or any other tool that + * mutates the file's bytes). After a write the on-disk mtime/size will + * differ from any prior Read snapshot, so we refresh the cached + * fingerprint to the post-write Stats; otherwise the next Edit would + * see its own write as a "stale" external change. + */ + recordWrite(absPath: string, stats: Stats): FileReadEntry { + const entry = this.upsert(absPath, stats); + entry.lastWriteAt = Date.now(); + return entry; + } + + /** + * Compare the cached fingerprint against `stats` for the same inode. + * + * - `unknown` — no entry. The file has never been Read or written in + * this session. + * - `stale` — entry exists but mtime or size differs. The file has + * been changed by something outside our control (or by us, before + * this stats call was taken). + * - `fresh` — entry exists and mtime + size match. Safe to assume + * the bytes are what we last saw. + * + * Note: mtime + size is a best-effort fingerprint, not a hash. A file + * rewritten with identical mtime *and* identical size will read as + * `fresh`. In practice the Edit path catches this via the + * `0 occurrences` failure mode, which prompts the model to re-read. + */ + check(stats: Stats): FileReadCheckResult { + const entry = this.byInode.get(FileReadCache.inodeKey(stats)); + if (!entry) return { state: 'unknown' }; + if (entry.mtimeMs !== stats.mtimeMs || entry.sizeBytes !== stats.size) { + return { state: 'stale', entry }; + } + return { state: 'fresh', entry }; + } + + /** Remove the entry for the given Stats, if any. */ + invalidate(stats: Stats): void { + this.byInode.delete(FileReadCache.inodeKey(stats)); + } + + /** Drop every entry. Used by tests and on Config shutdown. */ + clear(): void { + this.byInode.clear(); + } + + /** Number of tracked entries. Diagnostic / test use only. */ + size(): number { + return this.byInode.size; + } + + private upsert(absPath: string, stats: Stats): FileReadEntry { + const key = FileReadCache.inodeKey(stats); + const existing = this.byInode.get(key); + if (existing) { + existing.realPath = absPath; + existing.mtimeMs = stats.mtimeMs; + existing.sizeBytes = stats.size; + return existing; + } + const entry: FileReadEntry = { + inodeKey: key, + realPath: absPath, + mtimeMs: stats.mtimeMs, + sizeBytes: stats.size, + lastReadWasFull: false, + lastReadCacheable: false, + }; + this.byInode.set(key, entry); + return entry; + } +} diff --git a/packages/core/src/tools/edit.test.ts b/packages/core/src/tools/edit.test.ts index 179cdfae7..2b78956d9 100644 --- a/packages/core/src/tools/edit.test.ts +++ b/packages/core/src/tools/edit.test.ts @@ -20,7 +20,7 @@ import type { Mock } from 'vitest'; import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; import type { EditToolParams } from './edit.js'; import { applyReplacement, EditTool } from './edit.js'; -import type { FileDiff } from './tools.js'; +import type { FileDiff, ToolInvocation, ToolResult } from './tools.js'; import { ToolErrorType } from './tool-error.js'; import path from 'node:path'; import fs from 'node:fs'; @@ -28,6 +28,7 @@ import os from 'node:os'; import type { Config } from '../config/config.js'; import { ApprovalMode } from '../config/config.js'; import { createMockWorkspaceContext } from '../test-utils/mockWorkspaceContext.js'; +import { FileReadCache } from '../services/fileReadCache.js'; import { StandardFileSystemService } from '../services/fileSystemService.js'; describe('EditTool', () => { @@ -37,12 +38,14 @@ describe('EditTool', () => { let mockConfig: Config; let geminiClient: any; let baseLlmClient: any; + let fileReadCache: FileReadCache; beforeEach(() => { vi.restoreAllMocks(); tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'edit-tool-test-')); rootDir = path.join(tempDir, 'root'); fs.mkdirSync(rootDir); + fileReadCache = new FileReadCache(); geminiClient = { generateJson: mockGenerateJson, // mockGenerateJson is already defined and hoisted @@ -78,6 +81,7 @@ describe('EditTool', () => { setGeminiMdFileCount: vi.fn(), getToolRegistry: () => ({}) as any, // Minimal mock for ToolRegistry getDefaultFileEncoding: vi.fn().mockReturnValue('utf-8'), + getFileReadCache: () => fileReadCache, } as unknown as Config; // Reset mocks before each test @@ -850,6 +854,57 @@ describe('EditTool', () => { }); }); + describe('FileReadCache integration', () => { + it('records a write into the cache so a follow-up Read sees lastWriteAt', async () => { + // Without this hook, ReadFile's `(lastWriteAt === undefined || + // lastReadAt > lastWriteAt)` guard would let a post-edit Read + // return the pre-edit placeholder when the filesystem's mtime + // resolution is too coarse to detect the edit. + const filePath = path.join(rootDir, 'cached.txt'); + fs.writeFileSync(filePath, 'old content'); + + // Simulate the model having Read the file before Edit fires. + const preEditStats = fs.statSync(filePath); + fileReadCache.recordRead(filePath, preEditStats, { + full: true, + cacheable: true, + }); + const beforeRead = fileReadCache.check(preEditStats); + expect(beforeRead.state).toBe('fresh'); + if (beforeRead.state === 'fresh') { + expect(beforeRead.entry.lastWriteAt).toBeUndefined(); + } + + const params: EditToolParams = { + file_path: filePath, + old_string: 'old content', + new_string: 'new content', + }; + const invocation = tool.build(params) as ToolInvocation< + EditToolParams, + ToolResult + >; + const abortSignal = new AbortController().signal; + const result = await invocation.execute(abortSignal); + expect(result.error).toBeUndefined(); + + const postEditStats = fs.statSync(filePath); + const after = fileReadCache.check(postEditStats); + // After the edit, the cache entry's mtime+size match the new + // file state and lastWriteAt has been stamped. + expect(after.state).toBe('fresh'); + if (after.state === 'fresh') { + expect(after.entry.lastWriteAt).toBeDefined(); + // lastReadAt was set by the simulated pre-edit Read; the + // post-write timestamp must dominate it so subsequent Reads + // do not return the placeholder. + expect(after.entry.lastWriteAt!).toBeGreaterThanOrEqual( + after.entry.lastReadAt!, + ); + } + }); + }); + describe('workspace boundary validation', () => { it('should validate paths are within workspace root', () => { const validPath = { diff --git a/packages/core/src/tools/edit.ts b/packages/core/src/tools/edit.ts index 2e78a4149..fef091bf6 100644 --- a/packages/core/src/tools/edit.ts +++ b/packages/core/src/tools/edit.ts @@ -420,6 +420,25 @@ class EditToolInvocation implements ToolInvocation { }); } + // Mark the cache entry written, capturing the post-write stats + // so a follow-up Read sees `lastReadAt < lastWriteAt` and falls + // through to the full pipeline instead of returning the + // pre-edit placeholder. Best-effort: a stat failure here does + // not undo the successful write — the next Read will simply + // re-stat and treat the cache entry as stale. + try { + const postWriteStats = fs.statSync(this.params.file_path); + this.config + .getFileReadCache() + .recordWrite(this.params.file_path, postWriteStats); + } catch { + // Non-fatal: leaving a stale entry is preferable to failing + // the user-visible Edit on a transient stat failure. The + // entry's mtime/size still does not match the on-disk bytes + // post-write, so the next ReadFile will report stale and + // refresh the entry. + } + const fileName = path.basename(this.params.file_path); const originallyProposedContent = this.params.ai_proposed_content || editData.newContent; diff --git a/packages/core/src/tools/read-file.test.ts b/packages/core/src/tools/read-file.test.ts index dc2d4f831..19bacd3e2 100644 --- a/packages/core/src/tools/read-file.test.ts +++ b/packages/core/src/tools/read-file.test.ts @@ -14,6 +14,7 @@ import fs from 'node:fs'; import fsp from 'node:fs/promises'; import type { Config } from '../config/config.js'; import { FileDiscoveryService } from '../services/fileDiscoveryService.js'; +import { FileReadCache } from '../services/fileReadCache.js'; import { StandardFileSystemService } from '../services/fileSystemService.js'; import { createMockWorkspaceContext } from '../test-utils/mockWorkspaceContext.js'; import type { ToolInvocation, ToolResult } from './tools.js'; @@ -25,6 +26,7 @@ vi.mock('../telemetry/loggers.js', () => ({ describe('ReadFileTool', () => { let tempRootDir: string; let tool: ReadFileTool; + let fileReadCache: FileReadCache; const abortSignal = new AbortController().signal; beforeEach(async () => { @@ -32,6 +34,7 @@ describe('ReadFileTool', () => { tempRootDir = await fsp.mkdtemp( path.join(os.tmpdir(), 'read-file-tool-root-'), ); + fileReadCache = new FileReadCache(); const mockConfigInstance = { getFileService: () => new FileDiscoveryService(tempRootDir), @@ -48,6 +51,8 @@ describe('ReadFileTool', () => { getContentGeneratorConfig: () => ({ modalities: { image: true, pdf: true, audio: true, video: true }, }), + getFileReadCache: () => fileReadCache, + getFileReadCacheDisabled: () => false, } as unknown as Config; tool = new ReadFileTool(mockConfigInstance); }); @@ -572,6 +577,196 @@ describe('ReadFileTool', () => { } }); + describe('with FileReadCache', () => { + // Helper to build + execute a Read in one shot. + async function read( + params: ReadFileToolParams, + toolOverride: ReadFileTool = tool, + ): Promise { + const invocation = toolOverride.build(params) as ToolInvocation< + ReadFileToolParams, + ToolResult + >; + return invocation.execute(abortSignal); + } + + it('returns the file_unchanged placeholder on a second full Read of an unchanged text file', async () => { + const filePath = path.join(tempRootDir, 'note.txt'); + await fsp.writeFile(filePath, 'hello world', 'utf-8'); + + const first = await read({ file_path: filePath }); + expect(first.llmContent).toBe('hello world'); + + const second = await read({ file_path: filePath }); + expect(typeof second.llmContent).toBe('string'); + expect(second.llmContent).toMatch( + /unchanged since last read in this session/, + ); + // Placeholder must not echo the original content. + expect(second.llmContent).not.toContain('hello world'); + expect(second.returnDisplay).toMatch(/^Unchanged: /); + }); + + it('serves a fresh full Read after an external modification (stale)', async () => { + const filePath = path.join(tempRootDir, 'mut.txt'); + await fsp.writeFile(filePath, 'one', 'utf-8'); + await read({ file_path: filePath }); + + // Bump mtime well into the future to defeat low-precision filesystems + // that share the second across rapid writes. + await fsp.writeFile(filePath, 'two', 'utf-8'); + const future = new Date(Date.now() + 60_000); + await fsp.utimes(filePath, future, future); + + const after = await read({ file_path: filePath }); + expect(after.llmContent).toBe('two'); + }); + + it('forces a full Read after recordWrite even if mtime/size still match', async () => { + // Models that mix Read with Edit / Write should see the post-write + // bytes on their next Read, not a placeholder pointing at the + // pre-write content. The lastReadAt < lastWriteAt branch enforces + // this even when the file's stats happen to match (which can + // happen when an Edit is a no-op or filesystems coalesce mtime). + const filePath = path.join(tempRootDir, 'edited.txt'); + await fsp.writeFile(filePath, 'before', 'utf-8'); + await read({ file_path: filePath }); + + const stats = fs.statSync(filePath); + fileReadCache.recordWrite(filePath, stats); + + const after = await read({ file_path: filePath }); + expect(after.llmContent).toBe('before'); + expect(after.llmContent).not.toMatch(/unchanged since/); + }); + + it('never short-circuits a ranged Read (offset/limit set)', async () => { + const filePath = path.join(tempRootDir, 'multi.txt'); + await fsp.writeFile(filePath, 'a\nb\nc\nd\ne', 'utf-8'); + await read({ file_path: filePath }); + + const ranged = await read({ + file_path: filePath, + offset: 1, + limit: 2, + }); + expect(typeof ranged.llmContent).toBe('string'); + expect(ranged.llmContent).not.toMatch(/unchanged since/); + expect(ranged.llmContent).toContain('b'); + }); + + it('does not arm the placeholder if the first Read was truncated', async () => { + // Truncation means the model has not seen the full file even + // though no offset/limit was passed. A follow-up no-args Read + // must therefore re-emit the truncated window rather than + // claiming "you've already seen this file". + const filePath = path.join(tempRootDir, 'long.txt'); + // Write more lines than the mock Config's truncate-lines limit + // (500) so the read pipeline reports isTruncated = true. + const bigContent = Array.from( + { length: 700 }, + (_, i) => `line ${i + 1}`, + ).join('\n'); + await fsp.writeFile(filePath, bigContent, 'utf-8'); + + const first = await read({ file_path: filePath }); + expect(typeof first.llmContent).toBe('string'); + // Truncation kicks in (either by line or character cap depending + // on Config); we only need the read to actually be truncated, + // not match a specific line count. + expect(first.returnDisplay).toMatch(/Read lines .* of 700/); + + const second = await read({ file_path: filePath }); + expect(typeof second.llmContent).toBe('string'); + expect(second.llmContent).not.toMatch(/unchanged since/); + expect(second.returnDisplay).toMatch(/Read lines .* of 700/); + }); + + it('does not arm the placeholder if the first Read was ranged', async () => { + // First Read covers only a slice — lastReadWasFull = false. A + // follow-up no-args Read must therefore go through the full + // pipeline, since the cache cannot prove the model has already + // seen the entire file. + const filePath = path.join(tempRootDir, 'big.txt'); + await fsp.writeFile(filePath, 'a\nb\nc\nd\ne', 'utf-8'); + + await read({ file_path: filePath, offset: 0, limit: 2 }); + const followUp = await read({ file_path: filePath }); + expect(typeof followUp.llmContent).toBe('string'); + expect(followUp.llmContent).not.toMatch(/unchanged since/); + expect(followUp.llmContent).toContain('e'); + }); + + it('does not return the placeholder for binary files', async () => { + const binPath = path.join(tempRootDir, 'blob.bin'); + await fsp.writeFile(binPath, Buffer.from([0x00, 0xff, 0x00, 0xff])); + const first = await read({ file_path: binPath }); + expect(typeof first.llmContent).toBe('string'); + expect(first.llmContent).toMatch(/Cannot display content of binary/); + + const second = await read({ file_path: binPath }); + expect(second.llmContent).not.toMatch(/unchanged since/); + expect(second.llmContent).toMatch(/Cannot display content of binary/); + }); + + it('does not return the placeholder for image files', async () => { + const imagePath = path.join(tempRootDir, 'pic.png'); + const pngHeader = Buffer.from([ + 0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a, 0x1a, 0x0a, + ]); + await fsp.writeFile(imagePath, pngHeader); + + const first = await read({ file_path: imagePath }); + // Image returns a Part, not a string. + expect(typeof first.llmContent).not.toBe('string'); + + const second = await read({ file_path: imagePath }); + // Must remain a Part — never collapsed to a string placeholder. + expect(typeof second.llmContent).not.toBe('string'); + }); + + it('completely bypasses the cache when getFileReadCacheDisabled() is true', async () => { + // Build a fresh ReadFileTool with a Config whose cache is + // disabled. Two consecutive full Reads must both return the + // file content — never the placeholder, and the cache itself + // must remain empty so prior-read enforcement (added in a + // follow-up) cannot accidentally trip on a recorded entry. + const isolatedCache = new FileReadCache(); + const disabledConfig = { + getFileService: () => new FileDiscoveryService(tempRootDir), + getFileSystemService: () => new StandardFileSystemService(), + getTargetDir: () => tempRootDir, + getWorkspaceContext: () => createMockWorkspaceContext(tempRootDir), + storage: { + getProjectTempDir: () => path.join(tempRootDir, '.temp'), + getProjectDir: () => path.join(tempRootDir, '.project'), + getUserSkillsDirs: () => [ + path.join(os.homedir(), '.qwen', 'skills'), + ], + }, + getTruncateToolOutputThreshold: () => 2500, + getTruncateToolOutputLines: () => 500, + getContentGeneratorConfig: () => ({ + modalities: { image: true, pdf: true, audio: true, video: true }, + }), + getFileReadCache: () => isolatedCache, + getFileReadCacheDisabled: () => true, + } as unknown as Config; + const disabledTool = new ReadFileTool(disabledConfig); + + const filePath = path.join(tempRootDir, 'bypass.txt'); + await fsp.writeFile(filePath, 'plain text', 'utf-8'); + + const first = await read({ file_path: filePath }, disabledTool); + const second = await read({ file_path: filePath }, disabledTool); + + expect(first.llmContent).toBe('plain text'); + expect(second.llmContent).toBe('plain text'); + expect(second.llmContent).not.toMatch(/unchanged since/); + expect(isolatedCache.size()).toBe(0); + }); + }); + describe('with .qwenignore', () => { beforeEach(async () => { await fsp.writeFile( diff --git a/packages/core/src/tools/read-file.ts b/packages/core/src/tools/read-file.ts index 805ffe786..bfb4aeabc 100644 --- a/packages/core/src/tools/read-file.ts +++ b/packages/core/src/tools/read-file.ts @@ -7,6 +7,7 @@ import os from 'node:os'; import path from 'node:path'; import fs from 'node:fs/promises'; +import type { Stats } from 'node:fs'; import { makeRelative, shortenPath } from '../utils/paths.js'; import type { ToolInvocation, ToolLocation, ToolResult } from './tools.js'; import { BaseDeclarativeTool, BaseToolInvocation, Kind } from './tools.js'; @@ -28,6 +29,9 @@ import { isSubpaths } from '../utils/paths.js'; import { Storage } from '../config/storage.js'; import { isAutoMemPath } from '../memory/paths.js'; import { memoryFreshnessNote } from '../memory/memoryAge.js'; +import { createDebugLogger } from '../utils/debugLogger.js'; + +const debugLogger = createDebugLogger('READ_FILE_CACHE'); /** * Parameters for the ReadFile tool @@ -127,6 +131,63 @@ class ReadFileToolInvocation extends BaseToolInvocation< } async execute(): Promise { + const absPath = path.resolve(this.params.file_path); + const projectRoot = this.config.getTargetDir(); + // Auto-memory files (AGENTS.md and friends under the auto-memory + // root) get a per-read freshness `` prepended in + // the slow path — the signal that tells the model to treat the + // contents as a point-in-time snapshot. Returning the + // file_unchanged placeholder would skip that prepend, silently + // dropping the staleness warning for the rest of the session. + // These files are small; re-emit them on every read. + const isAutoMem = isAutoMemPath(absPath, projectRoot); + // The cache can be disabled at the Config level (escape hatch for + // sessions where the "model has already seen the prior tool result" + // assumption breaks down — e.g. after context compaction or + // transcript transformation). When disabled we bypass both the + // fast-path lookup and the post-read record so behaviour matches + // the pre-cache implementation byte-for-byte. + const cacheEnabled = !this.config.getFileReadCacheDisabled() && !isAutoMem; + const cache = this.config.getFileReadCache(); + // A "full" Read consumes the whole file: no offset, no limit, no PDF + // page range. Only full Reads are eligible for the file_unchanged + // fast-path; range-scoped Reads always go through, since the model + // may legitimately ask for a different slice next time. + const isFullRead = + this.params.offset === undefined && + this.params.limit === undefined && + this.params.pages === undefined; + + // Stat up front so we can consult the cache before doing any heavy + // work. processSingleFileContent re-stats anyway; the extra syscall + // here is microseconds. If stat fails we fall through to the normal + // pipeline so its error handling stays the single source of truth. + let stats: Stats | undefined; + try { + stats = await fs.stat(absPath); + } catch (err) { + debugLogger.debug('stat-failed', { + path: absPath, + code: (err as NodeJS.ErrnoException).code, + }); + } + + if (cacheEnabled && stats && isFullRead) { + const status = cache.check(stats); + if ( + status.state === 'fresh' && + status.entry.lastReadAt !== undefined && + status.entry.lastReadWasFull && + status.entry.lastReadCacheable && + (status.entry.lastWriteAt === undefined || + status.entry.lastReadAt > status.entry.lastWriteAt) + ) { + debugLogger.debug('hit', { path: absPath }); + return this.unchangedResult(absPath); + } + debugLogger.debug('miss', { path: absPath, state: status.state }); + } + const result = await processSingleFileContent( this.params.file_path, this.config, @@ -146,6 +207,43 @@ class ReadFileToolInvocation extends BaseToolInvocation< }; } + // Record a cache entry so that subsequent identical Reads can hit + // the file_unchanged fast-path. An entry is "cacheable" only when + // - the content is plain text (not binary / image / audio / video + // / PDF / notebook — those need their structured payload), and + // - the read was not truncated. A truncated full Read means the + // model only saw the head of the file; returning a placeholder + // on the next call would falsely imply "you've already seen + // everything", so we force the next call back through the full + // pipeline. + // + // The stat we record is the one taken **after** the read pipeline, + // not `stats` from L158. processSingleFileContent does its own stat + // internally; if the file mutated between L158 and that internal + // stat, the bytes that landed in `result.llmContent` correspond to + // the post-read fingerprint, not the pre-read one. Recording + // `stats` would store a fingerprint that does not match the bytes + // we just emitted, so a later `check()` could report `fresh` and + // serve a placeholder pointing at content the model never saw. + if (cacheEnabled && stats) { + const cacheable = + typeof result.llmContent === 'string' && + result.originalLineCount !== undefined && + !result.isTruncated; + let recordStats: Stats = stats; + try { + recordStats = await fs.stat(absPath); + } catch { + // Stat after read failed — fall back to the pre-read stat. + // The fingerprint may not exactly match the bytes emitted, + // but it is the best we have without a second strategy. + } + cache.recordRead(absPath, recordStats, { + full: isFullRead, + cacheable, + }); + } + let llmContent: PartUnion; if (result.isTruncated) { const [start, end] = result.linesShown!; @@ -157,16 +255,13 @@ class ReadFileToolInvocation extends BaseToolInvocation< // For memory files, prepend a per-file staleness caveat so the model knows // the content is a point-in-time snapshot and may be stale. - const projectRoot = this.config.getTargetDir(); - if ( - typeof llmContent === 'string' && - isAutoMemPath(path.resolve(this.params.file_path), projectRoot) - ) { - // Only compute mtime when we actually need the note (avoids extra stat on - // every non-memory file read). + if (typeof llmContent === 'string' && isAutoMem) { + // Reuse the stat from above when we have it; only re-stat as a + // fallback so memory-file behavior survives a stat failure earlier + // (which would leave `stats` undefined). try { - const stat = await fs.stat(path.resolve(this.params.file_path)); - const note = memoryFreshnessNote(stat.mtimeMs); + const memStat = stats ?? (await fs.stat(absPath)); + const note = memoryFreshnessNote(memStat.mtimeMs); if (note) { llmContent = note + llmContent; } @@ -200,6 +295,45 @@ class ReadFileToolInvocation extends BaseToolInvocation< returnDisplay: result.returnDisplay || '', }; } + + /** + * Build the placeholder ToolResult returned when the cache indicates + * the file has not changed since the model last fully read it. The + * placeholder is intentionally explicit about its assumptions so the + * model can decide whether to trust it: + * + * 1. The full content was emitted *earlier in this conversation*. + * If the conversation has been compacted, summarised, or the + * model is a subagent receiving a transformed transcript, the + * prior content may no longer be retrievable — the model should + * re-read with explicit offset/limit in that case. + * 2. External mutations the cache cannot observe (shell writes via + * run_shell_command, MCP tool writes, other processes touching + * the file) will not appear here as `stale`. If the model + * suspects drift, it should re-read with explicit offset/limit. + * + * No `logFileOperation` is emitted on this path: the file_unchanged + * fast-path bypasses the read pipeline entirely, and the existing + * `FileOperationEvent` schema has no representation for "served from + * cache". A dedicated cache-hit metric can be added when telemetry + * needs visibility into the fast-path's effectiveness. + */ + private unchangedResult(absPath: string): ToolResult { + const relativePath = shortenPath( + makeRelative(absPath, this.config.getTargetDir()), + ); + const llmContent = + `[File ${relativePath} unchanged since last read in this session — ` + + `the full content was provided earlier in this conversation. ` + + `If you cannot retrieve that prior content (e.g. after context ` + + `compaction) or you suspect the file was modified outside the read/edit ` + + `tools (shell command, MCP tool, another process), re-read with ` + + `explicit offset/limit to fetch current content.]`; + return { + llmContent, + returnDisplay: `Unchanged: ${relativePath}`, + }; + } } /** diff --git a/packages/core/src/tools/write-file.test.ts b/packages/core/src/tools/write-file.test.ts index 0ed9a2050..e5f972b00 100644 --- a/packages/core/src/tools/write-file.test.ts +++ b/packages/core/src/tools/write-file.test.ts @@ -26,6 +26,7 @@ import fs from 'node:fs'; import os from 'node:os'; import { GeminiClient } from '../core/client.js'; import { createMockWorkspaceContext } from '../test-utils/mockWorkspaceContext.js'; +import { FileReadCache } from '../services/fileReadCache.js'; import { StandardFileSystemService } from '../services/fileSystemService.js'; const rootDir = path.resolve(os.tmpdir(), 'qwen-code-test-root'); @@ -37,6 +38,7 @@ let mockGeminiClientInstance: Mocked; // Mock Config const fsService = new StandardFileSystemService(); +const fileReadCache = new FileReadCache(); const mockConfigInternal = { getTargetDir: () => rootDir, getProjectRoot: () => rootDir, @@ -67,6 +69,7 @@ const mockConfigInternal = { discoverTools: vi.fn(), }) as unknown as ToolRegistry, getDefaultFileEncoding: () => 'utf-8', + getFileReadCache: () => fileReadCache, }; const mockConfig = mockConfigInternal as unknown as Config; @@ -721,5 +724,30 @@ describe('WriteFileTool', () => { fs.unlinkSync(filePath); } }); + + it('records a write into the FileReadCache', async () => { + // Symmetric with EditTool's "records a write" test: ensures + // ReadFile's post-write guard observes lastWriteAt and skips + // the file_unchanged placeholder for files this PR's tools just + // mutated. + fileReadCache.clear(); + const filePath = path.join(rootDir, 'cache-marker.txt'); + const params = { file_path: filePath, content: 'fresh bytes' }; + + const invocation = tool.build(params); + const result = await invocation.execute(abortSignal); + expect(result.error).toBeUndefined(); + + const stats = fs.statSync(filePath); + const status = fileReadCache.check(stats); + expect(status.state).toBe('fresh'); + if (status.state === 'fresh') { + expect(status.entry.lastWriteAt).toBeDefined(); + } + + if (fs.existsSync(filePath)) { + fs.unlinkSync(filePath); + } + }); }); }); diff --git a/packages/core/src/tools/write-file.ts b/packages/core/src/tools/write-file.ts index f434bdd95..0e7c979f7 100644 --- a/packages/core/src/tools/write-file.ts +++ b/packages/core/src/tools/write-file.ts @@ -241,6 +241,20 @@ class WriteFileToolInvocation extends BaseToolInvocation< }, }); + // Mark the cache entry written, capturing the post-write stats + // so a follow-up Read sees `lastReadAt < lastWriteAt` and falls + // through to the full pipeline instead of returning the + // pre-write placeholder. Best-effort: a stat failure here does + // not undo the successful write — the next Read will re-stat + // and either see fresh content or treat the entry as stale. + try { + const postWriteStats = fs.statSync(file_path); + this.config.getFileReadCache().recordWrite(file_path, postWriteStats); + } catch { + // Non-fatal: leaving a stale entry is preferable to failing + // the user-visible Write on a transient stat failure. + } + // Generate diff for display result const fileName = path.basename(file_path); // If there was a readError, originalContent in correctedContentResult is '',