From de8ddf5303a39c15ec7fca46e8903a486d1d95a4 Mon Sep 17 00:00:00 2001 From: wenshao Date: Wed, 6 May 2026 11:16:06 +0800 Subject: [PATCH] fix(core): close 2 correctness gaps from maintainer review #4232751470 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both tracked back to the cache's "track most recent read shape" model diverging from prior-read enforcement's "model has seen these bytes" model. 1. SVG (and similar string-content fallbacks) recorded as non-cacheable, blocking subsequent Edit / WriteFile. `read-file.ts` derives `cacheable` from `originalLineCount !== undefined && !isTruncated`. The SVG branch in `fileUtils.ts` returned content without `originalLineCount`, so `cacheable` collapsed to false and a follow-up Edit hit the dead-end "non-text payload — use shell" rejection — telling the model to use shell to mutate a file it had just successfully read as text. This was a real regression vs pre-PR behaviour where SVG-as-text editing worked. Fix: SVG-as-text branch now sets `originalLineCount` (split on '\n') and `isTruncated: false`, so ReadFile records it as a full cacheable read. The binary-fallback string and over-1MB SVG branches are deliberately left non-cacheable — they return placeholder strings ("Cannot display content of ...") rather than file content, so blocking edits there is correct. New regression test in `read-file.test.ts`: `records SVG-as-text reads with cacheable=true so a follow-up Edit passes enforcement`. 2. recordRead unconditionally overwriting lastReadWasFull / lastReadCacheable, revoking prior write-author or full-read rights. The `WriteFile(create) → ReadFile(offset/limit) → Edit` sequence rejected the Edit because the partial read clobbered the `lastReadWasFull = true` that `recordWrite` had stamped at create time. Same shape applies to a full text read followed by a partial one of the same inode. Fix: `recordRead` is now sticky-on-true for the read flags — `if (opts.full) entry.lastReadWasFull = true;` and the matching guard for `cacheable`. Prior `true` survives a later partial / non-cacheable read. The fast-path `file_unchanged` check still gates on the incoming request's own `isFullRead` in `read-file.ts`, so a partial read still does not get a placeholder it shouldn't. Updated the existing "overwrites earlier lastReadWasFull" test to assert the new sticky semantics, and added a `lastReadCacheable` symmetric test plus a `Write → partial-Read → Edit` end-to-end test in `edit.test.ts`. Reported by tanzhenxin via independent maintainer review on 2026-05-06. --- .../core/src/services/fileReadCache.test.ts | 27 +++++++++++- packages/core/src/services/fileReadCache.ts | 22 +++++++++- packages/core/src/tools/edit.test.ts | 41 +++++++++++++++++++ packages/core/src/tools/read-file.test.ts | 30 ++++++++++++++ packages/core/src/utils/fileUtils.ts | 11 +++++ 5 files changed, 127 insertions(+), 4 deletions(-) diff --git a/packages/core/src/services/fileReadCache.test.ts b/packages/core/src/services/fileReadCache.test.ts index 6b3af18fd..02d6153a5 100644 --- a/packages/core/src/services/fileReadCache.test.ts +++ b/packages/core/src/services/fileReadCache.test.ts @@ -136,7 +136,13 @@ describe('FileReadCache', () => { } }); - it('overwrites earlier lastReadWasFull on subsequent reads', () => { + it('preserves earlier lastReadWasFull on a subsequent partial read (sticky-on-true)', () => { + // Prior-read enforcement asks "has the model seen these + // bytes (or fully authored them)?" — once a full read has + // happened, a follow-up offset/limit read should not revoke + // the model's right to mutate the file. Pre-fix this was + // the cause of the `WriteFile(create) → ReadFile(offset/limit) + // → Edit` regression flagged by the maintainer review. const cache = new FileReadCache(); const stats = makeStats(); cache.recordRead('/x/foo.ts', stats, { full: true, cacheable: true }); @@ -144,7 +150,24 @@ describe('FileReadCache', () => { const result = cache.check(stats); expect(result.state).toBe('fresh'); if (result.state === 'fresh') { - expect(result.entry.lastReadWasFull).toBe(false); + expect(result.entry.lastReadWasFull).toBe(true); + } + }); + + it('preserves earlier lastReadCacheable on a subsequent non-cacheable read', () => { + // Symmetric to lastReadWasFull. A model that once read a + // file as text and later read it as a structured payload + // (image, PDF, etc. — though uncommon in practice for a + // single inode) keeps the right to edit the bytes it saw + // as text. + const cache = new FileReadCache(); + const stats = makeStats(); + cache.recordRead('/x/foo.ts', stats, { full: true, cacheable: true }); + cache.recordRead('/x/foo.ts', stats, { full: true, cacheable: false }); + const result = cache.check(stats); + expect(result.state).toBe('fresh'); + if (result.state === 'fresh') { + expect(result.entry.lastReadCacheable).toBe(true); } }); diff --git a/packages/core/src/services/fileReadCache.ts b/packages/core/src/services/fileReadCache.ts index 3a39696dc..8c4bdf2bf 100644 --- a/packages/core/src/services/fileReadCache.ts +++ b/packages/core/src/services/fileReadCache.ts @@ -99,6 +99,20 @@ export class FileReadCache { * - `cacheable` — the produced content is suitable for substitution * with a `file_unchanged` placeholder. Set true for plain text, * false for binary / image / audio / video / PDF / notebook. + * + * The `lastReadWasFull` and `lastReadCacheable` flags are + * **sticky-on-true**: a partial / non-cacheable read records the + * timestamp but does not flip a previously-`true` flag back to + * `false`. The reason is that prior-read enforcement on Edit / + * WriteFile asks "has the model seen these bytes (or fully + * authored them)?", not "what was the *most recent* read shape?" + * — pre-fix, a `WriteFile(create) → ReadFile(offset/limit) → Edit` + * sequence would reject the Edit because the partial read clobbered + * the `lastReadWasFull = true` that `recordWrite` had stamped, and + * the same shape applies to a full text read followed by a partial + * one. The fast-path file_unchanged check still gates on the + * incoming request's own `isFullRead` (in `read-file.ts`), so a + * partial read does not get a placeholder it shouldn't. */ recordRead( absPath: string, @@ -107,8 +121,12 @@ export class FileReadCache { ): FileReadEntry { const entry = this.upsert(absPath, stats); entry.lastReadAt = Date.now(); - entry.lastReadWasFull = opts.full; - entry.lastReadCacheable = opts.cacheable; + if (opts.full) { + entry.lastReadWasFull = true; + } + if (opts.cacheable) { + entry.lastReadCacheable = true; + } return entry; } diff --git a/packages/core/src/tools/edit.test.ts b/packages/core/src/tools/edit.test.ts index ffea84a62..e1cb47335 100644 --- a/packages/core/src/tools/edit.test.ts +++ b/packages/core/src/tools/edit.test.ts @@ -1239,6 +1239,47 @@ describe('EditTool', () => { expect(fs.readFileSync(newPath, 'utf8')).toBe('second content\n'); }); + it('allows Edit after Write→partial-Read (sticky-on-true preserves write-author rights)', async () => { + // Reproduction for the maintainer-review regression: pre-fix, + // a partial read recorded `lastReadWasFull = false` and + // clobbered the `true` that recordWrite had stamped at + // create time, so this Edit would then be rejected with + // EDIT_REQUIRES_PRIOR_READ even though the model had + // authored the file's full content. + const newPath = path.join(rootDir, 'write-then-partial-read.txt'); + (mockConfig.getApprovalMode as Mock).mockReturnValue( + ApprovalMode.AUTO_EDIT, + ); + + const created = await tool + .build({ + file_path: newPath, + old_string: '', + new_string: 'line one\nline two\nline three\n', + }) + .execute(abortSignal); + expect(created.error).toBeUndefined(); + + // Simulate a partial follow-up Read (offset/limit). Pre-fix + // this overwrote lastReadWasFull/lastReadCacheable to false. + fileReadCache.recordRead(newPath, fs.statSync(newPath), { + full: false, + cacheable: true, + }); + + const edited = await tool + .build({ + file_path: newPath, + old_string: 'line two', + new_string: 'second line', + }) + .execute(abortSignal); + expect(edited.error).toBeUndefined(); + expect(fs.readFileSync(newPath, 'utf8')).toBe( + 'line one\nsecond line\nline three\n', + ); + }); + it('allows a chain of edits without re-reading between them', async () => { // After the first Edit, recordWrite stamps `lastWriteAt`. The // second Edit's stat will still match the cache entry (because diff --git a/packages/core/src/tools/read-file.test.ts b/packages/core/src/tools/read-file.test.ts index ca5be9885..2b2916f50 100644 --- a/packages/core/src/tools/read-file.test.ts +++ b/packages/core/src/tools/read-file.test.ts @@ -795,6 +795,36 @@ describe('ReadFileTool', () => { } }); + it('records SVG-as-text reads with cacheable=true so a follow-up Edit passes enforcement', async () => { + // Pre-fix the SVG branch in fileUtils.ts returned content + // without `originalLineCount`, which collapsed + // ReadFileToolInvocation's `cacheable` derivation to + // false. EditTool's prior-read enforcement then mistook + // the just-read SVG for a "non-text payload" and rejected + // a subsequent in-place edit. + const svgPath = path.join(tempRootDir, 'icon.svg'); + await fsp.writeFile( + svgPath, + '\n', + 'utf-8', + ); + + const result = await read({ file_path: svgPath }); + expect(typeof result.llmContent).toBe('string'); + expect(result.returnDisplay).toMatch(/^Read SVG as text:/); + + // The cache must record this as a full, cacheable read so + // that prior-read enforcement on Edit / WriteFile would + // recognise it as the model having seen the bytes. + const status = fileReadCache.check(fs.statSync(svgPath)); + expect(status.state).toBe('fresh'); + if (status.state === 'fresh') { + expect(status.entry.lastReadAt).toBeDefined(); + expect(status.entry.lastReadWasFull).toBe(true); + expect(status.entry.lastReadCacheable).toBe(true); + } + }); + it('does not return the placeholder for image files', async () => { const imagePath = path.join(tempRootDir, 'pic.png'); const pngHeader = Buffer.from([ diff --git a/packages/core/src/utils/fileUtils.ts b/packages/core/src/utils/fileUtils.ts index 79467fb0e..3606dbea8 100644 --- a/packages/core/src/utils/fileUtils.ts +++ b/packages/core/src/utils/fileUtils.ts @@ -698,9 +698,20 @@ export async function processSingleFileContent( }; } const content = await readFileWithEncoding(filePath); + // Populate `originalLineCount` and `isTruncated` so the + // ReadFile cache treats this exactly like a successful text + // read: ReadFileToolInvocation derives `cacheable` from + // those two fields, and an SVG-as-text read needs to be + // cacheable to keep working as an editable text file. Pre-fix, + // the absent `originalLineCount` collapsed cacheable to false + // and a follow-up Edit on the just-read SVG would be rejected + // as a "non-text payload" — a regression flagged by the + // independent maintainer review. return { llmContent: content, returnDisplay: `Read SVG as text: ${relativePathForDisplay}`, + originalLineCount: content.split('\n').length, + isTruncated: false, }; } case 'text': {