fix: tighten source cache validation

This commit is contained in:
Sharada Mohanty 2026-04-20 16:30:53 +02:00
parent a0bad07c19
commit 0d4d103627
2 changed files with 75 additions and 6 deletions

View file

@ -37,6 +37,31 @@ export type SourceCacheManifest = {
entries: Record<string, { file: string; provider: string; logicalPath: string }>
}
function isPlainObject(value: unknown): value is Record<string, unknown> {
return !!value && typeof value === 'object' && !Array.isArray(value)
}
function isManifestEntry(value: unknown): value is { file: string; provider: string; logicalPath: string } {
return isPlainObject(value)
&& typeof value.file === 'string'
&& typeof value.provider === 'string'
&& typeof value.logicalPath === 'string'
}
function isSourceCacheEntry(value: unknown): value is SourceCacheEntry {
return isPlainObject(value)
&& typeof value.version === 'number'
&& typeof value.provider === 'string'
&& typeof value.logicalPath === 'string'
&& typeof value.fingerprintPath === 'string'
&& (value.cacheStrategy === 'full-reparse' || value.cacheStrategy === 'append-jsonl')
&& typeof value.parserVersion === 'string'
&& isPlainObject(value.fingerprint)
&& typeof value.fingerprint.mtimeMs === 'number'
&& typeof value.fingerprint.sizeBytes === 'number'
&& Array.isArray(value.sessions)
}
function cacheRoot(): string {
const base = process.env['CODEBURN_CACHE_DIR'] ?? join(homedir(), '.cache', 'codeburn')
return join(base, 'source-cache-v1')
@ -72,11 +97,18 @@ export async function loadSourceCacheManifest(): Promise<SourceCacheManifest> {
try {
const raw = await readFile(manifestPath(), 'utf-8')
const parsed = JSON.parse(raw) as Partial<SourceCacheManifest>
if (parsed.version !== SOURCE_CACHE_VERSION || !parsed.entries || typeof parsed.entries !== 'object') {
const parsed: unknown = JSON.parse(raw)
if (!isPlainObject(parsed) || parsed.version !== SOURCE_CACHE_VERSION || !isPlainObject(parsed.entries)) {
return emptySourceCacheManifest()
}
return { version: SOURCE_CACHE_VERSION, entries: parsed.entries as SourceCacheManifest['entries'] }
const entries: SourceCacheManifest['entries'] = {}
for (const [key, value] of Object.entries(parsed.entries)) {
if (!isManifestEntry(value)) return emptySourceCacheManifest()
entries[key] = value
}
return { version: SOURCE_CACHE_VERSION, entries }
} catch {
return emptySourceCacheManifest()
}
@ -120,8 +152,8 @@ export async function readSourceCacheEntry(
try {
const raw = await readFile(join(entryDir(), meta.file), 'utf-8')
const entry = JSON.parse(raw) as SourceCacheEntry
if (entry.version !== SOURCE_CACHE_VERSION) return null
const entry: unknown = JSON.parse(raw)
if (!isSourceCacheEntry(entry) || entry.version !== SOURCE_CACHE_VERSION) return null
const currentFingerprint = await computeFileFingerprint(entry.fingerprintPath)
if (

View file

@ -1,6 +1,6 @@
import { afterEach, beforeEach, describe, expect, it } from 'vitest'
import { existsSync } from 'fs'
import { mkdtemp, readFile, rm, writeFile } from 'fs/promises'
import { mkdir, mkdtemp, readFile, readdir, rm, writeFile } from 'fs/promises'
import { tmpdir } from 'os'
import { join } from 'path'
@ -32,6 +32,16 @@ describe('source cache manifest', () => {
await expect(loadSourceCacheManifest()).resolves.toEqual(emptySourceCacheManifest())
})
it('returns an empty manifest when the manifest shape is invalid', async () => {
await mkdir(join(root, 'source-cache-v1'), { recursive: true })
await writeFile(join(root, 'source-cache-v1', 'manifest.json'), JSON.stringify({
version: SOURCE_CACHE_VERSION,
entries: { bad: { file: 123, provider: 'fake' } },
}), 'utf-8')
await expect(loadSourceCacheManifest()).resolves.toEqual(emptySourceCacheManifest())
})
it('round-trips a manifest and entry', async () => {
const sourcePath = join(root, 'source.jsonl')
await writeFile(sourcePath, '{"ok":true}\n', 'utf-8')
@ -80,6 +90,29 @@ describe('source cache manifest', () => {
expect(loaded).toBeNull()
})
it('returns null when the cached entry shape is invalid', async () => {
const sourcePath = join(root, 'source.jsonl')
await writeFile(sourcePath, 'one\n', 'utf-8')
const manifest = await loadSourceCacheManifest()
const file = 'broken.json'
manifest.entries[`fake:${sourcePath}`] = { file, provider: 'fake', logicalPath: sourcePath }
await saveSourceCacheManifest(manifest)
await mkdir(join(root, 'source-cache-v1', 'entries'), { recursive: true })
await writeFile(join(root, 'source-cache-v1', 'entries', file), JSON.stringify({
version: SOURCE_CACHE_VERSION,
provider: 'fake',
logicalPath: sourcePath,
fingerprintPath: sourcePath,
cacheStrategy: 'full-reparse',
parserVersion: 'fake-v1',
fingerprint: { mtimeMs: 'nope', sizeBytes: 4 },
sessions: [],
}), 'utf-8')
const loaded = await readSourceCacheEntry(await loadSourceCacheManifest(), 'fake', sourcePath)
expect(loaded).toBeNull()
})
it('writes atomically without leaving temp files behind', async () => {
const sourcePath = join(root, 'source.jsonl')
await writeFile(sourcePath, 'x\n', 'utf-8')
@ -99,5 +132,9 @@ describe('source cache manifest', () => {
const files = JSON.parse(await readFile(join(root, 'source-cache-v1', 'manifest.json'), 'utf-8'))
expect(files.version).toBe(SOURCE_CACHE_VERSION)
expect(existsSync(join(root, 'source-cache-v1', 'entries'))).toBe(true)
const cacheFiles = await readdir(join(root, 'source-cache-v1'))
const entryFiles = await readdir(join(root, 'source-cache-v1', 'entries'))
expect(cacheFiles.some(f => f.endsWith('.tmp'))).toBe(false)
expect(entryFiles.some(f => f.endsWith('.tmp'))).toBe(false)
})
})