From ac5dd8c3e958769665cd70ab0aaacbd86f88081b Mon Sep 17 00:00:00 2001 From: Sharada Mohanty Date: Mon, 20 Apr 2026 16:35:48 +0200 Subject: [PATCH] fix: tighten source cache validation --- src/source-cache.ts | 40 +++++++++++++++++++++++- tests/source-cache.test.ts | 64 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+), 1 deletion(-) diff --git a/src/source-cache.ts b/src/source-cache.ts index a771010..83ece56 100644 --- a/src/source-cache.ts +++ b/src/source-cache.ts @@ -44,10 +44,44 @@ function isPlainObject(value: unknown): value is Record { function isManifestEntry(value: unknown): value is { file: string; provider: string; logicalPath: string } { return isPlainObject(value) && typeof value.file === 'string' + && /^[a-f0-9]{40}\.json$/.test(value.file) && typeof value.provider === 'string' && typeof value.logicalPath === 'string' } +function isSessionSummary(value: unknown): value is SessionSummary { + return isPlainObject(value) + && typeof value.sessionId === 'string' + && typeof value.project === 'string' + && typeof value.firstTimestamp === 'string' + && typeof value.lastTimestamp === 'string' + && typeof value.totalCostUSD === 'number' + && Number.isFinite(value.totalCostUSD) + && typeof value.totalInputTokens === 'number' + && Number.isFinite(value.totalInputTokens) + && typeof value.totalOutputTokens === 'number' + && Number.isFinite(value.totalOutputTokens) + && typeof value.totalCacheReadTokens === 'number' + && Number.isFinite(value.totalCacheReadTokens) + && typeof value.totalCacheWriteTokens === 'number' + && Number.isFinite(value.totalCacheWriteTokens) + && typeof value.apiCalls === 'number' + && Number.isFinite(value.apiCalls) + && Array.isArray(value.turns) + && isPlainObject(value.modelBreakdown) + && isPlainObject(value.toolBreakdown) + && isPlainObject(value.mcpBreakdown) + && isPlainObject(value.bashBreakdown) + && isPlainObject(value.categoryBreakdown) +} + +function isAppendState(value: unknown): value is AppendState { + return isPlainObject(value) + && typeof value.endOffset === 'number' + && Number.isFinite(value.endOffset) + && typeof value.tailHash === 'string' +} + function isSourceCacheEntry(value: unknown): value is SourceCacheEntry { return isPlainObject(value) && typeof value.version === 'number' @@ -57,9 +91,13 @@ function isSourceCacheEntry(value: unknown): value is SourceCacheEntry { && (value.cacheStrategy === 'full-reparse' || value.cacheStrategy === 'append-jsonl') && typeof value.parserVersion === 'string' && isPlainObject(value.fingerprint) + && Number.isFinite(value.fingerprint.mtimeMs) && typeof value.fingerprint.mtimeMs === 'number' + && Number.isFinite(value.fingerprint.sizeBytes) && typeof value.fingerprint.sizeBytes === 'number' && Array.isArray(value.sessions) + && value.sessions.every(isSessionSummary) + && (value.appendState === undefined || isAppendState(value.appendState)) } function cacheRoot(): string { @@ -172,10 +210,10 @@ export async function readSourceCacheEntry( export async function writeSourceCacheEntry(manifest: SourceCacheManifest, entry: SourceCacheEntry): Promise { await mkdir(entryDir(), { recursive: true }) const file = entryFilename(entry.provider, entry.logicalPath) + await atomicWriteJson(join(entryDir(), file), entry) manifest.entries[sourceKey(entry.provider, entry.logicalPath)] = { file, provider: entry.provider, logicalPath: entry.logicalPath, } - await atomicWriteJson(join(entryDir(), file), entry) } diff --git a/tests/source-cache.test.ts b/tests/source-cache.test.ts index 581b66a..8707f85 100644 --- a/tests/source-cache.test.ts +++ b/tests/source-cache.test.ts @@ -1,4 +1,5 @@ import { afterEach, beforeEach, describe, expect, it } from 'vitest' +import { createHash } from 'crypto' import { existsSync } from 'fs' import { mkdir, mkdtemp, readFile, readdir, rm, writeFile } from 'fs/promises' import { tmpdir } from 'os' @@ -42,6 +43,22 @@ describe('source cache manifest', () => { await expect(loadSourceCacheManifest()).resolves.toEqual(emptySourceCacheManifest()) }) + it('returns an empty manifest when an entry filename is unsafe', async () => { + await mkdir(join(root, 'source-cache-v1'), { recursive: true }) + await writeFile(join(root, 'source-cache-v1', 'manifest.json'), JSON.stringify({ + version: SOURCE_CACHE_VERSION, + entries: { + bad: { + file: '../escape.json', + provider: 'fake', + logicalPath: join(root, 'source.jsonl'), + }, + }, + }), 'utf-8') + + await expect(loadSourceCacheManifest()).resolves.toEqual(emptySourceCacheManifest()) + }) + it('round-trips a manifest and entry', async () => { const sourcePath = join(root, 'source.jsonl') await writeFile(sourcePath, '{"ok":true}\n', 'utf-8') @@ -113,6 +130,30 @@ describe('source cache manifest', () => { expect(loaded).toBeNull() }) + it('returns null when append state is malformed', async () => { + const sourcePath = join(root, 'source.jsonl') + await writeFile(sourcePath, 'one\n', 'utf-8') + const fingerprint = await computeFileFingerprint(sourcePath) + const entry = { + version: SOURCE_CACHE_VERSION, + provider: 'fake', + logicalPath: sourcePath, + fingerprintPath: sourcePath, + cacheStrategy: 'append-jsonl' as const, + parserVersion: 'fake-v1', + fingerprint, + sessions: [], + appendState: { endOffset: 'bad', tailHash: 'abc' }, + } + + const manifest = await loadSourceCacheManifest() + await writeSourceCacheEntry(manifest, entry as SourceCacheEntry) + await saveSourceCacheManifest(manifest) + + const loaded = await readSourceCacheEntry(await loadSourceCacheManifest(), 'fake', sourcePath) + expect(loaded).toBeNull() + }) + it('writes atomically without leaving temp files behind', async () => { const sourcePath = join(root, 'source.jsonl') await writeFile(sourcePath, 'x\n', 'utf-8') @@ -137,4 +178,27 @@ describe('source cache manifest', () => { expect(cacheFiles.some(f => f.endsWith('.tmp'))).toBe(false) expect(entryFiles.some(f => f.endsWith('.tmp'))).toBe(false) }) + + it('does not mutate the manifest when the entry write fails', async () => { + const sourcePath = join(root, 'source.jsonl') + await writeFile(sourcePath, 'x\n', 'utf-8') + const manifest = await loadSourceCacheManifest() + const provider = 'fake' + const logicalPath = sourcePath + const file = `${createHash('sha1').update(`${provider}:${logicalPath}`).digest('hex')}.json` + await mkdir(join(root, 'source-cache-v1', 'entries', file), { recursive: true }) + + await expect(writeSourceCacheEntry(manifest, { + version: SOURCE_CACHE_VERSION, + provider, + logicalPath, + fingerprintPath: sourcePath, + cacheStrategy: 'full-reparse', + parserVersion: 'fake-v1', + fingerprint: await computeFileFingerprint(sourcePath), + sessions: [], + })).rejects.toBeTruthy() + + expect(manifest.entries[`fake:${sourcePath}`]).toBeUndefined() + }) })