From a2593ceb1ed7ddfa3e509df497b202679b18d25e Mon Sep 17 00:00:00 2001 From: Sharada Mohanty Date: Mon, 20 Apr 2026 16:39:19 +0200 Subject: [PATCH] fix: harden source cache validation --- src/source-cache.ts | 101 +++++++++++++++++++---- tests/source-cache.test.ts | 161 ++++++++++++++++++++++++++++++++++++- 2 files changed, 244 insertions(+), 18 deletions(-) diff --git a/src/source-cache.ts b/src/source-cache.ts index 83ece56..bd65dcf 100644 --- a/src/source-cache.ts +++ b/src/source-cache.ts @@ -41,6 +41,10 @@ function isPlainObject(value: unknown): value is Record { return !!value && typeof value === 'object' && !Array.isArray(value) } +function isFiniteNumber(value: unknown): value is number { + return typeof value === 'number' && Number.isFinite(value) +} + function isManifestEntry(value: unknown): value is { file: string; provider: string; logicalPath: string } { return isPlainObject(value) && typeof value.file === 'string' @@ -55,24 +59,82 @@ function isSessionSummary(value: unknown): value is SessionSummary { && typeof value.project === 'string' && typeof value.firstTimestamp === 'string' && typeof value.lastTimestamp === 'string' - && typeof value.totalCostUSD === 'number' - && Number.isFinite(value.totalCostUSD) - && typeof value.totalInputTokens === 'number' - && Number.isFinite(value.totalInputTokens) - && typeof value.totalOutputTokens === 'number' - && Number.isFinite(value.totalOutputTokens) - && typeof value.totalCacheReadTokens === 'number' - && Number.isFinite(value.totalCacheReadTokens) - && typeof value.totalCacheWriteTokens === 'number' - && Number.isFinite(value.totalCacheWriteTokens) - && typeof value.apiCalls === 'number' - && Number.isFinite(value.apiCalls) + && isFiniteNumber(value.totalCostUSD) + && isFiniteNumber(value.totalInputTokens) + && isFiniteNumber(value.totalOutputTokens) + && isFiniteNumber(value.totalCacheReadTokens) + && isFiniteNumber(value.totalCacheWriteTokens) + && isFiniteNumber(value.apiCalls) && Array.isArray(value.turns) - && isPlainObject(value.modelBreakdown) - && isPlainObject(value.toolBreakdown) - && isPlainObject(value.mcpBreakdown) - && isPlainObject(value.bashBreakdown) - && isPlainObject(value.categoryBreakdown) + && value.turns.every(isParsedTurn) + && isBreakdownMap(value.modelBreakdown, isModelBreakdownEntry) + && isBreakdownMap(value.toolBreakdown, isCallsBreakdownEntry) + && isBreakdownMap(value.mcpBreakdown, isCallsBreakdownEntry) + && isBreakdownMap(value.bashBreakdown, isCallsBreakdownEntry) + && isBreakdownMap(value.categoryBreakdown, isCategoryBreakdownEntry) +} + +function isTokenUsage(value: unknown): value is { inputTokens: number; outputTokens: number; cacheCreationInputTokens: number; cacheReadInputTokens: number; cachedInputTokens: number; reasoningTokens: number; webSearchRequests: number } { + return isPlainObject(value) + && isFiniteNumber(value.inputTokens) + && isFiniteNumber(value.outputTokens) + && isFiniteNumber(value.cacheCreationInputTokens) + && isFiniteNumber(value.cacheReadInputTokens) + && isFiniteNumber(value.cachedInputTokens) + && isFiniteNumber(value.reasoningTokens) + && isFiniteNumber(value.webSearchRequests) +} + +function isParsedApiCall(value: unknown): boolean { + return isPlainObject(value) + && typeof value.provider === 'string' + && typeof value.model === 'string' + && isTokenUsage(value.usage) + && isFiniteNumber(value.costUSD) + && Array.isArray(value.tools) + && value.tools.every(tool => typeof tool === 'string') + && Array.isArray(value.mcpTools) + && value.mcpTools.every(tool => typeof tool === 'string') + && typeof value.hasAgentSpawn === 'boolean' + && typeof value.hasPlanMode === 'boolean' + && (value.speed === 'standard' || value.speed === 'fast') + && typeof value.timestamp === 'string' + && Array.isArray(value.bashCommands) + && value.bashCommands.every(command => typeof command === 'string') + && typeof value.deduplicationKey === 'string' +} + +function isParsedTurn(value: unknown): boolean { + return isPlainObject(value) + && typeof value.userMessage === 'string' + && Array.isArray(value.assistantCalls) + && value.assistantCalls.every(isParsedApiCall) + && typeof value.timestamp === 'string' + && typeof value.sessionId === 'string' +} + +function isModelBreakdownEntry(value: unknown): boolean { + return isPlainObject(value) + && isFiniteNumber(value.calls) + && isFiniteNumber(value.costUSD) + && isTokenUsage(value.tokens) +} + +function isCallsBreakdownEntry(value: unknown): boolean { + return isPlainObject(value) && isFiniteNumber(value.calls) +} + +function isCategoryBreakdownEntry(value: unknown): boolean { + return isPlainObject(value) + && isFiniteNumber(value.turns) + && isFiniteNumber(value.costUSD) + && isFiniteNumber(value.retries) + && isFiniteNumber(value.editTurns) + && isFiniteNumber(value.oneShotTurns) +} + +function isBreakdownMap(value: unknown, predicate: (entry: unknown) => entry is T): value is Record { + return isPlainObject(value) && Object.values(value).every(predicate) } function isAppendState(value: unknown): value is AppendState { @@ -187,11 +249,16 @@ export async function readSourceCacheEntry( ): Promise { const meta = manifest.entries[sourceKey(provider, logicalPath)] if (!meta) return null + if (meta.provider !== provider || meta.logicalPath !== logicalPath) return null + + const expectedFile = entryFilename(provider, logicalPath) + if (meta.file !== expectedFile) return null try { const raw = await readFile(join(entryDir(), meta.file), 'utf-8') const entry: unknown = JSON.parse(raw) if (!isSourceCacheEntry(entry) || entry.version !== SOURCE_CACHE_VERSION) return null + if (entry.provider !== provider || entry.logicalPath !== logicalPath) return null const currentFingerprint = await computeFileFingerprint(entry.fingerprintPath) if ( diff --git a/tests/source-cache.test.ts b/tests/source-cache.test.ts index 8707f85..3af4818 100644 --- a/tests/source-cache.test.ts +++ b/tests/source-cache.test.ts @@ -15,9 +15,32 @@ import { computeFileFingerprint, type SourceCacheEntry, } from '../src/source-cache.js' +import type { SessionSummary } from '../src/types.js' let root = '' +function emptySession(sessionId: string, overrides: Partial = {}): SessionSummary { + return { + sessionId, + project: 'project', + firstTimestamp: '2026-04-10T00:00:00Z', + lastTimestamp: '2026-04-10T00:00:00Z', + totalCostUSD: 0, + totalInputTokens: 0, + totalOutputTokens: 0, + totalCacheReadTokens: 0, + totalCacheWriteTokens: 0, + apiCalls: 0, + turns: [], + modelBreakdown: {}, + toolBreakdown: {}, + mcpBreakdown: {}, + bashBreakdown: {}, + categoryBreakdown: {}, + ...overrides, + } +} + beforeEach(async () => { root = await mkdtemp(join(tmpdir(), 'codeburn-source-cache-')) process.env['CODEBURN_CACHE_DIR'] = root @@ -111,7 +134,7 @@ describe('source cache manifest', () => { const sourcePath = join(root, 'source.jsonl') await writeFile(sourcePath, 'one\n', 'utf-8') const manifest = await loadSourceCacheManifest() - const file = 'broken.json' + const file = `${createHash('sha1').update(`fake:${sourcePath}`).digest('hex')}.json` manifest.entries[`fake:${sourcePath}`] = { file, provider: 'fake', logicalPath: sourcePath } await saveSourceCacheManifest(manifest) await mkdir(join(root, 'source-cache-v1', 'entries'), { recursive: true }) @@ -130,6 +153,101 @@ describe('source cache manifest', () => { expect(loaded).toBeNull() }) + it('returns null when the manifest metadata does not match the lookup request', async () => { + const sourcePath = join(root, 'source.jsonl') + await writeFile(sourcePath, 'one\n', 'utf-8') + const fingerprint = await computeFileFingerprint(sourcePath) + const file = `${createHash('sha1').update(`fake:${sourcePath}`).digest('hex')}.json` + const manifest = await loadSourceCacheManifest() + manifest.entries[`fake:${sourcePath}`] = { + file, + provider: 'other', + logicalPath: sourcePath, + } + await saveSourceCacheManifest(manifest) + await mkdir(join(root, 'source-cache-v1', 'entries'), { recursive: true }) + await writeFile(join(root, 'source-cache-v1', 'entries', file), JSON.stringify({ + version: SOURCE_CACHE_VERSION, + provider: 'fake', + logicalPath: sourcePath, + fingerprintPath: sourcePath, + cacheStrategy: 'full-reparse', + parserVersion: 'fake-v1', + fingerprint, + sessions: [], + }), 'utf-8') + + const loaded = await readSourceCacheEntry(await loadSourceCacheManifest(), 'fake', sourcePath) + expect(loaded).toBeNull() + }) + + it('returns null when a nested assistant call is malformed', async () => { + const sourcePath = join(root, 'source.jsonl') + await writeFile(sourcePath, 'one\n', 'utf-8') + const fingerprint = await computeFileFingerprint(sourcePath) + const entry: SourceCacheEntry = { + version: SOURCE_CACHE_VERSION, + provider: 'fake', + logicalPath: sourcePath, + fingerprintPath: sourcePath, + cacheStrategy: 'full-reparse', + parserVersion: 'fake-v1', + fingerprint, + sessions: [ + emptySession('session-1', { + turns: [{ + userMessage: 'hello', + assistantCalls: [{ + provider: 'fake', + model: 'model', + usage: { + inputTokens: 1, + outputTokens: 1, + cacheCreationInputTokens: 0, + cacheReadInputTokens: 0, + cachedInputTokens: 0, + reasoningTokens: 0, + webSearchRequests: 0, + }, + costUSD: 1, + tools: [], + mcpTools: [], + hasAgentSpawn: false, + hasPlanMode: false, + speed: 'standard', + timestamp: '2026-04-10T00:00:00Z', + bashCommands: [], + deduplicationKey: 'k', + }], + timestamp: '2026-04-10T00:00:00Z', + sessionId: 'session-1', + }], + }), + ], + } + + const manifest = await loadSourceCacheManifest() + await writeSourceCacheEntry(manifest, entry) + await saveSourceCacheManifest(manifest) + + await writeFile(join(root, 'source-cache-v1', 'entries', `${createHash('sha1').update(`fake:${sourcePath}`).digest('hex')}.json`), JSON.stringify({ + ...entry, + sessions: [{ + ...entry.sessions[0], + turns: [{ + ...entry.sessions[0].turns[0], + assistantCalls: [{ + ...entry.sessions[0].turns[0].assistantCalls[0], + usage: { ...entry.sessions[0].turns[0].assistantCalls[0].usage, inputTokens: 'bad' }, + }], + }], + }], + }), 'utf-8') + + const loaded = await readSourceCacheEntry(await loadSourceCacheManifest(), 'fake', sourcePath) + expect(loaded).toBeNull() + }) + it('returns null when append state is malformed', async () => { const sourcePath = join(root, 'source.jsonl') await writeFile(sourcePath, 'one\n', 'utf-8') @@ -154,6 +272,47 @@ describe('source cache manifest', () => { expect(loaded).toBeNull() }) + it('returns null when a breakdown map contains malformed values', async () => { + const sourcePath = join(root, 'source.jsonl') + await writeFile(sourcePath, 'one\n', 'utf-8') + const fingerprint = await computeFileFingerprint(sourcePath) + const entry: SourceCacheEntry = { + version: SOURCE_CACHE_VERSION, + provider: 'fake', + logicalPath: sourcePath, + fingerprintPath: sourcePath, + cacheStrategy: 'full-reparse', + parserVersion: 'fake-v1', + fingerprint, + sessions: [ + emptySession('session-2', { + modelBreakdown: { + modelA: { + calls: 'bad', + costUSD: 0, + tokens: { + inputTokens: 0, + outputTokens: 0, + cacheCreationInputTokens: 0, + cacheReadInputTokens: 0, + cachedInputTokens: 0, + reasoningTokens: 0, + webSearchRequests: 0, + }, + }, + }, + }), + ], + } + + const manifest = await loadSourceCacheManifest() + await writeSourceCacheEntry(manifest, entry) + await saveSourceCacheManifest(manifest) + + const loaded = await readSourceCacheEntry(await loadSourceCacheManifest(), 'fake', sourcePath) + expect(loaded).toBeNull() + }) + it('writes atomically without leaving temp files behind', async () => { const sourcePath = join(root, 'source.jsonl') await writeFile(sourcePath, 'x\n', 'utf-8')