fix: harden source cache validation

This commit is contained in:
Sharada Mohanty 2026-04-20 16:39:19 +02:00
parent ac5dd8c3e9
commit a2593ceb1e
2 changed files with 244 additions and 18 deletions

View file

@ -41,6 +41,10 @@ function isPlainObject(value: unknown): value is Record<string, unknown> {
return !!value && typeof value === 'object' && !Array.isArray(value)
}
function isFiniteNumber(value: unknown): value is number {
return typeof value === 'number' && Number.isFinite(value)
}
function isManifestEntry(value: unknown): value is { file: string; provider: string; logicalPath: string } {
return isPlainObject(value)
&& typeof value.file === 'string'
@ -55,24 +59,82 @@ function isSessionSummary(value: unknown): value is SessionSummary {
&& typeof value.project === 'string'
&& typeof value.firstTimestamp === 'string'
&& typeof value.lastTimestamp === 'string'
&& typeof value.totalCostUSD === 'number'
&& Number.isFinite(value.totalCostUSD)
&& typeof value.totalInputTokens === 'number'
&& Number.isFinite(value.totalInputTokens)
&& typeof value.totalOutputTokens === 'number'
&& Number.isFinite(value.totalOutputTokens)
&& typeof value.totalCacheReadTokens === 'number'
&& Number.isFinite(value.totalCacheReadTokens)
&& typeof value.totalCacheWriteTokens === 'number'
&& Number.isFinite(value.totalCacheWriteTokens)
&& typeof value.apiCalls === 'number'
&& Number.isFinite(value.apiCalls)
&& isFiniteNumber(value.totalCostUSD)
&& isFiniteNumber(value.totalInputTokens)
&& isFiniteNumber(value.totalOutputTokens)
&& isFiniteNumber(value.totalCacheReadTokens)
&& isFiniteNumber(value.totalCacheWriteTokens)
&& isFiniteNumber(value.apiCalls)
&& Array.isArray(value.turns)
&& isPlainObject(value.modelBreakdown)
&& isPlainObject(value.toolBreakdown)
&& isPlainObject(value.mcpBreakdown)
&& isPlainObject(value.bashBreakdown)
&& isPlainObject(value.categoryBreakdown)
&& value.turns.every(isParsedTurn)
&& isBreakdownMap(value.modelBreakdown, isModelBreakdownEntry)
&& isBreakdownMap(value.toolBreakdown, isCallsBreakdownEntry)
&& isBreakdownMap(value.mcpBreakdown, isCallsBreakdownEntry)
&& isBreakdownMap(value.bashBreakdown, isCallsBreakdownEntry)
&& isBreakdownMap(value.categoryBreakdown, isCategoryBreakdownEntry)
}
function isTokenUsage(value: unknown): value is { inputTokens: number; outputTokens: number; cacheCreationInputTokens: number; cacheReadInputTokens: number; cachedInputTokens: number; reasoningTokens: number; webSearchRequests: number } {
return isPlainObject(value)
&& isFiniteNumber(value.inputTokens)
&& isFiniteNumber(value.outputTokens)
&& isFiniteNumber(value.cacheCreationInputTokens)
&& isFiniteNumber(value.cacheReadInputTokens)
&& isFiniteNumber(value.cachedInputTokens)
&& isFiniteNumber(value.reasoningTokens)
&& isFiniteNumber(value.webSearchRequests)
}
function isParsedApiCall(value: unknown): boolean {
return isPlainObject(value)
&& typeof value.provider === 'string'
&& typeof value.model === 'string'
&& isTokenUsage(value.usage)
&& isFiniteNumber(value.costUSD)
&& Array.isArray(value.tools)
&& value.tools.every(tool => typeof tool === 'string')
&& Array.isArray(value.mcpTools)
&& value.mcpTools.every(tool => typeof tool === 'string')
&& typeof value.hasAgentSpawn === 'boolean'
&& typeof value.hasPlanMode === 'boolean'
&& (value.speed === 'standard' || value.speed === 'fast')
&& typeof value.timestamp === 'string'
&& Array.isArray(value.bashCommands)
&& value.bashCommands.every(command => typeof command === 'string')
&& typeof value.deduplicationKey === 'string'
}
function isParsedTurn(value: unknown): boolean {
return isPlainObject(value)
&& typeof value.userMessage === 'string'
&& Array.isArray(value.assistantCalls)
&& value.assistantCalls.every(isParsedApiCall)
&& typeof value.timestamp === 'string'
&& typeof value.sessionId === 'string'
}
function isModelBreakdownEntry(value: unknown): boolean {
return isPlainObject(value)
&& isFiniteNumber(value.calls)
&& isFiniteNumber(value.costUSD)
&& isTokenUsage(value.tokens)
}
function isCallsBreakdownEntry(value: unknown): boolean {
return isPlainObject(value) && isFiniteNumber(value.calls)
}
function isCategoryBreakdownEntry(value: unknown): boolean {
return isPlainObject(value)
&& isFiniteNumber(value.turns)
&& isFiniteNumber(value.costUSD)
&& isFiniteNumber(value.retries)
&& isFiniteNumber(value.editTurns)
&& isFiniteNumber(value.oneShotTurns)
}
function isBreakdownMap<T>(value: unknown, predicate: (entry: unknown) => entry is T): value is Record<string, T> {
return isPlainObject(value) && Object.values(value).every(predicate)
}
function isAppendState(value: unknown): value is AppendState {
@ -187,11 +249,16 @@ export async function readSourceCacheEntry(
): Promise<SourceCacheEntry | null> {
const meta = manifest.entries[sourceKey(provider, logicalPath)]
if (!meta) return null
if (meta.provider !== provider || meta.logicalPath !== logicalPath) return null
const expectedFile = entryFilename(provider, logicalPath)
if (meta.file !== expectedFile) return null
try {
const raw = await readFile(join(entryDir(), meta.file), 'utf-8')
const entry: unknown = JSON.parse(raw)
if (!isSourceCacheEntry(entry) || entry.version !== SOURCE_CACHE_VERSION) return null
if (entry.provider !== provider || entry.logicalPath !== logicalPath) return null
const currentFingerprint = await computeFileFingerprint(entry.fingerprintPath)
if (

View file

@ -15,9 +15,32 @@ import {
computeFileFingerprint,
type SourceCacheEntry,
} from '../src/source-cache.js'
import type { SessionSummary } from '../src/types.js'
let root = ''
function emptySession(sessionId: string, overrides: Partial<SessionSummary> = {}): SessionSummary {
return {
sessionId,
project: 'project',
firstTimestamp: '2026-04-10T00:00:00Z',
lastTimestamp: '2026-04-10T00:00:00Z',
totalCostUSD: 0,
totalInputTokens: 0,
totalOutputTokens: 0,
totalCacheReadTokens: 0,
totalCacheWriteTokens: 0,
apiCalls: 0,
turns: [],
modelBreakdown: {},
toolBreakdown: {},
mcpBreakdown: {},
bashBreakdown: {},
categoryBreakdown: {},
...overrides,
}
}
beforeEach(async () => {
root = await mkdtemp(join(tmpdir(), 'codeburn-source-cache-'))
process.env['CODEBURN_CACHE_DIR'] = root
@ -111,7 +134,7 @@ describe('source cache manifest', () => {
const sourcePath = join(root, 'source.jsonl')
await writeFile(sourcePath, 'one\n', 'utf-8')
const manifest = await loadSourceCacheManifest()
const file = 'broken.json'
const file = `${createHash('sha1').update(`fake:${sourcePath}`).digest('hex')}.json`
manifest.entries[`fake:${sourcePath}`] = { file, provider: 'fake', logicalPath: sourcePath }
await saveSourceCacheManifest(manifest)
await mkdir(join(root, 'source-cache-v1', 'entries'), { recursive: true })
@ -130,6 +153,101 @@ describe('source cache manifest', () => {
expect(loaded).toBeNull()
})
it('returns null when the manifest metadata does not match the lookup request', async () => {
const sourcePath = join(root, 'source.jsonl')
await writeFile(sourcePath, 'one\n', 'utf-8')
const fingerprint = await computeFileFingerprint(sourcePath)
const file = `${createHash('sha1').update(`fake:${sourcePath}`).digest('hex')}.json`
const manifest = await loadSourceCacheManifest()
manifest.entries[`fake:${sourcePath}`] = {
file,
provider: 'other',
logicalPath: sourcePath,
}
await saveSourceCacheManifest(manifest)
await mkdir(join(root, 'source-cache-v1', 'entries'), { recursive: true })
await writeFile(join(root, 'source-cache-v1', 'entries', file), JSON.stringify({
version: SOURCE_CACHE_VERSION,
provider: 'fake',
logicalPath: sourcePath,
fingerprintPath: sourcePath,
cacheStrategy: 'full-reparse',
parserVersion: 'fake-v1',
fingerprint,
sessions: [],
}), 'utf-8')
const loaded = await readSourceCacheEntry(await loadSourceCacheManifest(), 'fake', sourcePath)
expect(loaded).toBeNull()
})
it('returns null when a nested assistant call is malformed', async () => {
const sourcePath = join(root, 'source.jsonl')
await writeFile(sourcePath, 'one\n', 'utf-8')
const fingerprint = await computeFileFingerprint(sourcePath)
const entry: SourceCacheEntry = {
version: SOURCE_CACHE_VERSION,
provider: 'fake',
logicalPath: sourcePath,
fingerprintPath: sourcePath,
cacheStrategy: 'full-reparse',
parserVersion: 'fake-v1',
fingerprint,
sessions: [
emptySession('session-1', {
turns: [{
userMessage: 'hello',
assistantCalls: [{
provider: 'fake',
model: 'model',
usage: {
inputTokens: 1,
outputTokens: 1,
cacheCreationInputTokens: 0,
cacheReadInputTokens: 0,
cachedInputTokens: 0,
reasoningTokens: 0,
webSearchRequests: 0,
},
costUSD: 1,
tools: [],
mcpTools: [],
hasAgentSpawn: false,
hasPlanMode: false,
speed: 'standard',
timestamp: '2026-04-10T00:00:00Z',
bashCommands: [],
deduplicationKey: 'k',
}],
timestamp: '2026-04-10T00:00:00Z',
sessionId: 'session-1',
}],
}),
],
}
const manifest = await loadSourceCacheManifest()
await writeSourceCacheEntry(manifest, entry)
await saveSourceCacheManifest(manifest)
await writeFile(join(root, 'source-cache-v1', 'entries', `${createHash('sha1').update(`fake:${sourcePath}`).digest('hex')}.json`), JSON.stringify({
...entry,
sessions: [{
...entry.sessions[0],
turns: [{
...entry.sessions[0].turns[0],
assistantCalls: [{
...entry.sessions[0].turns[0].assistantCalls[0],
usage: { ...entry.sessions[0].turns[0].assistantCalls[0].usage, inputTokens: 'bad' },
}],
}],
}],
}), 'utf-8')
const loaded = await readSourceCacheEntry(await loadSourceCacheManifest(), 'fake', sourcePath)
expect(loaded).toBeNull()
})
it('returns null when append state is malformed', async () => {
const sourcePath = join(root, 'source.jsonl')
await writeFile(sourcePath, 'one\n', 'utf-8')
@ -154,6 +272,47 @@ describe('source cache manifest', () => {
expect(loaded).toBeNull()
})
it('returns null when a breakdown map contains malformed values', async () => {
const sourcePath = join(root, 'source.jsonl')
await writeFile(sourcePath, 'one\n', 'utf-8')
const fingerprint = await computeFileFingerprint(sourcePath)
const entry: SourceCacheEntry = {
version: SOURCE_CACHE_VERSION,
provider: 'fake',
logicalPath: sourcePath,
fingerprintPath: sourcePath,
cacheStrategy: 'full-reparse',
parserVersion: 'fake-v1',
fingerprint,
sessions: [
emptySession('session-2', {
modelBreakdown: {
modelA: {
calls: 'bad',
costUSD: 0,
tokens: {
inputTokens: 0,
outputTokens: 0,
cacheCreationInputTokens: 0,
cacheReadInputTokens: 0,
cachedInputTokens: 0,
reasoningTokens: 0,
webSearchRequests: 0,
},
},
},
}),
],
}
const manifest = await loadSourceCacheManifest()
await writeSourceCacheEntry(manifest, entry)
await saveSourceCacheManifest(manifest)
const loaded = await readSourceCacheEntry(await loadSourceCacheManifest(), 'fake', sourcePath)
expect(loaded).toBeNull()
})
it('writes atomically without leaving temp files behind', async () => {
const sourcePath = join(root, 'source.jsonl')
await writeFile(sourcePath, 'x\n', 'utf-8')