feat: add persistent source cache storage

This commit is contained in:
Sharada Mohanty 2026-04-20 16:27:40 +02:00
parent e25922030d
commit a0bad07c19
2 changed files with 252 additions and 0 deletions

149
src/source-cache.ts Normal file
View file

@ -0,0 +1,149 @@
import { createHash, randomBytes } from 'crypto'
import { existsSync } from 'fs'
import { mkdir, open, readFile, rename, stat, unlink } from 'fs/promises'
import { homedir } from 'os'
import { dirname, join } from 'path'
import type { SessionSummary } from './types.js'
export const SOURCE_CACHE_VERSION = 1
export type SourceCacheStrategy = 'full-reparse' | 'append-jsonl'
export type SourceFingerprint = {
mtimeMs: number
sizeBytes: number
}
export type AppendState = {
endOffset: number
tailHash: string
}
export type SourceCacheEntry = {
version: number
provider: string
logicalPath: string
fingerprintPath: string
cacheStrategy: SourceCacheStrategy
parserVersion: string
fingerprint: SourceFingerprint
sessions: SessionSummary[]
appendState?: AppendState
}
export type SourceCacheManifest = {
version: number
entries: Record<string, { file: string; provider: string; logicalPath: string }>
}
function cacheRoot(): string {
const base = process.env['CODEBURN_CACHE_DIR'] ?? join(homedir(), '.cache', 'codeburn')
return join(base, 'source-cache-v1')
}
function manifestPath(): string {
return join(cacheRoot(), 'manifest.json')
}
function entryDir(): string {
return join(cacheRoot(), 'entries')
}
function sourceKey(provider: string, logicalPath: string): string {
return `${provider}:${logicalPath}`
}
function entryFilename(provider: string, logicalPath: string): string {
return `${createHash('sha1').update(sourceKey(provider, logicalPath)).digest('hex')}.json`
}
export function emptySourceCacheManifest(): SourceCacheManifest {
return { version: SOURCE_CACHE_VERSION, entries: {} }
}
export async function computeFileFingerprint(filePath: string): Promise<SourceFingerprint> {
const meta = await stat(filePath)
return { mtimeMs: meta.mtimeMs, sizeBytes: meta.size }
}
export async function loadSourceCacheManifest(): Promise<SourceCacheManifest> {
if (!existsSync(manifestPath())) return emptySourceCacheManifest()
try {
const raw = await readFile(manifestPath(), 'utf-8')
const parsed = JSON.parse(raw) as Partial<SourceCacheManifest>
if (parsed.version !== SOURCE_CACHE_VERSION || !parsed.entries || typeof parsed.entries !== 'object') {
return emptySourceCacheManifest()
}
return { version: SOURCE_CACHE_VERSION, entries: parsed.entries as SourceCacheManifest['entries'] }
} catch {
return emptySourceCacheManifest()
}
}
async function atomicWriteJson(path: string, value: unknown): Promise<void> {
await mkdir(dirname(path), { recursive: true })
const temp = `${path}.${randomBytes(8).toString('hex')}.tmp`
const handle = await open(temp, 'w', 0o600)
try {
await handle.writeFile(JSON.stringify(value), { encoding: 'utf-8' })
await handle.sync()
} finally {
await handle.close()
}
try {
await rename(temp, path)
} catch (err) {
try {
await unlink(temp)
} catch {
// ignore cleanup failures
}
throw err
}
}
export async function saveSourceCacheManifest(manifest: SourceCacheManifest): Promise<void> {
await mkdir(cacheRoot(), { recursive: true })
await atomicWriteJson(manifestPath(), manifest)
}
export async function readSourceCacheEntry(
manifest: SourceCacheManifest,
provider: string,
logicalPath: string,
): Promise<SourceCacheEntry | null> {
const meta = manifest.entries[sourceKey(provider, logicalPath)]
if (!meta) return null
try {
const raw = await readFile(join(entryDir(), meta.file), 'utf-8')
const entry = JSON.parse(raw) as SourceCacheEntry
if (entry.version !== SOURCE_CACHE_VERSION) return null
const currentFingerprint = await computeFileFingerprint(entry.fingerprintPath)
if (
currentFingerprint.mtimeMs !== entry.fingerprint.mtimeMs
|| currentFingerprint.sizeBytes !== entry.fingerprint.sizeBytes
) {
return null
}
return entry
} catch {
return null
}
}
export async function writeSourceCacheEntry(manifest: SourceCacheManifest, entry: SourceCacheEntry): Promise<void> {
await mkdir(entryDir(), { recursive: true })
const file = entryFilename(entry.provider, entry.logicalPath)
manifest.entries[sourceKey(entry.provider, entry.logicalPath)] = {
file,
provider: entry.provider,
logicalPath: entry.logicalPath,
}
await atomicWriteJson(join(entryDir(), file), entry)
}

103
tests/source-cache.test.ts Normal file
View file

@ -0,0 +1,103 @@
import { afterEach, beforeEach, describe, expect, it } from 'vitest'
import { existsSync } from 'fs'
import { mkdtemp, readFile, rm, writeFile } from 'fs/promises'
import { tmpdir } from 'os'
import { join } from 'path'
import {
SOURCE_CACHE_VERSION,
emptySourceCacheManifest,
loadSourceCacheManifest,
saveSourceCacheManifest,
readSourceCacheEntry,
writeSourceCacheEntry,
computeFileFingerprint,
type SourceCacheEntry,
} from '../src/source-cache.js'
let root = ''
beforeEach(async () => {
root = await mkdtemp(join(tmpdir(), 'codeburn-source-cache-'))
process.env['CODEBURN_CACHE_DIR'] = root
})
afterEach(async () => {
delete process.env['CODEBURN_CACHE_DIR']
if (root) await rm(root, { recursive: true, force: true })
})
describe('source cache manifest', () => {
it('returns an empty manifest when no file exists', async () => {
await expect(loadSourceCacheManifest()).resolves.toEqual(emptySourceCacheManifest())
})
it('round-trips a manifest and entry', async () => {
const sourcePath = join(root, 'source.jsonl')
await writeFile(sourcePath, '{"ok":true}\n', 'utf-8')
const fingerprint = await computeFileFingerprint(sourcePath)
const entry: SourceCacheEntry = {
version: SOURCE_CACHE_VERSION,
provider: 'fake',
logicalPath: sourcePath,
fingerprintPath: sourcePath,
cacheStrategy: 'full-reparse',
parserVersion: 'fake-v1',
fingerprint,
sessions: [],
}
const manifest = await loadSourceCacheManifest()
await writeSourceCacheEntry(manifest, entry)
await saveSourceCacheManifest(manifest)
const loadedManifest = await loadSourceCacheManifest()
const loadedEntry = await readSourceCacheEntry(loadedManifest, 'fake', sourcePath)
expect(loadedEntry).toEqual(entry)
})
it('returns null when the fingerprint no longer matches', async () => {
const sourcePath = join(root, 'source.jsonl')
await writeFile(sourcePath, 'one\n', 'utf-8')
const fingerprint = await computeFileFingerprint(sourcePath)
const entry: SourceCacheEntry = {
version: SOURCE_CACHE_VERSION,
provider: 'fake',
logicalPath: sourcePath,
fingerprintPath: sourcePath,
cacheStrategy: 'full-reparse',
parserVersion: 'fake-v1',
fingerprint,
sessions: [],
}
const manifest = await loadSourceCacheManifest()
await writeSourceCacheEntry(manifest, entry)
await saveSourceCacheManifest(manifest)
await writeFile(sourcePath, 'one\ntwo\n', 'utf-8')
const loaded = await readSourceCacheEntry(await loadSourceCacheManifest(), 'fake', sourcePath)
expect(loaded).toBeNull()
})
it('writes atomically without leaving temp files behind', async () => {
const sourcePath = join(root, 'source.jsonl')
await writeFile(sourcePath, 'x\n', 'utf-8')
const manifest = await loadSourceCacheManifest()
await writeSourceCacheEntry(manifest, {
version: SOURCE_CACHE_VERSION,
provider: 'fake',
logicalPath: sourcePath,
fingerprintPath: sourcePath,
cacheStrategy: 'full-reparse',
parserVersion: 'fake-v1',
fingerprint: await computeFileFingerprint(sourcePath),
sessions: [],
})
await saveSourceCacheManifest(manifest)
const files = JSON.parse(await readFile(join(root, 'source-cache-v1', 'manifest.json'), 'utf-8'))
expect(files.version).toBe(SOURCE_CACHE_VERSION)
expect(existsSync(join(root, 'source-cache-v1', 'entries'))).toBe(true)
})
})