diff --git a/src/source-cache.ts b/src/source-cache.ts new file mode 100644 index 0000000..4c7efa2 --- /dev/null +++ b/src/source-cache.ts @@ -0,0 +1,149 @@ +import { createHash, randomBytes } from 'crypto' +import { existsSync } from 'fs' +import { mkdir, open, readFile, rename, stat, unlink } from 'fs/promises' +import { homedir } from 'os' +import { dirname, join } from 'path' + +import type { SessionSummary } from './types.js' + +export const SOURCE_CACHE_VERSION = 1 + +export type SourceCacheStrategy = 'full-reparse' | 'append-jsonl' + +export type SourceFingerprint = { + mtimeMs: number + sizeBytes: number +} + +export type AppendState = { + endOffset: number + tailHash: string +} + +export type SourceCacheEntry = { + version: number + provider: string + logicalPath: string + fingerprintPath: string + cacheStrategy: SourceCacheStrategy + parserVersion: string + fingerprint: SourceFingerprint + sessions: SessionSummary[] + appendState?: AppendState +} + +export type SourceCacheManifest = { + version: number + entries: Record +} + +function cacheRoot(): string { + const base = process.env['CODEBURN_CACHE_DIR'] ?? join(homedir(), '.cache', 'codeburn') + return join(base, 'source-cache-v1') +} + +function manifestPath(): string { + return join(cacheRoot(), 'manifest.json') +} + +function entryDir(): string { + return join(cacheRoot(), 'entries') +} + +function sourceKey(provider: string, logicalPath: string): string { + return `${provider}:${logicalPath}` +} + +function entryFilename(provider: string, logicalPath: string): string { + return `${createHash('sha1').update(sourceKey(provider, logicalPath)).digest('hex')}.json` +} + +export function emptySourceCacheManifest(): SourceCacheManifest { + return { version: SOURCE_CACHE_VERSION, entries: {} } +} + +export async function computeFileFingerprint(filePath: string): Promise { + const meta = await stat(filePath) + return { mtimeMs: meta.mtimeMs, sizeBytes: meta.size } +} + +export async function loadSourceCacheManifest(): Promise { + if (!existsSync(manifestPath())) return emptySourceCacheManifest() + + try { + const raw = await readFile(manifestPath(), 'utf-8') + const parsed = JSON.parse(raw) as Partial + if (parsed.version !== SOURCE_CACHE_VERSION || !parsed.entries || typeof parsed.entries !== 'object') { + return emptySourceCacheManifest() + } + return { version: SOURCE_CACHE_VERSION, entries: parsed.entries as SourceCacheManifest['entries'] } + } catch { + return emptySourceCacheManifest() + } +} + +async function atomicWriteJson(path: string, value: unknown): Promise { + await mkdir(dirname(path), { recursive: true }) + const temp = `${path}.${randomBytes(8).toString('hex')}.tmp` + const handle = await open(temp, 'w', 0o600) + try { + await handle.writeFile(JSON.stringify(value), { encoding: 'utf-8' }) + await handle.sync() + } finally { + await handle.close() + } + + try { + await rename(temp, path) + } catch (err) { + try { + await unlink(temp) + } catch { + // ignore cleanup failures + } + throw err + } +} + +export async function saveSourceCacheManifest(manifest: SourceCacheManifest): Promise { + await mkdir(cacheRoot(), { recursive: true }) + await atomicWriteJson(manifestPath(), manifest) +} + +export async function readSourceCacheEntry( + manifest: SourceCacheManifest, + provider: string, + logicalPath: string, +): Promise { + const meta = manifest.entries[sourceKey(provider, logicalPath)] + if (!meta) return null + + try { + const raw = await readFile(join(entryDir(), meta.file), 'utf-8') + const entry = JSON.parse(raw) as SourceCacheEntry + if (entry.version !== SOURCE_CACHE_VERSION) return null + + const currentFingerprint = await computeFileFingerprint(entry.fingerprintPath) + if ( + currentFingerprint.mtimeMs !== entry.fingerprint.mtimeMs + || currentFingerprint.sizeBytes !== entry.fingerprint.sizeBytes + ) { + return null + } + + return entry + } catch { + return null + } +} + +export async function writeSourceCacheEntry(manifest: SourceCacheManifest, entry: SourceCacheEntry): Promise { + await mkdir(entryDir(), { recursive: true }) + const file = entryFilename(entry.provider, entry.logicalPath) + manifest.entries[sourceKey(entry.provider, entry.logicalPath)] = { + file, + provider: entry.provider, + logicalPath: entry.logicalPath, + } + await atomicWriteJson(join(entryDir(), file), entry) +} diff --git a/tests/source-cache.test.ts b/tests/source-cache.test.ts new file mode 100644 index 0000000..3cc8340 --- /dev/null +++ b/tests/source-cache.test.ts @@ -0,0 +1,103 @@ +import { afterEach, beforeEach, describe, expect, it } from 'vitest' +import { existsSync } from 'fs' +import { mkdtemp, readFile, rm, writeFile } from 'fs/promises' +import { tmpdir } from 'os' +import { join } from 'path' + +import { + SOURCE_CACHE_VERSION, + emptySourceCacheManifest, + loadSourceCacheManifest, + saveSourceCacheManifest, + readSourceCacheEntry, + writeSourceCacheEntry, + computeFileFingerprint, + type SourceCacheEntry, +} from '../src/source-cache.js' + +let root = '' + +beforeEach(async () => { + root = await mkdtemp(join(tmpdir(), 'codeburn-source-cache-')) + process.env['CODEBURN_CACHE_DIR'] = root +}) + +afterEach(async () => { + delete process.env['CODEBURN_CACHE_DIR'] + if (root) await rm(root, { recursive: true, force: true }) +}) + +describe('source cache manifest', () => { + it('returns an empty manifest when no file exists', async () => { + await expect(loadSourceCacheManifest()).resolves.toEqual(emptySourceCacheManifest()) + }) + + it('round-trips a manifest and entry', async () => { + const sourcePath = join(root, 'source.jsonl') + await writeFile(sourcePath, '{"ok":true}\n', 'utf-8') + const fingerprint = await computeFileFingerprint(sourcePath) + const entry: SourceCacheEntry = { + version: SOURCE_CACHE_VERSION, + provider: 'fake', + logicalPath: sourcePath, + fingerprintPath: sourcePath, + cacheStrategy: 'full-reparse', + parserVersion: 'fake-v1', + fingerprint, + sessions: [], + } + + const manifest = await loadSourceCacheManifest() + await writeSourceCacheEntry(manifest, entry) + await saveSourceCacheManifest(manifest) + + const loadedManifest = await loadSourceCacheManifest() + const loadedEntry = await readSourceCacheEntry(loadedManifest, 'fake', sourcePath) + expect(loadedEntry).toEqual(entry) + }) + + it('returns null when the fingerprint no longer matches', async () => { + const sourcePath = join(root, 'source.jsonl') + await writeFile(sourcePath, 'one\n', 'utf-8') + const fingerprint = await computeFileFingerprint(sourcePath) + const entry: SourceCacheEntry = { + version: SOURCE_CACHE_VERSION, + provider: 'fake', + logicalPath: sourcePath, + fingerprintPath: sourcePath, + cacheStrategy: 'full-reparse', + parserVersion: 'fake-v1', + fingerprint, + sessions: [], + } + + const manifest = await loadSourceCacheManifest() + await writeSourceCacheEntry(manifest, entry) + await saveSourceCacheManifest(manifest) + + await writeFile(sourcePath, 'one\ntwo\n', 'utf-8') + const loaded = await readSourceCacheEntry(await loadSourceCacheManifest(), 'fake', sourcePath) + expect(loaded).toBeNull() + }) + + it('writes atomically without leaving temp files behind', async () => { + const sourcePath = join(root, 'source.jsonl') + await writeFile(sourcePath, 'x\n', 'utf-8') + const manifest = await loadSourceCacheManifest() + await writeSourceCacheEntry(manifest, { + version: SOURCE_CACHE_VERSION, + provider: 'fake', + logicalPath: sourcePath, + fingerprintPath: sourcePath, + cacheStrategy: 'full-reparse', + parserVersion: 'fake-v1', + fingerprint: await computeFileFingerprint(sourcePath), + sessions: [], + }) + await saveSourceCacheManifest(manifest) + + const files = JSON.parse(await readFile(join(root, 'source-cache-v1', 'manifest.json'), 'utf-8')) + expect(files.version).toBe(SOURCE_CACHE_VERSION) + expect(existsSync(join(root, 'source-cache-v1', 'entries'))).toBe(true) + }) +})