Add per-file result cache for Codex provider

Fixes #183. Users with large Codex session directories (45 GB, 10K+
files) experienced CPU pegging because every 30-second refresh re-parsed
all session files from scratch.

Three optimizations:

1. readFirstLine now reads 16 KB via fs.open() instead of loading the
   entire file through readSessionFile. Cuts discovery I/O from ~45 GB
   to ~160 MB for 10K files.

2. Per-file result cache (codex-results.json) with mtime+size
   fingerprinting. Parsed results are cached on first run; subsequent
   runs return cached data instantly for unchanged files.

3. Cache-accelerated discovery skips header validation for cached files,
   pulling the project name directly from the cache manifest.

Cache safety: fingerprint captured before read (no TOCTOU), atomic
write via temp+fsync+rename, 0o600 permissions, Object.hasOwn for
prototype pollution defense, eviction of deleted files on flush,
try/finally ensures flush even on parse errors.
This commit is contained in:
iamtoruk 2026-04-30 16:43:41 -07:00
parent f35400f199
commit 8ab9ea916b
3 changed files with 213 additions and 31 deletions

143
src/codex-cache.ts Normal file
View file

@ -0,0 +1,143 @@
import { readFile, mkdir, stat, open, rename, unlink } from 'fs/promises'
import { existsSync } from 'fs'
import { randomBytes } from 'crypto'
import { join } from 'path'
import { homedir } from 'os'
import type { ParsedProviderCall } from './providers/types.js'
const CODEX_CACHE_VERSION = 1
const CACHE_FILE = 'codex-results.json'
type FileFingerprint = { mtimeMs: number; sizeBytes: number }
type FileEntry = {
mtimeMs: number
sizeBytes: number
project: string
calls: ParsedProviderCall[]
}
type ResultCache = {
version: number
files: Record<string, FileEntry>
}
function getCacheDir(): string {
return process.env['CODEBURN_CACHE_DIR'] ?? join(homedir(), '.cache', 'codeburn')
}
function getCachePath(): string {
return join(getCacheDir(), CACHE_FILE)
}
let memCache: ResultCache | null = null
async function loadCache(): Promise<ResultCache> {
if (memCache) return memCache
try {
const raw = await readFile(getCachePath(), 'utf-8')
const cache = JSON.parse(raw) as ResultCache
if (cache.version === CODEX_CACHE_VERSION && cache.files && typeof cache.files === 'object') {
memCache = cache
return cache
}
} catch {}
memCache = { version: CODEX_CACHE_VERSION, files: {} }
return memCache
}
function getEntry(cache: ResultCache, filePath: string, fp: FileFingerprint): FileEntry | null {
if (!Object.hasOwn(cache.files, filePath)) return null
const entry = cache.files[filePath]
if (entry && entry.mtimeMs === fp.mtimeMs && entry.sizeBytes === fp.sizeBytes) {
return entry
}
return null
}
export async function readCachedCodexResults(
filePath: string,
): Promise<ParsedProviderCall[] | null> {
try {
const s = await stat(filePath)
const cache = await loadCache()
const entry = getEntry(cache, filePath, { mtimeMs: s.mtimeMs, sizeBytes: s.size })
return entry?.calls ?? null
} catch {}
return null
}
export async function getCachedCodexProject(
filePath: string,
): Promise<string | null> {
try {
const s = await stat(filePath)
const cache = await loadCache()
const entry = getEntry(cache, filePath, { mtimeMs: s.mtimeMs, sizeBytes: s.size })
return entry?.project ?? null
} catch {}
return null
}
export async function fingerprintFile(
filePath: string,
): Promise<FileFingerprint | null> {
try {
const s = await stat(filePath)
return { mtimeMs: s.mtimeMs, sizeBytes: s.size }
} catch {
return null
}
}
export async function writeCachedCodexResults(
filePath: string,
project: string,
calls: ParsedProviderCall[],
fingerprint: FileFingerprint,
): Promise<void> {
try {
const cache = await loadCache()
cache.files[filePath] = {
mtimeMs: fingerprint.mtimeMs,
sizeBytes: fingerprint.sizeBytes,
project,
calls,
}
} catch {}
}
export async function flushCodexCache(): Promise<void> {
if (!memCache) return
try {
// Evict entries for files that no longer exist on disk
const paths = Object.keys(memCache.files)
for (const p of paths) {
try {
await stat(p)
} catch {
delete memCache.files[p]
}
}
const dir = getCacheDir()
if (!existsSync(dir)) await mkdir(dir, { recursive: true })
const finalPath = getCachePath()
const tempPath = `${finalPath}.${randomBytes(8).toString('hex')}.tmp`
const payload = JSON.stringify(memCache)
const handle = await open(tempPath, 'w', 0o600)
try {
await handle.writeFile(payload, { encoding: 'utf-8' })
await handle.sync()
} finally {
await handle.close()
}
try {
await rename(tempPath, finalPath)
} catch (err) {
try { await unlink(tempPath) } catch {}
throw err
}
} catch {}
}

View file

@ -3,6 +3,7 @@ import { basename, join } from 'path'
import { readSessionLines } from './fs-utils.js'
import { calculateCost, getShortModelName } from './models.js'
import { discoverAllSessions, getProvider } from './providers/index.js'
import { flushCodexCache } from './codex-cache.js'
import type { ParsedProviderCall } from './providers/types.js'
import type {
AssistantMessageContent,
@ -402,36 +403,40 @@ async function parseProviderSources(
const sessionMap = new Map<string, { project: string; turns: ClassifiedTurn[] }>()
for (const source of sources) {
if (dateRange) {
try {
const s = await stat(source.path)
if (s.mtimeMs < dateRange.start.getTime()) continue
} catch { /* fall through; treat unknown stat as "may contain data" */ }
}
const parser = provider.createSessionParser(
{ path: source.path, project: source.project, provider: providerName },
seenKeys,
)
for await (const call of parser.parse()) {
try {
for (const source of sources) {
if (dateRange) {
if (!call.timestamp) continue
const ts = new Date(call.timestamp)
if (ts < dateRange.start || ts > dateRange.end) continue
try {
const s = await stat(source.path)
if (s.mtimeMs < dateRange.start.getTime()) continue
} catch { /* fall through; treat unknown stat as "may contain data" */ }
}
const parser = provider.createSessionParser(
{ path: source.path, project: source.project, provider: providerName },
seenKeys,
)
const turn = providerCallToTurn(call)
const classified = classifyTurn(turn)
const key = `${providerName}:${call.sessionId}:${source.project}`
for await (const call of parser.parse()) {
if (dateRange) {
if (!call.timestamp) continue
const ts = new Date(call.timestamp)
if (ts < dateRange.start || ts > dateRange.end) continue
}
const existing = sessionMap.get(key)
if (existing) {
existing.turns.push(classified)
} else {
sessionMap.set(key, { project: source.project, turns: [classified] })
const turn = providerCallToTurn(call)
const classified = classifyTurn(turn)
const key = `${providerName}:${call.sessionId}:${source.project}`
const existing = sessionMap.get(key)
if (existing) {
existing.turns.push(classified)
} else {
sessionMap.set(key, { project: source.project, turns: [classified] })
}
}
}
} finally {
if (providerName === 'codex') await flushCodexCache()
}
const projectMap = new Map<string, SessionSummary[]>()

View file

@ -1,9 +1,10 @@
import { readdir, stat } from 'fs/promises'
import { readdir, stat, open } from 'fs/promises'
import { basename, join } from 'path'
import { homedir } from 'os'
import { readSessionFile } from '../fs-utils.js'
import { calculateCost } from '../models.js'
import { readCachedCodexResults, writeCachedCodexResults, getCachedCodexProject, fingerprintFile } from '../codex-cache.js'
import type { Provider, SessionSource, SessionParser, ParsedProviderCall } from './types.js'
const modelDisplayNames: Record<string, string> = {
@ -69,14 +70,21 @@ function sanitizeProject(cwd: string): string {
}
async function readFirstLine(filePath: string): Promise<CodexEntry | null> {
const content = await readSessionFile(filePath)
if (content === null) return null
const line = content.split('\n')[0]
if (!line?.trim()) return null
let fh
try {
fh = await open(filePath, 'r')
const buf = Buffer.alloc(16384)
const { bytesRead } = await fh.read(buf, 0, 16384, 0)
if (bytesRead === 0) return null
const text = buf.toString('utf-8', 0, bytesRead)
const nl = text.indexOf('\n')
const line = nl >= 0 ? text.slice(0, nl) : text
if (!line.trim()) return null
return JSON.parse(line) as CodexEntry
} catch {
return null
} finally {
await fh?.close()
}
}
@ -121,6 +129,12 @@ async function discoverSessionsInDir(codexDir: string): Promise<SessionSource[]>
const s = await stat(filePath).catch(() => null)
if (!s?.isFile()) continue
const cachedProject = await getCachedCodexProject(filePath)
if (cachedProject) {
sources.push({ path: filePath, project: cachedProject, provider: 'codex' })
continue
}
const { valid, meta } = await isValidCodexSession(filePath)
if (!valid || !meta) continue
@ -145,6 +159,19 @@ function resolveModel(info: CodexEntry['payload'], sessionModel?: string): strin
function createParser(source: SessionSource, seenKeys: Set<string>): SessionParser {
return {
async *parse(): AsyncGenerator<ParsedProviderCall> {
const cached = await readCachedCodexResults(source.path)
if (cached) {
for (const call of cached) {
if (seenKeys.has(call.deduplicationKey)) continue
seenKeys.add(call.deduplicationKey)
yield call
}
return
}
const fp = await fingerprintFile(source.path)
if (!fp) return
const content = await readSessionFile(source.path)
if (content === null) return
const lines = content.split('\n').filter(l => l.trim())
@ -157,6 +184,7 @@ function createParser(source: SessionSource, seenKeys: Set<string>): SessionPars
let prevReasoning = 0
let pendingTools: string[] = []
let pendingUserMessage = ''
const results: ParsedProviderCall[] = []
for (const line of lines) {
let entry: CodexEntry
@ -258,7 +286,7 @@ function createParser(source: SessionSource, seenKeys: Set<string>): SessionPars
0,
)
yield {
results.push({
provider: 'codex',
model,
inputTokens: uncachedInputTokens,
@ -276,12 +304,18 @@ function createParser(source: SessionSource, seenKeys: Set<string>): SessionPars
deduplicationKey: dedupKey,
userMessage: pendingUserMessage,
sessionId,
}
})
pendingTools = []
pendingUserMessage = ''
}
}
await writeCachedCodexResults(source.path, source.project, results, fp)
for (const call of results) {
yield call
}
},
}
}