mirror of
https://github.com/AgentSeal/codeburn.git
synced 2026-05-17 03:56:45 +00:00
Add per-file result cache for Codex provider
Fixes #183. Users with large Codex session directories (45 GB, 10K+ files) experienced CPU pegging because every 30-second refresh re-parsed all session files from scratch. Three optimizations: 1. readFirstLine now reads 16 KB via fs.open() instead of loading the entire file through readSessionFile. Cuts discovery I/O from ~45 GB to ~160 MB for 10K files. 2. Per-file result cache (codex-results.json) with mtime+size fingerprinting. Parsed results are cached on first run; subsequent runs return cached data instantly for unchanged files. 3. Cache-accelerated discovery skips header validation for cached files, pulling the project name directly from the cache manifest. Cache safety: fingerprint captured before read (no TOCTOU), atomic write via temp+fsync+rename, 0o600 permissions, Object.hasOwn for prototype pollution defense, eviction of deleted files on flush, try/finally ensures flush even on parse errors.
This commit is contained in:
parent
f35400f199
commit
8ab9ea916b
3 changed files with 213 additions and 31 deletions
143
src/codex-cache.ts
Normal file
143
src/codex-cache.ts
Normal file
|
|
@ -0,0 +1,143 @@
|
|||
import { readFile, mkdir, stat, open, rename, unlink } from 'fs/promises'
|
||||
import { existsSync } from 'fs'
|
||||
import { randomBytes } from 'crypto'
|
||||
import { join } from 'path'
|
||||
import { homedir } from 'os'
|
||||
|
||||
import type { ParsedProviderCall } from './providers/types.js'
|
||||
|
||||
const CODEX_CACHE_VERSION = 1
|
||||
const CACHE_FILE = 'codex-results.json'
|
||||
|
||||
type FileFingerprint = { mtimeMs: number; sizeBytes: number }
|
||||
|
||||
type FileEntry = {
|
||||
mtimeMs: number
|
||||
sizeBytes: number
|
||||
project: string
|
||||
calls: ParsedProviderCall[]
|
||||
}
|
||||
|
||||
type ResultCache = {
|
||||
version: number
|
||||
files: Record<string, FileEntry>
|
||||
}
|
||||
|
||||
function getCacheDir(): string {
|
||||
return process.env['CODEBURN_CACHE_DIR'] ?? join(homedir(), '.cache', 'codeburn')
|
||||
}
|
||||
|
||||
function getCachePath(): string {
|
||||
return join(getCacheDir(), CACHE_FILE)
|
||||
}
|
||||
|
||||
let memCache: ResultCache | null = null
|
||||
|
||||
async function loadCache(): Promise<ResultCache> {
|
||||
if (memCache) return memCache
|
||||
try {
|
||||
const raw = await readFile(getCachePath(), 'utf-8')
|
||||
const cache = JSON.parse(raw) as ResultCache
|
||||
if (cache.version === CODEX_CACHE_VERSION && cache.files && typeof cache.files === 'object') {
|
||||
memCache = cache
|
||||
return cache
|
||||
}
|
||||
} catch {}
|
||||
memCache = { version: CODEX_CACHE_VERSION, files: {} }
|
||||
return memCache
|
||||
}
|
||||
|
||||
function getEntry(cache: ResultCache, filePath: string, fp: FileFingerprint): FileEntry | null {
|
||||
if (!Object.hasOwn(cache.files, filePath)) return null
|
||||
const entry = cache.files[filePath]
|
||||
if (entry && entry.mtimeMs === fp.mtimeMs && entry.sizeBytes === fp.sizeBytes) {
|
||||
return entry
|
||||
}
|
||||
return null
|
||||
}
|
||||
|
||||
export async function readCachedCodexResults(
|
||||
filePath: string,
|
||||
): Promise<ParsedProviderCall[] | null> {
|
||||
try {
|
||||
const s = await stat(filePath)
|
||||
const cache = await loadCache()
|
||||
const entry = getEntry(cache, filePath, { mtimeMs: s.mtimeMs, sizeBytes: s.size })
|
||||
return entry?.calls ?? null
|
||||
} catch {}
|
||||
return null
|
||||
}
|
||||
|
||||
export async function getCachedCodexProject(
|
||||
filePath: string,
|
||||
): Promise<string | null> {
|
||||
try {
|
||||
const s = await stat(filePath)
|
||||
const cache = await loadCache()
|
||||
const entry = getEntry(cache, filePath, { mtimeMs: s.mtimeMs, sizeBytes: s.size })
|
||||
return entry?.project ?? null
|
||||
} catch {}
|
||||
return null
|
||||
}
|
||||
|
||||
export async function fingerprintFile(
|
||||
filePath: string,
|
||||
): Promise<FileFingerprint | null> {
|
||||
try {
|
||||
const s = await stat(filePath)
|
||||
return { mtimeMs: s.mtimeMs, sizeBytes: s.size }
|
||||
} catch {
|
||||
return null
|
||||
}
|
||||
}
|
||||
|
||||
export async function writeCachedCodexResults(
|
||||
filePath: string,
|
||||
project: string,
|
||||
calls: ParsedProviderCall[],
|
||||
fingerprint: FileFingerprint,
|
||||
): Promise<void> {
|
||||
try {
|
||||
const cache = await loadCache()
|
||||
cache.files[filePath] = {
|
||||
mtimeMs: fingerprint.mtimeMs,
|
||||
sizeBytes: fingerprint.sizeBytes,
|
||||
project,
|
||||
calls,
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
|
||||
export async function flushCodexCache(): Promise<void> {
|
||||
if (!memCache) return
|
||||
try {
|
||||
// Evict entries for files that no longer exist on disk
|
||||
const paths = Object.keys(memCache.files)
|
||||
for (const p of paths) {
|
||||
try {
|
||||
await stat(p)
|
||||
} catch {
|
||||
delete memCache.files[p]
|
||||
}
|
||||
}
|
||||
|
||||
const dir = getCacheDir()
|
||||
if (!existsSync(dir)) await mkdir(dir, { recursive: true })
|
||||
const finalPath = getCachePath()
|
||||
const tempPath = `${finalPath}.${randomBytes(8).toString('hex')}.tmp`
|
||||
const payload = JSON.stringify(memCache)
|
||||
const handle = await open(tempPath, 'w', 0o600)
|
||||
try {
|
||||
await handle.writeFile(payload, { encoding: 'utf-8' })
|
||||
await handle.sync()
|
||||
} finally {
|
||||
await handle.close()
|
||||
}
|
||||
try {
|
||||
await rename(tempPath, finalPath)
|
||||
} catch (err) {
|
||||
try { await unlink(tempPath) } catch {}
|
||||
throw err
|
||||
}
|
||||
} catch {}
|
||||
}
|
||||
|
|
@ -3,6 +3,7 @@ import { basename, join } from 'path'
|
|||
import { readSessionLines } from './fs-utils.js'
|
||||
import { calculateCost, getShortModelName } from './models.js'
|
||||
import { discoverAllSessions, getProvider } from './providers/index.js'
|
||||
import { flushCodexCache } from './codex-cache.js'
|
||||
import type { ParsedProviderCall } from './providers/types.js'
|
||||
import type {
|
||||
AssistantMessageContent,
|
||||
|
|
@ -402,36 +403,40 @@ async function parseProviderSources(
|
|||
|
||||
const sessionMap = new Map<string, { project: string; turns: ClassifiedTurn[] }>()
|
||||
|
||||
for (const source of sources) {
|
||||
if (dateRange) {
|
||||
try {
|
||||
const s = await stat(source.path)
|
||||
if (s.mtimeMs < dateRange.start.getTime()) continue
|
||||
} catch { /* fall through; treat unknown stat as "may contain data" */ }
|
||||
}
|
||||
const parser = provider.createSessionParser(
|
||||
{ path: source.path, project: source.project, provider: providerName },
|
||||
seenKeys,
|
||||
)
|
||||
|
||||
for await (const call of parser.parse()) {
|
||||
try {
|
||||
for (const source of sources) {
|
||||
if (dateRange) {
|
||||
if (!call.timestamp) continue
|
||||
const ts = new Date(call.timestamp)
|
||||
if (ts < dateRange.start || ts > dateRange.end) continue
|
||||
try {
|
||||
const s = await stat(source.path)
|
||||
if (s.mtimeMs < dateRange.start.getTime()) continue
|
||||
} catch { /* fall through; treat unknown stat as "may contain data" */ }
|
||||
}
|
||||
const parser = provider.createSessionParser(
|
||||
{ path: source.path, project: source.project, provider: providerName },
|
||||
seenKeys,
|
||||
)
|
||||
|
||||
const turn = providerCallToTurn(call)
|
||||
const classified = classifyTurn(turn)
|
||||
const key = `${providerName}:${call.sessionId}:${source.project}`
|
||||
for await (const call of parser.parse()) {
|
||||
if (dateRange) {
|
||||
if (!call.timestamp) continue
|
||||
const ts = new Date(call.timestamp)
|
||||
if (ts < dateRange.start || ts > dateRange.end) continue
|
||||
}
|
||||
|
||||
const existing = sessionMap.get(key)
|
||||
if (existing) {
|
||||
existing.turns.push(classified)
|
||||
} else {
|
||||
sessionMap.set(key, { project: source.project, turns: [classified] })
|
||||
const turn = providerCallToTurn(call)
|
||||
const classified = classifyTurn(turn)
|
||||
const key = `${providerName}:${call.sessionId}:${source.project}`
|
||||
|
||||
const existing = sessionMap.get(key)
|
||||
if (existing) {
|
||||
existing.turns.push(classified)
|
||||
} else {
|
||||
sessionMap.set(key, { project: source.project, turns: [classified] })
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
if (providerName === 'codex') await flushCodexCache()
|
||||
}
|
||||
|
||||
const projectMap = new Map<string, SessionSummary[]>()
|
||||
|
|
|
|||
|
|
@ -1,9 +1,10 @@
|
|||
import { readdir, stat } from 'fs/promises'
|
||||
import { readdir, stat, open } from 'fs/promises'
|
||||
import { basename, join } from 'path'
|
||||
import { homedir } from 'os'
|
||||
|
||||
import { readSessionFile } from '../fs-utils.js'
|
||||
import { calculateCost } from '../models.js'
|
||||
import { readCachedCodexResults, writeCachedCodexResults, getCachedCodexProject, fingerprintFile } from '../codex-cache.js'
|
||||
import type { Provider, SessionSource, SessionParser, ParsedProviderCall } from './types.js'
|
||||
|
||||
const modelDisplayNames: Record<string, string> = {
|
||||
|
|
@ -69,14 +70,21 @@ function sanitizeProject(cwd: string): string {
|
|||
}
|
||||
|
||||
async function readFirstLine(filePath: string): Promise<CodexEntry | null> {
|
||||
const content = await readSessionFile(filePath)
|
||||
if (content === null) return null
|
||||
const line = content.split('\n')[0]
|
||||
if (!line?.trim()) return null
|
||||
let fh
|
||||
try {
|
||||
fh = await open(filePath, 'r')
|
||||
const buf = Buffer.alloc(16384)
|
||||
const { bytesRead } = await fh.read(buf, 0, 16384, 0)
|
||||
if (bytesRead === 0) return null
|
||||
const text = buf.toString('utf-8', 0, bytesRead)
|
||||
const nl = text.indexOf('\n')
|
||||
const line = nl >= 0 ? text.slice(0, nl) : text
|
||||
if (!line.trim()) return null
|
||||
return JSON.parse(line) as CodexEntry
|
||||
} catch {
|
||||
return null
|
||||
} finally {
|
||||
await fh?.close()
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -121,6 +129,12 @@ async function discoverSessionsInDir(codexDir: string): Promise<SessionSource[]>
|
|||
const s = await stat(filePath).catch(() => null)
|
||||
if (!s?.isFile()) continue
|
||||
|
||||
const cachedProject = await getCachedCodexProject(filePath)
|
||||
if (cachedProject) {
|
||||
sources.push({ path: filePath, project: cachedProject, provider: 'codex' })
|
||||
continue
|
||||
}
|
||||
|
||||
const { valid, meta } = await isValidCodexSession(filePath)
|
||||
if (!valid || !meta) continue
|
||||
|
||||
|
|
@ -145,6 +159,19 @@ function resolveModel(info: CodexEntry['payload'], sessionModel?: string): strin
|
|||
function createParser(source: SessionSource, seenKeys: Set<string>): SessionParser {
|
||||
return {
|
||||
async *parse(): AsyncGenerator<ParsedProviderCall> {
|
||||
const cached = await readCachedCodexResults(source.path)
|
||||
if (cached) {
|
||||
for (const call of cached) {
|
||||
if (seenKeys.has(call.deduplicationKey)) continue
|
||||
seenKeys.add(call.deduplicationKey)
|
||||
yield call
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
const fp = await fingerprintFile(source.path)
|
||||
if (!fp) return
|
||||
|
||||
const content = await readSessionFile(source.path)
|
||||
if (content === null) return
|
||||
const lines = content.split('\n').filter(l => l.trim())
|
||||
|
|
@ -157,6 +184,7 @@ function createParser(source: SessionSource, seenKeys: Set<string>): SessionPars
|
|||
let prevReasoning = 0
|
||||
let pendingTools: string[] = []
|
||||
let pendingUserMessage = ''
|
||||
const results: ParsedProviderCall[] = []
|
||||
|
||||
for (const line of lines) {
|
||||
let entry: CodexEntry
|
||||
|
|
@ -258,7 +286,7 @@ function createParser(source: SessionSource, seenKeys: Set<string>): SessionPars
|
|||
0,
|
||||
)
|
||||
|
||||
yield {
|
||||
results.push({
|
||||
provider: 'codex',
|
||||
model,
|
||||
inputTokens: uncachedInputTokens,
|
||||
|
|
@ -276,12 +304,18 @@ function createParser(source: SessionSource, seenKeys: Set<string>): SessionPars
|
|||
deduplicationKey: dedupKey,
|
||||
userMessage: pendingUserMessage,
|
||||
sessionId,
|
||||
}
|
||||
})
|
||||
|
||||
pendingTools = []
|
||||
pendingUserMessage = ''
|
||||
}
|
||||
}
|
||||
|
||||
await writeCachedCodexResults(source.path, source.project, results, fp)
|
||||
|
||||
for (const call of results) {
|
||||
yield call
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue