Merge pull request #207 from ozymandiashh/fix/codex-stream-large-sessions

Stream-parse Codex session files to fix oversize-cap drops on heavy users
This commit is contained in:
Resham Joshi 2026-05-03 16:35:30 -07:00 committed by GitHub
commit ac8081bb08
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 27 additions and 7 deletions

View file

@ -8,6 +8,13 @@ import { createInterface } from 'readline'
export const MAX_SESSION_FILE_BYTES = 128 * 1024 * 1024
export const STREAM_THRESHOLD_BYTES = 8 * 1024 * 1024
// Line-by-line streaming has bounded memory (one line at a time) and is not
// constrained by V8's string limit, so it can safely handle multi-GB session
// files. The cap here is purely a sanity check against pathological inputs;
// real Codex sessions for heavy users have been observed at 250+ MB and will
// continue to grow as context windows expand.
export const MAX_STREAM_SESSION_FILE_BYTES = 2 * 1024 * 1024 * 1024
function verbose(): boolean {
return process.env.CODEBURN_VERBOSE === '1'
}
@ -78,8 +85,10 @@ export async function* readSessionLines(filePath: string): AsyncGenerator<string
return
}
if (size > MAX_SESSION_FILE_BYTES) {
warn(`skipped oversize file ${filePath} (${size} bytes > cap ${MAX_SESSION_FILE_BYTES})`)
if (size > MAX_STREAM_SESSION_FILE_BYTES) {
warn(
`skipped oversize file ${filePath} (${size} bytes > stream cap ${MAX_STREAM_SESSION_FILE_BYTES})`,
)
return
}

View file

@ -4,7 +4,7 @@ import { createInterface } from 'readline'
import { basename, join } from 'path'
import { homedir } from 'os'
import { readSessionFile } from '../fs-utils.js'
import { readSessionLines } from '../fs-utils.js'
import { calculateCost } from '../models.js'
import { readCachedCodexResults, writeCachedCodexResults, getCachedCodexProject, fingerprintFile } from '../codex-cache.js'
import type { Provider, SessionSource, SessionParser, ParsedProviderCall } from './types.js'
@ -201,9 +201,6 @@ function createParser(source: SessionSource, seenKeys: Set<string>): SessionPars
const fp = await fingerprintFile(source.path)
if (!fp) return
const content = await readSessionFile(source.path)
if (content === null) return
const lines = content.split('\n').filter(l => l.trim())
let sessionModel: string | undefined
let sessionId = ''
let prevCumulativeTotal = 0
@ -215,9 +212,18 @@ function createParser(source: SessionSource, seenKeys: Set<string>): SessionPars
let pendingUserMessage = ''
let pendingOutputChars = 0
let estCounter = 0
let sawAnyLine = false
const results: ParsedProviderCall[] = []
for (const line of lines) {
// Stream the session file line by line. Heavy Codex sessions can exceed
// 250 MB on disk; reading the entire file into a string would either hit
// the readSessionFile cap or push V8 toward its 512 MB string limit
// after split('\n'). readSessionLines streams via readline so memory
// stays bounded to the longest line.
for await (const rawLine of readSessionLines(source.path)) {
sawAnyLine = true
const line = rawLine.trim()
if (!line) continue
let entry: CodexEntry
try {
entry = JSON.parse(line) as CodexEntry
@ -391,6 +397,11 @@ function createParser(source: SessionSource, seenKeys: Set<string>): SessionPars
}
}
// If the stream yielded nothing the file was unreadable, oversized, or
// empty. Skip cache write so a transient failure can't pin an empty
// result set against a fingerprint that would otherwise be re-parsed.
if (!sawAnyLine) return
await writeCachedCodexResults(source.path, source.project, results, fp)
for (const call of results) {