Merge pull request #207 from ozymandiashh/fix/codex-stream-large-sessions

Stream-parse Codex session files to fix oversize-cap drops on heavy users
2026-05-17 03:56:45 +00:00 · 2026-05-03 16:35:30 -07:00 · 2026-05-03 16:35:30 -07:00 · ac8081bb08
commit ac8081bb08
parent 7501aee130 61be92a834
2 changed files with 27 additions and 7 deletions
--- a/src/fs-utils.ts
+++ b/src/fs-utils.ts
@ -8,6 +8,13 @@ import { createInterface } from 'readline'
 export const MAX_SESSION_FILE_BYTES = 128 * 1024 * 1024
 export const STREAM_THRESHOLD_BYTES = 8 * 1024 * 1024

+// Line-by-line streaming has bounded memory (one line at a time) and is not
+// constrained by V8's string limit, so it can safely handle multi-GB session
+// files. The cap here is purely a sanity check against pathological inputs;
+// real Codex sessions for heavy users have been observed at 250+ MB and will
+// continue to grow as context windows expand.
+export const MAX_STREAM_SESSION_FILE_BYTES = 2 * 1024 * 1024 * 1024
+
 function verbose(): boolean {
  return process.env.CODEBURN_VERBOSE === '1'
 }
@ -78,8 +85,10 @@ export async function* readSessionLines(filePath: string): AsyncGenerator<string
    return
  }

-  if (size > MAX_SESSION_FILE_BYTES) {
-    warn(`skipped oversize file ${filePath} (${size} bytes > cap ${MAX_SESSION_FILE_BYTES})`)
+  if (size > MAX_STREAM_SESSION_FILE_BYTES) {
+    warn(
+      `skipped oversize file ${filePath} (${size} bytes > stream cap ${MAX_STREAM_SESSION_FILE_BYTES})`,
+    )
    return
  }

--- a/src/providers/codex.ts
+++ b/src/providers/codex.ts
@ -4,7 +4,7 @@ import { createInterface } from 'readline'
 import { basename, join } from 'path'
 import { homedir } from 'os'

-import { readSessionFile } from '../fs-utils.js'
+import { readSessionLines } from '../fs-utils.js'
 import { calculateCost } from '../models.js'
 import { readCachedCodexResults, writeCachedCodexResults, getCachedCodexProject, fingerprintFile } from '../codex-cache.js'
 import type { Provider, SessionSource, SessionParser, ParsedProviderCall } from './types.js'
@ -201,9 +201,6 @@ function createParser(source: SessionSource, seenKeys: Set<string>): SessionPars
      const fp = await fingerprintFile(source.path)
      if (!fp) return

-      const content = await readSessionFile(source.path)
-      if (content === null) return
-      const lines = content.split('\n').filter(l => l.trim())
      let sessionModel: string | undefined
      let sessionId = ''
      let prevCumulativeTotal = 0
@ -215,9 +212,18 @@ function createParser(source: SessionSource, seenKeys: Set<string>): SessionPars
      let pendingUserMessage = ''
      let pendingOutputChars = 0
      let estCounter = 0
+      let sawAnyLine = false
      const results: ParsedProviderCall[] = []

-      for (const line of lines) {
+      // Stream the session file line by line. Heavy Codex sessions can exceed
+      // 250 MB on disk; reading the entire file into a string would either hit
+      // the readSessionFile cap or push V8 toward its 512 MB string limit
+      // after split('\n'). readSessionLines streams via readline so memory
+      // stays bounded to the longest line.
+      for await (const rawLine of readSessionLines(source.path)) {
+        sawAnyLine = true
+        const line = rawLine.trim()
+        if (!line) continue
        let entry: CodexEntry
        try {
          entry = JSON.parse(line) as CodexEntry
@ -391,6 +397,11 @@ function createParser(source: SessionSource, seenKeys: Set<string>): SessionPars
        }
      }

+      // If the stream yielded nothing the file was unreadable, oversized, or
+      // empty. Skip cache write so a transient failure can't pin an empty
+      // result set against a fingerprint that would otherwise be re-parsed.
+      if (!sawAnyLine) return
+
      await writeCachedCodexResults(source.path, source.project, results, fp)

      for (const call of results) {