mirror of
https://github.com/AgentSeal/codeburn.git
synced 2026-05-17 03:56:45 +00:00
Merge pull request #207 from ozymandiashh/fix/codex-stream-large-sessions
Stream-parse Codex session files to fix oversize-cap drops on heavy users
This commit is contained in:
commit
ac8081bb08
2 changed files with 27 additions and 7 deletions
|
|
@ -8,6 +8,13 @@ import { createInterface } from 'readline'
|
|||
export const MAX_SESSION_FILE_BYTES = 128 * 1024 * 1024
|
||||
export const STREAM_THRESHOLD_BYTES = 8 * 1024 * 1024
|
||||
|
||||
// Line-by-line streaming has bounded memory (one line at a time) and is not
|
||||
// constrained by V8's string limit, so it can safely handle multi-GB session
|
||||
// files. The cap here is purely a sanity check against pathological inputs;
|
||||
// real Codex sessions for heavy users have been observed at 250+ MB and will
|
||||
// continue to grow as context windows expand.
|
||||
export const MAX_STREAM_SESSION_FILE_BYTES = 2 * 1024 * 1024 * 1024
|
||||
|
||||
function verbose(): boolean {
|
||||
return process.env.CODEBURN_VERBOSE === '1'
|
||||
}
|
||||
|
|
@ -78,8 +85,10 @@ export async function* readSessionLines(filePath: string): AsyncGenerator<string
|
|||
return
|
||||
}
|
||||
|
||||
if (size > MAX_SESSION_FILE_BYTES) {
|
||||
warn(`skipped oversize file ${filePath} (${size} bytes > cap ${MAX_SESSION_FILE_BYTES})`)
|
||||
if (size > MAX_STREAM_SESSION_FILE_BYTES) {
|
||||
warn(
|
||||
`skipped oversize file ${filePath} (${size} bytes > stream cap ${MAX_STREAM_SESSION_FILE_BYTES})`,
|
||||
)
|
||||
return
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ import { createInterface } from 'readline'
|
|||
import { basename, join } from 'path'
|
||||
import { homedir } from 'os'
|
||||
|
||||
import { readSessionFile } from '../fs-utils.js'
|
||||
import { readSessionLines } from '../fs-utils.js'
|
||||
import { calculateCost } from '../models.js'
|
||||
import { readCachedCodexResults, writeCachedCodexResults, getCachedCodexProject, fingerprintFile } from '../codex-cache.js'
|
||||
import type { Provider, SessionSource, SessionParser, ParsedProviderCall } from './types.js'
|
||||
|
|
@ -201,9 +201,6 @@ function createParser(source: SessionSource, seenKeys: Set<string>): SessionPars
|
|||
const fp = await fingerprintFile(source.path)
|
||||
if (!fp) return
|
||||
|
||||
const content = await readSessionFile(source.path)
|
||||
if (content === null) return
|
||||
const lines = content.split('\n').filter(l => l.trim())
|
||||
let sessionModel: string | undefined
|
||||
let sessionId = ''
|
||||
let prevCumulativeTotal = 0
|
||||
|
|
@ -215,9 +212,18 @@ function createParser(source: SessionSource, seenKeys: Set<string>): SessionPars
|
|||
let pendingUserMessage = ''
|
||||
let pendingOutputChars = 0
|
||||
let estCounter = 0
|
||||
let sawAnyLine = false
|
||||
const results: ParsedProviderCall[] = []
|
||||
|
||||
for (const line of lines) {
|
||||
// Stream the session file line by line. Heavy Codex sessions can exceed
|
||||
// 250 MB on disk; reading the entire file into a string would either hit
|
||||
// the readSessionFile cap or push V8 toward its 512 MB string limit
|
||||
// after split('\n'). readSessionLines streams via readline so memory
|
||||
// stays bounded to the longest line.
|
||||
for await (const rawLine of readSessionLines(source.path)) {
|
||||
sawAnyLine = true
|
||||
const line = rawLine.trim()
|
||||
if (!line) continue
|
||||
let entry: CodexEntry
|
||||
try {
|
||||
entry = JSON.parse(line) as CodexEntry
|
||||
|
|
@ -391,6 +397,11 @@ function createParser(source: SessionSource, seenKeys: Set<string>): SessionPars
|
|||
}
|
||||
}
|
||||
|
||||
// If the stream yielded nothing the file was unreadable, oversized, or
|
||||
// empty. Skip cache write so a transient failure can't pin an empty
|
||||
// result set against a fingerprint that would otherwise be re-parsed.
|
||||
if (!sawAnyLine) return
|
||||
|
||||
await writeCachedCodexResults(source.path, source.project, results, fp)
|
||||
|
||||
for (const call of results) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue