review: cap first-line read size and add edge-case tests

- Cap createReadStream at 1 MiB so a malformed file with no newline
  cannot make readline buffer indefinitely (real session_meta lines
  are 22-27 KB).
- Capture stream errors explicitly; readline's async iterator does
  not always re-throw underlying stream errors per Node docs.
- Test: assert project is extracted from the >16 KB session_meta to
  prove the line was actually parsed, not just discovered.
- Test: session_meta line with no trailing newline is still accepted.
- Test: empty rollout file is silently skipped.
This commit is contained in:
ozymandiashh 2026-05-02 02:30:17 +03:00
parent 945da9f0ba
commit 98bbe5b678
2 changed files with 52 additions and 3 deletions

View file

@ -71,14 +71,26 @@ function sanitizeProject(cwd: string): string {
return cwd.replace(/^\//, '').replace(/\//g, '-')
}
// Cap how many bytes we'll read while looking for the first newline. Real
// Codex session_meta lines are ~22-27 KB; this leaves plenty of headroom while
// keeping memory bounded if a corrupt file has no newline at all.
const FIRST_LINE_READ_CAP = 1024 * 1024
async function readFirstLine(filePath: string): Promise<CodexEntry | null> {
// Codex CLI 0.128+ writes a session_meta line that can exceed 20 KB because
// it embeds the full base_instructions / system prompt. A fixed-size buffer
// would miss the trailing newline and reject the session as invalid.
// Stream the file via readline to read the first line regardless of length.
const stream = createReadStream(filePath, { encoding: 'utf-8' })
// Stream the file via readline so we can read the first line regardless of
// length, with `end` capping the read to keep memory bounded.
const stream = createReadStream(filePath, {
encoding: 'utf-8',
start: 0,
end: FIRST_LINE_READ_CAP - 1,
})
const rl = createInterface({ input: stream, crlfDelay: Infinity })
let firstLine: string | undefined
let streamError: unknown
stream.once('error', (err) => { streamError = err })
try {
for await (const line of rl) {
firstLine = line
@ -90,7 +102,7 @@ async function readFirstLine(filePath: string): Promise<CodexEntry | null> {
rl.close()
stream.destroy()
}
if (!firstLine || !firstLine.trim()) return null
if (streamError || !firstLine || !firstLine.trim()) return null
try {
return JSON.parse(firstLine) as CodexEntry
} catch {

View file

@ -147,6 +147,43 @@ describe('codex provider - session discovery', () => {
const sessions = await provider.discoverSessions()
expect(sessions).toHaveLength(1)
expect(sessions[0]!.path).toContain('rollout-big.jsonl')
// Confirm the large meta line was actually parsed (cwd extracted),
// not just that some path was registered.
expect(sessions[0]!.project).toBe('Users-test-big')
})
it('handles a session_meta line without trailing newline', async () => {
const [year, month, day] = '2026-05-02'.split('-')
const sessionDir = join(tmpDir, 'sessions', year!, month!, day!)
await mkdir(sessionDir, { recursive: true })
// Write a single session_meta line, deliberately without a trailing \n.
await writeFile(
join(sessionDir, 'rollout-no-nl.jsonl'),
JSON.stringify({
type: 'session_meta',
timestamp: '2026-05-02T00:00:00Z',
payload: {
cwd: '/Users/test/nonl',
originator: 'codex-tui',
session_id: 'sess-nonl',
model: 'gpt-5.5',
},
}),
)
const provider = createCodexProvider(tmpDir)
const sessions = await provider.discoverSessions()
expect(sessions).toHaveLength(1)
expect(sessions[0]!.project).toBe('Users-test-nonl')
})
it('returns no sessions for an empty rollout file', async () => {
const [year, month, day] = '2026-05-02'.split('-')
const sessionDir = join(tmpDir, 'sessions', year!, month!, day!)
await mkdir(sessionDir, { recursive: true })
await writeFile(join(sessionDir, 'rollout-empty.jsonl'), '')
const provider = createCodexProvider(tmpDir)
const sessions = await provider.discoverSessions()
expect(sessions).toHaveLength(0)
})
it('skips files without codex session_meta', async () => {