review: drop streamError flag, add multi-chunk and torn-write tests

- Stop tracking a separate streamError flag. createReadStream's default
  64 KiB highWaterMark means the stream may already be reading chunk 2
  when we break out of the loop after yielding the first line; if that
  later chunk errors, the flag could reject an otherwise-valid line.
  readline's async iterator already re-throws stream errors on Node 16+,
  which the existing catch handles.
- Test: 120 KB session_meta line forces multi-chunk line assembly.
- Test: truncated mid-write first line is rejected, not parsed as half
  an object.
This commit is contained in:
ozymandiashh 2026-05-02 02:34:41 +03:00
parent 98bbe5b678
commit ff8b20a79e
2 changed files with 44 additions and 3 deletions

View file

@ -89,8 +89,10 @@ async function readFirstLine(filePath: string): Promise<CodexEntry | null> {
})
const rl = createInterface({ input: stream, crlfDelay: Infinity })
let firstLine: string | undefined
let streamError: unknown
stream.once('error', (err) => { streamError = err })
// readline's async iterator re-throws underlying stream errors (ENOENT,
// EACCES, etc.) on Node 16+, which the catch below handles. Don't track a
// separate streamError flag: it can race with the read-ahead and reject a
// valid first line if a *later* chunk errors after we've already broken.
try {
for await (const line of rl) {
firstLine = line
@ -102,7 +104,7 @@ async function readFirstLine(filePath: string): Promise<CodexEntry | null> {
rl.close()
stream.destroy()
}
if (streamError || !firstLine || !firstLine.trim()) return null
if (!firstLine || !firstLine.trim()) return null
try {
return JSON.parse(firstLine) as CodexEntry
} catch {

View file

@ -176,6 +176,45 @@ describe('codex provider - session discovery', () => {
expect(sessions[0]!.project).toBe('Users-test-nonl')
})
it('handles a session_meta line that spans multiple stream chunks', async () => {
// createReadStream defaults to a 64 KiB highWaterMark, so a >64 KiB first
// line forces readline to assemble the line across chunk boundaries.
const bigPayload = JSON.stringify({
type: 'session_meta',
timestamp: '2026-05-02T00:00:00Z',
payload: {
cwd: '/Users/test/multichunk',
originator: 'codex-tui',
session_id: 'sess-multichunk',
model: 'gpt-5.5',
base_instructions: { text: 'y'.repeat(120_000) },
},
})
await writeSession(tmpDir, '2026-05-02', 'rollout-multichunk.jsonl', [
bigPayload,
tokenCount({ last: { input: 100, output: 50 }, total: { total: 150 } }),
])
const provider = createCodexProvider(tmpDir)
const sessions = await provider.discoverSessions()
expect(sessions).toHaveLength(1)
expect(sessions[0]!.project).toBe('Users-test-multichunk')
})
it('rejects truncated/torn first-line writes without throwing', async () => {
// Simulate a partial write where Codex started the session_meta object
// but hasn't flushed the rest yet (no closing brace, no newline).
const [year, month, day] = '2026-05-02'.split('-')
const sessionDir = join(tmpDir, 'sessions', year!, month!, day!)
await mkdir(sessionDir, { recursive: true })
await writeFile(
join(sessionDir, 'rollout-torn.jsonl'),
'{"type":"session_meta","timestamp":"2026-05-02T00:00:00Z","payload":{"cwd":"/x","originator":"codex-tui","session_id":"s","model":"gpt',
)
const provider = createCodexProvider(tmpDir)
const sessions = await provider.discoverSessions()
expect(sessions).toHaveLength(0)
})
it('returns no sessions for an empty rollout file', async () => {
const [year, month, day] = '2026-05-02'.split('-')
const sessionDir = join(tmpDir, 'sessions', year!, month!, day!)