review: drop streamError flag, add multi-chunk and torn-write tests

- Stop tracking a separate streamError flag. createReadStream's default 64 KiB highWaterMark means the stream may already be reading chunk 2 when we break out of the loop after yielding the first line; if that later chunk errors, the flag could reject an otherwise-valid line. readline's async iterator already re-throws stream errors on Node 16+, which the existing catch handles. - Test: 120 KB session_meta line forces multi-chunk line assembly. - Test: truncated mid-write first line is rejected, not parsed as half an object.
2026-05-17 03:56:45 +00:00 · 2026-05-02 02:34:41 +03:00 · 2026-05-02 02:34:41 +03:00 · ff8b20a79e
commit ff8b20a79e
parent 98bbe5b678
2 changed files with 44 additions and 3 deletions
--- a/src/providers/codex.ts
+++ b/src/providers/codex.ts
@ -89,8 +89,10 @@ async function readFirstLine(filePath: string): Promise<CodexEntry | null> {
  })
  const rl = createInterface({ input: stream, crlfDelay: Infinity })
  let firstLine: string | undefined
-  let streamError: unknown
-  stream.once('error', (err) => { streamError = err })
+  // readline's async iterator re-throws underlying stream errors (ENOENT,
+  // EACCES, etc.) on Node 16+, which the catch below handles. Don't track a
+  // separate streamError flag: it can race with the read-ahead and reject a
+  // valid first line if a *later* chunk errors after we've already broken.
  try {
    for await (const line of rl) {
      firstLine = line
@ -102,7 +104,7 @@ async function readFirstLine(filePath: string): Promise<CodexEntry | null> {
    rl.close()
    stream.destroy()
  }
-  if (streamError || !firstLine || !firstLine.trim()) return null
+  if (!firstLine || !firstLine.trim()) return null
  try {
    return JSON.parse(firstLine) as CodexEntry
  } catch {
--- a/tests/providers/codex.test.ts
+++ b/tests/providers/codex.test.ts
@ -176,6 +176,45 @@ describe('codex provider - session discovery', () => {
    expect(sessions[0]!.project).toBe('Users-test-nonl')
  })

+  it('handles a session_meta line that spans multiple stream chunks', async () => {
+    // createReadStream defaults to a 64 KiB highWaterMark, so a >64 KiB first
+    // line forces readline to assemble the line across chunk boundaries.
+    const bigPayload = JSON.stringify({
+      type: 'session_meta',
+      timestamp: '2026-05-02T00:00:00Z',
+      payload: {
+        cwd: '/Users/test/multichunk',
+        originator: 'codex-tui',
+        session_id: 'sess-multichunk',
+        model: 'gpt-5.5',
+        base_instructions: { text: 'y'.repeat(120_000) },
+      },
+    })
+    await writeSession(tmpDir, '2026-05-02', 'rollout-multichunk.jsonl', [
+      bigPayload,
+      tokenCount({ last: { input: 100, output: 50 }, total: { total: 150 } }),
+    ])
+    const provider = createCodexProvider(tmpDir)
+    const sessions = await provider.discoverSessions()
+    expect(sessions).toHaveLength(1)
+    expect(sessions[0]!.project).toBe('Users-test-multichunk')
+  })
+
+  it('rejects truncated/torn first-line writes without throwing', async () => {
+    // Simulate a partial write where Codex started the session_meta object
+    // but hasn't flushed the rest yet (no closing brace, no newline).
+    const [year, month, day] = '2026-05-02'.split('-')
+    const sessionDir = join(tmpDir, 'sessions', year!, month!, day!)
+    await mkdir(sessionDir, { recursive: true })
+    await writeFile(
+      join(sessionDir, 'rollout-torn.jsonl'),
+      '{"type":"session_meta","timestamp":"2026-05-02T00:00:00Z","payload":{"cwd":"/x","originator":"codex-tui","session_id":"s","model":"gpt',
+    )
+    const provider = createCodexProvider(tmpDir)
+    const sessions = await provider.discoverSessions()
+    expect(sessions).toHaveLength(0)
+  })
+
  it('returns no sessions for an empty rollout file', async () => {
    const [year, month, day] = '2026-05-02'.split('-')
    const sessionDir = join(tmpDir, 'sessions', year!, month!, day!)