fix: switch scanJsonlFile and parseSessionFile to readSessionLines to prevent OOM

readViaStream (used for files ≥8 MB) reconstructs the full file as a single string via chunks.join('\n'), giving the same peak allocation as readFile. Callers then call content.split('\n'), creating a second copy. With FILE_READ_CONCURRENCY=16 and files up to 128 MB this can exhaust the V8 heap (~6 GB theoretical peak). readSessionLines already exists as a proper async generator that yields one line at a time. Switch both hot-path callers to iterate it directly so the full file string is never held in memory. Adds two tests: a spy test confirming readSessionLines is called (not readSessionFile), and a 500-entry correctness test. Fixes #131
2026-05-19 16:13:56 +00:00 · 2026-04-22 10:11:13 +00:00 · 2026-04-22 10:11:13 +00:00 · 5e49f17e64
commit 5e49f17e64
parent d4e07de18f
3 changed files with 43 additions and 10 deletions
--- a/src/optimize.ts
+++ b/src/optimize.ts
@ -4,7 +4,7 @@ import { existsSync, statSync } from 'fs'
 import { basename, join } from 'path'
 import { homedir } from 'os'

-import { readSessionFile, readSessionFileSync } from './fs-utils.js'
+import { readSessionLines, readSessionFileSync } from './fs-utils.js'
 import { discoverAllSessions } from './providers/index.js'
 import type { DateRange, ProjectSummary } from './types.js'
 import { formatCost } from './currency.js'
@ -224,9 +224,6 @@ export async function scanJsonlFile(
  dateRange: DateRange | undefined,
  recentCutoffMs = Date.now() - RECENT_WINDOW_MS,
 ): Promise<ScanFileResult> {
-  const content = await readSessionFile(filePath)
-  if (content === null) return { calls: [], cwds: [], apiCalls: [], userMessages: [] }
-
  const calls: ToolCall[] = []
  const cwds: string[] = []
  const apiCalls: ApiCallMeta[] = []
@ -234,7 +231,7 @@ export async function scanJsonlFile(
  const sessionId = basename(filePath, '.jsonl')
  let lastVersion = ''

-  for (const line of content.split('\n')) {
+  for await (const line of readSessionLines(filePath)) {
    if (!line.trim()) continue
    let entry: Record<string, unknown>
    try { entry = JSON.parse(line) } catch { continue }
--- a/src/parser.ts
+++ b/src/parser.ts
@ -1,6 +1,6 @@
 import { readdir, stat } from 'fs/promises'
 import { basename, join } from 'path'
-import { readSessionFile } from './fs-utils.js'
+import { readSessionLines } from './fs-utils.js'
 import { calculateCost, getShortModelName } from './models.js'
 import { discoverAllSessions, getProvider } from './providers/index.js'
 import type { ParsedProviderCall } from './providers/types.js'
@ -275,16 +275,17 @@ async function parseSessionFile(
      if (s.mtimeMs < dateRange.start.getTime()) return null
    } catch { /* fall through to normal read; missing stat shouldn't break parsing */ }
  }
-  const content = await readSessionFile(filePath)
-  if (content === null) return null
-  const lines = content.split('\n').filter(l => l.trim())
  const entries: JournalEntry[] = []
+  let hasLines = false

-  for (const line of lines) {
+  for await (const line of readSessionLines(filePath)) {
+    hasLines = true
    const entry = parseJsonlLine(line)
    if (entry) entries.push(entry)
  }

+  if (!hasLines) return null
+
  if (entries.length === 0) return null

  const sessionId = basename(filePath, '.jsonl')
--- a/tests/optimize-fs.test.ts
+++ b/tests/optimize-fs.test.ts
@ -2,6 +2,7 @@ import { describe, it, expect, afterAll, beforeEach, vi } from 'vitest'
 import { mkdtempSync, rmSync, mkdirSync, writeFileSync, utimesSync } from 'fs'
 import { tmpdir } from 'os'
 import { join } from 'path'
+import * as fsUtils from '../src/fs-utils.js'

 vi.mock('os', async () => {
  const actual = await vi.importActual<typeof import('os')>('os')
@ -313,6 +314,40 @@ describe('scanJsonlFile', () => {
    expect(result.calls).toEqual([])
  })

+  it('uses readSessionLines (streaming) rather than readSessionFile (full-string load)', async () => {
+    const readSessionLinesSpy = vi.spyOn(fsUtils, 'readSessionLines')
+    const readSessionFileSpy = vi.spyOn(fsUtils, 'readSessionFile')
+    const root = makeFixtureRoot()
+    const filePath = join(root, 'session.jsonl')
+    const now = new Date().toISOString()
+    writeFile(filePath, JSON.stringify({
+      type: 'assistant', timestamp: now,
+      message: { content: [{ type: 'tool_use', name: 'Bash', input: {} }] },
+    }))
+    await scanJsonlFile(filePath, 'p1', undefined)
+    expect(readSessionLinesSpy).toHaveBeenCalledWith(filePath)
+    expect(readSessionFileSpy).not.toHaveBeenCalled()
+    readSessionLinesSpy.mockRestore()
+    readSessionFileSpy.mockRestore()
+  })
+
+  it('processes all entries in a large multi-line file without truncation', async () => {
+    const root = makeFixtureRoot()
+    const filePath = join(root, 'session.jsonl')
+    const now = new Date().toISOString()
+    const ENTRY_COUNT = 500
+    const lines = Array.from({ length: ENTRY_COUNT }, (_, i) =>
+      JSON.stringify({
+        type: 'assistant',
+        timestamp: now,
+        message: { content: [{ type: 'tool_use', name: 'Read', input: { file_path: `/file-${i}.ts` } }] },
+      }),
+    )
+    writeFile(filePath, lines.join('\n'))
+    const result = await scanJsonlFile(filePath, 'p1', undefined)
+    expect(result.calls).toHaveLength(ENTRY_COUNT)
+  })
+
  it('respects date-range filter for assistant entries', async () => {
    const root = makeFixtureRoot()
    const filePath = join(root, 'session.jsonl')