fix: switch scanJsonlFile and parseSessionFile to readSessionLines to prevent OOM

readViaStream (used for files ≥8 MB) reconstructs the full file as a
single string via chunks.join('\n'), giving the same peak allocation as
readFile. Callers then call content.split('\n'), creating a second copy.
With FILE_READ_CONCURRENCY=16 and files up to 128 MB this can exhaust
the V8 heap (~6 GB theoretical peak).

readSessionLines already exists as a proper async generator that yields
one line at a time. Switch both hot-path callers to iterate it directly
so the full file string is never held in memory.

Adds two tests: a spy test confirming readSessionLines is called (not
readSessionFile), and a 500-entry correctness test.

Fixes #131
This commit is contained in:
Łukasz Majcher 2026-04-22 10:11:13 +00:00
parent d4e07de18f
commit 5e49f17e64
3 changed files with 43 additions and 10 deletions

View file

@ -4,7 +4,7 @@ import { existsSync, statSync } from 'fs'
import { basename, join } from 'path'
import { homedir } from 'os'
import { readSessionFile, readSessionFileSync } from './fs-utils.js'
import { readSessionLines, readSessionFileSync } from './fs-utils.js'
import { discoverAllSessions } from './providers/index.js'
import type { DateRange, ProjectSummary } from './types.js'
import { formatCost } from './currency.js'
@ -224,9 +224,6 @@ export async function scanJsonlFile(
dateRange: DateRange | undefined,
recentCutoffMs = Date.now() - RECENT_WINDOW_MS,
): Promise<ScanFileResult> {
const content = await readSessionFile(filePath)
if (content === null) return { calls: [], cwds: [], apiCalls: [], userMessages: [] }
const calls: ToolCall[] = []
const cwds: string[] = []
const apiCalls: ApiCallMeta[] = []
@ -234,7 +231,7 @@ export async function scanJsonlFile(
const sessionId = basename(filePath, '.jsonl')
let lastVersion = ''
for (const line of content.split('\n')) {
for await (const line of readSessionLines(filePath)) {
if (!line.trim()) continue
let entry: Record<string, unknown>
try { entry = JSON.parse(line) } catch { continue }

View file

@ -1,6 +1,6 @@
import { readdir, stat } from 'fs/promises'
import { basename, join } from 'path'
import { readSessionFile } from './fs-utils.js'
import { readSessionLines } from './fs-utils.js'
import { calculateCost, getShortModelName } from './models.js'
import { discoverAllSessions, getProvider } from './providers/index.js'
import type { ParsedProviderCall } from './providers/types.js'
@ -275,16 +275,17 @@ async function parseSessionFile(
if (s.mtimeMs < dateRange.start.getTime()) return null
} catch { /* fall through to normal read; missing stat shouldn't break parsing */ }
}
const content = await readSessionFile(filePath)
if (content === null) return null
const lines = content.split('\n').filter(l => l.trim())
const entries: JournalEntry[] = []
let hasLines = false
for (const line of lines) {
for await (const line of readSessionLines(filePath)) {
hasLines = true
const entry = parseJsonlLine(line)
if (entry) entries.push(entry)
}
if (!hasLines) return null
if (entries.length === 0) return null
const sessionId = basename(filePath, '.jsonl')

View file

@ -2,6 +2,7 @@ import { describe, it, expect, afterAll, beforeEach, vi } from 'vitest'
import { mkdtempSync, rmSync, mkdirSync, writeFileSync, utimesSync } from 'fs'
import { tmpdir } from 'os'
import { join } from 'path'
import * as fsUtils from '../src/fs-utils.js'
vi.mock('os', async () => {
const actual = await vi.importActual<typeof import('os')>('os')
@ -313,6 +314,40 @@ describe('scanJsonlFile', () => {
expect(result.calls).toEqual([])
})
it('uses readSessionLines (streaming) rather than readSessionFile (full-string load)', async () => {
const readSessionLinesSpy = vi.spyOn(fsUtils, 'readSessionLines')
const readSessionFileSpy = vi.spyOn(fsUtils, 'readSessionFile')
const root = makeFixtureRoot()
const filePath = join(root, 'session.jsonl')
const now = new Date().toISOString()
writeFile(filePath, JSON.stringify({
type: 'assistant', timestamp: now,
message: { content: [{ type: 'tool_use', name: 'Bash', input: {} }] },
}))
await scanJsonlFile(filePath, 'p1', undefined)
expect(readSessionLinesSpy).toHaveBeenCalledWith(filePath)
expect(readSessionFileSpy).not.toHaveBeenCalled()
readSessionLinesSpy.mockRestore()
readSessionFileSpy.mockRestore()
})
it('processes all entries in a large multi-line file without truncation', async () => {
const root = makeFixtureRoot()
const filePath = join(root, 'session.jsonl')
const now = new Date().toISOString()
const ENTRY_COUNT = 500
const lines = Array.from({ length: ENTRY_COUNT }, (_, i) =>
JSON.stringify({
type: 'assistant',
timestamp: now,
message: { content: [{ type: 'tool_use', name: 'Read', input: { file_path: `/file-${i}.ts` } }] },
}),
)
writeFile(filePath, lines.join('\n'))
const result = await scanJsonlFile(filePath, 'p1', undefined)
expect(result.calls).toHaveLength(ENTRY_COUNT)
})
it('respects date-range filter for assistant entries', async () => {
const root = makeFixtureRoot()
const filePath = join(root, 'session.jsonl')