From 5e49f17e64096382b5f02b3be5514289dcec65a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Majcher?= Date: Wed, 22 Apr 2026 10:11:13 +0000 Subject: [PATCH] fix: switch scanJsonlFile and parseSessionFile to readSessionLines to prevent OOM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit readViaStream (used for files ≥8 MB) reconstructs the full file as a single string via chunks.join('\n'), giving the same peak allocation as readFile. Callers then call content.split('\n'), creating a second copy. With FILE_READ_CONCURRENCY=16 and files up to 128 MB this can exhaust the V8 heap (~6 GB theoretical peak). readSessionLines already exists as a proper async generator that yields one line at a time. Switch both hot-path callers to iterate it directly so the full file string is never held in memory. Adds two tests: a spy test confirming readSessionLines is called (not readSessionFile), and a 500-entry correctness test. Fixes #131 --- src/optimize.ts | 7 ++----- src/parser.ts | 11 ++++++----- tests/optimize-fs.test.ts | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 43 insertions(+), 10 deletions(-) diff --git a/src/optimize.ts b/src/optimize.ts index 2e8913c..7077b29 100644 --- a/src/optimize.ts +++ b/src/optimize.ts @@ -4,7 +4,7 @@ import { existsSync, statSync } from 'fs' import { basename, join } from 'path' import { homedir } from 'os' -import { readSessionFile, readSessionFileSync } from './fs-utils.js' +import { readSessionLines, readSessionFileSync } from './fs-utils.js' import { discoverAllSessions } from './providers/index.js' import type { DateRange, ProjectSummary } from './types.js' import { formatCost } from './currency.js' @@ -224,9 +224,6 @@ export async function scanJsonlFile( dateRange: DateRange | undefined, recentCutoffMs = Date.now() - RECENT_WINDOW_MS, ): Promise { - const content = await readSessionFile(filePath) - if (content === null) return { calls: [], cwds: [], apiCalls: [], userMessages: [] } - const calls: ToolCall[] = [] const cwds: string[] = [] const apiCalls: ApiCallMeta[] = [] @@ -234,7 +231,7 @@ export async function scanJsonlFile( const sessionId = basename(filePath, '.jsonl') let lastVersion = '' - for (const line of content.split('\n')) { + for await (const line of readSessionLines(filePath)) { if (!line.trim()) continue let entry: Record try { entry = JSON.parse(line) } catch { continue } diff --git a/src/parser.ts b/src/parser.ts index 6bc81a7..ab4eacd 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -1,6 +1,6 @@ import { readdir, stat } from 'fs/promises' import { basename, join } from 'path' -import { readSessionFile } from './fs-utils.js' +import { readSessionLines } from './fs-utils.js' import { calculateCost, getShortModelName } from './models.js' import { discoverAllSessions, getProvider } from './providers/index.js' import type { ParsedProviderCall } from './providers/types.js' @@ -275,16 +275,17 @@ async function parseSessionFile( if (s.mtimeMs < dateRange.start.getTime()) return null } catch { /* fall through to normal read; missing stat shouldn't break parsing */ } } - const content = await readSessionFile(filePath) - if (content === null) return null - const lines = content.split('\n').filter(l => l.trim()) const entries: JournalEntry[] = [] + let hasLines = false - for (const line of lines) { + for await (const line of readSessionLines(filePath)) { + hasLines = true const entry = parseJsonlLine(line) if (entry) entries.push(entry) } + if (!hasLines) return null + if (entries.length === 0) return null const sessionId = basename(filePath, '.jsonl') diff --git a/tests/optimize-fs.test.ts b/tests/optimize-fs.test.ts index e43f66b..4ec41de 100644 --- a/tests/optimize-fs.test.ts +++ b/tests/optimize-fs.test.ts @@ -2,6 +2,7 @@ import { describe, it, expect, afterAll, beforeEach, vi } from 'vitest' import { mkdtempSync, rmSync, mkdirSync, writeFileSync, utimesSync } from 'fs' import { tmpdir } from 'os' import { join } from 'path' +import * as fsUtils from '../src/fs-utils.js' vi.mock('os', async () => { const actual = await vi.importActual('os') @@ -313,6 +314,40 @@ describe('scanJsonlFile', () => { expect(result.calls).toEqual([]) }) + it('uses readSessionLines (streaming) rather than readSessionFile (full-string load)', async () => { + const readSessionLinesSpy = vi.spyOn(fsUtils, 'readSessionLines') + const readSessionFileSpy = vi.spyOn(fsUtils, 'readSessionFile') + const root = makeFixtureRoot() + const filePath = join(root, 'session.jsonl') + const now = new Date().toISOString() + writeFile(filePath, JSON.stringify({ + type: 'assistant', timestamp: now, + message: { content: [{ type: 'tool_use', name: 'Bash', input: {} }] }, + })) + await scanJsonlFile(filePath, 'p1', undefined) + expect(readSessionLinesSpy).toHaveBeenCalledWith(filePath) + expect(readSessionFileSpy).not.toHaveBeenCalled() + readSessionLinesSpy.mockRestore() + readSessionFileSpy.mockRestore() + }) + + it('processes all entries in a large multi-line file without truncation', async () => { + const root = makeFixtureRoot() + const filePath = join(root, 'session.jsonl') + const now = new Date().toISOString() + const ENTRY_COUNT = 500 + const lines = Array.from({ length: ENTRY_COUNT }, (_, i) => + JSON.stringify({ + type: 'assistant', + timestamp: now, + message: { content: [{ type: 'tool_use', name: 'Read', input: { file_path: `/file-${i}.ts` } }] }, + }), + ) + writeFile(filePath, lines.join('\n')) + const result = await scanJsonlFile(filePath, 'p1', undefined) + expect(result.calls).toHaveLength(ENTRY_COUNT) + }) + it('respects date-range filter for assistant entries', async () => { const root = makeFixtureRoot() const filePath = join(root, 'session.jsonl')