mirror of
https://github.com/AgentSeal/codeburn.git
synced 2026-05-19 07:43:09 +00:00
fix: switch scanJsonlFile and parseSessionFile to readSessionLines to prevent OOM
readViaStream (used for files ≥8 MB) reconstructs the full file as a
single string via chunks.join('\n'), giving the same peak allocation as
readFile. Callers then call content.split('\n'), creating a second copy.
With FILE_READ_CONCURRENCY=16 and files up to 128 MB this can exhaust
the V8 heap (~6 GB theoretical peak).
readSessionLines already exists as a proper async generator that yields
one line at a time. Switch both hot-path callers to iterate it directly
so the full file string is never held in memory.
Adds two tests: a spy test confirming readSessionLines is called (not
readSessionFile), and a 500-entry correctness test.
Fixes #131
This commit is contained in:
parent
d4e07de18f
commit
5e49f17e64
3 changed files with 43 additions and 10 deletions
|
|
@ -4,7 +4,7 @@ import { existsSync, statSync } from 'fs'
|
|||
import { basename, join } from 'path'
|
||||
import { homedir } from 'os'
|
||||
|
||||
import { readSessionFile, readSessionFileSync } from './fs-utils.js'
|
||||
import { readSessionLines, readSessionFileSync } from './fs-utils.js'
|
||||
import { discoverAllSessions } from './providers/index.js'
|
||||
import type { DateRange, ProjectSummary } from './types.js'
|
||||
import { formatCost } from './currency.js'
|
||||
|
|
@ -224,9 +224,6 @@ export async function scanJsonlFile(
|
|||
dateRange: DateRange | undefined,
|
||||
recentCutoffMs = Date.now() - RECENT_WINDOW_MS,
|
||||
): Promise<ScanFileResult> {
|
||||
const content = await readSessionFile(filePath)
|
||||
if (content === null) return { calls: [], cwds: [], apiCalls: [], userMessages: [] }
|
||||
|
||||
const calls: ToolCall[] = []
|
||||
const cwds: string[] = []
|
||||
const apiCalls: ApiCallMeta[] = []
|
||||
|
|
@ -234,7 +231,7 @@ export async function scanJsonlFile(
|
|||
const sessionId = basename(filePath, '.jsonl')
|
||||
let lastVersion = ''
|
||||
|
||||
for (const line of content.split('\n')) {
|
||||
for await (const line of readSessionLines(filePath)) {
|
||||
if (!line.trim()) continue
|
||||
let entry: Record<string, unknown>
|
||||
try { entry = JSON.parse(line) } catch { continue }
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import { readdir, stat } from 'fs/promises'
|
||||
import { basename, join } from 'path'
|
||||
import { readSessionFile } from './fs-utils.js'
|
||||
import { readSessionLines } from './fs-utils.js'
|
||||
import { calculateCost, getShortModelName } from './models.js'
|
||||
import { discoverAllSessions, getProvider } from './providers/index.js'
|
||||
import type { ParsedProviderCall } from './providers/types.js'
|
||||
|
|
@ -275,16 +275,17 @@ async function parseSessionFile(
|
|||
if (s.mtimeMs < dateRange.start.getTime()) return null
|
||||
} catch { /* fall through to normal read; missing stat shouldn't break parsing */ }
|
||||
}
|
||||
const content = await readSessionFile(filePath)
|
||||
if (content === null) return null
|
||||
const lines = content.split('\n').filter(l => l.trim())
|
||||
const entries: JournalEntry[] = []
|
||||
let hasLines = false
|
||||
|
||||
for (const line of lines) {
|
||||
for await (const line of readSessionLines(filePath)) {
|
||||
hasLines = true
|
||||
const entry = parseJsonlLine(line)
|
||||
if (entry) entries.push(entry)
|
||||
}
|
||||
|
||||
if (!hasLines) return null
|
||||
|
||||
if (entries.length === 0) return null
|
||||
|
||||
const sessionId = basename(filePath, '.jsonl')
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import { describe, it, expect, afterAll, beforeEach, vi } from 'vitest'
|
|||
import { mkdtempSync, rmSync, mkdirSync, writeFileSync, utimesSync } from 'fs'
|
||||
import { tmpdir } from 'os'
|
||||
import { join } from 'path'
|
||||
import * as fsUtils from '../src/fs-utils.js'
|
||||
|
||||
vi.mock('os', async () => {
|
||||
const actual = await vi.importActual<typeof import('os')>('os')
|
||||
|
|
@ -313,6 +314,40 @@ describe('scanJsonlFile', () => {
|
|||
expect(result.calls).toEqual([])
|
||||
})
|
||||
|
||||
it('uses readSessionLines (streaming) rather than readSessionFile (full-string load)', async () => {
|
||||
const readSessionLinesSpy = vi.spyOn(fsUtils, 'readSessionLines')
|
||||
const readSessionFileSpy = vi.spyOn(fsUtils, 'readSessionFile')
|
||||
const root = makeFixtureRoot()
|
||||
const filePath = join(root, 'session.jsonl')
|
||||
const now = new Date().toISOString()
|
||||
writeFile(filePath, JSON.stringify({
|
||||
type: 'assistant', timestamp: now,
|
||||
message: { content: [{ type: 'tool_use', name: 'Bash', input: {} }] },
|
||||
}))
|
||||
await scanJsonlFile(filePath, 'p1', undefined)
|
||||
expect(readSessionLinesSpy).toHaveBeenCalledWith(filePath)
|
||||
expect(readSessionFileSpy).not.toHaveBeenCalled()
|
||||
readSessionLinesSpy.mockRestore()
|
||||
readSessionFileSpy.mockRestore()
|
||||
})
|
||||
|
||||
it('processes all entries in a large multi-line file without truncation', async () => {
|
||||
const root = makeFixtureRoot()
|
||||
const filePath = join(root, 'session.jsonl')
|
||||
const now = new Date().toISOString()
|
||||
const ENTRY_COUNT = 500
|
||||
const lines = Array.from({ length: ENTRY_COUNT }, (_, i) =>
|
||||
JSON.stringify({
|
||||
type: 'assistant',
|
||||
timestamp: now,
|
||||
message: { content: [{ type: 'tool_use', name: 'Read', input: { file_path: `/file-${i}.ts` } }] },
|
||||
}),
|
||||
)
|
||||
writeFile(filePath, lines.join('\n'))
|
||||
const result = await scanJsonlFile(filePath, 'p1', undefined)
|
||||
expect(result.calls).toHaveLength(ENTRY_COUNT)
|
||||
})
|
||||
|
||||
it('respects date-range filter for assistant entries', async () => {
|
||||
const root = makeFixtureRoot()
|
||||
const filePath = join(root, 'session.jsonl')
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue