codeburn/tests/parser-large-session.test.ts
iamtoruk a7bb780618 Reduce Claude parser OOM risk via entry compaction (0.9.9)
Strip heavy fields from JournalEntry immediately after JSON.parse in the
JSONL hot loop. Keeps only what downstream consumers need: type, timestamp,
sessionId, cwd, compacted user text (2000 char total cap), assistant
model/usage/id, tool_use names with Skill and Bash inputs, and MCP
inventory attachments. Text, thinking, and tool_result blocks are dropped.

Also removes redundant hydrateCache() from status --format json and
terminal status paths, and clears the session cache between period
parses to avoid pinning both today and month result sets.

This is a mitigation, not a full fix. Very large month ranges still
materialize full ProjectSummary.turns arrays. The real fix is the
streaming single-pass parser refactor.
2026-05-15 13:20:50 -07:00

148 lines
5 KiB
TypeScript

import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'
import { tmpdir } from 'node:os'
import { join } from 'node:path'
import { describe, expect, it, beforeEach, afterEach } from 'vitest'
import { parseAllSessions, clearSessionCache } from '../src/parser.js'
import type { DateRange } from '../src/types.js'
let home: string
beforeEach(async () => {
home = await mkdtemp(join(tmpdir(), 'codeburn-large-'))
process.env['CLAUDE_CONFIG_DIR'] = join(home, '.claude')
})
afterEach(async () => {
clearSessionCache()
delete process.env['CLAUDE_CONFIG_DIR']
await rm(home, { recursive: true, force: true })
})
function userLine(sessionId: string, timestamp: string, textSize = 100): string {
return JSON.stringify({
type: 'user',
sessionId,
timestamp,
cwd: '/projects/app',
message: { role: 'user', content: 'x'.repeat(textSize) },
})
}
function assistantLine(sessionId: string, timestamp: string, messageId: string, opts?: {
contentSize?: number
toolCount?: number
}): string {
const contentSize = opts?.contentSize ?? 0
const toolCount = opts?.toolCount ?? 1
const content: unknown[] = []
if (contentSize > 0) {
content.push({ type: 'text', text: 'y'.repeat(contentSize) })
content.push({ type: 'thinking', thinking: 'z'.repeat(contentSize) })
}
for (let i = 0; i < toolCount; i++) {
content.push({
type: 'tool_use',
id: `tu-${i}`,
name: i === 0 ? 'Edit' : 'Read',
input: { file_path: '/tmp/x', big: 'w'.repeat(contentSize) },
})
}
return JSON.stringify({
type: 'assistant',
sessionId,
timestamp,
message: {
id: messageId,
type: 'message',
role: 'assistant',
model: 'claude-sonnet-4-5',
content,
usage: { input_tokens: 1000, output_tokens: 100 },
},
})
}
function attachmentLine(sessionId: string, timestamp: string): string {
return JSON.stringify({
type: 'attachment',
sessionId,
timestamp,
attachment: {
type: 'deferred_tools_delta',
addedNames: ['Bash', 'Edit', 'Read', 'mcp__hf__hub_search'],
},
})
}
describe('parseAllSessions with large Claude fixture', () => {
it('correctly parses sessions with bulky text/thinking/tool_result blocks', async () => {
const projectDir = join(home, '.claude', 'projects', 'bigapp')
await mkdir(projectDir, { recursive: true })
const lines: string[] = []
lines.push(attachmentLine('s1', '2026-04-10T09:00:00Z'))
for (let i = 0; i < 50; i++) {
const ts = `2026-04-10T${String(9 + Math.floor(i / 10)).padStart(2, '0')}:${String((i % 10) * 5).padStart(2, '0')}:00Z`
lines.push(userLine('s1', ts, 5000))
lines.push(assistantLine('s1', ts.replace(':00Z', ':30Z'), `msg-${i}`, {
contentSize: 50_000,
toolCount: 3,
}))
}
await writeFile(join(projectDir, 'session.jsonl'), lines.join('\n'))
const range: DateRange = {
start: new Date('2026-04-10T00:00:00Z'),
end: new Date('2026-04-10T23:59:59Z'),
}
const projects = await parseAllSessions(range, 'claude')
expect(projects.length).toBeGreaterThan(0)
const proj = projects[0]!
expect(proj.totalApiCalls).toBe(50)
expect(proj.totalCostUSD).toBeGreaterThan(0)
const sess = proj.sessions[0]!
expect(sess.turns.length).toBe(50)
for (const turn of sess.turns) {
expect(turn.userMessage.length).toBeLessThanOrEqual(2000)
expect(turn.assistantCalls.length).toBe(1)
const call = turn.assistantCalls[0]!
expect(call.tools).toContain('Edit')
expect(call.tools).toContain('Read')
expect(call.model).toBe('claude-sonnet-4-5')
}
expect(sess.mcpInventory).toContain('mcp__hf__hub_search')
})
it('handles malformed JSONL lines without crashing', async () => {
const projectDir = join(home, '.claude', 'projects', 'baddata')
await mkdir(projectDir, { recursive: true })
const lines = [
'not json at all',
'{"type": "user", "sessionId": "s1", "timestamp": "2026-04-10T10:00:00Z", "message": {"role": "user", "content": [null, {"type": "text", "text": "hello"}, 42]}}',
'{"type": "assistant", "sessionId": "s1", "timestamp": "2026-04-10T10:01:00Z", "message": {"id": "m1", "type": "message", "role": "assistant", "model": "claude-sonnet-4-5", "content": "not-an-array", "usage": {"input_tokens": 100, "output_tokens": 50}}}',
'{"type": "assistant", "sessionId": "s1", "timestamp": "2026-04-10T10:02:00Z", "message": {"id": "m2", "type": "message", "role": "assistant", "model": "claude-sonnet-4-5", "content": [null, {"type": "tool_use", "id": "t1", "name": "Read", "input": {}}], "usage": {"input_tokens": 100, "output_tokens": 50}}}',
]
await writeFile(join(projectDir, 'session.jsonl'), lines.join('\n'))
const range: DateRange = {
start: new Date('2026-04-10T00:00:00Z'),
end: new Date('2026-04-10T23:59:59Z'),
}
const projects = await parseAllSessions(range, 'claude')
expect(projects.length).toBeGreaterThan(0)
const sess = projects[0]!.sessions[0]!
expect(sess.apiCalls).toBeGreaterThanOrEqual(1)
})
})