From a7bb7806186a3ceccdffc88ddd610525c289bbe8 Mon Sep 17 00:00:00 2001 From: iamtoruk Date: Fri, 15 May 2026 13:20:50 -0700 Subject: [PATCH] Reduce Claude parser OOM risk via entry compaction (0.9.9) Strip heavy fields from JournalEntry immediately after JSON.parse in the JSONL hot loop. Keeps only what downstream consumers need: type, timestamp, sessionId, cwd, compacted user text (2000 char total cap), assistant model/usage/id, tool_use names with Skill and Bash inputs, and MCP inventory attachments. Text, thinking, and tool_result blocks are dropped. Also removes redundant hydrateCache() from status --format json and terminal status paths, and clears the session cache between period parses to avoid pinning both today and month result sets. This is a mitigation, not a full fix. Very large month ranges still materialize full ProjectSummary.turns arrays. The real fix is the streaming single-pass parser refactor. --- CHANGELOG.md | 18 ++ package.json | 2 +- src/main.ts | 17 +- src/parser.ts | 104 +++++++- tests/cli-status-menubar.test.ts | 103 ++++++++ tests/parser-compact-entry.test.ts | 396 +++++++++++++++++++++++++++++ tests/parser-large-session.test.ts | 148 +++++++++++ 7 files changed, 779 insertions(+), 9 deletions(-) create mode 100644 tests/cli-status-menubar.test.ts create mode 100644 tests/parser-compact-entry.test.ts create mode 100644 tests/parser-large-session.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 2bf6977..a1d328c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,24 @@ a `createdAt` timestamp were defaulting to the current date, inflating Today's spend. Now skipped at both the SQL and application level. +## 0.9.9 - 2026-05-15 + +### Fixed (CLI) +- **Reduced Claude parser OOM risk.** Large Claude JSONL sessions retained + full entry objects (text, thinking blocks, tool results) in memory during + parsing, causing V8 heap exhaustion on heavy usage months. Entries are now + compacted immediately after JSON.parse, keeping only the fields needed for + cost/token aggregation. This is a mitigation - very heavy users may still + need the streaming parser refactor planned next. +- **Redundant `hydrateCache()` in status commands.** The `status --format json` + and terminal `status` paths hydrated the daily cache before calling + `parseAllSessions` directly, doubling memory pressure for no benefit. + Removed. The menubar-json path still hydrates as needed. +- **Session cache retained between status parses.** The `status --format json` + path parsed today and month ranges without clearing the in-process session + cache between them, keeping both result sets pinned. Cache is now cleared + after each period is consumed. + ## 0.9.8 - 2026-05-10 ### Added (CLI) diff --git a/package.json b/package.json index c24699d..10e9f0c 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "codeburn", - "version": "0.9.8", + "version": "0.9.9", "description": "See where your AI coding tokens go - by task, tool, model, and project", "type": "module", "main": "./dist/cli.js", diff --git a/src/main.ts b/src/main.ts index eaa4d0e..9d82a18 100644 --- a/src/main.ts +++ b/src/main.ts @@ -2,7 +2,7 @@ import { Command } from 'commander' import { installMenubarApp } from './menubar-installer.js' import { exportCsv, exportJson, type PeriodExport } from './export.js' import { loadPricing, setModelAliases } from './models.js' -import { parseAllSessions, filterProjectsByName } from './parser.js' +import { parseAllSessions, filterProjectsByName, clearSessionCache } from './parser.js' import { convertCost } from './currency.js' import { renderStatusBar } from './format.js' import { type PeriodData, type ProviderCost } from './menubar-json.js' @@ -529,9 +529,12 @@ program } if (opts.format === 'json') { - await hydrateCache() - const todayData = buildPeriodData('today', fp(await parseAllSessions(getDateRange('today').range, pf))) - const monthData = buildPeriodData('month', fp(await parseAllSessions(getDateRange('month').range, pf))) + const todayProjects = fp(await parseAllSessions(getDateRange('today').range, pf)) + const todayData = buildPeriodData('today', todayProjects) + clearSessionCache() + const monthProjects = fp(await parseAllSessions(getDateRange('month').range, pf)) + const monthData = buildPeriodData('month', monthProjects) + clearSessionCache() const { code, rate } = getCurrency() const payload: { currency: string @@ -551,9 +554,9 @@ program return } - await hydrateCache() - const monthProjects = fp(await parseAllSessions(getDateRange('month').range, pf)) - console.log(renderStatusBar(monthProjects)) + const monthProjects2 = fp(await parseAllSessions(getDateRange('month').range, pf)) + clearSessionCache() + console.log(renderStatusBar(monthProjects2)) }) program diff --git a/src/parser.ts b/src/parser.ts index 9ab75ce..66ddbeb 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -40,6 +40,108 @@ function parseJsonlLine(line: string): JournalEntry | null { } } +const USER_TEXT_CAP = 2000 +const BASH_COMMAND_CAP = 2000 +const MAX_TOOL_BLOCKS = 500 +const MAX_ADDED_NAMES = 1000 + +export function compactEntry(raw: JournalEntry): JournalEntry { + const entry: JournalEntry = { type: raw.type } + + if (raw.timestamp !== undefined) entry.timestamp = raw.timestamp + if (raw.sessionId !== undefined) entry.sessionId = raw.sessionId + if (raw.cwd !== undefined) entry.cwd = raw.cwd + + const att = (raw as Record)['attachment'] + if (att && typeof att === 'object') { + const a = att as Record + if (a['type'] === 'deferred_tools_delta' && Array.isArray(a['addedNames'])) { + const names: string[] = [] + for (let i = 0; i < Math.min(a['addedNames'].length, MAX_ADDED_NAMES); i++) { + const n = a['addedNames'][i] + if (typeof n === 'string') names.push(n) + } + ;(entry as Record)['attachment'] = { type: 'deferred_tools_delta', addedNames: names } + } + } + + if (!raw.message) return entry + + if (raw.message.role === 'user') { + const content = raw.message.content + if (typeof content === 'string') { + entry.message = { role: 'user', content: content.slice(0, USER_TEXT_CAP) } + } else if (Array.isArray(content)) { + let remaining = USER_TEXT_CAP + const blocks: { type: 'text'; text: string }[] = [] + for (const b of content) { + if (remaining <= 0) break + if (!b || typeof b !== 'object' || b.type !== 'text') continue + const text = (b as { text?: unknown }).text + if (typeof text !== 'string') continue + const sliced = text.slice(0, remaining) + blocks.push({ type: 'text', text: sliced }) + remaining -= sliced.length + } + entry.message = { role: 'user', content: blocks } + } + return entry + } + + const msg = raw.message as AssistantMessageContent + if (!msg.usage || !msg.model) return entry + + const rawContent = msg.content + const contentArr = Array.isArray(rawContent) ? rawContent : [] + const toolBlocks = contentArr.filter((b): b is ToolUseBlock => b != null && typeof b === 'object' && b.type === 'tool_use') + const compactContent: ContentBlock[] = toolBlocks.slice(0, MAX_TOOL_BLOCKS).map(tb => { + let input: Record = {} + if (tb.name === 'Skill') { + const ri = (tb.input ?? {}) as Record + if (typeof ri['skill'] === 'string') input['skill'] = (ri['skill'] as string).slice(0, 200) + if (typeof ri['name'] === 'string') input['name'] = (ri['name'] as string).slice(0, 200) + } else if (BASH_TOOLS.has(tb.name)) { + const ri = (tb.input ?? {}) as Record + if (typeof ri['command'] === 'string') { + input['command'] = (ri['command'] as string).slice(0, BASH_COMMAND_CAP) + } + } + return { type: 'tool_use' as const, id: tb.id ?? '', name: tb.name, input } + }) + + const u = msg.usage + const compactUsage: AssistantMessageContent['usage'] = { + input_tokens: u.input_tokens, + output_tokens: u.output_tokens, + } + if (u.cache_creation_input_tokens) compactUsage.cache_creation_input_tokens = u.cache_creation_input_tokens + if (u.cache_creation) { + compactUsage.cache_creation = { + ...(u.cache_creation.ephemeral_5m_input_tokens ? { ephemeral_5m_input_tokens: u.cache_creation.ephemeral_5m_input_tokens } : {}), + ...(u.cache_creation.ephemeral_1h_input_tokens ? { ephemeral_1h_input_tokens: u.cache_creation.ephemeral_1h_input_tokens } : {}), + } + } + if (u.cache_read_input_tokens) compactUsage.cache_read_input_tokens = u.cache_read_input_tokens + if (u.server_tool_use) { + compactUsage.server_tool_use = { + ...(u.server_tool_use.web_search_requests ? { web_search_requests: u.server_tool_use.web_search_requests } : {}), + ...(u.server_tool_use.web_fetch_requests ? { web_fetch_requests: u.server_tool_use.web_fetch_requests } : {}), + } + } + if (u.speed) compactUsage.speed = u.speed + + entry.message = { + type: 'message', + role: 'assistant', + model: msg.model, + usage: compactUsage, + content: compactContent, + ...(msg.id ? { id: msg.id } : {}), + } + + return entry +} + function extractToolNames(content: ContentBlock[]): string[] { return content .filter((b): b is ToolUseBlock => b.type === 'tool_use') @@ -419,7 +521,7 @@ async function parseSessionFile( for await (const line of readSessionLines(filePath)) { hasLines = true const entry = parseJsonlLine(line) - if (entry) entries.push(entry) + if (entry) entries.push(compactEntry(entry)) } if (!hasLines) return null diff --git a/tests/cli-status-menubar.test.ts b/tests/cli-status-menubar.test.ts new file mode 100644 index 0000000..22a6ae1 --- /dev/null +++ b/tests/cli-status-menubar.test.ts @@ -0,0 +1,103 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises' +import { tmpdir } from 'node:os' +import { join } from 'node:path' +import { spawnSync } from 'node:child_process' + +import { describe, expect, it } from 'vitest' + +function runCli(args: string[], home: string) { + return spawnSync(process.execPath, ['--import', 'tsx', 'src/cli.ts', ...args], { + cwd: process.cwd(), + env: { + ...process.env, + CLAUDE_CONFIG_DIR: join(home, '.claude'), + HOME: home, + TZ: 'UTC', + }, + encoding: 'utf-8', + timeout: 30_000, + }) +} + +function userLine(sessionId: string, timestamp: string): string { + return JSON.stringify({ + type: 'user', + sessionId, + timestamp, + message: { role: 'user', content: 'do the thing' }, + }) +} + +function assistantLine(sessionId: string, timestamp: string, messageId: string): string { + return JSON.stringify({ + type: 'assistant', + sessionId, + timestamp, + message: { + id: messageId, + type: 'message', + role: 'assistant', + model: 'claude-sonnet-4-5', + content: [ + { type: 'text', text: 'done' }, + { type: 'tool_use', id: 'tu-1', name: 'Edit', input: { file_path: '/tmp/x', old_string: 'a', new_string: 'b' } }, + ], + usage: { input_tokens: 500, output_tokens: 50 }, + }, + }) +} + +describe('codeburn status --format menubar-json', () => { + it('returns valid MenubarPayload with expected top-level fields', async () => { + const home = await mkdtemp(join(tmpdir(), 'codeburn-menubar-')) + + try { + const projectDir = join(home, '.claude', 'projects', 'myapp') + await mkdir(projectDir, { recursive: true }) + + const today = new Date() + const ymd = `${today.getUTCFullYear()}-${String(today.getUTCMonth() + 1).padStart(2, '0')}-${String(today.getUTCDate()).padStart(2, '0')}` + + await writeFile( + join(projectDir, 'session.jsonl'), + [ + userLine('s1', `${ymd}T10:00:00Z`), + assistantLine('s1', `${ymd}T10:01:00Z`, 'msg-1'), + userLine('s1', `${ymd}T11:00:00Z`), + assistantLine('s1', `${ymd}T11:01:00Z`, 'msg-2'), + ].join('\n'), + ) + + const result = runCli([ + 'status', + '--format', 'menubar-json', + '--period', 'today', + '--provider', 'all', + '--no-optimize', + ], home) + + expect(result.status, `stderr: ${result.stderr}`).toBe(0) + + const payload = JSON.parse(result.stdout) as Record + + expect(payload).toHaveProperty('generated') + expect(payload).toHaveProperty('current') + expect(payload).toHaveProperty('optimize') + expect(payload).toHaveProperty('history') + + const current = payload['current'] as Record + expect(current['cost']).toBeGreaterThan(0) + expect(current['calls']).toBe(2) + expect(current['sessions']).toBe(1) + expect(current).toHaveProperty('oneShotRate') + expect(current).toHaveProperty('topActivities') + expect(current).toHaveProperty('topModels') + expect(current).toHaveProperty('providers') + + const history = payload['history'] as { daily: unknown[] } + expect(Array.isArray(history.daily)).toBe(true) + } finally { + await rm(home, { recursive: true, force: true }) + } + }) +}) diff --git a/tests/parser-compact-entry.test.ts b/tests/parser-compact-entry.test.ts new file mode 100644 index 0000000..7c973c4 --- /dev/null +++ b/tests/parser-compact-entry.test.ts @@ -0,0 +1,396 @@ +import { describe, it, expect } from 'vitest' + +import { compactEntry } from '../src/parser.js' +import type { JournalEntry } from '../src/types.js' + +function entry(overrides: Partial & Record): JournalEntry { + return { type: 'user', ...overrides } as JournalEntry +} + +describe('compactEntry', () => { + it('preserves type, timestamp, sessionId, cwd', () => { + const raw = entry({ type: 'user', timestamp: 't1', sessionId: 's1', cwd: '/foo' }) + const c = compactEntry(raw) + expect(c.type).toBe('user') + expect(c.timestamp).toBe('t1') + expect(c.sessionId).toBe('s1') + expect(c.cwd).toBe('/foo') + }) + + it('strips unknown catch-all fields', () => { + const raw = entry({ + type: 'assistant', + toolResult: { type: 'tool_result', content: 'x'.repeat(10_000) }, + someHugeField: 'y'.repeat(10_000), + }) + const c = compactEntry(raw) + expect((c as Record)['toolResult']).toBeUndefined() + expect((c as Record)['someHugeField']).toBeUndefined() + }) + + it('preserves deferred_tools_delta attachment with copied names', () => { + const raw = entry({ + type: 'attachment', + attachment: { + type: 'deferred_tools_delta', + addedNames: ['mcp__svc__t1', 'Bash'], + extraData: 'should be dropped', + }, + }) + const c = compactEntry(raw) + const att = (c as Record)['attachment'] as Record + expect(att['type']).toBe('deferred_tools_delta') + expect(att['addedNames']).toEqual(['mcp__svc__t1', 'Bash']) + expect(att['extraData']).toBeUndefined() + }) + + it('copies addedNames into a new array (not by reference)', () => { + const originalNames = ['mcp__a__b', 'Bash'] + const raw = entry({ + type: 'attachment', + attachment: { type: 'deferred_tools_delta', addedNames: originalNames }, + }) + const c = compactEntry(raw) + const att = (c as Record)['attachment'] as { addedNames: string[] } + expect(att.addedNames).not.toBe(originalNames) + expect(att.addedNames).toEqual(originalNames) + }) + + it('caps addedNames at 1000 entries', () => { + const names = Array.from({ length: 2000 }, (_, i) => `mcp__svc__t${i}`) + const raw = entry({ + type: 'attachment', + attachment: { type: 'deferred_tools_delta', addedNames: names }, + }) + const c = compactEntry(raw) + const att = (c as Record)['attachment'] as { addedNames: string[] } + expect(att.addedNames).toHaveLength(1000) + }) + + it('filters non-string entries from addedNames', () => { + const raw = entry({ + type: 'attachment', + attachment: { type: 'deferred_tools_delta', addedNames: [42, null, 'mcp__a__b', undefined] }, + }) + const c = compactEntry(raw) + const att = (c as Record)['attachment'] as { addedNames: string[] } + expect(att.addedNames).toEqual(['mcp__a__b']) + }) + + it('drops non-deferred_tools_delta attachments', () => { + const raw = entry({ + type: 'attachment', + attachment: { type: 'other', data: 'x'.repeat(10_000) }, + }) + const c = compactEntry(raw) + expect((c as Record)['attachment']).toBeUndefined() + }) + + it('caps user message string content at 2000', () => { + const longText = 'a'.repeat(5000) + const raw = entry({ + type: 'user', + message: { role: 'user' as const, content: longText }, + }) + const c = compactEntry(raw) + expect(c.message!.role).toBe('user') + const content = (c.message as { content: string }).content + expect(content.length).toBe(2000) + }) + + it('caps total user text across all blocks at 2000', () => { + const raw = entry({ + type: 'user', + message: { + role: 'user' as const, + content: [ + { type: 'text' as const, text: 'a'.repeat(1500) }, + { type: 'text' as const, text: 'b'.repeat(1500) }, + { type: 'text' as const, text: 'c'.repeat(1500) }, + { type: 'image' as const, source: 'big data' }, + ], + }, + }) + const c = compactEntry(raw) + const content = (c.message as { content: Array<{ type: string; text: string }> }).content + expect(content).toHaveLength(2) + expect(content[0]!.text.length).toBe(1500) + expect(content[1]!.text.length).toBe(500) + }) + + it('compacts assistant tool_use blocks, dropping text and thinking, preserving id', () => { + const raw = entry({ + type: 'assistant', + timestamp: 't1', + message: { + type: 'message' as const, + role: 'assistant' as const, + model: 'claude-opus-4-6', + id: 'msg_123', + usage: { input_tokens: 100, output_tokens: 200 }, + content: [ + { type: 'text', text: 'x'.repeat(50_000) }, + { type: 'thinking', thinking: 'y'.repeat(50_000) }, + { type: 'tool_use', id: 'tu1', name: 'Read', input: { file_path: '/foo', huge: 'z'.repeat(10_000) } }, + { type: 'tool_use', id: 'tu2', name: 'Edit', input: { old_string: 'a'.repeat(5000), new_string: 'b'.repeat(5000) } }, + ], + }, + }) + const c = compactEntry(raw) + const msg = c.message as { content: Array<{ type: string; id?: string; name?: string; input?: Record }> } + expect(msg.content).toHaveLength(2) + expect(msg.content[0]!.name).toBe('Read') + expect(msg.content[0]!.id).toBe('tu1') + expect(msg.content[0]!.input).toEqual({}) + expect(msg.content[1]!.name).toBe('Edit') + expect(msg.content[1]!.id).toBe('tu2') + expect(msg.content[1]!.input).toEqual({}) + }) + + it('caps tool_use blocks at 500 per message', () => { + const blocks = Array.from({ length: 600 }, (_, i) => ({ + type: 'tool_use' as const, + id: `tu${i}`, + name: `Tool${i}`, + input: {}, + })) + const raw = entry({ + type: 'assistant', + message: { + type: 'message' as const, + role: 'assistant' as const, + model: 'claude-opus-4-6', + usage: { input_tokens: 10, output_tokens: 10 }, + content: blocks, + }, + }) + const c = compactEntry(raw) + const msg = c.message as { content: unknown[] } + expect(msg.content).toHaveLength(500) + }) + + it('preserves model, usage (destructured), and id on assistant messages', () => { + const raw = entry({ + type: 'assistant', + message: { + type: 'message' as const, + role: 'assistant' as const, + model: 'claude-opus-4-6', + id: 'msg_abc', + usage: { + input_tokens: 50, + output_tokens: 100, + cache_read_input_tokens: 25, + extraGarbage: 'should not survive', + }, + content: [], + }, + }) + const c = compactEntry(raw) + const msg = c.message as { model: string; id: string; usage: Record } + expect(msg.model).toBe('claude-opus-4-6') + expect(msg.id).toBe('msg_abc') + expect(msg.usage['input_tokens']).toBe(50) + expect(msg.usage['output_tokens']).toBe(100) + expect(msg.usage['cache_read_input_tokens']).toBe(25) + expect(msg.usage['extraGarbage']).toBeUndefined() + }) + + it('deep-copies usage nested objects, stripping extra keys', () => { + const cacheCreation = { ephemeral_5m_input_tokens: 100, ephemeral_1h_input_tokens: 200, extraJunk: 'big' } + const serverToolUse = { web_search_requests: 3, web_fetch_requests: 1, extraJunk: 'big' } + const raw = entry({ + type: 'assistant', + message: { + type: 'message' as const, + role: 'assistant' as const, + model: 'claude-opus-4-6', + usage: { + input_tokens: 10, + output_tokens: 10, + speed: 'fast', + cache_creation: cacheCreation, + server_tool_use: serverToolUse, + }, + content: [], + }, + }) + const c = compactEntry(raw) + const msg = c.message as { usage: Record } + expect(msg.usage['speed']).toBe('fast') + const cc = msg.usage['cache_creation'] as Record + expect(cc['ephemeral_5m_input_tokens']).toBe(100) + expect(cc['ephemeral_1h_input_tokens']).toBe(200) + expect(cc['extraJunk']).toBeUndefined() + expect(cc).not.toBe(cacheCreation) + const stu = msg.usage['server_tool_use'] as Record + expect(stu['web_search_requests']).toBe(3) + expect(stu['web_fetch_requests']).toBe(1) + expect(stu['extraJunk']).toBeUndefined() + expect(stu).not.toBe(serverToolUse) + }) + + it('keeps Skill input.skill and input.name, type-checked and capped', () => { + const raw = entry({ + type: 'assistant', + message: { + type: 'message' as const, + role: 'assistant' as const, + model: 'claude-opus-4-6', + usage: { input_tokens: 10, output_tokens: 10 }, + content: [ + { type: 'tool_use', id: 'tu', name: 'Skill', input: { skill: 'graphify', args: 'huge arg data' } }, + ], + }, + }) + const c = compactEntry(raw) + const msg = c.message as { content: Array<{ input: Record }> } + expect(msg.content[0]!.input['skill']).toBe('graphify') + expect(msg.content[0]!.input['args']).toBeUndefined() + }) + + it('rejects non-string Skill input.skill and caps long names', () => { + const raw = entry({ + type: 'assistant', + message: { + type: 'message' as const, + role: 'assistant' as const, + model: 'claude-opus-4-6', + usage: { input_tokens: 10, output_tokens: 10 }, + content: [ + { type: 'tool_use', id: 'tu1', name: 'Skill', input: { skill: { malicious: 'x'.repeat(10_000) } } }, + { type: 'tool_use', id: 'tu2', name: 'Skill', input: { skill: 'a'.repeat(500) } }, + ], + }, + }) + const c = compactEntry(raw) + const msg = c.message as { content: Array<{ input: Record }> } + expect(msg.content[0]!.input['skill']).toBeUndefined() + expect((msg.content[1]!.input['skill'] as string).length).toBe(200) + }) + + it('keeps Bash input.command capped at 2000 for bash command extraction', () => { + const longCmd = 'npm run build && '.repeat(200) + const raw = entry({ + type: 'assistant', + message: { + type: 'message' as const, + role: 'assistant' as const, + model: 'claude-opus-4-6', + usage: { input_tokens: 10, output_tokens: 10 }, + content: [ + { type: 'tool_use', id: 'tu', name: 'Bash', input: { command: longCmd, description: 'big desc' } }, + ], + }, + }) + const c = compactEntry(raw) + const msg = c.message as { content: Array<{ input: Record }> } + const cmd = msg.content[0]!.input['command'] as string + expect(cmd.length).toBe(2000) + expect(msg.content[0]!.input['description']).toBeUndefined() + }) + + it('handles entry with no message field', () => { + const raw = entry({ type: 'system', timestamp: 't1', cwd: '/x' }) + const c = compactEntry(raw) + expect(c.type).toBe('system') + expect(c.timestamp).toBe('t1') + expect(c.message).toBeUndefined() + }) + + it('handles assistant message with no usage (non-standard)', () => { + const raw = entry({ + type: 'assistant', + message: { + type: 'message' as const, + role: 'assistant' as const, + model: 'claude-opus-4-6', + content: [{ type: 'text', text: 'response' }], + }, + }) + const c = compactEntry(raw) + expect(c.message).toBeUndefined() + }) + + it('handles unexpected message role (neither user nor assistant)', () => { + const raw = entry({ + type: 'system', + message: { role: 'system' as never, content: 'sys prompt' }, + }) + const c = compactEntry(raw) + expect(c.message).toBeUndefined() + }) + + it('tolerates null elements in user content array', () => { + const raw = entry({ + type: 'user', + message: { + role: 'user' as const, + content: [null, undefined, { type: 'text', text: 'ok' }, 42, { type: 'text' }] as never, + }, + }) + const c = compactEntry(raw) + const content = (c.message as { content: Array<{ text: string }> }).content + expect(content).toHaveLength(1) + expect(content[0]!.text).toBe('ok') + }) + + it('tolerates assistant content that is not an array', () => { + const raw = entry({ + type: 'assistant', + message: { + type: 'message' as const, + role: 'assistant' as const, + model: 'claude-opus-4-6', + usage: { input_tokens: 10, output_tokens: 10 }, + content: 'not an array' as never, + }, + }) + const c = compactEntry(raw) + const msg = c.message as { content: unknown[] } + expect(msg.content).toEqual([]) + }) + + it('tolerates null elements in assistant content array', () => { + const raw = entry({ + type: 'assistant', + message: { + type: 'message' as const, + role: 'assistant' as const, + model: 'claude-opus-4-6', + usage: { input_tokens: 10, output_tokens: 10 }, + content: [null, { type: 'tool_use', id: 'tu1', name: 'Read', input: {} }, undefined] as never, + }, + }) + const c = compactEntry(raw) + const msg = c.message as { content: Array<{ name: string }> } + expect(msg.content).toHaveLength(1) + expect(msg.content[0]!.name).toBe('Read') + }) + + it('memory reduction: compacted entry is much smaller than raw', () => { + const hugeContent = Array.from({ length: 20 }, (_, i) => ({ + type: i % 2 === 0 ? 'text' : 'tool_result', + text: 'x'.repeat(100_000), + content: 'y'.repeat(100_000), + })) + const raw = entry({ + type: 'assistant', + timestamp: '2026-01-01T00:00:00', + message: { + type: 'message' as const, + role: 'assistant' as const, + model: 'claude-opus-4-6', + id: 'msg_1', + usage: { input_tokens: 1000, output_tokens: 500 }, + content: hugeContent as never, + }, + toolResult: { content: 'z'.repeat(500_000) }, + }) + const rawSize = JSON.stringify(raw).length + const compacted = compactEntry(raw) + const compactedSize = JSON.stringify(compacted).length + expect(rawSize).toBeGreaterThan(2_000_000) + expect(compactedSize).toBeLessThan(500) + }) +}) diff --git a/tests/parser-large-session.test.ts b/tests/parser-large-session.test.ts new file mode 100644 index 0000000..190ef86 --- /dev/null +++ b/tests/parser-large-session.test.ts @@ -0,0 +1,148 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises' +import { tmpdir } from 'node:os' +import { join } from 'node:path' + +import { describe, expect, it, beforeEach, afterEach } from 'vitest' + +import { parseAllSessions, clearSessionCache } from '../src/parser.js' +import type { DateRange } from '../src/types.js' + +let home: string + +beforeEach(async () => { + home = await mkdtemp(join(tmpdir(), 'codeburn-large-')) + process.env['CLAUDE_CONFIG_DIR'] = join(home, '.claude') +}) + +afterEach(async () => { + clearSessionCache() + delete process.env['CLAUDE_CONFIG_DIR'] + await rm(home, { recursive: true, force: true }) +}) + +function userLine(sessionId: string, timestamp: string, textSize = 100): string { + return JSON.stringify({ + type: 'user', + sessionId, + timestamp, + cwd: '/projects/app', + message: { role: 'user', content: 'x'.repeat(textSize) }, + }) +} + +function assistantLine(sessionId: string, timestamp: string, messageId: string, opts?: { + contentSize?: number + toolCount?: number +}): string { + const contentSize = opts?.contentSize ?? 0 + const toolCount = opts?.toolCount ?? 1 + const content: unknown[] = [] + if (contentSize > 0) { + content.push({ type: 'text', text: 'y'.repeat(contentSize) }) + content.push({ type: 'thinking', thinking: 'z'.repeat(contentSize) }) + } + for (let i = 0; i < toolCount; i++) { + content.push({ + type: 'tool_use', + id: `tu-${i}`, + name: i === 0 ? 'Edit' : 'Read', + input: { file_path: '/tmp/x', big: 'w'.repeat(contentSize) }, + }) + } + return JSON.stringify({ + type: 'assistant', + sessionId, + timestamp, + message: { + id: messageId, + type: 'message', + role: 'assistant', + model: 'claude-sonnet-4-5', + content, + usage: { input_tokens: 1000, output_tokens: 100 }, + }, + }) +} + +function attachmentLine(sessionId: string, timestamp: string): string { + return JSON.stringify({ + type: 'attachment', + sessionId, + timestamp, + attachment: { + type: 'deferred_tools_delta', + addedNames: ['Bash', 'Edit', 'Read', 'mcp__hf__hub_search'], + }, + }) +} + +describe('parseAllSessions with large Claude fixture', () => { + it('correctly parses sessions with bulky text/thinking/tool_result blocks', async () => { + const projectDir = join(home, '.claude', 'projects', 'bigapp') + await mkdir(projectDir, { recursive: true }) + + const lines: string[] = [] + lines.push(attachmentLine('s1', '2026-04-10T09:00:00Z')) + for (let i = 0; i < 50; i++) { + const ts = `2026-04-10T${String(9 + Math.floor(i / 10)).padStart(2, '0')}:${String((i % 10) * 5).padStart(2, '0')}:00Z` + lines.push(userLine('s1', ts, 5000)) + lines.push(assistantLine('s1', ts.replace(':00Z', ':30Z'), `msg-${i}`, { + contentSize: 50_000, + toolCount: 3, + })) + } + + await writeFile(join(projectDir, 'session.jsonl'), lines.join('\n')) + + const range: DateRange = { + start: new Date('2026-04-10T00:00:00Z'), + end: new Date('2026-04-10T23:59:59Z'), + } + + const projects = await parseAllSessions(range, 'claude') + + expect(projects.length).toBeGreaterThan(0) + const proj = projects[0]! + expect(proj.totalApiCalls).toBe(50) + expect(proj.totalCostUSD).toBeGreaterThan(0) + + const sess = proj.sessions[0]! + expect(sess.turns.length).toBe(50) + + for (const turn of sess.turns) { + expect(turn.userMessage.length).toBeLessThanOrEqual(2000) + expect(turn.assistantCalls.length).toBe(1) + const call = turn.assistantCalls[0]! + expect(call.tools).toContain('Edit') + expect(call.tools).toContain('Read') + expect(call.model).toBe('claude-sonnet-4-5') + } + + expect(sess.mcpInventory).toContain('mcp__hf__hub_search') + }) + + it('handles malformed JSONL lines without crashing', async () => { + const projectDir = join(home, '.claude', 'projects', 'baddata') + await mkdir(projectDir, { recursive: true }) + + const lines = [ + 'not json at all', + '{"type": "user", "sessionId": "s1", "timestamp": "2026-04-10T10:00:00Z", "message": {"role": "user", "content": [null, {"type": "text", "text": "hello"}, 42]}}', + '{"type": "assistant", "sessionId": "s1", "timestamp": "2026-04-10T10:01:00Z", "message": {"id": "m1", "type": "message", "role": "assistant", "model": "claude-sonnet-4-5", "content": "not-an-array", "usage": {"input_tokens": 100, "output_tokens": 50}}}', + '{"type": "assistant", "sessionId": "s1", "timestamp": "2026-04-10T10:02:00Z", "message": {"id": "m2", "type": "message", "role": "assistant", "model": "claude-sonnet-4-5", "content": [null, {"type": "tool_use", "id": "t1", "name": "Read", "input": {}}], "usage": {"input_tokens": 100, "output_tokens": 50}}}', + ] + + await writeFile(join(projectDir, 'session.jsonl'), lines.join('\n')) + + const range: DateRange = { + start: new Date('2026-04-10T00:00:00Z'), + end: new Date('2026-04-10T23:59:59Z'), + } + + const projects = await parseAllSessions(range, 'claude') + expect(projects.length).toBeGreaterThan(0) + + const sess = projects[0]!.sessions[0]! + expect(sess.apiCalls).toBeGreaterThanOrEqual(1) + }) +})