From a7bb7806186a3ceccdffc88ddd610525c289bbe8 Mon Sep 17 00:00:00 2001
From: iamtoruk <hello@agentseal.org>
Date: Fri, 15 May 2026 13:20:50 -0700
Subject: [PATCH] Reduce Claude parser OOM risk via entry compaction (0.9.9)

Strip heavy fields from JournalEntry immediately after JSON.parse in the
JSONL hot loop. Keeps only what downstream consumers need: type, timestamp,
sessionId, cwd, compacted user text (2000 char total cap), assistant
model/usage/id, tool_use names with Skill and Bash inputs, and MCP
inventory attachments. Text, thinking, and tool_result blocks are dropped.

Also removes redundant hydrateCache() from status --format json and
terminal status paths, and clears the session cache between period
parses to avoid pinning both today and month result sets.

This is a mitigation, not a full fix. Very large month ranges still
materialize full ProjectSummary.turns arrays. The real fix is the
streaming single-pass parser refactor.
---
 CHANGELOG.md                       |  18 ++
 package.json                       |   2 +-
 src/main.ts                        |  17 +-
 src/parser.ts                      | 104 +++++++-
 tests/cli-status-menubar.test.ts   | 103 ++++++++
 tests/parser-compact-entry.test.ts | 396 +++++++++++++++++++++++++++++
 tests/parser-large-session.test.ts | 148 +++++++++++
 7 files changed, 779 insertions(+), 9 deletions(-)
 create mode 100644 tests/cli-status-menubar.test.ts
 create mode 100644 tests/parser-compact-entry.test.ts
 create mode 100644 tests/parser-large-session.test.ts

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2bf6977..a1d328c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -23,6 +23,24 @@
   a `createdAt` timestamp were defaulting to the current date, inflating
   Today's spend. Now skipped at both the SQL and application level.
 
+## 0.9.9 - 2026-05-15
+
+### Fixed (CLI)
+- **Reduced Claude parser OOM risk.** Large Claude JSONL sessions retained
+  full entry objects (text, thinking blocks, tool results) in memory during
+  parsing, causing V8 heap exhaustion on heavy usage months. Entries are now
+  compacted immediately after JSON.parse, keeping only the fields needed for
+  cost/token aggregation. This is a mitigation - very heavy users may still
+  need the streaming parser refactor planned next.
+- **Redundant `hydrateCache()` in status commands.** The `status --format json`
+  and terminal `status` paths hydrated the daily cache before calling
+  `parseAllSessions` directly, doubling memory pressure for no benefit.
+  Removed. The menubar-json path still hydrates as needed.
+- **Session cache retained between status parses.** The `status --format json`
+  path parsed today and month ranges without clearing the in-process session
+  cache between them, keeping both result sets pinned. Cache is now cleared
+  after each period is consumed.
+
 ## 0.9.8 - 2026-05-10
 
 ### Added (CLI)
diff --git a/package.json b/package.json
index c24699d..10e9f0c 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "codeburn",
-  "version": "0.9.8",
+  "version": "0.9.9",
   "description": "See where your AI coding tokens go - by task, tool, model, and project",
   "type": "module",
   "main": "./dist/cli.js",
diff --git a/src/main.ts b/src/main.ts
index eaa4d0e..9d82a18 100644
--- a/src/main.ts
+++ b/src/main.ts
@@ -2,7 +2,7 @@ import { Command } from 'commander'
 import { installMenubarApp } from './menubar-installer.js'
 import { exportCsv, exportJson, type PeriodExport } from './export.js'
 import { loadPricing, setModelAliases } from './models.js'
-import { parseAllSessions, filterProjectsByName } from './parser.js'
+import { parseAllSessions, filterProjectsByName, clearSessionCache } from './parser.js'
 import { convertCost } from './currency.js'
 import { renderStatusBar } from './format.js'
 import { type PeriodData, type ProviderCost } from './menubar-json.js'
@@ -529,9 +529,12 @@ program
     }
 
     if (opts.format === 'json') {
-      await hydrateCache()
-      const todayData = buildPeriodData('today', fp(await parseAllSessions(getDateRange('today').range, pf)))
-      const monthData = buildPeriodData('month', fp(await parseAllSessions(getDateRange('month').range, pf)))
+      const todayProjects = fp(await parseAllSessions(getDateRange('today').range, pf))
+      const todayData = buildPeriodData('today', todayProjects)
+      clearSessionCache()
+      const monthProjects = fp(await parseAllSessions(getDateRange('month').range, pf))
+      const monthData = buildPeriodData('month', monthProjects)
+      clearSessionCache()
       const { code, rate } = getCurrency()
       const payload: {
         currency: string
@@ -551,9 +554,9 @@ program
       return
     }
 
-    await hydrateCache()
-    const monthProjects = fp(await parseAllSessions(getDateRange('month').range, pf))
-    console.log(renderStatusBar(monthProjects))
+    const monthProjects2 = fp(await parseAllSessions(getDateRange('month').range, pf))
+    clearSessionCache()
+    console.log(renderStatusBar(monthProjects2))
   })
 
 program
diff --git a/src/parser.ts b/src/parser.ts
index 9ab75ce..66ddbeb 100644
--- a/src/parser.ts
+++ b/src/parser.ts
@@ -40,6 +40,108 @@ function parseJsonlLine(line: string): JournalEntry | null {
   }
 }
 
+const USER_TEXT_CAP = 2000
+const BASH_COMMAND_CAP = 2000
+const MAX_TOOL_BLOCKS = 500
+const MAX_ADDED_NAMES = 1000
+
+export function compactEntry(raw: JournalEntry): JournalEntry {
+  const entry: JournalEntry = { type: raw.type }
+
+  if (raw.timestamp !== undefined) entry.timestamp = raw.timestamp
+  if (raw.sessionId !== undefined) entry.sessionId = raw.sessionId
+  if (raw.cwd !== undefined) entry.cwd = raw.cwd
+
+  const att = (raw as Record<string, unknown>)['attachment']
+  if (att && typeof att === 'object') {
+    const a = att as Record<string, unknown>
+    if (a['type'] === 'deferred_tools_delta' && Array.isArray(a['addedNames'])) {
+      const names: string[] = []
+      for (let i = 0; i < Math.min(a['addedNames'].length, MAX_ADDED_NAMES); i++) {
+        const n = a['addedNames'][i]
+        if (typeof n === 'string') names.push(n)
+      }
+      ;(entry as Record<string, unknown>)['attachment'] = { type: 'deferred_tools_delta', addedNames: names }
+    }
+  }
+
+  if (!raw.message) return entry
+
+  if (raw.message.role === 'user') {
+    const content = raw.message.content
+    if (typeof content === 'string') {
+      entry.message = { role: 'user', content: content.slice(0, USER_TEXT_CAP) }
+    } else if (Array.isArray(content)) {
+      let remaining = USER_TEXT_CAP
+      const blocks: { type: 'text'; text: string }[] = []
+      for (const b of content) {
+        if (remaining <= 0) break
+        if (!b || typeof b !== 'object' || b.type !== 'text') continue
+        const text = (b as { text?: unknown }).text
+        if (typeof text !== 'string') continue
+        const sliced = text.slice(0, remaining)
+        blocks.push({ type: 'text', text: sliced })
+        remaining -= sliced.length
+      }
+      entry.message = { role: 'user', content: blocks }
+    }
+    return entry
+  }
+
+  const msg = raw.message as AssistantMessageContent
+  if (!msg.usage || !msg.model) return entry
+
+  const rawContent = msg.content
+  const contentArr = Array.isArray(rawContent) ? rawContent : []
+  const toolBlocks = contentArr.filter((b): b is ToolUseBlock => b != null && typeof b === 'object' && b.type === 'tool_use')
+  const compactContent: ContentBlock[] = toolBlocks.slice(0, MAX_TOOL_BLOCKS).map(tb => {
+    let input: Record<string, unknown> = {}
+    if (tb.name === 'Skill') {
+      const ri = (tb.input ?? {}) as Record<string, unknown>
+      if (typeof ri['skill'] === 'string') input['skill'] = (ri['skill'] as string).slice(0, 200)
+      if (typeof ri['name'] === 'string') input['name'] = (ri['name'] as string).slice(0, 200)
+    } else if (BASH_TOOLS.has(tb.name)) {
+      const ri = (tb.input ?? {}) as Record<string, unknown>
+      if (typeof ri['command'] === 'string') {
+        input['command'] = (ri['command'] as string).slice(0, BASH_COMMAND_CAP)
+      }
+    }
+    return { type: 'tool_use' as const, id: tb.id ?? '', name: tb.name, input }
+  })
+
+  const u = msg.usage
+  const compactUsage: AssistantMessageContent['usage'] = {
+    input_tokens: u.input_tokens,
+    output_tokens: u.output_tokens,
+  }
+  if (u.cache_creation_input_tokens) compactUsage.cache_creation_input_tokens = u.cache_creation_input_tokens
+  if (u.cache_creation) {
+    compactUsage.cache_creation = {
+      ...(u.cache_creation.ephemeral_5m_input_tokens ? { ephemeral_5m_input_tokens: u.cache_creation.ephemeral_5m_input_tokens } : {}),
+      ...(u.cache_creation.ephemeral_1h_input_tokens ? { ephemeral_1h_input_tokens: u.cache_creation.ephemeral_1h_input_tokens } : {}),
+    }
+  }
+  if (u.cache_read_input_tokens) compactUsage.cache_read_input_tokens = u.cache_read_input_tokens
+  if (u.server_tool_use) {
+    compactUsage.server_tool_use = {
+      ...(u.server_tool_use.web_search_requests ? { web_search_requests: u.server_tool_use.web_search_requests } : {}),
+      ...(u.server_tool_use.web_fetch_requests ? { web_fetch_requests: u.server_tool_use.web_fetch_requests } : {}),
+    }
+  }
+  if (u.speed) compactUsage.speed = u.speed
+
+  entry.message = {
+    type: 'message',
+    role: 'assistant',
+    model: msg.model,
+    usage: compactUsage,
+    content: compactContent,
+    ...(msg.id ? { id: msg.id } : {}),
+  }
+
+  return entry
+}
+
 function extractToolNames(content: ContentBlock[]): string[] {
   return content
     .filter((b): b is ToolUseBlock => b.type === 'tool_use')
@@ -419,7 +521,7 @@ async function parseSessionFile(
   for await (const line of readSessionLines(filePath)) {
     hasLines = true
     const entry = parseJsonlLine(line)
-    if (entry) entries.push(entry)
+    if (entry) entries.push(compactEntry(entry))
   }
 
   if (!hasLines) return null
diff --git a/tests/cli-status-menubar.test.ts b/tests/cli-status-menubar.test.ts
new file mode 100644
index 0000000..22a6ae1
--- /dev/null
+++ b/tests/cli-status-menubar.test.ts
@@ -0,0 +1,103 @@
+import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'
+import { tmpdir } from 'node:os'
+import { join } from 'node:path'
+import { spawnSync } from 'node:child_process'
+
+import { describe, expect, it } from 'vitest'
+
+function runCli(args: string[], home: string) {
+  return spawnSync(process.execPath, ['--import', 'tsx', 'src/cli.ts', ...args], {
+    cwd: process.cwd(),
+    env: {
+      ...process.env,
+      CLAUDE_CONFIG_DIR: join(home, '.claude'),
+      HOME: home,
+      TZ: 'UTC',
+    },
+    encoding: 'utf-8',
+    timeout: 30_000,
+  })
+}
+
+function userLine(sessionId: string, timestamp: string): string {
+  return JSON.stringify({
+    type: 'user',
+    sessionId,
+    timestamp,
+    message: { role: 'user', content: 'do the thing' },
+  })
+}
+
+function assistantLine(sessionId: string, timestamp: string, messageId: string): string {
+  return JSON.stringify({
+    type: 'assistant',
+    sessionId,
+    timestamp,
+    message: {
+      id: messageId,
+      type: 'message',
+      role: 'assistant',
+      model: 'claude-sonnet-4-5',
+      content: [
+        { type: 'text', text: 'done' },
+        { type: 'tool_use', id: 'tu-1', name: 'Edit', input: { file_path: '/tmp/x', old_string: 'a', new_string: 'b' } },
+      ],
+      usage: { input_tokens: 500, output_tokens: 50 },
+    },
+  })
+}
+
+describe('codeburn status --format menubar-json', () => {
+  it('returns valid MenubarPayload with expected top-level fields', async () => {
+    const home = await mkdtemp(join(tmpdir(), 'codeburn-menubar-'))
+
+    try {
+      const projectDir = join(home, '.claude', 'projects', 'myapp')
+      await mkdir(projectDir, { recursive: true })
+
+      const today = new Date()
+      const ymd = `${today.getUTCFullYear()}-${String(today.getUTCMonth() + 1).padStart(2, '0')}-${String(today.getUTCDate()).padStart(2, '0')}`
+
+      await writeFile(
+        join(projectDir, 'session.jsonl'),
+        [
+          userLine('s1', `${ymd}T10:00:00Z`),
+          assistantLine('s1', `${ymd}T10:01:00Z`, 'msg-1'),
+          userLine('s1', `${ymd}T11:00:00Z`),
+          assistantLine('s1', `${ymd}T11:01:00Z`, 'msg-2'),
+        ].join('\n'),
+      )
+
+      const result = runCli([
+        'status',
+        '--format', 'menubar-json',
+        '--period', 'today',
+        '--provider', 'all',
+        '--no-optimize',
+      ], home)
+
+      expect(result.status, `stderr: ${result.stderr}`).toBe(0)
+
+      const payload = JSON.parse(result.stdout) as Record<string, unknown>
+
+      expect(payload).toHaveProperty('generated')
+      expect(payload).toHaveProperty('current')
+      expect(payload).toHaveProperty('optimize')
+      expect(payload).toHaveProperty('history')
+
+      const current = payload['current'] as Record<string, unknown>
+      expect(current['cost']).toBeGreaterThan(0)
+      expect(current['calls']).toBe(2)
+      expect(current['sessions']).toBe(1)
+      expect(current).toHaveProperty('oneShotRate')
+      expect(current).toHaveProperty('topActivities')
+      expect(current).toHaveProperty('topModels')
+      expect(current).toHaveProperty('providers')
+
+      const history = payload['history'] as { daily: unknown[] }
+      expect(Array.isArray(history.daily)).toBe(true)
+    } finally {
+      await rm(home, { recursive: true, force: true })
+    }
+  })
+})
diff --git a/tests/parser-compact-entry.test.ts b/tests/parser-compact-entry.test.ts
new file mode 100644
index 0000000..7c973c4
--- /dev/null
+++ b/tests/parser-compact-entry.test.ts
@@ -0,0 +1,396 @@
+import { describe, it, expect } from 'vitest'
+
+import { compactEntry } from '../src/parser.js'
+import type { JournalEntry } from '../src/types.js'
+
+function entry(overrides: Partial<JournalEntry> & Record<string, unknown>): JournalEntry {
+  return { type: 'user', ...overrides } as JournalEntry
+}
+
+describe('compactEntry', () => {
+  it('preserves type, timestamp, sessionId, cwd', () => {
+    const raw = entry({ type: 'user', timestamp: 't1', sessionId: 's1', cwd: '/foo' })
+    const c = compactEntry(raw)
+    expect(c.type).toBe('user')
+    expect(c.timestamp).toBe('t1')
+    expect(c.sessionId).toBe('s1')
+    expect(c.cwd).toBe('/foo')
+  })
+
+  it('strips unknown catch-all fields', () => {
+    const raw = entry({
+      type: 'assistant',
+      toolResult: { type: 'tool_result', content: 'x'.repeat(10_000) },
+      someHugeField: 'y'.repeat(10_000),
+    })
+    const c = compactEntry(raw)
+    expect((c as Record<string, unknown>)['toolResult']).toBeUndefined()
+    expect((c as Record<string, unknown>)['someHugeField']).toBeUndefined()
+  })
+
+  it('preserves deferred_tools_delta attachment with copied names', () => {
+    const raw = entry({
+      type: 'attachment',
+      attachment: {
+        type: 'deferred_tools_delta',
+        addedNames: ['mcp__svc__t1', 'Bash'],
+        extraData: 'should be dropped',
+      },
+    })
+    const c = compactEntry(raw)
+    const att = (c as Record<string, unknown>)['attachment'] as Record<string, unknown>
+    expect(att['type']).toBe('deferred_tools_delta')
+    expect(att['addedNames']).toEqual(['mcp__svc__t1', 'Bash'])
+    expect(att['extraData']).toBeUndefined()
+  })
+
+  it('copies addedNames into a new array (not by reference)', () => {
+    const originalNames = ['mcp__a__b', 'Bash']
+    const raw = entry({
+      type: 'attachment',
+      attachment: { type: 'deferred_tools_delta', addedNames: originalNames },
+    })
+    const c = compactEntry(raw)
+    const att = (c as Record<string, unknown>)['attachment'] as { addedNames: string[] }
+    expect(att.addedNames).not.toBe(originalNames)
+    expect(att.addedNames).toEqual(originalNames)
+  })
+
+  it('caps addedNames at 1000 entries', () => {
+    const names = Array.from({ length: 2000 }, (_, i) => `mcp__svc__t${i}`)
+    const raw = entry({
+      type: 'attachment',
+      attachment: { type: 'deferred_tools_delta', addedNames: names },
+    })
+    const c = compactEntry(raw)
+    const att = (c as Record<string, unknown>)['attachment'] as { addedNames: string[] }
+    expect(att.addedNames).toHaveLength(1000)
+  })
+
+  it('filters non-string entries from addedNames', () => {
+    const raw = entry({
+      type: 'attachment',
+      attachment: { type: 'deferred_tools_delta', addedNames: [42, null, 'mcp__a__b', undefined] },
+    })
+    const c = compactEntry(raw)
+    const att = (c as Record<string, unknown>)['attachment'] as { addedNames: string[] }
+    expect(att.addedNames).toEqual(['mcp__a__b'])
+  })
+
+  it('drops non-deferred_tools_delta attachments', () => {
+    const raw = entry({
+      type: 'attachment',
+      attachment: { type: 'other', data: 'x'.repeat(10_000) },
+    })
+    const c = compactEntry(raw)
+    expect((c as Record<string, unknown>)['attachment']).toBeUndefined()
+  })
+
+  it('caps user message string content at 2000', () => {
+    const longText = 'a'.repeat(5000)
+    const raw = entry({
+      type: 'user',
+      message: { role: 'user' as const, content: longText },
+    })
+    const c = compactEntry(raw)
+    expect(c.message!.role).toBe('user')
+    const content = (c.message as { content: string }).content
+    expect(content.length).toBe(2000)
+  })
+
+  it('caps total user text across all blocks at 2000', () => {
+    const raw = entry({
+      type: 'user',
+      message: {
+        role: 'user' as const,
+        content: [
+          { type: 'text' as const, text: 'a'.repeat(1500) },
+          { type: 'text' as const, text: 'b'.repeat(1500) },
+          { type: 'text' as const, text: 'c'.repeat(1500) },
+          { type: 'image' as const, source: 'big data' },
+        ],
+      },
+    })
+    const c = compactEntry(raw)
+    const content = (c.message as { content: Array<{ type: string; text: string }> }).content
+    expect(content).toHaveLength(2)
+    expect(content[0]!.text.length).toBe(1500)
+    expect(content[1]!.text.length).toBe(500)
+  })
+
+  it('compacts assistant tool_use blocks, dropping text and thinking, preserving id', () => {
+    const raw = entry({
+      type: 'assistant',
+      timestamp: 't1',
+      message: {
+        type: 'message' as const,
+        role: 'assistant' as const,
+        model: 'claude-opus-4-6',
+        id: 'msg_123',
+        usage: { input_tokens: 100, output_tokens: 200 },
+        content: [
+          { type: 'text', text: 'x'.repeat(50_000) },
+          { type: 'thinking', thinking: 'y'.repeat(50_000) },
+          { type: 'tool_use', id: 'tu1', name: 'Read', input: { file_path: '/foo', huge: 'z'.repeat(10_000) } },
+          { type: 'tool_use', id: 'tu2', name: 'Edit', input: { old_string: 'a'.repeat(5000), new_string: 'b'.repeat(5000) } },
+        ],
+      },
+    })
+    const c = compactEntry(raw)
+    const msg = c.message as { content: Array<{ type: string; id?: string; name?: string; input?: Record<string, unknown> }> }
+    expect(msg.content).toHaveLength(2)
+    expect(msg.content[0]!.name).toBe('Read')
+    expect(msg.content[0]!.id).toBe('tu1')
+    expect(msg.content[0]!.input).toEqual({})
+    expect(msg.content[1]!.name).toBe('Edit')
+    expect(msg.content[1]!.id).toBe('tu2')
+    expect(msg.content[1]!.input).toEqual({})
+  })
+
+  it('caps tool_use blocks at 500 per message', () => {
+    const blocks = Array.from({ length: 600 }, (_, i) => ({
+      type: 'tool_use' as const,
+      id: `tu${i}`,
+      name: `Tool${i}`,
+      input: {},
+    }))
+    const raw = entry({
+      type: 'assistant',
+      message: {
+        type: 'message' as const,
+        role: 'assistant' as const,
+        model: 'claude-opus-4-6',
+        usage: { input_tokens: 10, output_tokens: 10 },
+        content: blocks,
+      },
+    })
+    const c = compactEntry(raw)
+    const msg = c.message as { content: unknown[] }
+    expect(msg.content).toHaveLength(500)
+  })
+
+  it('preserves model, usage (destructured), and id on assistant messages', () => {
+    const raw = entry({
+      type: 'assistant',
+      message: {
+        type: 'message' as const,
+        role: 'assistant' as const,
+        model: 'claude-opus-4-6',
+        id: 'msg_abc',
+        usage: {
+          input_tokens: 50,
+          output_tokens: 100,
+          cache_read_input_tokens: 25,
+          extraGarbage: 'should not survive',
+        },
+        content: [],
+      },
+    })
+    const c = compactEntry(raw)
+    const msg = c.message as { model: string; id: string; usage: Record<string, unknown> }
+    expect(msg.model).toBe('claude-opus-4-6')
+    expect(msg.id).toBe('msg_abc')
+    expect(msg.usage['input_tokens']).toBe(50)
+    expect(msg.usage['output_tokens']).toBe(100)
+    expect(msg.usage['cache_read_input_tokens']).toBe(25)
+    expect(msg.usage['extraGarbage']).toBeUndefined()
+  })
+
+  it('deep-copies usage nested objects, stripping extra keys', () => {
+    const cacheCreation = { ephemeral_5m_input_tokens: 100, ephemeral_1h_input_tokens: 200, extraJunk: 'big' }
+    const serverToolUse = { web_search_requests: 3, web_fetch_requests: 1, extraJunk: 'big' }
+    const raw = entry({
+      type: 'assistant',
+      message: {
+        type: 'message' as const,
+        role: 'assistant' as const,
+        model: 'claude-opus-4-6',
+        usage: {
+          input_tokens: 10,
+          output_tokens: 10,
+          speed: 'fast',
+          cache_creation: cacheCreation,
+          server_tool_use: serverToolUse,
+        },
+        content: [],
+      },
+    })
+    const c = compactEntry(raw)
+    const msg = c.message as { usage: Record<string, unknown> }
+    expect(msg.usage['speed']).toBe('fast')
+    const cc = msg.usage['cache_creation'] as Record<string, unknown>
+    expect(cc['ephemeral_5m_input_tokens']).toBe(100)
+    expect(cc['ephemeral_1h_input_tokens']).toBe(200)
+    expect(cc['extraJunk']).toBeUndefined()
+    expect(cc).not.toBe(cacheCreation)
+    const stu = msg.usage['server_tool_use'] as Record<string, unknown>
+    expect(stu['web_search_requests']).toBe(3)
+    expect(stu['web_fetch_requests']).toBe(1)
+    expect(stu['extraJunk']).toBeUndefined()
+    expect(stu).not.toBe(serverToolUse)
+  })
+
+  it('keeps Skill input.skill and input.name, type-checked and capped', () => {
+    const raw = entry({
+      type: 'assistant',
+      message: {
+        type: 'message' as const,
+        role: 'assistant' as const,
+        model: 'claude-opus-4-6',
+        usage: { input_tokens: 10, output_tokens: 10 },
+        content: [
+          { type: 'tool_use', id: 'tu', name: 'Skill', input: { skill: 'graphify', args: 'huge arg data' } },
+        ],
+      },
+    })
+    const c = compactEntry(raw)
+    const msg = c.message as { content: Array<{ input: Record<string, unknown> }> }
+    expect(msg.content[0]!.input['skill']).toBe('graphify')
+    expect(msg.content[0]!.input['args']).toBeUndefined()
+  })
+
+  it('rejects non-string Skill input.skill and caps long names', () => {
+    const raw = entry({
+      type: 'assistant',
+      message: {
+        type: 'message' as const,
+        role: 'assistant' as const,
+        model: 'claude-opus-4-6',
+        usage: { input_tokens: 10, output_tokens: 10 },
+        content: [
+          { type: 'tool_use', id: 'tu1', name: 'Skill', input: { skill: { malicious: 'x'.repeat(10_000) } } },
+          { type: 'tool_use', id: 'tu2', name: 'Skill', input: { skill: 'a'.repeat(500) } },
+        ],
+      },
+    })
+    const c = compactEntry(raw)
+    const msg = c.message as { content: Array<{ input: Record<string, unknown> }> }
+    expect(msg.content[0]!.input['skill']).toBeUndefined()
+    expect((msg.content[1]!.input['skill'] as string).length).toBe(200)
+  })
+
+  it('keeps Bash input.command capped at 2000 for bash command extraction', () => {
+    const longCmd = 'npm run build && '.repeat(200)
+    const raw = entry({
+      type: 'assistant',
+      message: {
+        type: 'message' as const,
+        role: 'assistant' as const,
+        model: 'claude-opus-4-6',
+        usage: { input_tokens: 10, output_tokens: 10 },
+        content: [
+          { type: 'tool_use', id: 'tu', name: 'Bash', input: { command: longCmd, description: 'big desc' } },
+        ],
+      },
+    })
+    const c = compactEntry(raw)
+    const msg = c.message as { content: Array<{ input: Record<string, unknown> }> }
+    const cmd = msg.content[0]!.input['command'] as string
+    expect(cmd.length).toBe(2000)
+    expect(msg.content[0]!.input['description']).toBeUndefined()
+  })
+
+  it('handles entry with no message field', () => {
+    const raw = entry({ type: 'system', timestamp: 't1', cwd: '/x' })
+    const c = compactEntry(raw)
+    expect(c.type).toBe('system')
+    expect(c.timestamp).toBe('t1')
+    expect(c.message).toBeUndefined()
+  })
+
+  it('handles assistant message with no usage (non-standard)', () => {
+    const raw = entry({
+      type: 'assistant',
+      message: {
+        type: 'message' as const,
+        role: 'assistant' as const,
+        model: 'claude-opus-4-6',
+        content: [{ type: 'text', text: 'response' }],
+      },
+    })
+    const c = compactEntry(raw)
+    expect(c.message).toBeUndefined()
+  })
+
+  it('handles unexpected message role (neither user nor assistant)', () => {
+    const raw = entry({
+      type: 'system',
+      message: { role: 'system' as never, content: 'sys prompt' },
+    })
+    const c = compactEntry(raw)
+    expect(c.message).toBeUndefined()
+  })
+
+  it('tolerates null elements in user content array', () => {
+    const raw = entry({
+      type: 'user',
+      message: {
+        role: 'user' as const,
+        content: [null, undefined, { type: 'text', text: 'ok' }, 42, { type: 'text' }] as never,
+      },
+    })
+    const c = compactEntry(raw)
+    const content = (c.message as { content: Array<{ text: string }> }).content
+    expect(content).toHaveLength(1)
+    expect(content[0]!.text).toBe('ok')
+  })
+
+  it('tolerates assistant content that is not an array', () => {
+    const raw = entry({
+      type: 'assistant',
+      message: {
+        type: 'message' as const,
+        role: 'assistant' as const,
+        model: 'claude-opus-4-6',
+        usage: { input_tokens: 10, output_tokens: 10 },
+        content: 'not an array' as never,
+      },
+    })
+    const c = compactEntry(raw)
+    const msg = c.message as { content: unknown[] }
+    expect(msg.content).toEqual([])
+  })
+
+  it('tolerates null elements in assistant content array', () => {
+    const raw = entry({
+      type: 'assistant',
+      message: {
+        type: 'message' as const,
+        role: 'assistant' as const,
+        model: 'claude-opus-4-6',
+        usage: { input_tokens: 10, output_tokens: 10 },
+        content: [null, { type: 'tool_use', id: 'tu1', name: 'Read', input: {} }, undefined] as never,
+      },
+    })
+    const c = compactEntry(raw)
+    const msg = c.message as { content: Array<{ name: string }> }
+    expect(msg.content).toHaveLength(1)
+    expect(msg.content[0]!.name).toBe('Read')
+  })
+
+  it('memory reduction: compacted entry is much smaller than raw', () => {
+    const hugeContent = Array.from({ length: 20 }, (_, i) => ({
+      type: i % 2 === 0 ? 'text' : 'tool_result',
+      text: 'x'.repeat(100_000),
+      content: 'y'.repeat(100_000),
+    }))
+    const raw = entry({
+      type: 'assistant',
+      timestamp: '2026-01-01T00:00:00',
+      message: {
+        type: 'message' as const,
+        role: 'assistant' as const,
+        model: 'claude-opus-4-6',
+        id: 'msg_1',
+        usage: { input_tokens: 1000, output_tokens: 500 },
+        content: hugeContent as never,
+      },
+      toolResult: { content: 'z'.repeat(500_000) },
+    })
+    const rawSize = JSON.stringify(raw).length
+    const compacted = compactEntry(raw)
+    const compactedSize = JSON.stringify(compacted).length
+    expect(rawSize).toBeGreaterThan(2_000_000)
+    expect(compactedSize).toBeLessThan(500)
+  })
+})
diff --git a/tests/parser-large-session.test.ts b/tests/parser-large-session.test.ts
new file mode 100644
index 0000000..190ef86
--- /dev/null
+++ b/tests/parser-large-session.test.ts
@@ -0,0 +1,148 @@
+import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises'
+import { tmpdir } from 'node:os'
+import { join } from 'node:path'
+
+import { describe, expect, it, beforeEach, afterEach } from 'vitest'
+
+import { parseAllSessions, clearSessionCache } from '../src/parser.js'
+import type { DateRange } from '../src/types.js'
+
+let home: string
+
+beforeEach(async () => {
+  home = await mkdtemp(join(tmpdir(), 'codeburn-large-'))
+  process.env['CLAUDE_CONFIG_DIR'] = join(home, '.claude')
+})
+
+afterEach(async () => {
+  clearSessionCache()
+  delete process.env['CLAUDE_CONFIG_DIR']
+  await rm(home, { recursive: true, force: true })
+})
+
+function userLine(sessionId: string, timestamp: string, textSize = 100): string {
+  return JSON.stringify({
+    type: 'user',
+    sessionId,
+    timestamp,
+    cwd: '/projects/app',
+    message: { role: 'user', content: 'x'.repeat(textSize) },
+  })
+}
+
+function assistantLine(sessionId: string, timestamp: string, messageId: string, opts?: {
+  contentSize?: number
+  toolCount?: number
+}): string {
+  const contentSize = opts?.contentSize ?? 0
+  const toolCount = opts?.toolCount ?? 1
+  const content: unknown[] = []
+  if (contentSize > 0) {
+    content.push({ type: 'text', text: 'y'.repeat(contentSize) })
+    content.push({ type: 'thinking', thinking: 'z'.repeat(contentSize) })
+  }
+  for (let i = 0; i < toolCount; i++) {
+    content.push({
+      type: 'tool_use',
+      id: `tu-${i}`,
+      name: i === 0 ? 'Edit' : 'Read',
+      input: { file_path: '/tmp/x', big: 'w'.repeat(contentSize) },
+    })
+  }
+  return JSON.stringify({
+    type: 'assistant',
+    sessionId,
+    timestamp,
+    message: {
+      id: messageId,
+      type: 'message',
+      role: 'assistant',
+      model: 'claude-sonnet-4-5',
+      content,
+      usage: { input_tokens: 1000, output_tokens: 100 },
+    },
+  })
+}
+
+function attachmentLine(sessionId: string, timestamp: string): string {
+  return JSON.stringify({
+    type: 'attachment',
+    sessionId,
+    timestamp,
+    attachment: {
+      type: 'deferred_tools_delta',
+      addedNames: ['Bash', 'Edit', 'Read', 'mcp__hf__hub_search'],
+    },
+  })
+}
+
+describe('parseAllSessions with large Claude fixture', () => {
+  it('correctly parses sessions with bulky text/thinking/tool_result blocks', async () => {
+    const projectDir = join(home, '.claude', 'projects', 'bigapp')
+    await mkdir(projectDir, { recursive: true })
+
+    const lines: string[] = []
+    lines.push(attachmentLine('s1', '2026-04-10T09:00:00Z'))
+    for (let i = 0; i < 50; i++) {
+      const ts = `2026-04-10T${String(9 + Math.floor(i / 10)).padStart(2, '0')}:${String((i % 10) * 5).padStart(2, '0')}:00Z`
+      lines.push(userLine('s1', ts, 5000))
+      lines.push(assistantLine('s1', ts.replace(':00Z', ':30Z'), `msg-${i}`, {
+        contentSize: 50_000,
+        toolCount: 3,
+      }))
+    }
+
+    await writeFile(join(projectDir, 'session.jsonl'), lines.join('\n'))
+
+    const range: DateRange = {
+      start: new Date('2026-04-10T00:00:00Z'),
+      end: new Date('2026-04-10T23:59:59Z'),
+    }
+
+    const projects = await parseAllSessions(range, 'claude')
+
+    expect(projects.length).toBeGreaterThan(0)
+    const proj = projects[0]!
+    expect(proj.totalApiCalls).toBe(50)
+    expect(proj.totalCostUSD).toBeGreaterThan(0)
+
+    const sess = proj.sessions[0]!
+    expect(sess.turns.length).toBe(50)
+
+    for (const turn of sess.turns) {
+      expect(turn.userMessage.length).toBeLessThanOrEqual(2000)
+      expect(turn.assistantCalls.length).toBe(1)
+      const call = turn.assistantCalls[0]!
+      expect(call.tools).toContain('Edit')
+      expect(call.tools).toContain('Read')
+      expect(call.model).toBe('claude-sonnet-4-5')
+    }
+
+    expect(sess.mcpInventory).toContain('mcp__hf__hub_search')
+  })
+
+  it('handles malformed JSONL lines without crashing', async () => {
+    const projectDir = join(home, '.claude', 'projects', 'baddata')
+    await mkdir(projectDir, { recursive: true })
+
+    const lines = [
+      'not json at all',
+      '{"type": "user", "sessionId": "s1", "timestamp": "2026-04-10T10:00:00Z", "message": {"role": "user", "content": [null, {"type": "text", "text": "hello"}, 42]}}',
+      '{"type": "assistant", "sessionId": "s1", "timestamp": "2026-04-10T10:01:00Z", "message": {"id": "m1", "type": "message", "role": "assistant", "model": "claude-sonnet-4-5", "content": "not-an-array", "usage": {"input_tokens": 100, "output_tokens": 50}}}',
+      '{"type": "assistant", "sessionId": "s1", "timestamp": "2026-04-10T10:02:00Z", "message": {"id": "m2", "type": "message", "role": "assistant", "model": "claude-sonnet-4-5", "content": [null, {"type": "tool_use", "id": "t1", "name": "Read", "input": {}}], "usage": {"input_tokens": 100, "output_tokens": 50}}}',
+    ]
+
+    await writeFile(join(projectDir, 'session.jsonl'), lines.join('\n'))
+
+    const range: DateRange = {
+      start: new Date('2026-04-10T00:00:00Z'),
+      end: new Date('2026-04-10T23:59:59Z'),
+    }
+
+    const projects = await parseAllSessions(range, 'claude')
+    expect(projects.length).toBeGreaterThan(0)
+
+    const sess = projects[0]!.sessions[0]!
+    expect(sess.apiCalls).toBeGreaterThanOrEqual(1)
+  })
+})