codeburn/tests/parser-large-json-scanner.test.ts
iamtoruk 2fb078bdfb Fix V8 OOM crash on 30-day period with Buffer-based line reader and large-line parser
Three-layer fix for V8 heap exhaustion when parsing heavy session data:

1. Buffer-based readSessionLines (fs-utils.ts): Replace readline with raw
   Buffer streaming using Buffer.indexOf(0x0a). Eliminates ConsString trees
   that caused OOM when regex-flattening 100MB+ lines. Two-state machine
   (ACCUMULATING/SCANNING) skips old lines at ~2KB cost instead of 200MB.

2. Large-line streaming parser (parser.ts): Hand-written JSON scanner for
   lines >32KB extracts only cost/token/tool fields without JSON.parse,
   avoiding full object graph allocation. Dual string/Buffer paths.

3. Dashboard memory management (dashboard.tsx): Disable auto-refresh for
   heavy periods (30d/month/all), clear old dataset before reload via
   nextTick to allow GC, prevent overlapping reloads with mutex, lazy
   optimize scanning on keypress instead of useEffect.

Also fixes three race conditions in dashboard reload deduplication:
- Early return after nextTick bypassing finally block (permanent mutex lock)
- A->B->A period switching dropping final reload (stale pending)
- Stale pendingReloadRef not cleared when in-flight matches request
2026-05-15 23:15:26 -07:00

87 lines
2.8 KiB
TypeScript

import { describe, expect, it } from 'vitest'
import { parseJsonlLine } from '../src/parser.js'
function largeUserLine(): string {
return JSON.stringify({
type: 'user',
sessionId: 's1',
timestamp: '2026-05-01T00:00:00Z',
cwd: '/repo',
message: {
role: 'user',
content: [
{ type: 'image', source: { data: 'x'.repeat(40_000) } },
{ type: 'text', text: 'hello ' + 'a'.repeat(3000) },
],
},
})
}
function largeAssistantLine(): string {
return JSON.stringify({
type: 'assistant',
sessionId: 's1',
timestamp: '2026-05-01T00:00:01Z',
cwd: '/repo',
message: {
id: 'm1',
type: 'message',
role: 'assistant',
model: 'claude-sonnet-4-5',
content: [
{ type: 'text', text: 'x'.repeat(40_000) },
{ type: 'tool_use', id: 'read1', name: 'Read', input: { file_path: '/tmp/file.ts', content: 'drop me' } },
{ type: 'tool_use', id: 'agent1', name: 'Agent', input: { subagent_type: 'reviewer', prompt: 'drop me' } },
],
usage: {
input_tokens: 100,
output_tokens: 20,
cache_read_input_tokens: 300,
},
},
})
}
describe('large JSONL compact scanner', () => {
it('extracts user text from array content without full JSON.parse', () => {
const parsed = parseJsonlLine(largeUserLine())
expect(parsed?.type).toBe('user')
const content = parsed?.message?.role === 'user' ? parsed.message.content : ''
expect(content).toBeTypeOf('string')
expect((content as string).startsWith('hello ')).toBe(true)
expect((content as string).length).toBe(2000)
})
it('extracts capped tool inputs needed by optimize', () => {
const parsed = parseJsonlLine(Buffer.from(largeAssistantLine()))
const msg = parsed?.message
expect(msg?.role).toBe('assistant')
if (msg?.role !== 'assistant') return
expect(msg.usage.input_tokens).toBe(100)
expect(msg.usage.output_tokens).toBe(20)
expect(msg.usage.cache_read_input_tokens).toBe(300)
expect(msg.content).toEqual([
{ type: 'tool_use', id: 'read1', name: 'Read', input: { file_path: '/tmp/file.ts' } },
{ type: 'tool_use', id: 'agent1', name: 'Agent', input: { subagent_type: 'reviewer' } },
])
})
it('extracts deferred MCP inventory from large attachment lines', () => {
const line = JSON.stringify({
type: 'attachment',
sessionId: 's1',
timestamp: '2026-05-01T00:00:02Z',
padding: 'x'.repeat(40_000),
attachment: {
type: 'deferred_tools_delta',
addedNames: ['Bash', 'mcp__svc__tool'],
},
})
const parsed = parseJsonlLine(Buffer.from(line)) as Record<string, unknown>
expect(parsed['attachment']).toEqual({
type: 'deferred_tools_delta',
addedNames: ['Bash', 'mcp__svc__tool'],
})
})
})