mirror of
https://github.com/AgentSeal/codeburn.git
synced 2026-05-17 03:56:45 +00:00
Three-layer fix for V8 heap exhaustion when parsing heavy session data: 1. Buffer-based readSessionLines (fs-utils.ts): Replace readline with raw Buffer streaming using Buffer.indexOf(0x0a). Eliminates ConsString trees that caused OOM when regex-flattening 100MB+ lines. Two-state machine (ACCUMULATING/SCANNING) skips old lines at ~2KB cost instead of 200MB. 2. Large-line streaming parser (parser.ts): Hand-written JSON scanner for lines >32KB extracts only cost/token/tool fields without JSON.parse, avoiding full object graph allocation. Dual string/Buffer paths. 3. Dashboard memory management (dashboard.tsx): Disable auto-refresh for heavy periods (30d/month/all), clear old dataset before reload via nextTick to allow GC, prevent overlapping reloads with mutex, lazy optimize scanning on keypress instead of useEffect. Also fixes three race conditions in dashboard reload deduplication: - Early return after nextTick bypassing finally block (permanent mutex lock) - A->B->A period switching dropping final reload (stale pending) - Stale pendingReloadRef not cleared when in-flight matches request
87 lines
2.8 KiB
TypeScript
87 lines
2.8 KiB
TypeScript
import { describe, expect, it } from 'vitest'
|
|
|
|
import { parseJsonlLine } from '../src/parser.js'
|
|
|
|
function largeUserLine(): string {
|
|
return JSON.stringify({
|
|
type: 'user',
|
|
sessionId: 's1',
|
|
timestamp: '2026-05-01T00:00:00Z',
|
|
cwd: '/repo',
|
|
message: {
|
|
role: 'user',
|
|
content: [
|
|
{ type: 'image', source: { data: 'x'.repeat(40_000) } },
|
|
{ type: 'text', text: 'hello ' + 'a'.repeat(3000) },
|
|
],
|
|
},
|
|
})
|
|
}
|
|
|
|
function largeAssistantLine(): string {
|
|
return JSON.stringify({
|
|
type: 'assistant',
|
|
sessionId: 's1',
|
|
timestamp: '2026-05-01T00:00:01Z',
|
|
cwd: '/repo',
|
|
message: {
|
|
id: 'm1',
|
|
type: 'message',
|
|
role: 'assistant',
|
|
model: 'claude-sonnet-4-5',
|
|
content: [
|
|
{ type: 'text', text: 'x'.repeat(40_000) },
|
|
{ type: 'tool_use', id: 'read1', name: 'Read', input: { file_path: '/tmp/file.ts', content: 'drop me' } },
|
|
{ type: 'tool_use', id: 'agent1', name: 'Agent', input: { subagent_type: 'reviewer', prompt: 'drop me' } },
|
|
],
|
|
usage: {
|
|
input_tokens: 100,
|
|
output_tokens: 20,
|
|
cache_read_input_tokens: 300,
|
|
},
|
|
},
|
|
})
|
|
}
|
|
|
|
describe('large JSONL compact scanner', () => {
|
|
it('extracts user text from array content without full JSON.parse', () => {
|
|
const parsed = parseJsonlLine(largeUserLine())
|
|
expect(parsed?.type).toBe('user')
|
|
const content = parsed?.message?.role === 'user' ? parsed.message.content : ''
|
|
expect(content).toBeTypeOf('string')
|
|
expect((content as string).startsWith('hello ')).toBe(true)
|
|
expect((content as string).length).toBe(2000)
|
|
})
|
|
|
|
it('extracts capped tool inputs needed by optimize', () => {
|
|
const parsed = parseJsonlLine(Buffer.from(largeAssistantLine()))
|
|
const msg = parsed?.message
|
|
expect(msg?.role).toBe('assistant')
|
|
if (msg?.role !== 'assistant') return
|
|
expect(msg.usage.input_tokens).toBe(100)
|
|
expect(msg.usage.output_tokens).toBe(20)
|
|
expect(msg.usage.cache_read_input_tokens).toBe(300)
|
|
expect(msg.content).toEqual([
|
|
{ type: 'tool_use', id: 'read1', name: 'Read', input: { file_path: '/tmp/file.ts' } },
|
|
{ type: 'tool_use', id: 'agent1', name: 'Agent', input: { subagent_type: 'reviewer' } },
|
|
])
|
|
})
|
|
|
|
it('extracts deferred MCP inventory from large attachment lines', () => {
|
|
const line = JSON.stringify({
|
|
type: 'attachment',
|
|
sessionId: 's1',
|
|
timestamp: '2026-05-01T00:00:02Z',
|
|
padding: 'x'.repeat(40_000),
|
|
attachment: {
|
|
type: 'deferred_tools_delta',
|
|
addedNames: ['Bash', 'mcp__svc__tool'],
|
|
},
|
|
})
|
|
const parsed = parseJsonlLine(Buffer.from(line)) as Record<string, unknown>
|
|
expect(parsed['attachment']).toEqual({
|
|
type: 'deferred_tools_delta',
|
|
addedNames: ['Bash', 'mcp__svc__tool'],
|
|
})
|
|
})
|
|
})
|