feat(compare): add self-correction JSONL scanner

Adds scanSelfCorrections() which reads raw .jsonl session files (including subagent dirs) and counts per-model self-correction patterns for use in the model comparison metrics.
This commit is contained in:
iamtoruk 2026-04-19 05:25:31 -07:00 committed by AgentSeal
parent ac9afffed5
commit 3cb9a7a7bc
2 changed files with 258 additions and 2 deletions

View file

@ -1,3 +1,6 @@
import { readdir, readFile } from 'fs/promises'
import { join } from 'path'
import type { ProjectSummary } from './types.js'
export type ModelStats = {
@ -139,3 +142,111 @@ export function computeComparison(a: ModelStats, b: ModelStats): ComparisonRow[]
}
})
}
const SELF_CORRECTION_PATTERNS = [
/\bI('m| am) sorry\b/i,
/\bmy mistake\b/i,
/\bmy apolog/i,
/\bI made (a |an )?(error|mistake)\b/i,
/\bI was wrong\b/i,
/\bmy bad\b/i,
/\bI apologize\b/i,
/\bsorry about that\b/i,
/\bsorry for (the|that|this)\b/i,
/\bI should have\b/i,
/\bI shouldn't have\b/i,
/\bI incorrectly\b/i,
/\bI mistakenly\b/i,
]
function extractText(content: unknown): string {
if (typeof content === 'string') return content
if (!Array.isArray(content)) return ''
return content
.filter((b): b is { type: string; text: string } => b !== null && typeof b === 'object' && b.type === 'text' && typeof b.text === 'string')
.map(b => b.text)
.join(' ')
}
async function collectJsonlFiles(sessionDir: string): Promise<string[]> {
const entries = await readdir(sessionDir, { withFileTypes: true })
const files: string[] = []
for (const entry of entries) {
if (entry.isFile() && entry.name.endsWith('.jsonl')) {
files.push(join(sessionDir, entry.name))
} else if (entry.isDirectory() && entry.name === 'subagents') {
const subEntries = await readdir(join(sessionDir, entry.name), { withFileTypes: true })
for (const sub of subEntries) {
if (sub.isFile() && sub.name.endsWith('.jsonl')) {
files.push(join(sessionDir, entry.name, sub.name))
}
}
}
}
return files
}
export async function scanSelfCorrections(sessionDirs: string[]): Promise<Map<string, number>> {
const counts = new Map<string, number>()
for (const dir of sessionDirs) {
let sessionEntries
try {
sessionEntries = await readdir(dir, { withFileTypes: true })
} catch {
continue
}
for (const entry of sessionEntries) {
if (!entry.isDirectory()) continue
const sessionDir = join(dir, entry.name)
let files: string[]
try {
files = await collectJsonlFiles(sessionDir)
} catch {
continue
}
for (const file of files) {
let raw: string
try {
raw = await readFile(file, 'utf8')
} catch {
continue
}
for (const line of raw.split('\n')) {
const trimmed = line.trim()
if (!trimmed) continue
let parsed: unknown
try {
parsed = JSON.parse(trimmed)
} catch {
continue
}
if (
parsed === null ||
typeof parsed !== 'object' ||
(parsed as Record<string, unknown>)['type'] !== 'assistant'
) continue
const msg = (parsed as Record<string, unknown>)['message']
if (msg === null || typeof msg !== 'object') continue
const model = (msg as Record<string, unknown>)['model']
if (typeof model !== 'string' || model === '<synthetic>') continue
const text = extractText((msg as Record<string, unknown>)['content'])
if (SELF_CORRECTION_PATTERNS.some(p => p.test(text))) {
counts.set(model, (counts.get(model) ?? 0) + 1)
}
}
}
}
}
return counts
}

View file

@ -1,5 +1,8 @@
import { describe, it, expect } from 'vitest'
import { aggregateModelStats, computeComparison, type ModelStats } from '../src/compare-stats.js'
import { mkdtemp, mkdir, rm, writeFile } from 'fs/promises'
import { join } from 'path'
import { tmpdir } from 'os'
import { describe, it, expect, beforeEach, afterEach } from 'vitest'
import { aggregateModelStats, computeComparison, scanSelfCorrections, type ModelStats } from '../src/compare-stats.js'
import type { ProjectSummary, SessionSummary, ClassifiedTurn } from '../src/types.js'
function makeTurn(model: string, cost: number, opts: { hasEdits?: boolean; retries?: number; outputTokens?: number; inputTokens?: number; cacheRead?: number; cacheWrite?: number; timestamp?: string } = {}): ClassifiedTurn {
@ -209,3 +212,145 @@ describe('computeComparison', () => {
expect(cacheRow.winner).toBe('a')
})
})
function jsonlLine(type: string, model: string, text: string): string {
if (type === 'assistant') {
return JSON.stringify({
type: 'assistant', timestamp: '2026-04-15T10:00:00Z',
message: { model, content: [{ type: 'text', text }], id: `msg-${Math.random()}`, usage: { input_tokens: 0, output_tokens: 0 } },
})
}
return JSON.stringify({ type: 'user', timestamp: '2026-04-15T10:00:00Z', message: { role: 'user', content: text } })
}
describe('scanSelfCorrections', () => {
let tmpDir: string
beforeEach(async () => {
tmpDir = await mkdtemp(join(tmpdir(), 'codeburn-test-'))
})
afterEach(async () => {
await rm(tmpDir, { recursive: true, force: true })
})
it('counts apology patterns per model', async () => {
const sessionDir = join(tmpDir, 'session-abc')
await mkdir(sessionDir)
const lines = [
jsonlLine('assistant', 'opus-4-6', 'I apologize for the confusion.'),
jsonlLine('assistant', 'opus-4-6', 'Here is the result.'),
jsonlLine('assistant', 'sonnet-4-6', 'I was wrong about that.'),
jsonlLine('user', '', 'Do this'),
]
await writeFile(join(sessionDir, 'session.jsonl'), lines.join('\n') + '\n')
const result = await scanSelfCorrections([tmpDir])
expect(result.get('opus-4-6')).toBe(1)
expect(result.get('sonnet-4-6')).toBe(1)
})
it('does not count non-apology text', async () => {
const sessionDir = join(tmpDir, 'session-xyz')
await mkdir(sessionDir)
const lines = [
jsonlLine('assistant', 'opus-4-6', 'Here is the updated code.'),
jsonlLine('assistant', 'opus-4-6', 'Let me fix that for you.'),
]
await writeFile(join(sessionDir, 'session.jsonl'), lines.join('\n') + '\n')
const result = await scanSelfCorrections([tmpDir])
expect(result.get('opus-4-6')).toBeUndefined()
expect(result.size).toBe(0)
})
it('returns empty map for missing directory', async () => {
const result = await scanSelfCorrections([join(tmpDir, 'nonexistent')])
expect(result.size).toBe(0)
})
it('returns empty map for empty directory', async () => {
const result = await scanSelfCorrections([tmpDir])
expect(result.size).toBe(0)
})
it('scans subagent directories', async () => {
const sessionDir = join(tmpDir, 'session-sub')
const subagentsDir = join(sessionDir, 'subagents')
await mkdir(subagentsDir, { recursive: true })
const lines = [
jsonlLine('assistant', 'haiku-4-6', 'My mistake, let me redo that.'),
]
await writeFile(join(subagentsDir, 'sub.jsonl'), lines.join('\n') + '\n')
const result = await scanSelfCorrections([tmpDir])
expect(result.get('haiku-4-6')).toBe(1)
})
it('skips <synthetic> models', async () => {
const sessionDir = join(tmpDir, 'session-synth')
await mkdir(sessionDir)
const lines = [
jsonlLine('assistant', '<synthetic>', 'I apologize for the error.'),
]
await writeFile(join(sessionDir, 'session.jsonl'), lines.join('\n') + '\n')
const result = await scanSelfCorrections([tmpDir])
expect(result.get('<synthetic>')).toBeUndefined()
expect(result.size).toBe(0)
})
it('accumulates counts across multiple sessions and directories', async () => {
const sessionA = join(tmpDir, 'session-a')
const sessionB = join(tmpDir, 'session-b')
await mkdir(sessionA)
await mkdir(sessionB)
await writeFile(join(sessionA, 'a.jsonl'), [
jsonlLine('assistant', 'opus-4-6', 'I was wrong.'),
jsonlLine('assistant', 'opus-4-6', 'My bad!'),
].join('\n') + '\n')
await writeFile(join(sessionB, 'b.jsonl'), [
jsonlLine('assistant', 'opus-4-6', 'I apologize.'),
].join('\n') + '\n')
const result = await scanSelfCorrections([tmpDir])
expect(result.get('opus-4-6')).toBe(3)
})
it('handles malformed JSON lines gracefully', async () => {
const sessionDir = join(tmpDir, 'session-bad')
await mkdir(sessionDir)
await writeFile(join(sessionDir, 'bad.jsonl'), [
'not valid json',
jsonlLine('assistant', 'opus-4-6', 'I apologize.'),
].join('\n') + '\n')
const result = await scanSelfCorrections([tmpDir])
expect(result.get('opus-4-6')).toBe(1)
})
it('accepts multiple sessionDirs and merges counts', async () => {
const dir2 = await mkdtemp(join(tmpdir(), 'codeburn-test2-'))
try {
const sessionA = join(tmpDir, 'session-a')
const sessionB = join(dir2, 'session-b')
await mkdir(sessionA)
await mkdir(sessionB)
await writeFile(join(sessionA, 'a.jsonl'), [
jsonlLine('assistant', 'sonnet-4-6', 'My mistake.'),
].join('\n') + '\n')
await writeFile(join(sessionB, 'b.jsonl'), [
jsonlLine('assistant', 'sonnet-4-6', 'I was wrong.'),
].join('\n') + '\n')
const result = await scanSelfCorrections([tmpDir, dir2])
expect(result.get('sonnet-4-6')).toBe(2)
} finally {
await rm(dir2, { recursive: true, force: true })
}
})
})