mirror of
https://github.com/AgentSeal/codeburn.git
synced 2026-04-28 06:59:37 +00:00
feat(compare): add self-correction JSONL scanner
Adds scanSelfCorrections() which reads raw .jsonl session files (including subagent dirs) and counts per-model self-correction patterns for use in the model comparison metrics.
This commit is contained in:
parent
ac9afffed5
commit
3cb9a7a7bc
2 changed files with 258 additions and 2 deletions
|
|
@ -1,3 +1,6 @@
|
|||
import { readdir, readFile } from 'fs/promises'
|
||||
import { join } from 'path'
|
||||
|
||||
import type { ProjectSummary } from './types.js'
|
||||
|
||||
export type ModelStats = {
|
||||
|
|
@ -139,3 +142,111 @@ export function computeComparison(a: ModelStats, b: ModelStats): ComparisonRow[]
|
|||
}
|
||||
})
|
||||
}
|
||||
|
||||
const SELF_CORRECTION_PATTERNS = [
|
||||
/\bI('m| am) sorry\b/i,
|
||||
/\bmy mistake\b/i,
|
||||
/\bmy apolog/i,
|
||||
/\bI made (a |an )?(error|mistake)\b/i,
|
||||
/\bI was wrong\b/i,
|
||||
/\bmy bad\b/i,
|
||||
/\bI apologize\b/i,
|
||||
/\bsorry about that\b/i,
|
||||
/\bsorry for (the|that|this)\b/i,
|
||||
/\bI should have\b/i,
|
||||
/\bI shouldn't have\b/i,
|
||||
/\bI incorrectly\b/i,
|
||||
/\bI mistakenly\b/i,
|
||||
]
|
||||
|
||||
function extractText(content: unknown): string {
|
||||
if (typeof content === 'string') return content
|
||||
if (!Array.isArray(content)) return ''
|
||||
return content
|
||||
.filter((b): b is { type: string; text: string } => b !== null && typeof b === 'object' && b.type === 'text' && typeof b.text === 'string')
|
||||
.map(b => b.text)
|
||||
.join(' ')
|
||||
}
|
||||
|
||||
async function collectJsonlFiles(sessionDir: string): Promise<string[]> {
|
||||
const entries = await readdir(sessionDir, { withFileTypes: true })
|
||||
const files: string[] = []
|
||||
for (const entry of entries) {
|
||||
if (entry.isFile() && entry.name.endsWith('.jsonl')) {
|
||||
files.push(join(sessionDir, entry.name))
|
||||
} else if (entry.isDirectory() && entry.name === 'subagents') {
|
||||
const subEntries = await readdir(join(sessionDir, entry.name), { withFileTypes: true })
|
||||
for (const sub of subEntries) {
|
||||
if (sub.isFile() && sub.name.endsWith('.jsonl')) {
|
||||
files.push(join(sessionDir, entry.name, sub.name))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return files
|
||||
}
|
||||
|
||||
export async function scanSelfCorrections(sessionDirs: string[]): Promise<Map<string, number>> {
|
||||
const counts = new Map<string, number>()
|
||||
|
||||
for (const dir of sessionDirs) {
|
||||
let sessionEntries
|
||||
try {
|
||||
sessionEntries = await readdir(dir, { withFileTypes: true })
|
||||
} catch {
|
||||
continue
|
||||
}
|
||||
|
||||
for (const entry of sessionEntries) {
|
||||
if (!entry.isDirectory()) continue
|
||||
const sessionDir = join(dir, entry.name)
|
||||
|
||||
let files: string[]
|
||||
try {
|
||||
files = await collectJsonlFiles(sessionDir)
|
||||
} catch {
|
||||
continue
|
||||
}
|
||||
|
||||
for (const file of files) {
|
||||
let raw: string
|
||||
try {
|
||||
raw = await readFile(file, 'utf8')
|
||||
} catch {
|
||||
continue
|
||||
}
|
||||
|
||||
for (const line of raw.split('\n')) {
|
||||
const trimmed = line.trim()
|
||||
if (!trimmed) continue
|
||||
|
||||
let parsed: unknown
|
||||
try {
|
||||
parsed = JSON.parse(trimmed)
|
||||
} catch {
|
||||
continue
|
||||
}
|
||||
|
||||
if (
|
||||
parsed === null ||
|
||||
typeof parsed !== 'object' ||
|
||||
(parsed as Record<string, unknown>)['type'] !== 'assistant'
|
||||
) continue
|
||||
|
||||
const msg = (parsed as Record<string, unknown>)['message']
|
||||
if (msg === null || typeof msg !== 'object') continue
|
||||
|
||||
const model = (msg as Record<string, unknown>)['model']
|
||||
if (typeof model !== 'string' || model === '<synthetic>') continue
|
||||
|
||||
const text = extractText((msg as Record<string, unknown>)['content'])
|
||||
if (SELF_CORRECTION_PATTERNS.some(p => p.test(text))) {
|
||||
counts.set(model, (counts.get(model) ?? 0) + 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return counts
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,8 @@
|
|||
import { describe, it, expect } from 'vitest'
|
||||
import { aggregateModelStats, computeComparison, type ModelStats } from '../src/compare-stats.js'
|
||||
import { mkdtemp, mkdir, rm, writeFile } from 'fs/promises'
|
||||
import { join } from 'path'
|
||||
import { tmpdir } from 'os'
|
||||
import { describe, it, expect, beforeEach, afterEach } from 'vitest'
|
||||
import { aggregateModelStats, computeComparison, scanSelfCorrections, type ModelStats } from '../src/compare-stats.js'
|
||||
import type { ProjectSummary, SessionSummary, ClassifiedTurn } from '../src/types.js'
|
||||
|
||||
function makeTurn(model: string, cost: number, opts: { hasEdits?: boolean; retries?: number; outputTokens?: number; inputTokens?: number; cacheRead?: number; cacheWrite?: number; timestamp?: string } = {}): ClassifiedTurn {
|
||||
|
|
@ -209,3 +212,145 @@ describe('computeComparison', () => {
|
|||
expect(cacheRow.winner).toBe('a')
|
||||
})
|
||||
})
|
||||
|
||||
function jsonlLine(type: string, model: string, text: string): string {
|
||||
if (type === 'assistant') {
|
||||
return JSON.stringify({
|
||||
type: 'assistant', timestamp: '2026-04-15T10:00:00Z',
|
||||
message: { model, content: [{ type: 'text', text }], id: `msg-${Math.random()}`, usage: { input_tokens: 0, output_tokens: 0 } },
|
||||
})
|
||||
}
|
||||
return JSON.stringify({ type: 'user', timestamp: '2026-04-15T10:00:00Z', message: { role: 'user', content: text } })
|
||||
}
|
||||
|
||||
describe('scanSelfCorrections', () => {
|
||||
let tmpDir: string
|
||||
|
||||
beforeEach(async () => {
|
||||
tmpDir = await mkdtemp(join(tmpdir(), 'codeburn-test-'))
|
||||
})
|
||||
|
||||
afterEach(async () => {
|
||||
await rm(tmpDir, { recursive: true, force: true })
|
||||
})
|
||||
|
||||
it('counts apology patterns per model', async () => {
|
||||
const sessionDir = join(tmpDir, 'session-abc')
|
||||
await mkdir(sessionDir)
|
||||
const lines = [
|
||||
jsonlLine('assistant', 'opus-4-6', 'I apologize for the confusion.'),
|
||||
jsonlLine('assistant', 'opus-4-6', 'Here is the result.'),
|
||||
jsonlLine('assistant', 'sonnet-4-6', 'I was wrong about that.'),
|
||||
jsonlLine('user', '', 'Do this'),
|
||||
]
|
||||
await writeFile(join(sessionDir, 'session.jsonl'), lines.join('\n') + '\n')
|
||||
|
||||
const result = await scanSelfCorrections([tmpDir])
|
||||
expect(result.get('opus-4-6')).toBe(1)
|
||||
expect(result.get('sonnet-4-6')).toBe(1)
|
||||
})
|
||||
|
||||
it('does not count non-apology text', async () => {
|
||||
const sessionDir = join(tmpDir, 'session-xyz')
|
||||
await mkdir(sessionDir)
|
||||
const lines = [
|
||||
jsonlLine('assistant', 'opus-4-6', 'Here is the updated code.'),
|
||||
jsonlLine('assistant', 'opus-4-6', 'Let me fix that for you.'),
|
||||
]
|
||||
await writeFile(join(sessionDir, 'session.jsonl'), lines.join('\n') + '\n')
|
||||
|
||||
const result = await scanSelfCorrections([tmpDir])
|
||||
expect(result.get('opus-4-6')).toBeUndefined()
|
||||
expect(result.size).toBe(0)
|
||||
})
|
||||
|
||||
it('returns empty map for missing directory', async () => {
|
||||
const result = await scanSelfCorrections([join(tmpDir, 'nonexistent')])
|
||||
expect(result.size).toBe(0)
|
||||
})
|
||||
|
||||
it('returns empty map for empty directory', async () => {
|
||||
const result = await scanSelfCorrections([tmpDir])
|
||||
expect(result.size).toBe(0)
|
||||
})
|
||||
|
||||
it('scans subagent directories', async () => {
|
||||
const sessionDir = join(tmpDir, 'session-sub')
|
||||
const subagentsDir = join(sessionDir, 'subagents')
|
||||
await mkdir(subagentsDir, { recursive: true })
|
||||
const lines = [
|
||||
jsonlLine('assistant', 'haiku-4-6', 'My mistake, let me redo that.'),
|
||||
]
|
||||
await writeFile(join(subagentsDir, 'sub.jsonl'), lines.join('\n') + '\n')
|
||||
|
||||
const result = await scanSelfCorrections([tmpDir])
|
||||
expect(result.get('haiku-4-6')).toBe(1)
|
||||
})
|
||||
|
||||
it('skips <synthetic> models', async () => {
|
||||
const sessionDir = join(tmpDir, 'session-synth')
|
||||
await mkdir(sessionDir)
|
||||
const lines = [
|
||||
jsonlLine('assistant', '<synthetic>', 'I apologize for the error.'),
|
||||
]
|
||||
await writeFile(join(sessionDir, 'session.jsonl'), lines.join('\n') + '\n')
|
||||
|
||||
const result = await scanSelfCorrections([tmpDir])
|
||||
expect(result.get('<synthetic>')).toBeUndefined()
|
||||
expect(result.size).toBe(0)
|
||||
})
|
||||
|
||||
it('accumulates counts across multiple sessions and directories', async () => {
|
||||
const sessionA = join(tmpDir, 'session-a')
|
||||
const sessionB = join(tmpDir, 'session-b')
|
||||
await mkdir(sessionA)
|
||||
await mkdir(sessionB)
|
||||
|
||||
await writeFile(join(sessionA, 'a.jsonl'), [
|
||||
jsonlLine('assistant', 'opus-4-6', 'I was wrong.'),
|
||||
jsonlLine('assistant', 'opus-4-6', 'My bad!'),
|
||||
].join('\n') + '\n')
|
||||
|
||||
await writeFile(join(sessionB, 'b.jsonl'), [
|
||||
jsonlLine('assistant', 'opus-4-6', 'I apologize.'),
|
||||
].join('\n') + '\n')
|
||||
|
||||
const result = await scanSelfCorrections([tmpDir])
|
||||
expect(result.get('opus-4-6')).toBe(3)
|
||||
})
|
||||
|
||||
it('handles malformed JSON lines gracefully', async () => {
|
||||
const sessionDir = join(tmpDir, 'session-bad')
|
||||
await mkdir(sessionDir)
|
||||
await writeFile(join(sessionDir, 'bad.jsonl'), [
|
||||
'not valid json',
|
||||
jsonlLine('assistant', 'opus-4-6', 'I apologize.'),
|
||||
].join('\n') + '\n')
|
||||
|
||||
const result = await scanSelfCorrections([tmpDir])
|
||||
expect(result.get('opus-4-6')).toBe(1)
|
||||
})
|
||||
|
||||
it('accepts multiple sessionDirs and merges counts', async () => {
|
||||
const dir2 = await mkdtemp(join(tmpdir(), 'codeburn-test2-'))
|
||||
try {
|
||||
const sessionA = join(tmpDir, 'session-a')
|
||||
const sessionB = join(dir2, 'session-b')
|
||||
await mkdir(sessionA)
|
||||
await mkdir(sessionB)
|
||||
|
||||
await writeFile(join(sessionA, 'a.jsonl'), [
|
||||
jsonlLine('assistant', 'sonnet-4-6', 'My mistake.'),
|
||||
].join('\n') + '\n')
|
||||
|
||||
await writeFile(join(sessionB, 'b.jsonl'), [
|
||||
jsonlLine('assistant', 'sonnet-4-6', 'I was wrong.'),
|
||||
].join('\n') + '\n')
|
||||
|
||||
const result = await scanSelfCorrections([tmpDir, dir2])
|
||||
expect(result.get('sonnet-4-6')).toBe(2)
|
||||
} finally {
|
||||
await rm(dir2, { recursive: true, force: true })
|
||||
}
|
||||
})
|
||||
})
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue