diff --git a/src/compare-stats.ts b/src/compare-stats.ts index e01db6a..8a177b9 100644 --- a/src/compare-stats.ts +++ b/src/compare-stats.ts @@ -1,3 +1,6 @@ +import { readdir, readFile } from 'fs/promises' +import { join } from 'path' + import type { ProjectSummary } from './types.js' export type ModelStats = { @@ -139,3 +142,111 @@ export function computeComparison(a: ModelStats, b: ModelStats): ComparisonRow[] } }) } + +const SELF_CORRECTION_PATTERNS = [ + /\bI('m| am) sorry\b/i, + /\bmy mistake\b/i, + /\bmy apolog/i, + /\bI made (a |an )?(error|mistake)\b/i, + /\bI was wrong\b/i, + /\bmy bad\b/i, + /\bI apologize\b/i, + /\bsorry about that\b/i, + /\bsorry for (the|that|this)\b/i, + /\bI should have\b/i, + /\bI shouldn't have\b/i, + /\bI incorrectly\b/i, + /\bI mistakenly\b/i, +] + +function extractText(content: unknown): string { + if (typeof content === 'string') return content + if (!Array.isArray(content)) return '' + return content + .filter((b): b is { type: string; text: string } => b !== null && typeof b === 'object' && b.type === 'text' && typeof b.text === 'string') + .map(b => b.text) + .join(' ') +} + +async function collectJsonlFiles(sessionDir: string): Promise { + const entries = await readdir(sessionDir, { withFileTypes: true }) + const files: string[] = [] + for (const entry of entries) { + if (entry.isFile() && entry.name.endsWith('.jsonl')) { + files.push(join(sessionDir, entry.name)) + } else if (entry.isDirectory() && entry.name === 'subagents') { + const subEntries = await readdir(join(sessionDir, entry.name), { withFileTypes: true }) + for (const sub of subEntries) { + if (sub.isFile() && sub.name.endsWith('.jsonl')) { + files.push(join(sessionDir, entry.name, sub.name)) + } + } + } + } + return files +} + +export async function scanSelfCorrections(sessionDirs: string[]): Promise> { + const counts = new Map() + + for (const dir of sessionDirs) { + let sessionEntries + try { + sessionEntries = await readdir(dir, { withFileTypes: true }) + } catch { + continue + } + + for (const entry of sessionEntries) { + if (!entry.isDirectory()) continue + const sessionDir = join(dir, entry.name) + + let files: string[] + try { + files = await collectJsonlFiles(sessionDir) + } catch { + continue + } + + for (const file of files) { + let raw: string + try { + raw = await readFile(file, 'utf8') + } catch { + continue + } + + for (const line of raw.split('\n')) { + const trimmed = line.trim() + if (!trimmed) continue + + let parsed: unknown + try { + parsed = JSON.parse(trimmed) + } catch { + continue + } + + if ( + parsed === null || + typeof parsed !== 'object' || + (parsed as Record)['type'] !== 'assistant' + ) continue + + const msg = (parsed as Record)['message'] + if (msg === null || typeof msg !== 'object') continue + + const model = (msg as Record)['model'] + if (typeof model !== 'string' || model === '') continue + + const text = extractText((msg as Record)['content']) + if (SELF_CORRECTION_PATTERNS.some(p => p.test(text))) { + counts.set(model, (counts.get(model) ?? 0) + 1) + } + } + } + } + } + + return counts +} diff --git a/tests/compare-stats.test.ts b/tests/compare-stats.test.ts index f40469a..ad1c461 100644 --- a/tests/compare-stats.test.ts +++ b/tests/compare-stats.test.ts @@ -1,5 +1,8 @@ -import { describe, it, expect } from 'vitest' -import { aggregateModelStats, computeComparison, type ModelStats } from '../src/compare-stats.js' +import { mkdtemp, mkdir, rm, writeFile } from 'fs/promises' +import { join } from 'path' +import { tmpdir } from 'os' +import { describe, it, expect, beforeEach, afterEach } from 'vitest' +import { aggregateModelStats, computeComparison, scanSelfCorrections, type ModelStats } from '../src/compare-stats.js' import type { ProjectSummary, SessionSummary, ClassifiedTurn } from '../src/types.js' function makeTurn(model: string, cost: number, opts: { hasEdits?: boolean; retries?: number; outputTokens?: number; inputTokens?: number; cacheRead?: number; cacheWrite?: number; timestamp?: string } = {}): ClassifiedTurn { @@ -209,3 +212,145 @@ describe('computeComparison', () => { expect(cacheRow.winner).toBe('a') }) }) + +function jsonlLine(type: string, model: string, text: string): string { + if (type === 'assistant') { + return JSON.stringify({ + type: 'assistant', timestamp: '2026-04-15T10:00:00Z', + message: { model, content: [{ type: 'text', text }], id: `msg-${Math.random()}`, usage: { input_tokens: 0, output_tokens: 0 } }, + }) + } + return JSON.stringify({ type: 'user', timestamp: '2026-04-15T10:00:00Z', message: { role: 'user', content: text } }) +} + +describe('scanSelfCorrections', () => { + let tmpDir: string + + beforeEach(async () => { + tmpDir = await mkdtemp(join(tmpdir(), 'codeburn-test-')) + }) + + afterEach(async () => { + await rm(tmpDir, { recursive: true, force: true }) + }) + + it('counts apology patterns per model', async () => { + const sessionDir = join(tmpDir, 'session-abc') + await mkdir(sessionDir) + const lines = [ + jsonlLine('assistant', 'opus-4-6', 'I apologize for the confusion.'), + jsonlLine('assistant', 'opus-4-6', 'Here is the result.'), + jsonlLine('assistant', 'sonnet-4-6', 'I was wrong about that.'), + jsonlLine('user', '', 'Do this'), + ] + await writeFile(join(sessionDir, 'session.jsonl'), lines.join('\n') + '\n') + + const result = await scanSelfCorrections([tmpDir]) + expect(result.get('opus-4-6')).toBe(1) + expect(result.get('sonnet-4-6')).toBe(1) + }) + + it('does not count non-apology text', async () => { + const sessionDir = join(tmpDir, 'session-xyz') + await mkdir(sessionDir) + const lines = [ + jsonlLine('assistant', 'opus-4-6', 'Here is the updated code.'), + jsonlLine('assistant', 'opus-4-6', 'Let me fix that for you.'), + ] + await writeFile(join(sessionDir, 'session.jsonl'), lines.join('\n') + '\n') + + const result = await scanSelfCorrections([tmpDir]) + expect(result.get('opus-4-6')).toBeUndefined() + expect(result.size).toBe(0) + }) + + it('returns empty map for missing directory', async () => { + const result = await scanSelfCorrections([join(tmpDir, 'nonexistent')]) + expect(result.size).toBe(0) + }) + + it('returns empty map for empty directory', async () => { + const result = await scanSelfCorrections([tmpDir]) + expect(result.size).toBe(0) + }) + + it('scans subagent directories', async () => { + const sessionDir = join(tmpDir, 'session-sub') + const subagentsDir = join(sessionDir, 'subagents') + await mkdir(subagentsDir, { recursive: true }) + const lines = [ + jsonlLine('assistant', 'haiku-4-6', 'My mistake, let me redo that.'), + ] + await writeFile(join(subagentsDir, 'sub.jsonl'), lines.join('\n') + '\n') + + const result = await scanSelfCorrections([tmpDir]) + expect(result.get('haiku-4-6')).toBe(1) + }) + + it('skips models', async () => { + const sessionDir = join(tmpDir, 'session-synth') + await mkdir(sessionDir) + const lines = [ + jsonlLine('assistant', '', 'I apologize for the error.'), + ] + await writeFile(join(sessionDir, 'session.jsonl'), lines.join('\n') + '\n') + + const result = await scanSelfCorrections([tmpDir]) + expect(result.get('')).toBeUndefined() + expect(result.size).toBe(0) + }) + + it('accumulates counts across multiple sessions and directories', async () => { + const sessionA = join(tmpDir, 'session-a') + const sessionB = join(tmpDir, 'session-b') + await mkdir(sessionA) + await mkdir(sessionB) + + await writeFile(join(sessionA, 'a.jsonl'), [ + jsonlLine('assistant', 'opus-4-6', 'I was wrong.'), + jsonlLine('assistant', 'opus-4-6', 'My bad!'), + ].join('\n') + '\n') + + await writeFile(join(sessionB, 'b.jsonl'), [ + jsonlLine('assistant', 'opus-4-6', 'I apologize.'), + ].join('\n') + '\n') + + const result = await scanSelfCorrections([tmpDir]) + expect(result.get('opus-4-6')).toBe(3) + }) + + it('handles malformed JSON lines gracefully', async () => { + const sessionDir = join(tmpDir, 'session-bad') + await mkdir(sessionDir) + await writeFile(join(sessionDir, 'bad.jsonl'), [ + 'not valid json', + jsonlLine('assistant', 'opus-4-6', 'I apologize.'), + ].join('\n') + '\n') + + const result = await scanSelfCorrections([tmpDir]) + expect(result.get('opus-4-6')).toBe(1) + }) + + it('accepts multiple sessionDirs and merges counts', async () => { + const dir2 = await mkdtemp(join(tmpdir(), 'codeburn-test2-')) + try { + const sessionA = join(tmpDir, 'session-a') + const sessionB = join(dir2, 'session-b') + await mkdir(sessionA) + await mkdir(sessionB) + + await writeFile(join(sessionA, 'a.jsonl'), [ + jsonlLine('assistant', 'sonnet-4-6', 'My mistake.'), + ].join('\n') + '\n') + + await writeFile(join(sessionB, 'b.jsonl'), [ + jsonlLine('assistant', 'sonnet-4-6', 'I was wrong.'), + ].join('\n') + '\n') + + const result = await scanSelfCorrections([tmpDir, dir2]) + expect(result.get('sonnet-4-6')).toBe(2) + } finally { + await rm(dir2, { recursive: true, force: true }) + } + }) +})