mirror of
https://github.com/AgentSeal/codeburn.git
synced 2026-05-19 07:43:09 +00:00
feat(compare): add computeComparison with normalized metrics
This commit is contained in:
parent
9d119bfe40
commit
ac9afffed5
2 changed files with 153 additions and 1 deletions
|
|
@ -61,3 +61,81 @@ export function aggregateModelStats(projects: ProjectSummary[]): ModelStats[] {
|
|||
|
||||
return [...byModel.values()].sort((a, b) => b.cost - a.cost)
|
||||
}
|
||||
|
||||
export type ComparisonRow = {
|
||||
label: string
|
||||
valueA: number | null
|
||||
valueB: number | null
|
||||
formatFn: 'cost' | 'number' | 'percent' | 'decimal'
|
||||
winner: 'a' | 'b' | 'tie' | 'none'
|
||||
}
|
||||
|
||||
type MetricDef = {
|
||||
label: string
|
||||
formatFn: ComparisonRow['formatFn']
|
||||
higherIsBetter: boolean
|
||||
compute: (s: ModelStats) => number | null
|
||||
}
|
||||
|
||||
const METRICS: MetricDef[] = [
|
||||
{
|
||||
label: 'Cost / call',
|
||||
formatFn: 'cost',
|
||||
higherIsBetter: false,
|
||||
compute: s => s.calls > 0 ? s.cost / s.calls : null,
|
||||
},
|
||||
{
|
||||
label: 'Output tok / call',
|
||||
formatFn: 'number',
|
||||
higherIsBetter: false,
|
||||
compute: s => s.calls > 0 ? Math.round(s.outputTokens / s.calls) : null,
|
||||
},
|
||||
{
|
||||
label: 'Cache hit rate',
|
||||
formatFn: 'percent',
|
||||
higherIsBetter: true,
|
||||
compute: s => {
|
||||
const total = s.inputTokens + s.cacheReadTokens + s.cacheWriteTokens
|
||||
return total > 0 ? (s.cacheReadTokens / total) * 100 : null
|
||||
},
|
||||
},
|
||||
{
|
||||
label: 'One-shot rate',
|
||||
formatFn: 'percent',
|
||||
higherIsBetter: true,
|
||||
compute: s => s.editTurns > 0 ? (s.oneShotTurns / s.editTurns) * 100 : null,
|
||||
},
|
||||
{
|
||||
label: 'Retry rate',
|
||||
formatFn: 'decimal',
|
||||
higherIsBetter: false,
|
||||
compute: s => s.editTurns > 0 ? s.retries / s.editTurns : null,
|
||||
},
|
||||
{
|
||||
label: 'Self-correction',
|
||||
formatFn: 'percent',
|
||||
higherIsBetter: false,
|
||||
compute: s => s.totalTurns > 0 ? (s.selfCorrections / s.totalTurns) * 100 : null,
|
||||
},
|
||||
]
|
||||
|
||||
function pickWinner(valueA: number | null, valueB: number | null, higherIsBetter: boolean): ComparisonRow['winner'] {
|
||||
if (valueA === null || valueB === null) return 'none'
|
||||
if (valueA === valueB) return 'tie'
|
||||
if (higherIsBetter) return valueA > valueB ? 'a' : 'b'
|
||||
return valueA < valueB ? 'a' : 'b'
|
||||
}
|
||||
|
||||
export function computeComparison(a: ModelStats, b: ModelStats): ComparisonRow[] {
|
||||
return METRICS.map(m => {
|
||||
const valueA = m.compute(a)
|
||||
const valueB = m.compute(b)
|
||||
return {
|
||||
label: m.label,
|
||||
valueA,
|
||||
valueB,
|
||||
formatFn: m.formatFn,
|
||||
winner: pickWinner(valueA, valueB, m.higherIsBetter),
|
||||
}
|
||||
})
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import { describe, it, expect } from 'vitest'
|
||||
import { aggregateModelStats, type ModelStats } from '../src/compare-stats.js'
|
||||
import { aggregateModelStats, computeComparison, type ModelStats } from '../src/compare-stats.js'
|
||||
import type { ProjectSummary, SessionSummary, ClassifiedTurn } from '../src/types.js'
|
||||
|
||||
function makeTurn(model: string, cost: number, opts: { hasEdits?: boolean; retries?: number; outputTokens?: number; inputTokens?: number; cacheRead?: number; cacheWrite?: number; timestamp?: string } = {}): ClassifiedTurn {
|
||||
|
|
@ -135,3 +135,77 @@ describe('aggregateModelStats', () => {
|
|||
expect(stats[1].model).toBe('cheap-model')
|
||||
})
|
||||
})
|
||||
|
||||
function makeStats(overrides: Partial<ModelStats> = {}): ModelStats {
|
||||
return {
|
||||
model: 'test-model',
|
||||
calls: 100,
|
||||
cost: 10,
|
||||
outputTokens: 50000,
|
||||
inputTokens: 10000,
|
||||
cacheReadTokens: 20000,
|
||||
cacheWriteTokens: 5000,
|
||||
totalTurns: 200,
|
||||
editTurns: 80,
|
||||
oneShotTurns: 60,
|
||||
retries: 20,
|
||||
selfCorrections: 10,
|
||||
firstSeen: '2026-04-01T00:00:00Z',
|
||||
lastSeen: '2026-04-15T00:00:00Z',
|
||||
...overrides,
|
||||
}
|
||||
}
|
||||
|
||||
describe('computeComparison', () => {
|
||||
it('computes normalized metrics and picks winners correctly', () => {
|
||||
const a = makeStats({ calls: 100, cost: 10, outputTokens: 50000, inputTokens: 10000, cacheReadTokens: 20000, cacheWriteTokens: 5000, editTurns: 80, oneShotTurns: 60, retries: 20, selfCorrections: 10, totalTurns: 200 })
|
||||
const b = makeStats({ calls: 100, cost: 8, outputTokens: 40000, inputTokens: 10000, cacheReadTokens: 20000, cacheWriteTokens: 5000, editTurns: 80, oneShotTurns: 60, retries: 20, selfCorrections: 10, totalTurns: 200 })
|
||||
const rows = computeComparison(a, b)
|
||||
|
||||
const costRow = rows.find(r => r.label === 'Cost / call')!
|
||||
expect(costRow.valueA).toBeCloseTo(0.1)
|
||||
expect(costRow.valueB).toBeCloseTo(0.08)
|
||||
expect(costRow.winner).toBe('b')
|
||||
|
||||
const outputRow = rows.find(r => r.label === 'Output tok / call')!
|
||||
expect(outputRow.valueA).toBe(500)
|
||||
expect(outputRow.valueB).toBe(400)
|
||||
expect(outputRow.winner).toBe('b')
|
||||
})
|
||||
|
||||
it('returns null values for one-shot rate and retry rate when editTurns is zero', () => {
|
||||
const a = makeStats({ editTurns: 0, oneShotTurns: 0, retries: 0 })
|
||||
const b = makeStats({ editTurns: 80, oneShotTurns: 60, retries: 20 })
|
||||
const rows = computeComparison(a, b)
|
||||
|
||||
const oneShotRow = rows.find(r => r.label === 'One-shot rate')!
|
||||
expect(oneShotRow.valueA).toBeNull()
|
||||
expect(oneShotRow.winner).toBe('none')
|
||||
|
||||
const retryRow = rows.find(r => r.label === 'Retry rate')!
|
||||
expect(retryRow.valueA).toBeNull()
|
||||
expect(retryRow.winner).toBe('none')
|
||||
})
|
||||
|
||||
it('returns tie when values are equal', () => {
|
||||
const a = makeStats({ calls: 100, cost: 10 })
|
||||
const b = makeStats({ calls: 100, cost: 10 })
|
||||
const rows = computeComparison(a, b)
|
||||
|
||||
const costRow = rows.find(r => r.label === 'Cost / call')!
|
||||
expect(costRow.winner).toBe('tie')
|
||||
})
|
||||
|
||||
it('picks higher value as winner for cache hit rate', () => {
|
||||
const a = makeStats({ inputTokens: 5000, cacheReadTokens: 30000, cacheWriteTokens: 5000 })
|
||||
const b = makeStats({ inputTokens: 10000, cacheReadTokens: 10000, cacheWriteTokens: 5000 })
|
||||
const rows = computeComparison(a, b)
|
||||
|
||||
const cacheRow = rows.find(r => r.label === 'Cache hit rate')!
|
||||
const totalA = 5000 + 30000 + 5000
|
||||
const totalB = 10000 + 10000 + 5000
|
||||
expect(cacheRow.valueA).toBeCloseTo(30000 / totalA * 100)
|
||||
expect(cacheRow.valueB).toBeCloseTo(10000 / totalB * 100)
|
||||
expect(cacheRow.winner).toBe('a')
|
||||
})
|
||||
})
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue