feat(compare): add computeComparison with normalized metrics

2026-05-19 07:43:09 +00:00 · 2026-04-19 05:22:34 -07:00 · 2026-04-19 05:22:34 -07:00 · ac9afffed5
commit ac9afffed5
parent 9d119bfe40
2 changed files with 153 additions and 1 deletions
--- a/src/compare-stats.ts
+++ b/src/compare-stats.ts
@ -61,3 +61,81 @@ export function aggregateModelStats(projects: ProjectSummary[]): ModelStats[] {

  return [...byModel.values()].sort((a, b) => b.cost - a.cost)
 }
+
+export type ComparisonRow = {
+  label: string
+  valueA: number | null
+  valueB: number | null
+  formatFn: 'cost' | 'number' | 'percent' | 'decimal'
+  winner: 'a' | 'b' | 'tie' | 'none'
+}
+
+type MetricDef = {
+  label: string
+  formatFn: ComparisonRow['formatFn']
+  higherIsBetter: boolean
+  compute: (s: ModelStats) => number | null
+}
+
+const METRICS: MetricDef[] = [
+  {
+    label: 'Cost / call',
+    formatFn: 'cost',
+    higherIsBetter: false,
+    compute: s => s.calls > 0 ? s.cost / s.calls : null,
+  },
+  {
+    label: 'Output tok / call',
+    formatFn: 'number',
+    higherIsBetter: false,
+    compute: s => s.calls > 0 ? Math.round(s.outputTokens / s.calls) : null,
+  },
+  {
+    label: 'Cache hit rate',
+    formatFn: 'percent',
+    higherIsBetter: true,
+    compute: s => {
+      const total = s.inputTokens + s.cacheReadTokens + s.cacheWriteTokens
+      return total > 0 ? (s.cacheReadTokens / total) * 100 : null
+    },
+  },
+  {
+    label: 'One-shot rate',
+    formatFn: 'percent',
+    higherIsBetter: true,
+    compute: s => s.editTurns > 0 ? (s.oneShotTurns / s.editTurns) * 100 : null,
+  },
+  {
+    label: 'Retry rate',
+    formatFn: 'decimal',
+    higherIsBetter: false,
+    compute: s => s.editTurns > 0 ? s.retries / s.editTurns : null,
+  },
+  {
+    label: 'Self-correction',
+    formatFn: 'percent',
+    higherIsBetter: false,
+    compute: s => s.totalTurns > 0 ? (s.selfCorrections / s.totalTurns) * 100 : null,
+  },
+]
+
+function pickWinner(valueA: number | null, valueB: number | null, higherIsBetter: boolean): ComparisonRow['winner'] {
+  if (valueA === null || valueB === null) return 'none'
+  if (valueA === valueB) return 'tie'
+  if (higherIsBetter) return valueA > valueB ? 'a' : 'b'
+  return valueA < valueB ? 'a' : 'b'
+}
+
+export function computeComparison(a: ModelStats, b: ModelStats): ComparisonRow[] {
+  return METRICS.map(m => {
+    const valueA = m.compute(a)
+    const valueB = m.compute(b)
+    return {
+      label: m.label,
+      valueA,
+      valueB,
+      formatFn: m.formatFn,
+      winner: pickWinner(valueA, valueB, m.higherIsBetter),
+    }
+  })
+}
--- a/tests/compare-stats.test.ts
+++ b/tests/compare-stats.test.ts
@ -1,5 +1,5 @@
 import { describe, it, expect } from 'vitest'
-import { aggregateModelStats, type ModelStats } from '../src/compare-stats.js'
+import { aggregateModelStats, computeComparison, type ModelStats } from '../src/compare-stats.js'
 import type { ProjectSummary, SessionSummary, ClassifiedTurn } from '../src/types.js'

 function makeTurn(model: string, cost: number, opts: { hasEdits?: boolean; retries?: number; outputTokens?: number; inputTokens?: number; cacheRead?: number; cacheWrite?: number; timestamp?: string } = {}): ClassifiedTurn {
@ -135,3 +135,77 @@ describe('aggregateModelStats', () => {
    expect(stats[1].model).toBe('cheap-model')
  })
 })
+
+function makeStats(overrides: Partial<ModelStats> = {}): ModelStats {
+  return {
+    model: 'test-model',
+    calls: 100,
+    cost: 10,
+    outputTokens: 50000,
+    inputTokens: 10000,
+    cacheReadTokens: 20000,
+    cacheWriteTokens: 5000,
+    totalTurns: 200,
+    editTurns: 80,
+    oneShotTurns: 60,
+    retries: 20,
+    selfCorrections: 10,
+    firstSeen: '2026-04-01T00:00:00Z',
+    lastSeen: '2026-04-15T00:00:00Z',
+    ...overrides,
+  }
+}
+
+describe('computeComparison', () => {
+  it('computes normalized metrics and picks winners correctly', () => {
+    const a = makeStats({ calls: 100, cost: 10, outputTokens: 50000, inputTokens: 10000, cacheReadTokens: 20000, cacheWriteTokens: 5000, editTurns: 80, oneShotTurns: 60, retries: 20, selfCorrections: 10, totalTurns: 200 })
+    const b = makeStats({ calls: 100, cost: 8, outputTokens: 40000, inputTokens: 10000, cacheReadTokens: 20000, cacheWriteTokens: 5000, editTurns: 80, oneShotTurns: 60, retries: 20, selfCorrections: 10, totalTurns: 200 })
+    const rows = computeComparison(a, b)
+
+    const costRow = rows.find(r => r.label === 'Cost / call')!
+    expect(costRow.valueA).toBeCloseTo(0.1)
+    expect(costRow.valueB).toBeCloseTo(0.08)
+    expect(costRow.winner).toBe('b')
+
+    const outputRow = rows.find(r => r.label === 'Output tok / call')!
+    expect(outputRow.valueA).toBe(500)
+    expect(outputRow.valueB).toBe(400)
+    expect(outputRow.winner).toBe('b')
+  })
+
+  it('returns null values for one-shot rate and retry rate when editTurns is zero', () => {
+    const a = makeStats({ editTurns: 0, oneShotTurns: 0, retries: 0 })
+    const b = makeStats({ editTurns: 80, oneShotTurns: 60, retries: 20 })
+    const rows = computeComparison(a, b)
+
+    const oneShotRow = rows.find(r => r.label === 'One-shot rate')!
+    expect(oneShotRow.valueA).toBeNull()
+    expect(oneShotRow.winner).toBe('none')
+
+    const retryRow = rows.find(r => r.label === 'Retry rate')!
+    expect(retryRow.valueA).toBeNull()
+    expect(retryRow.winner).toBe('none')
+  })
+
+  it('returns tie when values are equal', () => {
+    const a = makeStats({ calls: 100, cost: 10 })
+    const b = makeStats({ calls: 100, cost: 10 })
+    const rows = computeComparison(a, b)
+
+    const costRow = rows.find(r => r.label === 'Cost / call')!
+    expect(costRow.winner).toBe('tie')
+  })
+
+  it('picks higher value as winner for cache hit rate', () => {
+    const a = makeStats({ inputTokens: 5000, cacheReadTokens: 30000, cacheWriteTokens: 5000 })
+    const b = makeStats({ inputTokens: 10000, cacheReadTokens: 10000, cacheWriteTokens: 5000 })
+    const rows = computeComparison(a, b)
+
+    const cacheRow = rows.find(r => r.label === 'Cache hit rate')!
+    const totalA = 5000 + 30000 + 5000
+    const totalB = 10000 + 10000 + 5000
+    expect(cacheRow.valueA).toBeCloseTo(30000 / totalA * 100)
+    expect(cacheRow.valueB).toBeCloseTo(10000 / totalB * 100)
+    expect(cacheRow.winner).toBe('a')
+  })
+})