From ac9afffed56591a9e85534ce9126135f2c1c9c72 Mon Sep 17 00:00:00 2001 From: iamtoruk Date: Sun, 19 Apr 2026 05:22:34 -0700 Subject: [PATCH] feat(compare): add computeComparison with normalized metrics --- src/compare-stats.ts | 78 +++++++++++++++++++++++++++++++++++++ tests/compare-stats.test.ts | 76 +++++++++++++++++++++++++++++++++++- 2 files changed, 153 insertions(+), 1 deletion(-) diff --git a/src/compare-stats.ts b/src/compare-stats.ts index 1bd5359..e01db6a 100644 --- a/src/compare-stats.ts +++ b/src/compare-stats.ts @@ -61,3 +61,81 @@ export function aggregateModelStats(projects: ProjectSummary[]): ModelStats[] { return [...byModel.values()].sort((a, b) => b.cost - a.cost) } + +export type ComparisonRow = { + label: string + valueA: number | null + valueB: number | null + formatFn: 'cost' | 'number' | 'percent' | 'decimal' + winner: 'a' | 'b' | 'tie' | 'none' +} + +type MetricDef = { + label: string + formatFn: ComparisonRow['formatFn'] + higherIsBetter: boolean + compute: (s: ModelStats) => number | null +} + +const METRICS: MetricDef[] = [ + { + label: 'Cost / call', + formatFn: 'cost', + higherIsBetter: false, + compute: s => s.calls > 0 ? s.cost / s.calls : null, + }, + { + label: 'Output tok / call', + formatFn: 'number', + higherIsBetter: false, + compute: s => s.calls > 0 ? Math.round(s.outputTokens / s.calls) : null, + }, + { + label: 'Cache hit rate', + formatFn: 'percent', + higherIsBetter: true, + compute: s => { + const total = s.inputTokens + s.cacheReadTokens + s.cacheWriteTokens + return total > 0 ? (s.cacheReadTokens / total) * 100 : null + }, + }, + { + label: 'One-shot rate', + formatFn: 'percent', + higherIsBetter: true, + compute: s => s.editTurns > 0 ? (s.oneShotTurns / s.editTurns) * 100 : null, + }, + { + label: 'Retry rate', + formatFn: 'decimal', + higherIsBetter: false, + compute: s => s.editTurns > 0 ? s.retries / s.editTurns : null, + }, + { + label: 'Self-correction', + formatFn: 'percent', + higherIsBetter: false, + compute: s => s.totalTurns > 0 ? (s.selfCorrections / s.totalTurns) * 100 : null, + }, +] + +function pickWinner(valueA: number | null, valueB: number | null, higherIsBetter: boolean): ComparisonRow['winner'] { + if (valueA === null || valueB === null) return 'none' + if (valueA === valueB) return 'tie' + if (higherIsBetter) return valueA > valueB ? 'a' : 'b' + return valueA < valueB ? 'a' : 'b' +} + +export function computeComparison(a: ModelStats, b: ModelStats): ComparisonRow[] { + return METRICS.map(m => { + const valueA = m.compute(a) + const valueB = m.compute(b) + return { + label: m.label, + valueA, + valueB, + formatFn: m.formatFn, + winner: pickWinner(valueA, valueB, m.higherIsBetter), + } + }) +} diff --git a/tests/compare-stats.test.ts b/tests/compare-stats.test.ts index 82af105..f40469a 100644 --- a/tests/compare-stats.test.ts +++ b/tests/compare-stats.test.ts @@ -1,5 +1,5 @@ import { describe, it, expect } from 'vitest' -import { aggregateModelStats, type ModelStats } from '../src/compare-stats.js' +import { aggregateModelStats, computeComparison, type ModelStats } from '../src/compare-stats.js' import type { ProjectSummary, SessionSummary, ClassifiedTurn } from '../src/types.js' function makeTurn(model: string, cost: number, opts: { hasEdits?: boolean; retries?: number; outputTokens?: number; inputTokens?: number; cacheRead?: number; cacheWrite?: number; timestamp?: string } = {}): ClassifiedTurn { @@ -135,3 +135,77 @@ describe('aggregateModelStats', () => { expect(stats[1].model).toBe('cheap-model') }) }) + +function makeStats(overrides: Partial = {}): ModelStats { + return { + model: 'test-model', + calls: 100, + cost: 10, + outputTokens: 50000, + inputTokens: 10000, + cacheReadTokens: 20000, + cacheWriteTokens: 5000, + totalTurns: 200, + editTurns: 80, + oneShotTurns: 60, + retries: 20, + selfCorrections: 10, + firstSeen: '2026-04-01T00:00:00Z', + lastSeen: '2026-04-15T00:00:00Z', + ...overrides, + } +} + +describe('computeComparison', () => { + it('computes normalized metrics and picks winners correctly', () => { + const a = makeStats({ calls: 100, cost: 10, outputTokens: 50000, inputTokens: 10000, cacheReadTokens: 20000, cacheWriteTokens: 5000, editTurns: 80, oneShotTurns: 60, retries: 20, selfCorrections: 10, totalTurns: 200 }) + const b = makeStats({ calls: 100, cost: 8, outputTokens: 40000, inputTokens: 10000, cacheReadTokens: 20000, cacheWriteTokens: 5000, editTurns: 80, oneShotTurns: 60, retries: 20, selfCorrections: 10, totalTurns: 200 }) + const rows = computeComparison(a, b) + + const costRow = rows.find(r => r.label === 'Cost / call')! + expect(costRow.valueA).toBeCloseTo(0.1) + expect(costRow.valueB).toBeCloseTo(0.08) + expect(costRow.winner).toBe('b') + + const outputRow = rows.find(r => r.label === 'Output tok / call')! + expect(outputRow.valueA).toBe(500) + expect(outputRow.valueB).toBe(400) + expect(outputRow.winner).toBe('b') + }) + + it('returns null values for one-shot rate and retry rate when editTurns is zero', () => { + const a = makeStats({ editTurns: 0, oneShotTurns: 0, retries: 0 }) + const b = makeStats({ editTurns: 80, oneShotTurns: 60, retries: 20 }) + const rows = computeComparison(a, b) + + const oneShotRow = rows.find(r => r.label === 'One-shot rate')! + expect(oneShotRow.valueA).toBeNull() + expect(oneShotRow.winner).toBe('none') + + const retryRow = rows.find(r => r.label === 'Retry rate')! + expect(retryRow.valueA).toBeNull() + expect(retryRow.winner).toBe('none') + }) + + it('returns tie when values are equal', () => { + const a = makeStats({ calls: 100, cost: 10 }) + const b = makeStats({ calls: 100, cost: 10 }) + const rows = computeComparison(a, b) + + const costRow = rows.find(r => r.label === 'Cost / call')! + expect(costRow.winner).toBe('tie') + }) + + it('picks higher value as winner for cache hit rate', () => { + const a = makeStats({ inputTokens: 5000, cacheReadTokens: 30000, cacheWriteTokens: 5000 }) + const b = makeStats({ inputTokens: 10000, cacheReadTokens: 10000, cacheWriteTokens: 5000 }) + const rows = computeComparison(a, b) + + const cacheRow = rows.find(r => r.label === 'Cache hit rate')! + const totalA = 5000 + 30000 + 5000 + const totalB = 10000 + 10000 + 5000 + expect(cacheRow.valueA).toBeCloseTo(30000 / totalA * 100) + expect(cacheRow.valueB).toBeCloseTo(10000 / totalB * 100) + expect(cacheRow.winner).toBe('a') + }) +})