Merge d719d80cc5 into 4737bfb1fa

2026-05-16 19:44:14 +00:00 · 2026-05-12 03:06:33 +00:00 · 2026-05-12 03:06:33 +00:00 · e4f541f996
commit e4f541f996
parent 4737bfb1fa d719d80cc5
5 changed files with 483 additions and 2 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -93,6 +93,11 @@
 ## 0.9.7 - 2026-05-07

 ### Added (CLI)
+- **Counterfactual model pricing.** `codeburn compare --reprice <model>` now
+  recalculates the selected period as if every recorded call used the target
+  model's pricing. The output includes actual vs what-if spend, savings or
+  added cost, and the largest session/project/source-model impacts, with
+  `--json` for scripts.
 - **MCP tool coverage detector.** New `optimize` finding flags MCP servers
  whose tool inventory is largely unused. Inventory is observed from the
  Claude `deferred_tools_delta` JSONL attachments (exact tool names per
--- a/README.md
+++ b/README.md
@ -81,6 +81,7 @@ codeburn export -f json         # JSON export
 codeburn optimize               # find waste, get copy-paste fixes
 codeburn optimize -p week       # scope the scan to last 7 days
 codeburn compare                # side-by-side model comparison
+codeburn compare --reprice claude-sonnet-4-5  # what-if pricing
 codeburn yield                  # track productive vs reverted/abandoned spend
 codeburn yield -p 30days        # yield analysis for last 30 days
 codeburn models                 # per-model token + cost table (last 30 days)
@ -214,6 +215,8 @@ codeburn compare                        # interactive model picker (default: las
 codeburn compare -p week                # last 7 days
 codeburn compare -p today               # today only
 codeburn compare --provider claude      # Claude Code sessions only
+codeburn compare --reprice gpt-5.3-codex
+codeburn compare --reprice claude-sonnet-4-5 --json
 ```

 Or press `c` in the dashboard to enter compare mode. Arrow keys switch periods, `b` to return.
@ -230,6 +233,8 @@ Or press `c` in the dashboard to enter compare mode. Arrow keys switch periods,

 Also compares per-category one-shot rates, delegation rate, planning rate, average tools per turn, and fast mode usage.

+For pricing-only what-if analysis, `--reprice <model>` recalculates the selected period as if every recorded call used the target model's current pricing. It keeps the original token counts, cache reads/writes, web-search usage, and fast-mode flags, then shows total difference plus the biggest session, project, and source-model impacts. It does not simulate quality changes or different output lengths.
+
 ### Yield

 ```bash
--- a/src/cli.ts
+++ b/src/cli.ts
@ -15,6 +15,7 @@ import { renderDashboard } from './dashboard.js'
 import { formatDateRangeLabel, parseDateRangeFlags, getDateRange, toPeriod, type Period } from './cli-date.js'
 import { runOptimize, scanAndDetect } from './optimize.js'
 import { renderCompare } from './compare.js'
+import { analyzeReprice, canRepriceToModel, renderRepriceText } from './reprice.js'
 import { getAllProviders } from './providers/index.js'
 import { clearPlan, readConfig, readPlan, saveConfig, savePlan, getConfigFilePath, type PlanId } from './config.js'
 import { clampResetDay, getPlanUsageOrNull, type PlanUsage } from './plan-usage.js'
@ -894,13 +895,38 @@ program

 program
  .command('compare')
-  .description('Compare two AI models side-by-side')
+  .description('Compare models and run what-if pricing')
  .option('-p, --period <period>', 'Analysis period: today, week, 30days, month, all', 'all')
  .option('--provider <provider>', 'Filter by provider (e.g. claude, gemini, cursor, copilot)', 'all')
+  .option('--reprice <model>', 'Recalculate spend as if every call used this model')
+  .option('--json', 'Print machine-readable JSON for --reprice')
  .action(async (opts) => {
    await loadPricing()
+    const { range, label } = getDateRange(opts.period)
+
+    if (opts.reprice) {
+      if (!canRepriceToModel(opts.reprice)) {
+        console.error(`\n  Unknown pricing model: ${opts.reprice}\n`)
+        process.exitCode = 1
+        return
+      }
+      const projects = await parseAllSessions(range, opts.provider)
+      const result = analyzeReprice(projects, label, opts.reprice)
+      if (opts.json) {
+        console.log(JSON.stringify(result, null, 2))
+      } else {
+        console.log(renderRepriceText(result))
+      }
+      return
+    }
+
+    if (opts.json) {
+      console.error('\n  --json is only supported with --reprice.\n')
+      process.exitCode = 1
+      return
+    }
+
    await hydrateCache()
-    const { range } = getDateRange(opts.period)
    await renderCompare(range, opts.provider)
  })

--- a/src/reprice.ts
+++ b/src/reprice.ts
@ -0,0 +1,242 @@
+import chalk from 'chalk'
+
+import { formatCost } from './currency.js'
+import { calculateCost, getModelCosts, getShortModelName } from './models.js'
+import type { ParsedApiCall, ProjectSummary } from './types.js'
+
+const ORANGE = '#ff8c42'
+const GREEN = '#5bf5a0'
+const RED = '#ff6b6b'
+const GOLD = '#ffd700'
+const DIM = '#888888'
+const PANEL_WIDTH = 76
+const TOP_LIMIT = 10
+
+export type RepriceSummary = {
+  targetModel: string
+  actualCostUSD: number
+  repricedCostUSD: number
+  savingsUSD: number
+  savingsPercent: number | null
+  projects: number
+  sessions: number
+  calls: number
+}
+
+export type RepriceBreakdownRow = {
+  name: string
+  actualCostUSD: number
+  repricedCostUSD: number
+  savingsUSD: number
+  calls: number
+}
+
+export type RepriceSessionImpact = {
+  project: string
+  sessionId: string
+  firstTimestamp: string
+  lastTimestamp: string
+  actualCostUSD: number
+  repricedCostUSD: number
+  savingsUSD: number
+  calls: number
+}
+
+export type RepriceResult = {
+  label: string
+  summary: RepriceSummary
+  topSessions: RepriceSessionImpact[]
+  projects: RepriceBreakdownRow[]
+  sourceModels: RepriceBreakdownRow[]
+}
+
+type MutableBreakdown = {
+  actualCostUSD: number
+  repricedCostUSD: number
+  calls: number
+}
+
+function addBreakdown(map: Map<string, MutableBreakdown>, name: string, actualCostUSD: number, repricedCostUSD: number): void {
+  const row = map.get(name) ?? { actualCostUSD: 0, repricedCostUSD: 0, calls: 0 }
+  row.actualCostUSD += actualCostUSD
+  row.repricedCostUSD += repricedCostUSD
+  row.calls += 1
+  map.set(name, row)
+}
+
+function toBreakdownRows(map: Map<string, MutableBreakdown>): RepriceBreakdownRow[] {
+  return [...map.entries()]
+    .map(([name, row]) => ({
+      name,
+      actualCostUSD: row.actualCostUSD,
+      repricedCostUSD: row.repricedCostUSD,
+      savingsUSD: row.actualCostUSD - row.repricedCostUSD,
+      calls: row.calls,
+    }))
+    .sort((a, b) => Math.abs(b.savingsUSD) - Math.abs(a.savingsUSD))
+}
+
+function finiteCost(value: number): number {
+  return Number.isFinite(value) ? value : 0
+}
+
+function repriceCall(targetModel: string, call: ParsedApiCall): number {
+  if (call.model === '<synthetic>') return 0
+  return calculateCost(
+    targetModel,
+    call.usage.inputTokens,
+    call.usage.outputTokens,
+    call.usage.cacheCreationInputTokens,
+    call.usage.cacheReadInputTokens,
+    call.usage.webSearchRequests,
+    call.speed,
+  )
+}
+
+export function canRepriceToModel(model: string): boolean {
+  return getModelCosts(model) !== null
+}
+
+export function analyzeReprice(projects: ProjectSummary[], label: string, targetModel: string): RepriceResult {
+  const projectBreakdown = new Map<string, MutableBreakdown>()
+  const sourceModelBreakdown = new Map<string, MutableBreakdown>()
+  const topSessions: RepriceSessionImpact[] = []
+  let actualCostUSD = 0
+  let repricedCostUSD = 0
+  let sessions = 0
+  let calls = 0
+
+  for (const project of projects) {
+    for (const session of project.sessions) {
+      let sessionActualCostUSD = 0
+      let sessionRepricedCostUSD = 0
+      let sessionCalls = 0
+
+      for (const turn of session.turns) {
+        for (const call of turn.assistantCalls) {
+          if (call.model === '<synthetic>') continue
+          const actual = finiteCost(call.costUSD)
+          const repriced = repriceCall(targetModel, call)
+
+          actualCostUSD += actual
+          repricedCostUSD += repriced
+          sessionActualCostUSD += actual
+          sessionRepricedCostUSD += repriced
+          sessionCalls += 1
+          calls += 1
+
+          addBreakdown(projectBreakdown, project.project, actual, repriced)
+          addBreakdown(sourceModelBreakdown, call.model, actual, repriced)
+        }
+      }
+
+      if (sessionCalls > 0) {
+        sessions += 1
+        topSessions.push({
+          project: project.project,
+          sessionId: session.sessionId,
+          firstTimestamp: session.firstTimestamp,
+          lastTimestamp: session.lastTimestamp,
+          actualCostUSD: sessionActualCostUSD,
+          repricedCostUSD: sessionRepricedCostUSD,
+          savingsUSD: sessionActualCostUSD - sessionRepricedCostUSD,
+          calls: sessionCalls,
+        })
+      }
+    }
+  }
+
+  topSessions.sort((a, b) => Math.abs(b.savingsUSD) - Math.abs(a.savingsUSD))
+
+  const savingsUSD = actualCostUSD - repricedCostUSD
+  return {
+    label,
+    summary: {
+      targetModel,
+      actualCostUSD,
+      repricedCostUSD,
+      savingsUSD,
+      savingsPercent: actualCostUSD > 0 ? (savingsUSD / actualCostUSD) * 100 : null,
+      projects: projects.length,
+      sessions,
+      calls,
+    },
+    topSessions: topSessions.slice(0, TOP_LIMIT),
+    projects: toBreakdownRows(projectBreakdown),
+    sourceModels: toBreakdownRows(sourceModelBreakdown),
+  }
+}
+
+function plural(count: number, singular: string, pluralForm = `${singular}s`): string {
+  return `${count} ${count === 1 ? singular : pluralForm}`
+}
+
+function formatPercent(value: number | null): string {
+  if (value === null) return '-'
+  return `${Math.abs(value).toFixed(1)}%`
+}
+
+function formatSignedCost(value: number): string {
+  const abs = formatCost(Math.abs(value))
+  if (value > 0) return chalk.hex(GREEN)(`save ${abs}`)
+  if (value < 0) return chalk.hex(RED)(`cost +${abs}`)
+  return chalk.dim('no change')
+}
+
+function truncate(value: string, maxLength: number): string {
+  if (value.length <= maxLength) return value
+  return `${value.slice(0, Math.max(0, maxLength - 3))}...`
+}
+
+function renderBreakdown(title: string, rows: RepriceBreakdownRow[], limit: number): string[] {
+  if (rows.length === 0) return []
+  const lines: string[] = []
+  lines.push(`  ${chalk.bold(title)}`)
+  for (const row of rows.slice(0, limit)) {
+    lines.push(`  ${formatSignedCost(row.savingsUSD)}  ${chalk.bold(truncate(row.name, 34))}  ${chalk.dim(`${formatCost(row.actualCostUSD)} -> ${formatCost(row.repricedCostUSD)}`)}${chalk.dim(`  ${plural(row.calls, 'call')}`)}`)
+  }
+  lines.push('')
+  return lines
+}
+
+export function renderRepriceText(result: RepriceResult): string {
+  const { summary } = result
+  const lines: string[] = []
+  const targetLabel = getShortModelName(summary.targetModel)
+  lines.push('')
+  lines.push(`  ${chalk.bold.hex(ORANGE)('CodeBurn what-if pricing')}${chalk.dim('  ' + result.label)}`)
+  lines.push(chalk.hex(DIM)('  ' + '-'.repeat(PANEL_WIDTH)))
+  lines.push(`  Reprice target: ${chalk.bold(targetLabel)}${targetLabel !== summary.targetModel ? chalk.dim(` (${summary.targetModel})`) : ''}`)
+  lines.push('  ' + [
+    plural(summary.projects, 'project'),
+    plural(summary.sessions, 'session'),
+    plural(summary.calls, 'call'),
+  ].join(chalk.hex(DIM)('   ')))
+  lines.push('')
+
+  if (summary.calls === 0) {
+    lines.push(chalk.dim('  No usage data found for this period.'))
+    lines.push('')
+    return lines.join('\n')
+  }
+
+  lines.push(`  Actual spend:     ${chalk.hex(GOLD)(formatCost(summary.actualCostUSD))}`)
+  lines.push(`  What-if spend:    ${chalk.hex(GOLD)(formatCost(summary.repricedCostUSD))}`)
+  lines.push(`  Difference:       ${formatSignedCost(summary.savingsUSD)} ${chalk.dim(`(${formatPercent(summary.savingsPercent)})`)}`)
+  lines.push('')
+
+  if (result.topSessions.length > 0) {
+    lines.push(`  ${chalk.bold('Top session impacts')}`)
+    for (const session of result.topSessions) {
+      const target = `${session.project}/${session.sessionId}`
+      lines.push(`  ${formatSignedCost(session.savingsUSD)}  ${chalk.bold(truncate(target, 38))}  ${chalk.dim(`${formatCost(session.actualCostUSD)} -> ${formatCost(session.repricedCostUSD)}`)}${chalk.dim(`  ${plural(session.calls, 'call')}`)}`)
+    }
+    lines.push('')
+  }
+
+  lines.push(...renderBreakdown('By project', result.projects, 5))
+  lines.push(...renderBreakdown('By source model', result.sourceModels, 5))
+  lines.push(chalk.dim('  Uses recorded token counts and tool/web-search usage; model quality and output length are not simulated.'))
+  lines.push('')
+  return lines.join('\n')
+}
--- a/tests/reprice.test.ts
+++ b/tests/reprice.test.ts
@ -0,0 +1,203 @@
+import { describe, expect, it } from 'vitest'
+
+import { calculateCost } from '../src/models.js'
+import { analyzeReprice, canRepriceToModel, renderRepriceText } from '../src/reprice.js'
+import type { ClassifiedTurn, ParsedApiCall, ProjectSummary, SessionSummary } from '../src/types.js'
+
+function makeCall(model: string, costUSD: number, opts: {
+  inputTokens?: number
+  outputTokens?: number
+  cacheRead?: number
+  cacheWrite?: number
+  webSearch?: number
+  speed?: 'standard' | 'fast'
+  timestamp?: string
+} = {}): ParsedApiCall {
+  const timestamp = opts.timestamp ?? '2026-05-05T10:00:00Z'
+  return {
+    provider: 'claude',
+    model,
+    usage: {
+      inputTokens: opts.inputTokens ?? 1000,
+      outputTokens: opts.outputTokens ?? 500,
+      cacheCreationInputTokens: opts.cacheWrite ?? 100,
+      cacheReadInputTokens: opts.cacheRead ?? 2000,
+      cachedInputTokens: 0,
+      reasoningTokens: 0,
+      webSearchRequests: opts.webSearch ?? 0,
+    },
+    costUSD,
+    tools: [],
+    mcpTools: [],
+    skills: [],
+    hasAgentSpawn: false,
+    hasPlanMode: false,
+    speed: opts.speed ?? 'standard',
+    timestamp,
+    bashCommands: [],
+    deduplicationKey: `${model}:${timestamp}:${costUSD}`,
+  }
+}
+
+function makeTurn(calls: ParsedApiCall[], timestamp = '2026-05-05T10:00:00Z'): ClassifiedTurn {
+  return {
+    userMessage: 'test',
+    assistantCalls: calls,
+    timestamp,
+    sessionId: 's1',
+    category: 'coding',
+    retries: 0,
+    hasEdits: true,
+  }
+}
+
+function makeSession(project: string, sessionId: string, turns: ClassifiedTurn[]): SessionSummary {
+  const costs = turns.flatMap(turn => turn.assistantCalls).reduce((sum, call) => sum + call.costUSD, 0)
+  return {
+    sessionId,
+    project,
+    firstTimestamp: turns[0]?.timestamp ?? '',
+    lastTimestamp: turns[turns.length - 1]?.timestamp ?? '',
+    totalCostUSD: costs,
+    totalInputTokens: 0,
+    totalOutputTokens: 0,
+    totalCacheReadTokens: 0,
+    totalCacheWriteTokens: 0,
+    apiCalls: turns.reduce((sum, turn) => sum + turn.assistantCalls.length, 0),
+    turns,
+    modelBreakdown: {},
+    toolBreakdown: {},
+    mcpBreakdown: {},
+    bashBreakdown: {},
+    categoryBreakdown: {} as SessionSummary['categoryBreakdown'],
+    skillBreakdown: {},
+  }
+}
+
+function makeProject(project: string, sessions: SessionSummary[]): ProjectSummary {
+  return {
+    project,
+    projectPath: `/tmp/${project}`,
+    sessions,
+    totalCostUSD: sessions.reduce((sum, session) => sum + session.totalCostUSD, 0),
+    totalApiCalls: sessions.reduce((sum, session) => sum + session.apiCalls, 0),
+  }
+}
+
+describe('canRepriceToModel', () => {
+  it('recognizes known pricing models and aliases', () => {
+    expect(canRepriceToModel('gpt-4o-mini')).toBe(true)
+    expect(canRepriceToModel('claude-sonnet-4.5')).toBe(true)
+    expect(canRepriceToModel('not-a-real-model')).toBe(false)
+  })
+})
+
+describe('analyzeReprice', () => {
+  it('recalculates every real call with the target model price', () => {
+    const targetModel = 'gpt-4o-mini'
+    const call = makeCall('claude-opus-4-7', 1.5, {
+      inputTokens: 1200,
+      outputTokens: 300,
+      cacheWrite: 80,
+      cacheRead: 400,
+      webSearch: 2,
+    })
+    const expected = calculateCost(
+      targetModel,
+      call.usage.inputTokens,
+      call.usage.outputTokens,
+      call.usage.cacheCreationInputTokens,
+      call.usage.cacheReadInputTokens,
+      call.usage.webSearchRequests,
+      call.speed,
+    )
+
+    const result = analyzeReprice([
+      makeProject('api', [makeSession('api', 's1', [makeTurn([call])])]),
+    ], 'Today', targetModel)
+
+    expect(result.summary.actualCostUSD).toBeCloseTo(1.5)
+    expect(result.summary.repricedCostUSD).toBeCloseTo(expected)
+    expect(result.summary.savingsUSD).toBeCloseTo(1.5 - expected)
+    expect(result.summary.calls).toBe(1)
+    expect(result.summary.sessions).toBe(1)
+  })
+
+  it('preserves fast-mode pricing semantics for the target model', () => {
+    const targetModel = 'claude-opus-4-7'
+    const call = makeCall('claude-sonnet-4-5', 0.5, {
+      speed: 'fast',
+      inputTokens: 1000,
+      outputTokens: 1000,
+      cacheWrite: 0,
+      cacheRead: 0,
+    })
+
+    const result = analyzeReprice([
+      makeProject('api', [makeSession('api', 's1', [makeTurn([call])])]),
+    ], 'Today', targetModel)
+
+    expect(result.summary.repricedCostUSD).toBeCloseTo(calculateCost(targetModel, 1000, 1000, 0, 0, 0, 'fast'))
+  })
+
+  it('breaks impact down by project and source model', () => {
+    const targetModel = 'gpt-4o-mini'
+    const apiCall = makeCall('claude-opus-4-7', 2, { inputTokens: 1000 })
+    const webCall = makeCall('claude-sonnet-4-5', 1, { inputTokens: 500 })
+
+    const result = analyzeReprice([
+      makeProject('api', [makeSession('api', 's1', [makeTurn([apiCall])])]),
+      makeProject('web', [makeSession('web', 's2', [makeTurn([webCall])])]),
+    ], 'Today', targetModel)
+
+    expect(result.projects.map(row => row.name).sort()).toEqual(['api', 'web'])
+    expect(result.sourceModels.map(row => row.name).sort()).toEqual(['claude-opus-4-7', 'claude-sonnet-4-5'])
+    expect(result.projects.reduce((sum, row) => sum + row.calls, 0)).toBe(2)
+    expect(result.sourceModels.reduce((sum, row) => sum + row.calls, 0)).toBe(2)
+  })
+
+  it('skips synthetic calls', () => {
+    const targetModel = 'gpt-4o-mini'
+    const result = analyzeReprice([
+      makeProject('api', [
+        makeSession('api', 's1', [makeTurn([makeCall('<synthetic>', 10)])]),
+      ]),
+    ], 'Today', targetModel)
+
+    expect(result.summary.calls).toBe(0)
+    expect(result.summary.actualCostUSD).toBe(0)
+    expect(result.summary.repricedCostUSD).toBe(0)
+    expect(result.topSessions).toEqual([])
+  })
+
+  it('sorts top sessions by absolute impact', () => {
+    const targetModel = 'gpt-4o-mini'
+    const result = analyzeReprice([
+      makeProject('api', [
+        makeSession('api', 'small', [makeTurn([makeCall('claude-opus-4-7', 1)])]),
+        makeSession('api', 'large', [makeTurn([makeCall('claude-opus-4-7', 5)])]),
+      ]),
+    ], 'Today', targetModel)
+
+    expect(result.topSessions[0]?.sessionId).toBe('large')
+  })
+})
+
+describe('renderRepriceText', () => {
+  it('renders empty usage clearly', () => {
+    const result = analyzeReprice([], 'Today', 'gpt-4o-mini')
+    expect(renderRepriceText(result)).toContain('No usage data found for this period.')
+  })
+
+  it('renders summary and target model', () => {
+    const result = analyzeReprice([
+      makeProject('api', [makeSession('api', 's1', [makeTurn([makeCall('claude-opus-4-7', 1)])])]),
+    ], 'Today', 'gpt-4o-mini')
+
+    const text = renderRepriceText(result)
+    expect(text).toContain('CodeBurn what-if pricing')
+    expect(text).toContain('gpt-4o-mini')
+    expect(text).toContain('Actual spend')
+    expect(text).toContain('What-if spend')
+  })
+})