diff --git a/mac/Sources/CodeBurnMenubar/CodeBurnApp.swift b/mac/Sources/CodeBurnMenubar/CodeBurnApp.swift index 4e9d07b..65811e4 100644 --- a/mac/Sources/CodeBurnMenubar/CodeBurnApp.swift +++ b/mac/Sources/CodeBurnMenubar/CodeBurnApp.swift @@ -31,6 +31,7 @@ final class AppDelegate: NSObject, NSApplicationDelegate, NSPopoverDelegate { /// Held for the lifetime of the app to opt out of App Nap and Automatic Termination. private var backgroundActivity: NSObjectProtocol? private var pendingRefreshWork: DispatchWorkItem? + private var refreshLoopTask: Task? func applicationWillFinishLaunching(_ notification: Notification) { // Set accessory policy before the app's focus chain forms. On macOS Tahoe @@ -60,12 +61,34 @@ final class AppDelegate: NSObject, NSApplicationDelegate, NSPopoverDelegate { } private func setupWakeObservers() { + // Pause the refresh loop while the machine is asleep. Without this, + // Task.sleep keeps a wakeup pending across the suspension and the + // loop tick fires the same instant the wake notifications do, + // producing 2-3 concurrent CLI spawns within ms of every wake. + NSWorkspace.shared.notificationCenter.addObserver( + forName: NSWorkspace.willSleepNotification, + object: nil, + queue: .main + ) { [weak self] _ in + Task { @MainActor in + self?.refreshLoopTask?.cancel() + self?.refreshLoopTask = nil + } + } + + // didWakeNotification + screensDidWakeNotification can both fire on + // the same wake. forceRefresh has a 5-second rate-limit gate so the + // duplicate is squashed there. Restart the refresh loop too, since + // we cancelled it on willSleep. NSWorkspace.shared.notificationCenter.addObserver( forName: NSWorkspace.didWakeNotification, object: nil, queue: .main ) { [weak self] _ in - Task { @MainActor in self?.forceRefresh() } + Task { @MainActor in + self?.forceRefresh() + if self?.refreshLoopTask == nil { self?.startRefreshLoop() } + } } NSWorkspace.shared.notificationCenter.addObserver( @@ -211,26 +234,42 @@ final class AppDelegate: NSObject, NSApplicationDelegate, NSPopoverDelegate { } private func startRefreshLoop() { - Task { [weak self] in + refreshLoopTask?.cancel() + refreshLoopTask = Task { [weak self] in while !Task.isCancelled { guard let self else { return } - if self.store.selectedPeriod != .today || self.store.selectedProvider != .all { - await self.store.refreshQuietly(period: .today) + // Skip the loop's tick if a wake / manual / distributed- + // notification refresh just ran. Without this gate, every + // wake produced two refreshes (forceRefresh from the wake + // observer plus the loop's natural tick). + let sinceLast = Date().timeIntervalSince(self.lastRefreshTime) + if sinceLast >= 5 { + if self.store.selectedPeriod != .today || self.store.selectedProvider != .all { + await self.store.refreshQuietly(period: .today) + } + await self.store.refresh(includeOptimize: false, force: true) + self.lastRefreshTime = Date() + self.refreshStatusButton() } - await self.store.refresh(includeOptimize: false, force: true) - self.refreshStatusButton() try? await Task.sleep(nanoseconds: refreshIntervalNanos) } } } private func observeStore() { - withObservationTracking { - _ = store.payload - _ = store.todayPayload - // Track currency too so the menubar title catches up immediately on + // Read closure uses [weak self] so the implicit self capture from + // accessing store.* doesn't pin self for the lifetime of an + // unfired observation. withObservationTracking is one-shot per + // call: once any read property changes, onChange fires and the + // registration is consumed, then we re-arm. There is at most one + // active subscription at a time. + withObservationTracking { [weak self] in + guard let self else { return } + _ = self.store.payload + _ = self.store.todayPayload + // Track currency so the menubar title catches up immediately on // currency switch instead of waiting for the next 30s payload tick. - _ = store.currency + _ = self.store.currency } onChange: { [weak self] in DispatchQueue.main.async { guard let self else { return } diff --git a/mac/Sources/CodeBurnMenubar/Views/HeatmapSection.swift b/mac/Sources/CodeBurnMenubar/Views/HeatmapSection.swift index ec27b48..2e2dc3a 100644 --- a/mac/Sources/CodeBurnMenubar/Views/HeatmapSection.swift +++ b/mac/Sources/CodeBurnMenubar/Views/HeatmapSection.swift @@ -5,6 +5,36 @@ private let trendBarWidth: CGFloat = 13 private let trendBarGap: CGFloat = 4 private let trendChartHeight: CGFloat = 90 +// Cached formatters and a calendar to avoid allocating fresh ones on every +// SwiftUI body re-eval. Hover scrubbing on the trend bars triggers many +// re-evals per second; a fresh DateFormatter / Calendar each time was a +// measurable hot spot. +private let yyyymmdd: DateFormatter = { + let f = DateFormatter() + f.dateFormat = "yyyy-MM-dd" + f.timeZone = .current + return f +}() + +private let prettyDayFormat: DateFormatter = { + let f = DateFormatter() + f.dateFormat = "EEE MMM d" + return f +}() + +private let mmmDayFormat: DateFormatter = { + let f = DateFormatter() + f.dateFormat = "MMM d" + f.timeZone = .current + return f +}() + +private let gregorianCalendar: Calendar = { + var c = Calendar(identifier: .gregorian) + c.timeZone = .current + return c +}() + /// Three switchable insight visualizations: Calendar (this month), Forecast (burn rate), /// Pulse (efficiency KPIs). Pills at top toggle between them. struct HeatmapSection: View { @@ -342,13 +372,8 @@ private struct BarTooltipCard: View { } private func prettyDate(_ ymd: String) -> String { - let parser = DateFormatter() - parser.dateFormat = "yyyy-MM-dd" - parser.timeZone = .current - guard let date = parser.date(from: ymd) else { return ymd } - let display = DateFormatter() - display.dateFormat = "EEE MMM d" - return display.string(from: date) + guard let date = yyyymmdd.date(from: ymd) else { return ymd } + return prettyDayFormat.string(from: date) } private struct MiniStat: View { @@ -391,14 +416,8 @@ private struct TrendStats { } private func buildTrendBars(from days: [DailyHistoryEntry]) -> [TrendBar] { - var calendar = Calendar(identifier: .gregorian) - calendar.timeZone = .current - let formatter: DateFormatter = { - let f = DateFormatter() - f.dateFormat = "yyyy-MM-dd" - f.timeZone = .current - return f - }() + let calendar = gregorianCalendar + let formatter = yyyymmdd let entryByDate = Dictionary(days.map { ($0.date, $0) }, uniquingKeysWith: { _, new in new }) let today = calendar.startOfDay(for: Date()) let todayKey = formatter.string(from: today) @@ -426,14 +445,8 @@ private func computeTrendStats(bars: [TrendBar], allDays: [DailyHistoryEntry]) - let avg = bars.isEmpty ? 0 : total / Double(bars.count) let peak = bars.filter { $0.cost > 0 }.max(by: { $0.cost < $1.cost }) - var calendar = Calendar(identifier: .gregorian) - calendar.timeZone = .current - let formatter: DateFormatter = { - let f = DateFormatter() - f.dateFormat = "yyyy-MM-dd" - f.timeZone = .current - return f - }() + let calendar = gregorianCalendar + let formatter = yyyymmdd let today = calendar.startOfDay(for: Date()) let priorWindowStart = calendar.date(byAdding: .day, value: -(2 * trendDays - 1), to: today) let thisWindowStart = calendar.date(byAdding: .day, value: -(trendDays - 1), to: today) @@ -546,14 +559,8 @@ private struct ForecastStats { } private func computeForecast(days: [DailyHistoryEntry]) -> ForecastStats { - var calendar = Calendar(identifier: .gregorian) - calendar.timeZone = .current - let formatter: DateFormatter = { - let f = DateFormatter() - f.dateFormat = "yyyy-MM-dd" - f.timeZone = .current - return f - }() + let calendar = gregorianCalendar + let formatter = yyyymmdd let now = Date() let comps = calendar.dateComponents([.year, .month, .day], from: now) guard @@ -797,20 +804,9 @@ private struct AllStats { let history = payload.history.daily let favoriteModel = payload.current.topModels.first?.name ?? "—" - var calendar = Calendar(identifier: .gregorian) - calendar.timeZone = .current - let formatter: DateFormatter = { - let f = DateFormatter() - f.dateFormat = "yyyy-MM-dd" - f.timeZone = .current - return f - }() - let displayFormatter: DateFormatter = { - let f = DateFormatter() - f.dateFormat = "MMM d" - f.timeZone = .current - return f - }() + let calendar = gregorianCalendar + let formatter = yyyymmdd + let displayFormatter = mmmDayFormat let now = Date() let today = calendar.startOfDay(for: now) diff --git a/src/cli-date.ts b/src/cli-date.ts index 2adfe97..f62b401 100644 --- a/src/cli-date.ts +++ b/src/cli-date.ts @@ -29,12 +29,17 @@ export const PERIOD_LABELS: Record = { all: '6 Months', } +const VALID_PERIODS: ReadonlyArray = ['today', 'week', '30days', 'month', 'all'] + export function toPeriod(s: string): Period { - if (s === 'today') return 'today' - if (s === 'month') return 'month' - if (s === '30days') return '30days' - if (s === 'all') return 'all' - return 'week' + if ((VALID_PERIODS as readonly string[]).includes(s)) return s as Period + // Fail loudly instead of silently coercing to 'week'. Previously a typo + // like `-p mounth` produced a quiet 7-day report and the user thought + // they were viewing the month. + process.stderr.write( + `codeburn: unknown period "${s}". Valid values: ${VALID_PERIODS.join(', ')}.\n` + ) + process.exit(1) } function parseLocalDate(s: string): Date { @@ -49,7 +54,14 @@ export function parseDateRangeFlags(from: string | undefined, to: string | undef if (from === undefined && to === undefined) return null const now = new Date() - const start = from !== undefined ? parseLocalDate(from) : new Date(0) + // When --from is omitted, default to 6 months back (the same window the + // dashboard's "all" period uses) instead of epoch. Previously a bare + // `--to 2026-01-01` opened a 55-year scan from 1970 which is rarely what + // the user meant and is expensive on machines with many session files. + const ALL_TIME_FALLBACK_MS = 6 * 31 * 24 * 60 * 60 * 1000 + const start = from !== undefined + ? parseLocalDate(from) + : new Date(now.getTime() - ALL_TIME_FALLBACK_MS) const endDate = to !== undefined ? parseLocalDate(to) : new Date(now.getFullYear(), now.getMonth(), now.getDate()) const end = new Date( diff --git a/src/cli.ts b/src/cli.ts index 396e811..470614d 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -271,7 +271,7 @@ program .option('--format ', 'Output format: tui, json', 'tui') .option('--project ', 'Show only projects matching name (repeatable)', collect, []) .option('--exclude ', 'Exclude projects matching name (repeatable)', collect, []) - .option('--refresh ', 'Auto-refresh interval in seconds (0 to disable)', parseInt, 30) + .option('--refresh ', 'Auto-refresh interval in seconds (0 to disable)', parseInteger, 30) .action(async (opts) => { let customRange: DateRange | null = null try { @@ -515,7 +515,7 @@ program .option('--format ', 'Output format: tui, json', 'tui') .option('--project ', 'Show only projects matching name (repeatable)', collect, []) .option('--exclude ', 'Exclude projects matching name (repeatable)', collect, []) - .option('--refresh ', 'Auto-refresh interval in seconds (0 to disable)', parseInt, 30) + .option('--refresh ', 'Auto-refresh interval in seconds (0 to disable)', parseInteger, 30) .action(async (opts) => { if (opts.format === 'json') { await runJsonReport('today', opts.provider, opts.project, opts.exclude) @@ -532,7 +532,7 @@ program .option('--format ', 'Output format: tui, json', 'tui') .option('--project ', 'Show only projects matching name (repeatable)', collect, []) .option('--exclude ', 'Exclude projects matching name (repeatable)', collect, []) - .option('--refresh ', 'Auto-refresh interval in seconds (0 to disable)', parseInt, 30) + .option('--refresh ', 'Auto-refresh interval in seconds (0 to disable)', parseInteger, 30) .action(async (opts) => { if (opts.format === 'json') { await runJsonReport('month', opts.provider, opts.project, opts.exclude) diff --git a/src/config.ts b/src/config.ts index 47a2b50..12fec8f 100644 --- a/src/config.ts +++ b/src/config.ts @@ -1,6 +1,7 @@ import { readFile, writeFile, mkdir, rename } from 'fs/promises' import { join } from 'path' import { homedir } from 'os' +import { randomBytes } from 'crypto' export type PlanId = 'claude-pro' | 'claude-max' | 'claude-max-5x' | 'cursor-pro' | 'custom' | 'none' export type PlanProvider = 'claude' | 'codex' | 'cursor' | 'all' @@ -42,7 +43,11 @@ export async function readConfig(): Promise { export async function saveConfig(config: CodeburnConfig): Promise { await mkdir(getConfigDir(), { recursive: true }) const configPath = getConfigPath() - const tmpPath = `${configPath}.tmp` + // Randomize the temp path so two simultaneous saveConfig calls (from + // overlapping menubar + CLI runs, for example) do not race on the same + // staging file. The previous fixed `.tmp` suffix could leave one + // process reading partial bytes the other was mid-writing. + const tmpPath = `${configPath}.${randomBytes(8).toString('hex')}.tmp` await writeFile(tmpPath, JSON.stringify(config, null, 2) + '\n', 'utf-8') await rename(tmpPath, configPath) } diff --git a/src/currency.ts b/src/currency.ts index 9901698..bc2e792 100644 --- a/src/currency.ts +++ b/src/currency.ts @@ -98,13 +98,19 @@ async function getExchangeRate(code: string): Promise { const cached = await loadCachedRate(code) if (cached) return cached + let rate: number try { - const rate = await fetchRate(code) - await cacheRate(code, rate) - return rate + rate = await fetchRate(code) } catch { return 1 } + // Persist the rate, but never let a cache-write failure (disk full, no + // permissions, etc.) cause us to return the USD-equivalent fallback. + // The original code wrapped fetch + cacheRate in one try/catch, so a + // disk-full at write time would discard a perfectly good rate and silently + // make every cost render as if the user had selected USD. + cacheRate(code, rate).catch(() => {}) + return rate } export async function loadCurrency(): Promise { @@ -137,9 +143,13 @@ export function getCostColumnHeader(): string { } export function convertCost(costUSD: number): number { - const digits = getFractionDigits(active.code) - const factor = 10 ** digits - return Math.round(costUSD * active.rate * factor) / factor + // Return the unrounded converted cost. Rounding here meant zero-fraction + // currencies (JPY, KRW, CLP) clamped every per-session cost to the nearest + // whole unit before aggregation; a project with 1000 sessions averaging + // ¥0.4 each would aggregate to ¥0 instead of ¥400 because each row was + // rounded independently. formatCost (and the export rowsToCsv path) round + // at the display boundary instead. + return costUSD * active.rate } export function formatCost(costUSD: number): string { diff --git a/src/cursor-cache.ts b/src/cursor-cache.ts index 62cc394..cbdf9c5 100644 --- a/src/cursor-cache.ts +++ b/src/cursor-cache.ts @@ -1,6 +1,7 @@ -import { readFile, writeFile, mkdir, stat } from 'fs/promises' +import { readFile, writeFile, mkdir, rename, stat, unlink } from 'fs/promises' import { join } from 'path' import { homedir } from 'os' +import { randomBytes } from 'crypto' import type { ParsedProviderCall } from './providers/types.js' @@ -50,18 +51,30 @@ export async function readCachedResults(dbPath: string): Promise { - try { - const fp = await getDbFingerprint(dbPath) - if (!fp) return + const fp = await getDbFingerprint(dbPath) + if (!fp) return - const dir = getCacheDir() - await mkdir(dir, { recursive: true }) - const cache: ResultCache = { - version: CURSOR_CACHE_VERSION, - dbMtimeMs: fp.mtimeMs, - dbSizeBytes: fp.size, - calls, - } - await writeFile(getCachePath(), JSON.stringify(cache), 'utf-8') - } catch {} + const dir = getCacheDir() + await mkdir(dir, { recursive: true }).catch(() => {}) + const cache: ResultCache = { + version: CURSOR_CACHE_VERSION, + dbMtimeMs: fp.mtimeMs, + dbSizeBytes: fp.size, + calls, + } + + // Atomic write: stage to a randomized temp file in the same directory, + // then rename onto the final path. rename() is atomic on POSIX, so a + // crash mid-write never leaves a half-written cache, and concurrent + // CLI invocations using their own random temp names cannot interleave + // bytes in the destination file (they only race on the final rename, + // last-writer-wins, both with valid content). + const target = getCachePath() + const tempPath = `${target}.${randomBytes(8).toString('hex')}.tmp` + try { + await writeFile(tempPath, JSON.stringify(cache), 'utf-8') + await rename(tempPath, target) + } catch { + await unlink(tempPath).catch(() => {}) + } } diff --git a/src/daily-cache.ts b/src/daily-cache.ts index 096e2c6..3455662 100644 --- a/src/daily-cache.ts +++ b/src/daily-cache.ts @@ -133,10 +133,24 @@ export function addNewDays(cache: DailyCache, incoming: DailyEntry[], newestDate byDate.set(day.date, day) } const merged = Array.from(byDate.values()).sort((a, b) => a.date.localeCompare(b.date)) + // Prune entries older than the BACKFILL window so the cache file does not + // grow unbounded over years of daily use. The "all time" / 6-month period + // and the BACKFILL_DAYS bootstrap both fit comfortably inside this cap. + // Anchor the cap on the newestDate boundary so a stale or stuck clock + // can't accidentally evict everything. Skip the prune entirely if + // newestDate is malformed — an invalid Date would produce a NaN cutoff + // and `d.date >= "Invalid Date"` would silently drop every entry. + const cutoffDate = new Date(`${newestDate}T00:00:00Z`) + let pruned = merged + if (!isNaN(cutoffDate.getTime())) { + cutoffDate.setUTCDate(cutoffDate.getUTCDate() - DAILY_CACHE_RETENTION_DAYS) + const cutoff = toDateString(cutoffDate) + pruned = merged.filter(d => d.date >= cutoff) + } const nextLast = cache.lastComputedDate && cache.lastComputedDate > newestDate ? cache.lastComputedDate : newestDate - return { version: DAILY_CACHE_VERSION, lastComputedDate: nextLast, days: merged } + return { version: DAILY_CACHE_VERSION, lastComputedDate: nextLast, days: pruned } } export function getDaysInRange(cache: DailyCache, start: string, end: string): DailyEntry[] { @@ -153,6 +167,10 @@ export function withDailyCacheLock(fn: () => Promise): Promise { export const MS_PER_DAY = 24 * 60 * 60 * 1000 export const BACKFILL_DAYS = 365 +// Keep 2 years of history so the longest UI-exposed period (6 months +// today, with headroom for future longer windows) always reads from +// cache while old entries get pruned. +export const DAILY_CACHE_RETENTION_DAYS = 730 export function toDateString(date: Date): string { return `${date.getFullYear()}-${String(date.getMonth() + 1).padStart(2, '0')}-${String(date.getDate()).padStart(2, '0')}` diff --git a/src/dashboard.tsx b/src/dashboard.tsx index f9efe6a..f1e53ba 100644 --- a/src/dashboard.tsx +++ b/src/dashboard.tsx @@ -760,12 +760,18 @@ function InteractiveDashboard({ initialProjects, initialPeriod, initialProvider, if (input === 'o' && findingCount > 0 && view === 'dashboard' && optimizeAvailable) { setView('optimize'); return } if ((input === 'b' || key.escape) && view === 'optimize') { setView('dashboard'); return } if (input === 'c' && compareAvailable && view === 'dashboard') { setView('compare'); return } + if ((input === 'b' || key.escape) && view === 'compare') { setView('dashboard'); return } if (input === 'p' && multipleProviders && view !== 'compare') { const opts = ['all', ...detectedProviders]; const next = opts[(opts.indexOf(activeProvider) + 1) % opts.length] setActiveProvider(next); setView('dashboard') if (debounceRef.current) clearTimeout(debounceRef.current) reloadData(period, next); return } + // Period switches reload the underlying data. Disable them while the + // compare view is mounted; the compare view re-aggregates from + // `projects` and would visibly change underneath the user without any + // affordance back to the dashboard. Press `b` or Esc to return first. + if (view === 'compare') return const idx = PERIODS.indexOf(period) if (key.leftArrow) switchPeriod(PERIODS[(idx - 1 + PERIODS.length) % PERIODS.length]!) else if (key.rightArrow || key.tab) switchPeriod(PERIODS[(idx + 1) % PERIODS.length]!) diff --git a/src/export.ts b/src/export.ts index d51406e..b7533fd 100644 --- a/src/export.ts +++ b/src/export.ts @@ -1,4 +1,4 @@ -import { writeFile, mkdir, readdir, stat, rm } from 'fs/promises' +import { writeFile, mkdir, readdir, open, stat, rm } from 'fs/promises' import { dirname, join, resolve } from 'path' import { CATEGORY_LABELS, type ProjectSummary, type TaskCategory } from './types.js' @@ -357,6 +357,33 @@ export async function exportJson(periods: PeriodExport[], outputPath: string): P } const target = resolve(outputPath.toLowerCase().endsWith('.json') ? outputPath : `${outputPath}.json`) + // Refuse to overwrite an existing file that wasn't produced by codeburn + // export. CSV path has the same guard via the .codeburn-export marker; JSON + // was missing it, so a stray `-o ~/important.json` would silently clobber. + const existing = await stat(target).catch(() => null) + if (existing?.isFile()) { + // Read just the first 4KB to look for the schema marker. The schema key + // is the first field in the JSON object so a partial read is enough; + // loading the whole file (potentially gigabytes) into memory could OOM + // on Node's ~512MB string limit. + const fh = await open(target, 'r') + try { + const buf = Buffer.alloc(4096) + const { bytesRead } = await fh.read(buf, 0, buf.length, 0) + const head = buf.toString('utf-8', 0, bytesRead) + if (!head.includes('"schema": "codeburn.export.v')) { + throw new Error( + `Refusing to overwrite ${target}: file does not look like a codeburn export. ` + + `Delete it manually or pick a different -o path.` + ) + } + } finally { + await fh.close() + } + } + if (existing?.isDirectory()) { + throw new Error(`Refusing to overwrite directory at ${target}. Pass a file path instead.`) + } await mkdir(dirname(target), { recursive: true }) await writeFile(target, JSON.stringify(data, null, 2), 'utf-8') return target diff --git a/src/format.ts b/src/format.ts index ee44619..826c04c 100644 --- a/src/format.ts +++ b/src/format.ts @@ -8,9 +8,13 @@ import { formatCost } from './currency.js' export { formatCost } export function formatTokens(n: number): string { + // Guard against Infinity / NaN / negatives that would otherwise leak into + // the UI as "Infinity" or "NaN" strings when an upstream calculation glitches. + if (!Number.isFinite(n)) return '?' + if (n < 0) return '0' if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(1)}M` if (n >= 1_000) return `${(n / 1_000).toFixed(1)}K` - return n.toString() + return Math.round(n).toString() } /// Returns YYYY-MM-DD for the given date in the process-local timezone. Cheaper than shelling diff --git a/src/models.ts b/src/models.ts index f338c14..626bf60 100644 --- a/src/models.ts +++ b/src/models.ts @@ -48,6 +48,14 @@ function loadSnapshot(): Map { } let pricingCache: Map = loadSnapshot() +let sortedPricingKeys: string[] | null = null + +function getSortedPricingKeys(): string[] { + if (sortedPricingKeys === null) { + sortedPricingKeys = Array.from(pricingCache.keys()).sort((a, b) => b.length - a.length) + } + return sortedPricingKeys +} function getCacheDir(): string { return join(homedir(), '.cache', 'codeburn') @@ -110,11 +118,13 @@ export async function loadPricing(): Promise { const cached = await loadCachedPricing() if (cached) { pricingCache = cached + sortedPricingKeys = null return } try { pricingCache = await fetchAndCachePricing() + sortedPricingKeys = null } catch { // snapshot already loaded at init; nothing more to do } @@ -192,13 +202,23 @@ export function getModelCosts(model: string): ModelCosts | null { const canonical = resolveAlias(getCanonicalName(model)) if (pricingCache.has(canonical)) return pricingCache.get(canonical)! - for (const [key, costs] of pricingCache) { - if (canonical.startsWith(key + '-') || canonical.startsWith(key)) return costs + // Iterate keys longest-first so a model id like `gpt-5-mini` matches the + // `gpt-5-mini` entry rather than collapsing to the shorter `gpt-5` entry + // due to dictionary insertion order. + for (const key of getSortedPricingKeys()) { + if (canonical.startsWith(key + '-') || canonical === key) { + return pricingCache.get(key)! + } } return null } +// Warn at most once per unknown model name per process. Without this, a model +// missing from the pricing snapshot would silently price at $0 for every +// session that used it, hiding real spend until the user noticed. +const warnedUnknownModels = new Set() + export function calculateCost( model: string, inputTokens: number, @@ -209,16 +229,39 @@ export function calculateCost( speed: 'standard' | 'fast' = 'standard', ): number { const costs = getModelCosts(model) - if (!costs) return 0 + if (!costs) { + // Skip the synthetic placeholder and the auto-router pseudo-models that + // intentionally have no direct pricing entry; calculateCost callers + // resolve those through aliasing first, so an unknown here is genuinely + // an unmapped real model. + if (model && model !== '' && !warnedUnknownModels.has(model)) { + warnedUnknownModels.add(model) + // Strip control characters and cap length: model names come from JSONL + // payloads written by external tools, so a hostile or corrupt file + // could embed terminal escape sequences here. + const safeName = model.replace(/[\x00-\x1F\x7F-\x9F]/g, '?').slice(0, 200) + process.stderr.write( + `codeburn: no pricing data for model "${safeName}" — costs for this model will show $0. ` + + `Update with: npx codeburn@latest, or report at https://github.com/getagentseal/codeburn/issues.\n` + ) + } + return 0 + } const multiplier = speed === 'fast' ? costs.fastMultiplier : 1 + // Clamp negative inputs to 0. A corrupt JSONL that emits a negative token + // count would otherwise produce a negative cost that silently subtracts + // from real spend in aggregate totals. NaN is also handled here; the + // arithmetic below short-circuits to 0 when any operand is non-finite. + const safe = (n: number) => (Number.isFinite(n) && n > 0 ? n : 0) + return multiplier * ( - inputTokens * costs.inputCostPerToken + - outputTokens * costs.outputCostPerToken + - cacheCreationTokens * costs.cacheWriteCostPerToken + - cacheReadTokens * costs.cacheReadCostPerToken + - webSearchRequests * costs.webSearchCostPerRequest + safe(inputTokens) * costs.inputCostPerToken + + safe(outputTokens) * costs.outputCostPerToken + + safe(cacheCreationTokens) * costs.cacheWriteCostPerToken + + safe(cacheReadTokens) * costs.cacheReadCostPerToken + + safe(webSearchRequests) * costs.webSearchCostPerRequest ) } @@ -234,59 +277,67 @@ const autoModelNames: Record = { 'qwen-auto': 'Qwen (auto)', } +const SHORT_NAMES: Record = { + 'claude-opus-4-7': 'Opus 4.7', + 'claude-opus-4-6': 'Opus 4.6', + 'claude-opus-4-5': 'Opus 4.5', + 'claude-opus-4-1': 'Opus 4.1', + 'claude-opus-4': 'Opus 4', + 'claude-sonnet-4-6': 'Sonnet 4.6', + 'claude-sonnet-4-5': 'Sonnet 4.5', + 'claude-sonnet-4': 'Sonnet 4', + 'claude-3-7-sonnet': 'Sonnet 3.7', + 'claude-3-5-sonnet': 'Sonnet 3.5', + 'claude-haiku-4-5': 'Haiku 4.5', + 'claude-3-5-haiku': 'Haiku 3.5', + 'gpt-4o-mini': 'GPT-4o Mini', + 'gpt-4o': 'GPT-4o', + 'gpt-4.1-nano': 'GPT-4.1 Nano', + 'gpt-4.1-mini': 'GPT-4.1 Mini', + 'gpt-4.1': 'GPT-4.1', + 'codex-auto-review': 'Codex Auto Review', + 'gpt-5.5-pro': 'GPT-5.5 Pro', + 'gpt-5.5': 'GPT-5.5', + 'gpt-5.4-pro': 'GPT-5.4 Pro', + 'gpt-5.4-nano': 'GPT-5.4 Nano', + 'gpt-5.4-mini': 'GPT-5.4 Mini', + 'gpt-5.4': 'GPT-5.4', + 'gpt-5.3-codex': 'GPT-5.3 Codex', + 'gpt-5.3': 'GPT-5.3', + 'gpt-5.2-pro': 'GPT-5.2 Pro', + 'gpt-5.2-low': 'GPT-5.2 Low', + 'gpt-5.2': 'GPT-5.2', + 'gpt-5.1-codex-mini': 'GPT-5.1 Codex Mini', + 'gpt-5.1-codex': 'GPT-5.1 Codex', + 'gpt-5.1': 'GPT-5.1', + 'gpt-5-pro': 'GPT-5 Pro', + 'gpt-5-nano': 'GPT-5 Nano', + 'gpt-5-mini': 'GPT-5 Mini', + 'gpt-5': 'GPT-5', + 'gemini-3.1-pro-preview': 'Gemini 3.1 Pro', + 'gemini-3-flash-preview': 'Gemini 3 Flash', + 'gemini-2.5-pro': 'Gemini 2.5 Pro', + 'gemini-2.5-flash': 'Gemini 2.5 Flash', + 'deepseek-coder-max': 'DeepSeek Coder Max', + 'deepseek-coder': 'DeepSeek Coder', + 'deepseek-r1': 'DeepSeek R1', + 'o4-mini': 'o4-mini', + 'o3': 'o3', + 'MiniMax-M2.7-highspeed': 'MiniMax M2.7 Highspeed', + 'MiniMax-M2.7': 'MiniMax M2.7', +} + +// Sorted longest-first so more-specific prefixes match before shorter ones. +// Without this, `gpt-5-mini` could resolve to "GPT-5" (the entry for `gpt-5`) +// if it happened to be iterated before `gpt-5-mini`, hiding a distinct model +// behind the wrong display name and pricing tier. +const SORTED_SHORT_NAMES: [string, string][] = Object.entries(SHORT_NAMES) + .sort((a, b) => b[0].length - a[0].length) + export function getShortModelName(model: string): string { if (autoModelNames[model]) return autoModelNames[model] const canonical = resolveAlias(getCanonicalName(model)) - const shortNames: Record = { - 'claude-opus-4-7': 'Opus 4.7', - 'claude-opus-4-6': 'Opus 4.6', - 'claude-opus-4-5': 'Opus 4.5', - 'claude-opus-4-1': 'Opus 4.1', - 'claude-opus-4': 'Opus 4', - 'claude-sonnet-4-6': 'Sonnet 4.6', - 'claude-sonnet-4-5': 'Sonnet 4.5', - 'claude-sonnet-4': 'Sonnet 4', - 'claude-3-7-sonnet': 'Sonnet 3.7', - 'claude-3-5-sonnet': 'Sonnet 3.5', - 'claude-haiku-4-5': 'Haiku 4.5', - 'claude-3-5-haiku': 'Haiku 3.5', - 'gpt-4o-mini': 'GPT-4o Mini', - 'gpt-4o': 'GPT-4o', - 'gpt-4.1-nano': 'GPT-4.1 Nano', - 'gpt-4.1-mini': 'GPT-4.1 Mini', - 'gpt-4.1': 'GPT-4.1', - 'codex-auto-review': 'Codex Auto Review', - 'gpt-5.5-pro': 'GPT-5.5 Pro', - 'gpt-5.5': 'GPT-5.5', - 'gpt-5.4-pro': 'GPT-5.4 Pro', - 'gpt-5.4-nano': 'GPT-5.4 Nano', - 'gpt-5.4-mini': 'GPT-5.4 Mini', - 'gpt-5.4': 'GPT-5.4', - 'gpt-5.3-codex': 'GPT-5.3 Codex', - 'gpt-5.3': 'GPT-5.3', - 'gpt-5.2-pro': 'GPT-5.2 Pro', - 'gpt-5.2-low': 'GPT-5.2 Low', - 'gpt-5.2': 'GPT-5.2', - 'gpt-5.1-codex-mini': 'GPT-5.1 Codex Mini', - 'gpt-5.1-codex': 'GPT-5.1 Codex', - 'gpt-5.1': 'GPT-5.1', - 'gpt-5-pro': 'GPT-5 Pro', - 'gpt-5-nano': 'GPT-5 Nano', - 'gpt-5-mini': 'GPT-5 Mini', - 'gpt-5': 'GPT-5', - 'gemini-3.1-pro-preview': 'Gemini 3.1 Pro', - 'gemini-3-flash-preview': 'Gemini 3 Flash', - 'gemini-2.5-pro': 'Gemini 2.5 Pro', - 'gemini-2.5-flash': 'Gemini 2.5 Flash', - 'deepseek-coder-max': 'DeepSeek Coder Max', - 'deepseek-coder': 'DeepSeek Coder', - 'deepseek-r1': 'DeepSeek R1', - 'o4-mini': 'o4-mini', - 'o3': 'o3', - 'MiniMax-M2.7-highspeed': 'MiniMax M2.7 Highspeed', - 'MiniMax-M2.7': 'MiniMax M2.7', - } - for (const [key, name] of Object.entries(shortNames)) { + for (const [key, name] of SORTED_SHORT_NAMES) { if (canonical.startsWith(key)) return name } return canonical diff --git a/src/optimize.ts b/src/optimize.ts index 7974c3f..4bb1bab 100644 --- a/src/optimize.ts +++ b/src/optimize.ts @@ -111,9 +111,15 @@ const GRADE_A_MIN = 90 const GRADE_B_MIN = 75 const GRADE_C_MIN = 55 const GRADE_D_MIN = 30 -const URGENCY_IMPACT_WEIGHT = 0.7 -const URGENCY_TOKEN_WEIGHT = 0.3 -const URGENCY_TOKEN_NORMALIZE = 500_000 +// Rebalanced so a high-impact finding with zero observed tokens (e.g. +// detectGhostAgents firing on five files but tokensSaved=400) cannot +// outrank a medium-impact finding with many millions of tokens. +// Old: 0.7/0.3 → high+0 = 0.70, medium+1B = 0.65 (high+0 won). +// New: 0.5/0.5 → high+0 = 0.50, medium+1B = 0.75 (medium+1B wins). +// Token normalize lifted to 5M so the rank scales over a realistic range. +const URGENCY_IMPACT_WEIGHT = 0.5 +const URGENCY_TOKEN_WEIGHT = 0.5 +const URGENCY_TOKEN_NORMALIZE = 5_000_000 // ============================================================================ // File system constants diff --git a/src/providers/antigravity.ts b/src/providers/antigravity.ts index f048313..3f9667e 100644 --- a/src/providers/antigravity.ts +++ b/src/providers/antigravity.ts @@ -87,13 +87,22 @@ async function loadCache(): Promise { } async function flushCache(liveCascadeIds?: Set): Promise { - if (!memCache || !cacheDirty) return - try { - if (liveCascadeIds) { - for (const id of Object.keys(memCache.cascades)) { - if (!liveCascadeIds.has(id)) delete memCache.cascades[id] + if (!memCache) return + // If the caller supplied liveCascadeIds, we must run the eviction step + // even when no cascade was added or updated this run; otherwise deleted + // .pb files would persist in the cache forever once it stops getting + // dirty writes. Mark the cache dirty when an eviction happens so the + // file write below proceeds. + if (liveCascadeIds) { + for (const id of Object.keys(memCache.cascades)) { + if (!liveCascadeIds.has(id)) { + delete memCache.cascades[id] + cacheDirty = true } } + } + if (!cacheDirty) return + try { const dir = getCacheDir() await mkdir(dir, { recursive: true }) diff --git a/src/providers/codex.ts b/src/providers/codex.ts index 83d81eb..13e4482 100644 --- a/src/providers/codex.ts +++ b/src/providers/codex.ts @@ -338,14 +338,19 @@ function createParser(source: SessionSource, seenKeys: Set): SessionPars reasoningTokens = (total.reasoning_output_tokens ?? 0) - prevReasoning } - if (!last) { - const total = info.total_token_usage - if (total) { - prevInput = total.input_tokens ?? 0 - prevCached = total.cached_input_tokens ?? 0 - prevOutput = total.output_tokens ?? 0 - prevReasoning = total.reasoning_output_tokens ?? 0 - } + // Always advance the prev counters to track the cumulative state. + // Previously prev was only updated on the fallback branch, so a + // session with mixed last_token_usage / no-last events would + // compute the next fallback delta against a stale prev=0 baseline, + // double-counting the entire cumulative window. The prev value + // must mirror what cumulative reports regardless of whether this + // event used `last` or fell back to deltas. + const total = info.total_token_usage + if (total) { + prevInput = total.input_tokens ?? 0 + prevCached = total.cached_input_tokens ?? 0 + prevOutput = total.output_tokens ?? 0 + prevReasoning = total.reasoning_output_tokens ?? 0 } const totalTokens = inputTokens + cachedInputTokens + outputTokens + reasoningTokens diff --git a/src/providers/cursor.ts b/src/providers/cursor.ts index e9619b8..a96abf9 100644 --- a/src/providers/cursor.ts +++ b/src/providers/cursor.ts @@ -1,4 +1,4 @@ -import { existsSync } from 'fs' +import { existsSync, statSync } from 'fs' import { join } from 'path' import { homedir } from 'os' @@ -27,6 +27,7 @@ const modelDisplayNames: Record = { } type BubbleRow = { + bubble_key: string input_tokens: number | null output_tokens: number | null model: string | null @@ -100,6 +101,7 @@ function modelForDisplay(raw: string | null): string { const BUBBLE_QUERY_BASE = ` SELECT + key as bubble_key, json_extract(value, '$.tokenCount.inputTokens') as input_tokens, json_extract(value, '$.tokenCount.outputTokens') as output_tokens, json_extract(value, '$.modelInfo.modelName') as model, @@ -204,7 +206,12 @@ function parseBubbles(db: SqliteDatabase, seenKeys: Set): { calls: Parse const createdAt = row.created_at ?? '' const conversationId = row.conversation_id ?? 'unknown' - const dedupKey = `cursor:${conversationId}:${createdAt}:${inputTokens}:${outputTokens}` + // Use the SQLite row key (bubbleId:) as the dedup key. + // Cursor mutates token counts on the row in place when streaming + // completes — including tokens in the dedup key (the previous + // implementation) caused the same bubble to be counted twice once + // its tokens stabilized. + const dedupKey = `cursor:bubble:${row.bubble_key}` if (seenKeys.has(dedupKey)) continue seenKeys.add(dedupKey) @@ -273,9 +280,21 @@ function extractTextLength(content: AgentKvContent[]): number { return total } -function parseAgentKv(db: SqliteDatabase, seenKeys: Set): { calls: ParsedProviderCall[] } { +function parseAgentKv(db: SqliteDatabase, seenKeys: Set, dbPath: string): { calls: ParsedProviderCall[] } { const results: ParsedProviderCall[] = [] + // Cursor's agentKv schema does not record per-message timestamps. Use the + // SQLite file's mtime as a bounded "last write" timestamp for all calls; + // it's at least honest (no future time, no always-now). Users running + // codeburn against an idle Cursor install will see agentKv calls land at + // the actual last activity time rather than today's date. + let agentKvTimestamp: string + try { + agentKvTimestamp = new Date(statSync(dbPath).mtimeMs).toISOString() + } catch { + agentKvTimestamp = new Date().toISOString() + } + let rows: AgentKvRow[] try { rows = db.query(AGENTKV_QUERY) @@ -362,7 +381,7 @@ function parseAgentKv(db: SqliteDatabase, seenKeys: Set): { calls: Parse costUSD, tools: [], bashCommands: [], - timestamp: new Date().toISOString(), + timestamp: agentKvTimestamp, speed: 'standard', deduplicationKey: dedupKey, userMessage: session.userText, @@ -406,7 +425,7 @@ function createParser(source: SessionSource, seenKeys: Set): SessionPars } const { calls: bubbleCalls } = parseBubbles(db, seenKeys) - const { calls: agentKvCalls } = parseAgentKv(db, seenKeys) + const { calls: agentKvCalls } = parseAgentKv(db, seenKeys, source.path) const calls = [...bubbleCalls, ...agentKvCalls] await writeCachedResults(source.path, calls) diff --git a/src/providers/droid.ts b/src/providers/droid.ts index d744040..2b351a5 100644 --- a/src/providers/droid.ts +++ b/src/providers/droid.ts @@ -206,7 +206,12 @@ function createParser( if (assistantCalls.length === 0) return - // Distribute session-level token usage across calls + // KNOWN LIMITATION: Droid records token usage only at session level + // (settings.tokenUsage), not per-message. We split evenly across the + // emitted assistant calls and price all of them at settings.model + // (the latest model the session used). For sessions where the user + // switched models mid-stream, costs are approximate — we have no + // ground-truth breakdown to attribute tokens per model. const totalTokens = settings.tokenUsage if (!totalTokens) return diff --git a/src/providers/gemini.ts b/src/providers/gemini.ts index d00f0dc..87517d8 100644 --- a/src/providers/gemini.ts +++ b/src/providers/gemini.ts @@ -84,7 +84,7 @@ function parseSession(data: GeminiSession, seenKeys: Set): ParsedProvide for (const msg of geminiMessages) { const t = msg.tokens! totalInput += t.input ?? 0 - totalOutput += (t.output ?? 0) + (t.thoughts ?? 0) + totalOutput += t.output ?? 0 totalCached += t.cached ?? 0 totalThoughts += t.thoughts ?? 0 if (msg.model && !model) model = msg.model @@ -119,7 +119,10 @@ function parseSession(data: GeminiSession, seenKeys: Set): ParsedProvide const tsDate = new Date(data.startTime) if (isNaN(tsDate.getTime()) || tsDate.getTime() < 1_000_000_000_000) return results - const costUSD = calculateCost(model, freshInput, totalOutput, 0, totalCached, 0) + // Gemini bills thoughts at the output token rate; calculateCost does not + // accept a reasoning parameter, so fold thoughts into the output count for + // pricing while keeping outputTokens / reasoningTokens reported separately. + const costUSD = calculateCost(model, freshInput, totalOutput + totalThoughts, 0, totalCached, 0) results.push({ provider: 'gemini', diff --git a/src/providers/pi.ts b/src/providers/pi.ts index 8b75a31..7b4a94b 100644 --- a/src/providers/pi.ts +++ b/src/providers/pi.ts @@ -149,7 +149,14 @@ function createParser(source: SessionSource, seenKeys: Set): SessionPars if (msg.role !== 'assistant' || !msg.usage) continue - const { input, output, cacheRead, cacheWrite } = msg.usage + // Coerce undefined/null token fields to 0. Pi/OMP session files + // sometimes omit individual usage fields; the destructure used to + // pass undefined into calculateCost which then returned NaN, and + // that NaN propagated into every aggregate cost total. + const input = msg.usage.input ?? 0 + const output = msg.usage.output ?? 0 + const cacheRead = msg.usage.cacheRead ?? 0 + const cacheWrite = msg.usage.cacheWrite ?? 0 if (input === 0 && output === 0) continue const model = msg.model ?? 'gpt-5' diff --git a/src/yield.ts b/src/yield.ts index 1dda256..c26a18f 100644 --- a/src/yield.ts +++ b/src/yield.ts @@ -50,8 +50,35 @@ function getMainBranch(cwd: string): string { type CommitInfo = { sha: string timestamp: Date - isRevert: boolean inMain: boolean + /** Set when a LATER commit's body says "This reverts commit " — i.e. the work in this commit was reverted out of main. */ + wasReverted: boolean +} + +/** + * Find SHAs that were the target of a `git revert` ANYWHERE in the repo's + * history (not just the time window). The standard `git revert` body + * format is "This reverts commit ." which we grep out. + * + * The previous implementation flagged a commit as `isRevert` based on the + * substring "revert" appearing in its OWN subject. Two bugs there: + * 1. Subjects like "Add revert button" matched. + * 2. The session that PERFORMED the revert was tagged "reverted", not the + * session whose work was being reverted — so the original session always + * looked productive even after its work was thrown away. + */ +function getRevertedShas(cwd: string): Set { + const bodies = runGit( + ['log', '--all', '--grep=^This reverts commit', '--format=%B%x1e'], + cwd, + ) ?? '' + const set = new Set() + const re = /This reverts commit ([0-9a-f]{7,40})/g + let m: RegExpExecArray | null + while ((m = re.exec(bodies)) !== null) { + set.add(m[1].toLowerCase()) + } + return set } function getCommitsInRange(cwd: string, since: Date, until: Date, mainBranch: string): CommitInfo[] { @@ -68,14 +95,21 @@ function getCommitsInRange(cwd: string, since: Date, until: Date, mainBranch: st const mainCommits = new Set( (runGit(['log', mainBranch, '--format=%H'], cwd) ?? '').split('\n').filter(Boolean) ) + const revertedShas = getRevertedShas(cwd) return log.split('\n').filter(Boolean).map(line => { - const [sha, timestamp, subject] = line.split('|') + const [sha] = line.split('|') + const timestamp = line.split('|')[1] ?? '' return { sha, timestamp: new Date(timestamp), - isRevert: subject.toLowerCase().includes('revert'), inMain: mainCommits.has(sha), + // wasReverted: matches when ANY later commit's body says + // "This reverts commit ". Compare against the full SHA AND its + // 7-char short prefix to be safe; git revert sometimes records the + // short form. + wasReverted: revertedShas.has(sha.toLowerCase()) || + revertedShas.has(sha.toLowerCase().slice(0, 7)), } }) } @@ -101,7 +135,10 @@ function categorizeSession( } const inMainCount = relevantCommits.filter(c => c.inMain).length - const revertedCount = relevantCommits.filter(c => c.isRevert && c.inMain).length + // A session is "reverted" when at least half of its in-main commits were + // later reverted out (revert detected via "This reverts commit " + // anywhere later in history, not in the same time window). + const revertedCount = relevantCommits.filter(c => c.inMain && c.wasReverted).length if (revertedCount > 0 && revertedCount >= inMainCount / 2) { return { category: 'reverted', commitCount: relevantCommits.length } diff --git a/tests/cli-date.test.ts b/tests/cli-date.test.ts index e30096d..45578b7 100644 --- a/tests/cli-date.test.ts +++ b/tests/cli-date.test.ts @@ -114,8 +114,20 @@ describe('toPeriod', () => { } }) - it('falls back to "week" for unknown input', () => { - expect(toPeriod('garbage')).toBe('week') - expect(toPeriod('')).toBe('week') + it('exits with an error on unknown input instead of silently falling back', () => { + // Previously toPeriod silently fell back to 'week' for any unrecognized + // value, which let typos like `-p mounth` produce a quiet 7-day report + // while the user thought they were viewing the month. The new behavior + // is to fail loudly via process.exit(1) after writing to stderr. + const exitSpy = vi.spyOn(process, 'exit').mockImplementation(() => { throw new Error('exit') }) as unknown as ReturnType + const stderrSpy = vi.spyOn(process.stderr, 'write').mockImplementation(() => true) + try { + expect(() => toPeriod('garbage')).toThrow('exit') + expect(exitSpy).toHaveBeenCalledWith(1) + expect(stderrSpy).toHaveBeenCalled() + } finally { + exitSpy.mockRestore() + stderrSpy.mockRestore() + } }) }) diff --git a/tests/daily-cache.test.ts b/tests/daily-cache.test.ts index 3582e8a..199d7a4 100644 --- a/tests/daily-cache.test.ts +++ b/tests/daily-cache.test.ts @@ -163,6 +163,34 @@ describe('addNewDays', () => { const updated = addNewDays(base, [emptyDay('2026-04-05', 3)], '2026-04-05') expect(updated.lastComputedDate).toBe('2026-04-10') }) + + it('skips prune when newestDate is malformed (does not silently drop all days)', () => { + // Regression guard: a corrupt newestDate string used to produce a NaN + // cutoff, which made `d.date >= "Invalid Date"` always false and + // wiped every cached day on the next merge. The guard now leaves + // the entries untouched so the next valid run can prune normally. + const base: DailyCache = { + version: DAILY_CACHE_VERSION, + lastComputedDate: '2026-04-10', + days: [emptyDay('2026-04-08', 1), emptyDay('2026-04-09', 2), emptyDay('2026-04-10', 3)], + } + const updated = addNewDays(base, [], 'not-a-date') + expect(updated.days.map(d => d.date)).toEqual(['2026-04-08', '2026-04-09', '2026-04-10']) + }) + + it('still prunes when newestDate is valid', () => { + const old = '2020-01-01' + const recent = '2026-04-10' + const base: DailyCache = { + version: DAILY_CACHE_VERSION, + lastComputedDate: recent, + days: [emptyDay(old, 1), emptyDay(recent, 2)], + } + const updated = addNewDays(base, [], recent) + // 730-day retention from 2026-04-10 → cutoff ~2024-04-11; 2020-01-01 must be gone. + expect(updated.days.find(d => d.date === old)).toBeUndefined() + expect(updated.days.find(d => d.date === recent)).toBeDefined() + }) }) describe('getDaysInRange', () => { diff --git a/tests/date-range-filter.test.ts b/tests/date-range-filter.test.ts index 76a3118..b2b6fba 100644 --- a/tests/date-range-filter.test.ts +++ b/tests/date-range-filter.test.ts @@ -26,10 +26,18 @@ describe('parseDateRangeFlags', () => { expect(range!.end.getHours()).toBe(23) }) - it('accepts --to alone (start = epoch)', () => { + it('accepts --to alone with a 6-month default start', () => { + // Previously the missing --from defaulted to epoch (1970), opening a + // 55-year scan window that was almost never what the user meant. The + // default is now 6 months back from now, matching the dashboard's + // "6 Months" period boundary. const range = parseDateRangeFlags(undefined, '2026-04-10') expect(range).not.toBeNull() - expect(range!.start.getTime()).toBe(new Date(0).getTime()) + expect(range!.start.getTime()).toBeGreaterThan(new Date(0).getTime()) + const sixMonthsMs = 6 * 31 * 24 * 60 * 60 * 1000 + const ageMs = Date.now() - range!.start.getTime() + expect(ageMs).toBeLessThanOrEqual(sixMonthsMs + 1000) + expect(ageMs).toBeGreaterThanOrEqual(sixMonthsMs - 1000) expect(range!.end.getDate()).toBe(10) }) diff --git a/tests/models-hoist.test.ts b/tests/models-hoist.test.ts new file mode 100644 index 0000000..13af3e5 --- /dev/null +++ b/tests/models-hoist.test.ts @@ -0,0 +1,120 @@ +import { describe, it, expect } from 'vitest' +import { calculateCost, getModelCosts, getShortModelName } from '../src/models.js' + +// Lock down the post-hoist refactor: every model name a real user has +// emitted in the last year should resolve to the same display name and +// the same costs as before. If this list grows or shrinks, the refactor +// is fine — it's the per-name resolution that must stay stable. +const KNOWN_NAMES = [ + 'claude-opus-4-7', + 'claude-opus-4-6', + 'claude-opus-4-5', + 'claude-sonnet-4-6', + 'claude-sonnet-4-5', + 'claude-haiku-4-5', + 'claude-3-5-sonnet', + 'claude-3-5-haiku', + 'claude-opus-4-7-20250101', + 'claude-sonnet-4-6-20250929', + 'anthropic/claude-opus-4-7', + 'anthropic--claude-4.6-opus', + 'anthropic--claude-4.6-sonnet', + 'claude-4.6-sonnet', + 'gpt-5', + 'gpt-5-mini', + 'gpt-5-nano', + 'gpt-5-pro', + 'gpt-5.1', + 'gpt-5.1-codex', + 'gpt-5.1-codex-mini', + 'gpt-5.2', + 'gpt-5.2-low', + 'gpt-5.3-codex', + 'gpt-5.4', + 'gpt-5.4-mini', + 'gpt-4o', + 'gpt-4o-mini', + 'gpt-4.1', + 'gpt-4.1-mini', + 'gpt-4.1-nano', + 'gemini-2.5-pro', + 'gemini-2.5-flash', + 'gemini-3.1-pro-preview', + 'gemini-3-flash-preview', + 'gemini-3.1-pro', + 'gemini-3-flash', + 'cursor-auto', + 'cursor-agent-auto', + 'copilot-auto', + 'copilot-openai-auto', + 'kiro-auto', + 'cline-auto', + 'qwen-auto', + 'o3', + 'o4-mini', + 'deepseek-coder', + 'deepseek-coder-max', + 'deepseek-r1', + 'MiniMax-M2.7', + 'MiniMax-M2.7-highspeed', +] + +describe('post-hoist resolution stability', () => { + it('every known model resolves to a non-empty short name', () => { + for (const name of KNOWN_NAMES) { + const short = getShortModelName(name) + expect(short, `short name for ${name}`).toBeTruthy() + expect(typeof short, `short name for ${name}`).toBe('string') + } + }) + + it('gpt-5-mini does NOT collide with gpt-5 (longest-prefix wins)', () => { + expect(getShortModelName('gpt-5-mini')).toBe('GPT-5 Mini') + expect(getShortModelName('gpt-5')).toBe('GPT-5') + expect(getShortModelName('gpt-5-nano')).toBe('GPT-5 Nano') + expect(getShortModelName('gpt-5-pro')).toBe('GPT-5 Pro') + }) + + it('gpt-5.1-codex-mini does NOT collapse to gpt-5.1-codex or gpt-5', () => { + expect(getShortModelName('gpt-5.1-codex-mini')).toBe('GPT-5.1 Codex Mini') + expect(getShortModelName('gpt-5.1-codex')).toBe('GPT-5.1 Codex') + expect(getShortModelName('gpt-5.1')).toBe('GPT-5.1') + }) + + it('claude-haiku-4-5 does NOT collapse to claude-haiku-4 or claude-3-5-haiku', () => { + expect(getShortModelName('claude-haiku-4-5')).toBe('Haiku 4.5') + expect(getShortModelName('claude-3-5-haiku')).toBe('Haiku 3.5') + }) + + it('getModelCosts returns positive token costs for every known name', () => { + for (const name of KNOWN_NAMES) { + const c = getModelCosts(name) + expect(c, `costs for ${name}`).not.toBeNull() + expect(c!.inputCostPerToken).toBeGreaterThan(0) + expect(c!.outputCostPerToken).toBeGreaterThan(0) + } + }) + + it('calculateCost is stable for a typical Sonnet 4.6 turn', () => { + // 1k input, 2k output, 50k cache read — common Claude Code shape. + const cost = calculateCost('claude-sonnet-4-6', 1000, 2000, 0, 50_000, 0) + expect(cost).toBeGreaterThan(0) + expect(Number.isFinite(cost)).toBe(true) + }) + + it('calculateCost clamps NaN/negative inputs to 0', () => { + const c1 = calculateCost('claude-sonnet-4-6', NaN, 1000, 0, 0, 0) + const c2 = calculateCost('claude-sonnet-4-6', 0, 1000, 0, 0, 0) + expect(c1).toBe(c2) + const c3 = calculateCost('claude-sonnet-4-6', -1000, 1000, 0, 0, 0) + expect(c3).toBe(c2) + }) + + it('repeated calls return the same cost (memoized sort cache is consistent)', () => { + const a = getModelCosts('gpt-5-mini') + const b = getModelCosts('gpt-5-mini') + const c = getModelCosts('gpt-5-mini') + expect(a).toEqual(b) + expect(b).toEqual(c) + }) +})