codeburn/src/models.ts
iamtoruk 4f1138290e Merge main into feat/omp-support-model-aliases
Second merge of main since the PR was opened. Main moved 30+ commits
(0.8.5 bump, plan tracking feature, MiniMax pricing, menubar
prefetchAll walk-back, aicrowd cache rewrite revert) so the branch
needed another reconciliation before merging to main.

Two new conflicts resolved. Took main's text in both cases per the
policy of favoring main when the feature work is neutral:

  README.md             Kept main's Node 20+ / better-sqlite3
                        Requirements wording and main's shorter src/
                        tree listing. Added OMP to the Requirements
                        line.

  src/providers/pi.ts   Main dropped the discovery-cache snapshot and
                        the rich source-metadata fields as part of the
                        aicrowd revert. Took main's simpler structure
                        and only kept the providerName parameter so
                        OMP sources still report the correct provider
                        in the session source and dedup key.

Earlier fixups carried forward from the prior merge commit:
  - Object.hasOwn guards in resolveAlias against prototype-pollution
    via a model literally named '__proto__'.
  - source.provider in the dedup key prefix so OMP rows no longer
    stamp 'pi:'.
  - Combined pi.js imports in providers/index.ts.
  - Trailing newline on pi.ts.
  - Unknown-model fallback in cursor-agent.ts from yesterday's PR #117
    fixup (preserved via main).

353 tests pass (count dropped from 378 because main deleted the
parse-progress / parser-cache / provider-colors / source-cache test
files alongside the cache-rewrite revert).

Feature work by @cgrossde.
2026-04-21 11:51:20 -07:00

239 lines
12 KiB
TypeScript

import { readFile, writeFile, mkdir } from 'fs/promises'
import { join } from 'path'
import { homedir } from 'os'
export type ModelCosts = {
inputCostPerToken: number
outputCostPerToken: number
cacheWriteCostPerToken: number
cacheReadCostPerToken: number
webSearchCostPerRequest: number
fastMultiplier: number
}
type LiteLLMEntry = {
input_cost_per_token?: number
output_cost_per_token?: number
cache_creation_input_token_cost?: number
cache_read_input_token_cost?: number
provider_specific_entry?: { fast?: number }
}
const LITELLM_URL = 'https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json'
const CACHE_TTL_MS = 24 * 60 * 60 * 1000
const WEB_SEARCH_COST = 0.01
const FALLBACK_PRICING: Record<string, ModelCosts> = {
'claude-opus-4-7': { inputCostPerToken: 5e-6, outputCostPerToken: 25e-6, cacheWriteCostPerToken: 6.25e-6, cacheReadCostPerToken: 0.5e-6, webSearchCostPerRequest: WEB_SEARCH_COST, fastMultiplier: 6 },
'claude-opus-4-6': { inputCostPerToken: 5e-6, outputCostPerToken: 25e-6, cacheWriteCostPerToken: 6.25e-6, cacheReadCostPerToken: 0.5e-6, webSearchCostPerRequest: WEB_SEARCH_COST, fastMultiplier: 6 },
'claude-opus-4-5': { inputCostPerToken: 5e-6, outputCostPerToken: 25e-6, cacheWriteCostPerToken: 6.25e-6, cacheReadCostPerToken: 0.5e-6, webSearchCostPerRequest: WEB_SEARCH_COST, fastMultiplier: 1 },
'claude-opus-4-1': { inputCostPerToken: 15e-6, outputCostPerToken: 75e-6, cacheWriteCostPerToken: 18.75e-6, cacheReadCostPerToken: 1.5e-6, webSearchCostPerRequest: WEB_SEARCH_COST, fastMultiplier: 1 },
'claude-opus-4': { inputCostPerToken: 15e-6, outputCostPerToken: 75e-6, cacheWriteCostPerToken: 18.75e-6, cacheReadCostPerToken: 1.5e-6, webSearchCostPerRequest: WEB_SEARCH_COST, fastMultiplier: 1 },
'claude-sonnet-4-6': { inputCostPerToken: 3e-6, outputCostPerToken: 15e-6, cacheWriteCostPerToken: 3.75e-6, cacheReadCostPerToken: 0.3e-6, webSearchCostPerRequest: WEB_SEARCH_COST, fastMultiplier: 1 },
'claude-sonnet-4-5': { inputCostPerToken: 3e-6, outputCostPerToken: 15e-6, cacheWriteCostPerToken: 3.75e-6, cacheReadCostPerToken: 0.3e-6, webSearchCostPerRequest: WEB_SEARCH_COST, fastMultiplier: 1 },
'claude-sonnet-4': { inputCostPerToken: 3e-6, outputCostPerToken: 15e-6, cacheWriteCostPerToken: 3.75e-6, cacheReadCostPerToken: 0.3e-6, webSearchCostPerRequest: WEB_SEARCH_COST, fastMultiplier: 1 },
'claude-3-7-sonnet': { inputCostPerToken: 3e-6, outputCostPerToken: 15e-6, cacheWriteCostPerToken: 3.75e-6, cacheReadCostPerToken: 0.3e-6, webSearchCostPerRequest: WEB_SEARCH_COST, fastMultiplier: 1 },
'claude-3-5-sonnet': { inputCostPerToken: 3e-6, outputCostPerToken: 15e-6, cacheWriteCostPerToken: 3.75e-6, cacheReadCostPerToken: 0.3e-6, webSearchCostPerRequest: WEB_SEARCH_COST, fastMultiplier: 1 },
'claude-haiku-4-5': { inputCostPerToken: 1e-6, outputCostPerToken: 5e-6, cacheWriteCostPerToken: 1.25e-6, cacheReadCostPerToken: 0.1e-6, webSearchCostPerRequest: WEB_SEARCH_COST, fastMultiplier: 1 },
'claude-3-5-haiku': { inputCostPerToken: 0.8e-6, outputCostPerToken: 4e-6, cacheWriteCostPerToken: 1e-6, cacheReadCostPerToken: 0.08e-6, webSearchCostPerRequest: WEB_SEARCH_COST, fastMultiplier: 1 },
'gpt-4o': { inputCostPerToken: 2.5e-6, outputCostPerToken: 10e-6, cacheWriteCostPerToken: 2.5e-6, cacheReadCostPerToken: 1.25e-6, webSearchCostPerRequest: WEB_SEARCH_COST, fastMultiplier: 1 },
'gpt-4o-mini': { inputCostPerToken: 0.15e-6, outputCostPerToken: 0.6e-6, cacheWriteCostPerToken: 0.15e-6, cacheReadCostPerToken: 0.075e-6, webSearchCostPerRequest: WEB_SEARCH_COST, fastMultiplier: 1 },
'gemini-2.5-pro': { inputCostPerToken: 1.25e-6, outputCostPerToken: 10e-6, cacheWriteCostPerToken: 1.25e-6, cacheReadCostPerToken: 0.315e-6, webSearchCostPerRequest: WEB_SEARCH_COST, fastMultiplier: 1 },
'gpt-5.3-codex': { inputCostPerToken: 2.5e-6, outputCostPerToken: 10e-6, cacheWriteCostPerToken: 2.5e-6, cacheReadCostPerToken: 1.25e-6, webSearchCostPerRequest: WEB_SEARCH_COST, fastMultiplier: 1 },
'gpt-5.4': { inputCostPerToken: 2.5e-6, outputCostPerToken: 10e-6, cacheWriteCostPerToken: 2.5e-6, cacheReadCostPerToken: 1.25e-6, webSearchCostPerRequest: WEB_SEARCH_COST, fastMultiplier: 1 },
'gpt-5.4-mini': { inputCostPerToken: 0.4e-6, outputCostPerToken: 1.6e-6, cacheWriteCostPerToken: 0.4e-6, cacheReadCostPerToken: 0.2e-6, webSearchCostPerRequest: WEB_SEARCH_COST, fastMultiplier: 1 },
'gpt-5': { inputCostPerToken: 2.5e-6, outputCostPerToken: 10e-6, cacheWriteCostPerToken: 2.5e-6, cacheReadCostPerToken: 1.25e-6, webSearchCostPerRequest: WEB_SEARCH_COST, fastMultiplier: 1 },
'gpt-5-mini': { inputCostPerToken: 0.4e-6, outputCostPerToken: 1.6e-6, cacheWriteCostPerToken: 0.4e-6, cacheReadCostPerToken: 0.2e-6, webSearchCostPerRequest: WEB_SEARCH_COST, fastMultiplier: 1 },
'gpt-4.1': { inputCostPerToken: 2e-6, outputCostPerToken: 8e-6, cacheWriteCostPerToken: 2e-6, cacheReadCostPerToken: 0.5e-6, webSearchCostPerRequest: WEB_SEARCH_COST, fastMultiplier: 1 },
'gpt-4.1-mini': { inputCostPerToken: 0.4e-6, outputCostPerToken: 1.6e-6, cacheWriteCostPerToken: 0.4e-6, cacheReadCostPerToken: 0.1e-6, webSearchCostPerRequest: WEB_SEARCH_COST, fastMultiplier: 1 },
'gpt-4.1-nano': { inputCostPerToken: 0.1e-6, outputCostPerToken: 0.4e-6, cacheWriteCostPerToken: 0.1e-6, cacheReadCostPerToken: 0.025e-6, webSearchCostPerRequest: WEB_SEARCH_COST, fastMultiplier: 1 },
'o3': { inputCostPerToken: 10e-6, outputCostPerToken: 40e-6, cacheWriteCostPerToken: 10e-6, cacheReadCostPerToken: 2.5e-6, webSearchCostPerRequest: WEB_SEARCH_COST, fastMultiplier: 1 },
'o4-mini': { inputCostPerToken: 1.1e-6, outputCostPerToken: 4.4e-6, cacheWriteCostPerToken: 1.1e-6, cacheReadCostPerToken: 0.275e-6, webSearchCostPerRequest: WEB_SEARCH_COST, fastMultiplier: 1 },
'MiniMax-M2.7-highspeed': { inputCostPerToken: 0.6e-6, outputCostPerToken: 2.4e-6, cacheWriteCostPerToken: 0.375e-6, cacheReadCostPerToken: 0.06e-6, webSearchCostPerRequest: WEB_SEARCH_COST, fastMultiplier: 1 },
'MiniMax-M2.7': { inputCostPerToken: 0.3e-6, outputCostPerToken: 1.2e-6, cacheWriteCostPerToken: 0.375e-6, cacheReadCostPerToken: 0.06e-6, webSearchCostPerRequest: WEB_SEARCH_COST, fastMultiplier: 1 },
}
let pricingCache: Map<string, ModelCosts> | null = null
function getCacheDir(): string {
return join(homedir(), '.cache', 'codeburn')
}
function getCachePath(): string {
return join(getCacheDir(), 'litellm-pricing.json')
}
function parseLiteLLMEntry(entry: LiteLLMEntry): ModelCosts | null {
if (entry.input_cost_per_token === undefined || entry.output_cost_per_token === undefined) return null
return {
inputCostPerToken: entry.input_cost_per_token,
outputCostPerToken: entry.output_cost_per_token,
cacheWriteCostPerToken: entry.cache_creation_input_token_cost ?? entry.input_cost_per_token * 1.25,
cacheReadCostPerToken: entry.cache_read_input_token_cost ?? entry.input_cost_per_token * 0.1,
webSearchCostPerRequest: WEB_SEARCH_COST,
fastMultiplier: entry.provider_specific_entry?.fast ?? 1,
}
}
async function fetchAndCachePricing(): Promise<Map<string, ModelCosts>> {
const response = await fetch(LITELLM_URL)
if (!response.ok) throw new Error(`HTTP ${response.status}`)
const data = await response.json() as Record<string, LiteLLMEntry>
const pricing = new Map<string, ModelCosts>()
for (const [name, entry] of Object.entries(data)) {
const costs = parseLiteLLMEntry(entry)
if (!costs) continue
pricing.set(name, costs)
// Also index by stripped name so lookups work without provider prefix:
// 'anthropic/claude-opus-4-6' is also queryable as 'claude-opus-4-6'.
// First write wins so direct-provider entries take precedence over re-hosters.
const stripped = name.replace(/^[^/]+\//, '')
if (stripped !== name && !pricing.has(stripped)) pricing.set(stripped, costs)
}
await mkdir(getCacheDir(), { recursive: true })
await writeFile(getCachePath(), JSON.stringify({
timestamp: Date.now(),
data: Object.fromEntries(pricing),
}))
return pricing
}
async function loadCachedPricing(): Promise<Map<string, ModelCosts> | null> {
try {
const raw = await readFile(getCachePath(), 'utf-8')
const cached = JSON.parse(raw) as { timestamp: number; data: Record<string, ModelCosts> }
if (Date.now() - cached.timestamp > CACHE_TTL_MS) return null
return new Map(Object.entries(cached.data))
} catch {
return null
}
}
export async function loadPricing(): Promise<void> {
const cached = await loadCachedPricing()
if (cached) {
pricingCache = cached
return
}
try {
pricingCache = await fetchAndCachePricing()
} catch {
pricingCache = new Map(Object.entries(FALLBACK_PRICING))
}
}
// Known model name variants that providers emit but LiteLLM/fallback don't index under.
// OMP emits 'anthropic--claude-4.6-opus' (double-dash, dot version, tier-last).
// getCanonicalName strips any 'provider/' prefix first, so only the post-strip
// forms need to be listed here.
const BUILTIN_ALIASES: Record<string, string> = {
'anthropic--claude-4.6-opus': 'claude-opus-4-6',
'anthropic--claude-4.6-sonnet': 'claude-sonnet-4-6',
'anthropic--claude-4.5-opus': 'claude-opus-4-5',
'anthropic--claude-4.5-sonnet': 'claude-sonnet-4-5',
'anthropic--claude-4.5-haiku': 'claude-haiku-4-5',
}
let userAliases: Record<string, string> = {}
// Called once during CLI startup after config is loaded.
// User aliases take precedence over built-ins.
export function setModelAliases(aliases: Record<string, string>): void {
userAliases = aliases
}
function resolveAlias(model: string): string {
if (Object.hasOwn(userAliases, model)) return userAliases[model]!
if (Object.hasOwn(BUILTIN_ALIASES, model)) return BUILTIN_ALIASES[model]!
return model
}
function getCanonicalName(model: string): string {
return model
.replace(/@.*$/, '') // strip pin: claude-sonnet-4-6@20250929 -> claude-sonnet-4-6
.replace(/-\d{8}$/, '') // strip date: claude-sonnet-4-20250514 -> claude-sonnet-4
.replace(/^[^/]+\//, '') // strip provider prefix: anthropic/foo -> foo
}
export function getModelCosts(model: string): ModelCosts | null {
const canonical = resolveAlias(getCanonicalName(model))
if (pricingCache?.has(canonical)) return pricingCache.get(canonical)!
for (const [key, costs] of Object.entries(FALLBACK_PRICING)) {
if (canonical === key || canonical.startsWith(key + '-')) return costs
}
for (const [key, costs] of pricingCache ?? new Map()) {
if (canonical.startsWith(key)) return costs
}
for (const [key, costs] of Object.entries(FALLBACK_PRICING)) {
if (canonical.startsWith(key)) return costs
}
return null
}
export function calculateCost(
model: string,
inputTokens: number,
outputTokens: number,
cacheCreationTokens: number,
cacheReadTokens: number,
webSearchRequests: number,
speed: 'standard' | 'fast' = 'standard',
): number {
const costs = getModelCosts(model)
if (!costs) return 0
const multiplier = speed === 'fast' ? costs.fastMultiplier : 1
return multiplier * (
inputTokens * costs.inputCostPerToken +
outputTokens * costs.outputCostPerToken +
cacheCreationTokens * costs.cacheWriteCostPerToken +
cacheReadTokens * costs.cacheReadCostPerToken +
webSearchRequests * costs.webSearchCostPerRequest
)
}
export function getShortModelName(model: string): string {
const canonical = resolveAlias(getCanonicalName(model))
const shortNames: Record<string, string> = {
'claude-opus-4-7': 'Opus 4.7',
'claude-opus-4-6': 'Opus 4.6',
'claude-opus-4-5': 'Opus 4.5',
'claude-opus-4-1': 'Opus 4.1',
'claude-opus-4': 'Opus 4',
'claude-sonnet-4-6': 'Sonnet 4.6',
'claude-sonnet-4-5': 'Sonnet 4.5',
'claude-sonnet-4': 'Sonnet 4',
'claude-3-7-sonnet': 'Sonnet 3.7',
'claude-3-5-sonnet': 'Sonnet 3.5',
'claude-haiku-4-5': 'Haiku 4.5',
'claude-3-5-haiku': 'Haiku 3.5',
'gpt-4o-mini': 'GPT-4o Mini',
'gpt-4o': 'GPT-4o',
'gpt-4.1-nano': 'GPT-4.1 Nano',
'gpt-4.1-mini': 'GPT-4.1 Mini',
'gpt-4.1': 'GPT-4.1',
'gpt-5.4-mini': 'GPT-5.4 Mini',
'gpt-5.4': 'GPT-5.4',
'gpt-5.3-codex': 'GPT-5.3 Codex',
'gpt-5-mini': 'GPT-5 Mini',
'gpt-5': 'GPT-5',
'gemini-2.5-pro': 'Gemini 2.5 Pro',
'o4-mini': 'o4-mini',
'o3': 'o3',
'MiniMax-M2.7-highspeed': 'MiniMax M2.7 Highspeed',
'MiniMax-M2.7': 'MiniMax M2.7',
}
for (const [key, name] of Object.entries(shortNames)) {
if (canonical.startsWith(key)) return name
}
return canonical
}