Fix Claude 1h cache write pricing

This commit is contained in:
ozymandiashh 2026-05-11 18:32:00 +03:00
parent d9acd8c4cd
commit 3c790adb23
9 changed files with 170 additions and 23 deletions

View file

@ -1,5 +1,18 @@
# Changelog
## Unreleased
### Fixed (CLI)
- **Claude 1-hour cache writes use the correct price.** Claude Code records
5-minute and 1-hour prompt-cache writes separately in
`usage.cache_creation`. CodeBurn now prices the 1-hour portion at 2x base
input cost (1.6x the LiteLLM 5-minute cache-write rate) while preserving the
existing legacy fallback when only `cache_creation_input_tokens` is present.
Daily cache version bumped to v6 so previously cached under-reported costs
are recomputed from raw sessions.
This fixes under-reporting for plan-mode and long agent sessions that rely on
1-hour cache writes. Closes #276.
## 0.9.8 - 2026-05-10
### Added (CLI)

View file

@ -25,6 +25,17 @@ JSONL, one event per line, per session file. Sessions live under `<project>/<ses
`createSessionParser` returns an empty async generator (`claude.ts:101-105`). Claude is a special case: `src/parser.ts` reads Claude JSONL files directly with full turn grouping, dedup of streaming message IDs, and MCP tool inventory extraction. The provider object exists only so `discoverSessions` can return Claude session sources alongside the others.
## Pricing
Claude Code reports total cache-write tokens in `usage.cache_creation_input_tokens`.
When available, it also splits those writes by duration in
`usage.cache_creation.ephemeral_5m_input_tokens` and
`usage.cache_creation.ephemeral_1h_input_tokens`. CodeBurn keeps the existing
aggregate cache-write token total for reports, but prices the 1-hour portion at
2x base input cost (1.6x the 5-minute cache-write rate exposed by LiteLLM).
If the split fields are missing, the parser falls back to the legacy behavior
and prices every cache write at the 5-minute rate.
## Caching
None at the provider level. The daily aggregation cache (`src/daily-cache.ts`) reuses prior computed days.

View file

@ -5,24 +5,19 @@ import { homedir } from 'os'
import { join } from 'path'
import type { DateRange, ProjectSummary } from './types.js'
// Bumped to 5 alongside the Cursor per-project breakdown: prior daily
// entries recorded every Cursor session under a single 'cursor' project
// label. After the upgrade, the breakdown produces per-workspace project
// labels for new days; without invalidation the dashboard would show
// 'cursor' for historical days and `-Users-you-myproject` for new ones
// in the same window, producing a confusing mixed projection.
export const DAILY_CACHE_VERSION = 5
// MIN_SUPPORTED_VERSION bumped to 5 too. The migration path
// Bumped to 6 alongside the Claude 1-hour cache-write pricing fix: prior
// daily entries priced all Claude cache writes at the 5-minute rate, so
// cached historical cost/model/provider/category totals would remain
// under-reported unless discarded and recomputed from raw sessions.
export const DAILY_CACHE_VERSION = 6
// MIN_SUPPORTED_VERSION bumped to 6 too. The migration path
// (isMigratableCache + migrateDays) only fills in missing default fields;
// it does NOT recompute the providers / categories / models rollups from
// session data, because those raw sessions are not stored in the cache.
// So a migrated v2/v3/v4 cache would carry forward stale provider totals
// (single 'cursor' bucket instead of per-workspace) for the full cache
// retention window. Setting the floor to 5 forces those older caches to
// be discarded and recomputed cleanly. Confirmed by live test:
// menubar-json --period all reported cursor=$3.78 against a migrated
// v4 cache but $4.08 (correct) after the cache was discarded.
const MIN_SUPPORTED_VERSION = 5
// So a migrated v5 cache would carry forward stale pricing totals for
// the full cache retention window. Setting the floor to 6 forces older
// caches to be discarded and recomputed cleanly.
const MIN_SUPPORTED_VERSION = 6
const DAILY_CACHE_FILENAME = 'daily-cache.json'
export type DailyEntry = {

View file

@ -25,6 +25,7 @@ type SnapshotEntry = [number, number, number | null, number | null]
const LITELLM_URL = 'https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json'
const CACHE_TTL_MS = 24 * 60 * 60 * 1000
const WEB_SEARCH_COST = 0.01
const ONE_HOUR_CACHE_WRITE_MULTIPLIER_FROM_FIVE_MINUTE_RATE = 1.6
const FAST_MULTIPLIERS: Record<string, number> = {
'claude-opus-4-7': 6,
@ -310,6 +311,7 @@ export function calculateCost(
cacheReadTokens: number,
webSearchRequests: number,
speed: 'standard' | 'fast' = 'standard',
oneHourCacheCreationTokens = 0,
): number {
const costs = getModelCosts(model)
if (!costs) {
@ -335,11 +337,15 @@ export function calculateCost(
// from real spend in aggregate totals. NaN is also handled here; the
// arithmetic below short-circuits to 0 when any operand is non-finite.
const safe = (n: number) => (Number.isFinite(n) && n > 0 ? n : 0)
const safeOneHourCacheCreation = safe(oneHourCacheCreationTokens)
const safeCacheCreation = Math.max(safe(cacheCreationTokens), safeOneHourCacheCreation)
const safeFiveMinuteCacheCreation = Math.max(0, safeCacheCreation - safeOneHourCacheCreation)
return multiplier * (
safe(inputTokens) * costs.inputCostPerToken +
safe(outputTokens) * costs.outputCostPerToken +
safe(cacheCreationTokens) * costs.cacheWriteCostPerToken +
safeFiveMinuteCacheCreation * costs.cacheWriteCostPerToken +
safeOneHourCacheCreation * costs.cacheWriteCostPerToken * ONE_HOUR_CACHE_WRITE_MULTIPLIER_FROM_FIVE_MINUTE_RATE +
safe(cacheReadTokens) * costs.cacheReadCostPerToken +
safe(webSearchRequests) * costs.webSearchCostPerRequest
)

View file

@ -92,16 +92,39 @@ function getMessageId(entry: JournalEntry): string | null {
return msg?.id ?? null
}
function positiveNumber(n: number | undefined): number {
return n !== undefined && Number.isFinite(n) && n > 0 ? n : 0
}
function extractClaudeCacheCreation(usage: AssistantMessageContent['usage']): { totalTokens: number; oneHourTokens: number } {
const legacyTotal = positiveNumber(usage.cache_creation_input_tokens)
const cacheCreation = usage.cache_creation
const fiveMinuteTokens = positiveNumber(cacheCreation?.ephemeral_5m_input_tokens)
const oneHourTokens = positiveNumber(cacheCreation?.ephemeral_1h_input_tokens)
const splitTotal = fiveMinuteTokens + oneHourTokens
if (splitTotal === 0) return { totalTokens: legacyTotal, oneHourTokens: 0 }
// Valid Claude usage reports the legacy total and split total as equal.
// Keep the larger value so malformed partial splits do not drop tokens.
const totalTokens = Math.max(legacyTotal, splitTotal)
return {
totalTokens,
oneHourTokens: Math.min(oneHourTokens, totalTokens),
}
}
function parseApiCall(entry: JournalEntry): ParsedApiCall | null {
if (entry.type !== 'assistant') return null
const msg = entry.message as AssistantMessageContent | undefined
if (!msg?.usage || !msg?.model) return null
const usage = msg.usage
const cacheCreation = extractClaudeCacheCreation(usage)
const tokens: TokenUsage = {
inputTokens: usage.input_tokens ?? 0,
outputTokens: usage.output_tokens ?? 0,
cacheCreationInputTokens: usage.cache_creation_input_tokens ?? 0,
cacheCreationInputTokens: cacheCreation.totalTokens,
cacheReadInputTokens: usage.cache_read_input_tokens ?? 0,
cachedInputTokens: 0,
reasoningTokens: 0,
@ -118,6 +141,7 @@ function parseApiCall(entry: JournalEntry): ParsedApiCall | null {
tokens.cacheReadInputTokens,
tokens.webSearchRequests,
usage.speed ?? 'standard',
cacheCreation.oneHourTokens,
)
const bashCmds = extractBashCommandsFromContent(msg.content ?? [])

View file

@ -25,6 +25,10 @@ export type ApiUsage = {
input_tokens: number
output_tokens: number
cache_creation_input_tokens?: number
cache_creation?: {
ephemeral_5m_input_tokens?: number
ephemeral_1h_input_tokens?: number
}
cache_read_input_tokens?: number
server_tool_use?: {
web_search_requests?: number

View file

@ -104,6 +104,36 @@ describe('loadDailyCache', () => {
expect(existsSync(join(TMP_CACHE_ROOT, 'daily-cache.json.v2.bak'))).toBe(true)
})
it('discards a v5 cache because cached Claude costs predate 1-hour cache pricing', async () => {
const saved = {
version: 5,
lastComputedDate: '2026-05-01',
days: [{
date: '2026-05-01',
cost: 0.37575,
calls: 1,
sessions: 1,
inputTokens: 0,
outputTokens: 0,
cacheReadTokens: 0,
cacheWriteTokens: 60_120,
editTurns: 0,
oneShotTurns: 0,
models: { 'Opus 4.7': { calls: 1, cost: 0.37575, inputTokens: 0, outputTokens: 0, cacheReadTokens: 0, cacheWriteTokens: 60_120 } },
categories: {},
providers: { claude: { calls: 1, cost: 0.37575 } },
}],
}
const { writeFile, mkdir } = await import('fs/promises')
await mkdir(TMP_CACHE_ROOT, { recursive: true })
await writeFile(join(TMP_CACHE_ROOT, 'daily-cache.json'), JSON.stringify(saved), 'utf-8')
const cache = await loadDailyCache()
expect(cache.version).toBe(DAILY_CACHE_VERSION)
expect(cache.days).toEqual([])
expect(cache.lastComputedDate).toBeNull()
expect(existsSync(join(TMP_CACHE_ROOT, 'daily-cache.json.v5.bak'))).toBe(true)
})
it('round-trips a valid cache through save and load', async () => {
const saved: DailyCache = {
version: DAILY_CACHE_VERSION,

View file

@ -158,6 +158,18 @@ describe('calculateCost - OMP names produce non-zero cost', () => {
})
})
describe('calculateCost - Claude cache write durations', () => {
it('prices 1-hour cache writes at 1.6x the 5-minute cache write rate', () => {
const fiveMinute = calculateCost('claude-opus-4-7', 0, 0, 1_000_000, 0, 0)
const oneHour = calculateCost('claude-opus-4-7', 0, 0, 1_000_000, 0, 0, 'standard', 1_000_000)
const mixed = calculateCost('claude-opus-4-7', 0, 0, 100_000, 0, 0, 'standard', 60_000)
expect(fiveMinute).toBeCloseTo(6.25, 6)
expect(oneHour).toBeCloseTo(10, 6)
expect(mixed).toBeCloseTo(0.85, 6)
})
})
describe('existing model names still resolve', () => {
it('canonical claude-opus-4-6', () => {
expect(getModelCosts('claude-opus-4-6')).not.toBeNull()

View file

@ -31,7 +31,14 @@ function dayRange(day: string): DateRange {
}
}
async function writeClaudeSession(projectSlug: string, sessionId: string, cwd: string, timestamp: string): Promise<void> {
async function writeClaudeSession(
projectSlug: string,
sessionId: string,
cwd: string,
timestamp: string,
usage: Record<string, unknown> = { input_tokens: 100, output_tokens: 50 },
model = 'claude-sonnet-4-5',
): Promise<void> {
const projectDir = join(tmpDir, 'projects', projectSlug)
await mkdir(projectDir, { recursive: true })
const filePath = join(projectDir, `${sessionId}.jsonl`)
@ -44,12 +51,9 @@ async function writeClaudeSession(projectSlug: string, sessionId: string, cwd: s
id: `msg-${sessionId}`,
type: 'message',
role: 'assistant',
model: 'claude-sonnet-4-5',
model,
content: [],
usage: {
input_tokens: 100,
output_tokens: 50,
},
usage,
},
}) + '\n')
@ -158,3 +162,51 @@ describe('Claude cwd project paths', () => {
expect(projects[0]!.projectPath).toBe('fallback/slug')
})
})
describe('Claude cache creation pricing', () => {
it('prices 1-hour cache writes from usage.cache_creation at the 2x input rate', async () => {
await writeClaudeSession(
'cache-pricing',
'one-hour-cache',
'/tmp/cache-pricing',
'2099-05-05T10:00:00.000Z',
{
input_tokens: 0,
output_tokens: 0,
cache_creation_input_tokens: 60_120,
cache_creation: {
ephemeral_5m_input_tokens: 0,
ephemeral_1h_input_tokens: 60_120,
},
},
'claude-opus-4-7',
)
const projects = await parseAllSessions(dayRange('2099-05-05'), 'claude')
expect(projects).toHaveLength(1)
expect(projects[0]!.sessions[0]!.totalCacheWriteTokens).toBe(60_120)
expect(projects[0]!.totalCostUSD).toBeCloseTo(0.6012, 6)
})
it('falls back to the legacy 5-minute cache write rate when split fields are absent', async () => {
await writeClaudeSession(
'legacy-cache-pricing',
'legacy-cache',
'/tmp/legacy-cache-pricing',
'2099-05-06T10:00:00.000Z',
{
input_tokens: 0,
output_tokens: 0,
cache_creation_input_tokens: 60_120,
},
'claude-opus-4-7',
)
const projects = await parseAllSessions(dayRange('2099-05-06'), 'claude')
expect(projects).toHaveLength(1)
expect(projects[0]!.sessions[0]!.totalCacheWriteTokens).toBe(60_120)
expect(projects[0]!.totalCostUSD).toBeCloseTo(0.37575, 6)
})
})