Fix Claude 1h cache write pricing

2026-05-16 19:44:14 +00:00 · 2026-05-11 18:32:00 +03:00 · 2026-05-11 18:32:00 +03:00 · 3c790adb23
commit 3c790adb23
parent d9acd8c4cd
9 changed files with 170 additions and 23 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,5 +1,18 @@
 # Changelog

+## Unreleased
+
+### Fixed (CLI)
+- **Claude 1-hour cache writes use the correct price.** Claude Code records
+  5-minute and 1-hour prompt-cache writes separately in
+  `usage.cache_creation`. CodeBurn now prices the 1-hour portion at 2x base
+  input cost (1.6x the LiteLLM 5-minute cache-write rate) while preserving the
+  existing legacy fallback when only `cache_creation_input_tokens` is present.
+  Daily cache version bumped to v6 so previously cached under-reported costs
+  are recomputed from raw sessions.
+  This fixes under-reporting for plan-mode and long agent sessions that rely on
+  1-hour cache writes. Closes #276.
+
 ## 0.9.8 - 2026-05-10

 ### Added (CLI)
--- a/docs/providers/claude.md
+++ b/docs/providers/claude.md
@ -25,6 +25,17 @@ JSONL, one event per line, per session file. Sessions live under `<project>/<ses

 `createSessionParser` returns an empty async generator (`claude.ts:101-105`). Claude is a special case: `src/parser.ts` reads Claude JSONL files directly with full turn grouping, dedup of streaming message IDs, and MCP tool inventory extraction. The provider object exists only so `discoverSessions` can return Claude session sources alongside the others.

+## Pricing
+
+Claude Code reports total cache-write tokens in `usage.cache_creation_input_tokens`.
+When available, it also splits those writes by duration in
+`usage.cache_creation.ephemeral_5m_input_tokens` and
+`usage.cache_creation.ephemeral_1h_input_tokens`. CodeBurn keeps the existing
+aggregate cache-write token total for reports, but prices the 1-hour portion at
+2x base input cost (1.6x the 5-minute cache-write rate exposed by LiteLLM).
+If the split fields are missing, the parser falls back to the legacy behavior
+and prices every cache write at the 5-minute rate.
+
 ## Caching

 None at the provider level. The daily aggregation cache (`src/daily-cache.ts`) reuses prior computed days.
--- a/src/daily-cache.ts
+++ b/src/daily-cache.ts
@ -5,24 +5,19 @@ import { homedir } from 'os'
 import { join } from 'path'
 import type { DateRange, ProjectSummary } from './types.js'

-// Bumped to 5 alongside the Cursor per-project breakdown: prior daily
-// entries recorded every Cursor session under a single 'cursor' project
-// label. After the upgrade, the breakdown produces per-workspace project
-// labels for new days; without invalidation the dashboard would show
-// 'cursor' for historical days and `-Users-you-myproject` for new ones
-// in the same window, producing a confusing mixed projection.
-export const DAILY_CACHE_VERSION = 5
-// MIN_SUPPORTED_VERSION bumped to 5 too. The migration path
+// Bumped to 6 alongside the Claude 1-hour cache-write pricing fix: prior
+// daily entries priced all Claude cache writes at the 5-minute rate, so
+// cached historical cost/model/provider/category totals would remain
+// under-reported unless discarded and recomputed from raw sessions.
+export const DAILY_CACHE_VERSION = 6
+// MIN_SUPPORTED_VERSION bumped to 6 too. The migration path
 // (isMigratableCache + migrateDays) only fills in missing default fields;
 // it does NOT recompute the providers / categories / models rollups from
 // session data, because those raw sessions are not stored in the cache.
-// So a migrated v2/v3/v4 cache would carry forward stale provider totals
-// (single 'cursor' bucket instead of per-workspace) for the full cache
-// retention window. Setting the floor to 5 forces those older caches to
-// be discarded and recomputed cleanly. Confirmed by live test:
-// menubar-json --period all reported cursor=$3.78 against a migrated
-// v4 cache but $4.08 (correct) after the cache was discarded.
-const MIN_SUPPORTED_VERSION = 5
+// So a migrated v5 cache would carry forward stale pricing totals for
+// the full cache retention window. Setting the floor to 6 forces older
+// caches to be discarded and recomputed cleanly.
+const MIN_SUPPORTED_VERSION = 6
 const DAILY_CACHE_FILENAME = 'daily-cache.json'

 export type DailyEntry = {
--- a/src/models.ts
+++ b/src/models.ts
@ -25,6 +25,7 @@ type SnapshotEntry = [number, number, number | null, number | null]
 const LITELLM_URL = 'https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json'
 const CACHE_TTL_MS = 24 * 60 * 60 * 1000
 const WEB_SEARCH_COST = 0.01
+const ONE_HOUR_CACHE_WRITE_MULTIPLIER_FROM_FIVE_MINUTE_RATE = 1.6

 const FAST_MULTIPLIERS: Record<string, number> = {
  'claude-opus-4-7': 6,
@ -310,6 +311,7 @@ export function calculateCost(
  cacheReadTokens: number,
  webSearchRequests: number,
  speed: 'standard' | 'fast' = 'standard',
+  oneHourCacheCreationTokens = 0,
 ): number {
  const costs = getModelCosts(model)
  if (!costs) {
@ -335,11 +337,15 @@ export function calculateCost(
  // from real spend in aggregate totals. NaN is also handled here; the
  // arithmetic below short-circuits to 0 when any operand is non-finite.
  const safe = (n: number) => (Number.isFinite(n) && n > 0 ? n : 0)
+  const safeOneHourCacheCreation = safe(oneHourCacheCreationTokens)
+  const safeCacheCreation = Math.max(safe(cacheCreationTokens), safeOneHourCacheCreation)
+  const safeFiveMinuteCacheCreation = Math.max(0, safeCacheCreation - safeOneHourCacheCreation)

  return multiplier * (
    safe(inputTokens) * costs.inputCostPerToken +
    safe(outputTokens) * costs.outputCostPerToken +
-    safe(cacheCreationTokens) * costs.cacheWriteCostPerToken +
+    safeFiveMinuteCacheCreation * costs.cacheWriteCostPerToken +
+    safeOneHourCacheCreation * costs.cacheWriteCostPerToken * ONE_HOUR_CACHE_WRITE_MULTIPLIER_FROM_FIVE_MINUTE_RATE +
    safe(cacheReadTokens) * costs.cacheReadCostPerToken +
    safe(webSearchRequests) * costs.webSearchCostPerRequest
  )
--- a/src/parser.ts
+++ b/src/parser.ts
@ -92,16 +92,39 @@ function getMessageId(entry: JournalEntry): string | null {
  return msg?.id ?? null
 }

+function positiveNumber(n: number | undefined): number {
+  return n !== undefined && Number.isFinite(n) && n > 0 ? n : 0
+}
+
+function extractClaudeCacheCreation(usage: AssistantMessageContent['usage']): { totalTokens: number; oneHourTokens: number } {
+  const legacyTotal = positiveNumber(usage.cache_creation_input_tokens)
+  const cacheCreation = usage.cache_creation
+  const fiveMinuteTokens = positiveNumber(cacheCreation?.ephemeral_5m_input_tokens)
+  const oneHourTokens = positiveNumber(cacheCreation?.ephemeral_1h_input_tokens)
+  const splitTotal = fiveMinuteTokens + oneHourTokens
+
+  if (splitTotal === 0) return { totalTokens: legacyTotal, oneHourTokens: 0 }
+
+  // Valid Claude usage reports the legacy total and split total as equal.
+  // Keep the larger value so malformed partial splits do not drop tokens.
+  const totalTokens = Math.max(legacyTotal, splitTotal)
+  return {
+    totalTokens,
+    oneHourTokens: Math.min(oneHourTokens, totalTokens),
+  }
+}
+
 function parseApiCall(entry: JournalEntry): ParsedApiCall | null {
  if (entry.type !== 'assistant') return null
  const msg = entry.message as AssistantMessageContent | undefined
  if (!msg?.usage || !msg?.model) return null

  const usage = msg.usage
+  const cacheCreation = extractClaudeCacheCreation(usage)
  const tokens: TokenUsage = {
    inputTokens: usage.input_tokens ?? 0,
    outputTokens: usage.output_tokens ?? 0,
-    cacheCreationInputTokens: usage.cache_creation_input_tokens ?? 0,
+    cacheCreationInputTokens: cacheCreation.totalTokens,
    cacheReadInputTokens: usage.cache_read_input_tokens ?? 0,
    cachedInputTokens: 0,
    reasoningTokens: 0,
@ -118,6 +141,7 @@ function parseApiCall(entry: JournalEntry): ParsedApiCall | null {
    tokens.cacheReadInputTokens,
    tokens.webSearchRequests,
    usage.speed ?? 'standard',
+    cacheCreation.oneHourTokens,
  )

  const bashCmds = extractBashCommandsFromContent(msg.content ?? [])
--- a/src/types.ts
+++ b/src/types.ts
@ -25,6 +25,10 @@ export type ApiUsage = {
  input_tokens: number
  output_tokens: number
  cache_creation_input_tokens?: number
+  cache_creation?: {
+    ephemeral_5m_input_tokens?: number
+    ephemeral_1h_input_tokens?: number
+  }
  cache_read_input_tokens?: number
  server_tool_use?: {
    web_search_requests?: number
--- a/tests/daily-cache.test.ts
+++ b/tests/daily-cache.test.ts
@ -104,6 +104,36 @@ describe('loadDailyCache', () => {
    expect(existsSync(join(TMP_CACHE_ROOT, 'daily-cache.json.v2.bak'))).toBe(true)
  })

+  it('discards a v5 cache because cached Claude costs predate 1-hour cache pricing', async () => {
+    const saved = {
+      version: 5,
+      lastComputedDate: '2026-05-01',
+      days: [{
+        date: '2026-05-01',
+        cost: 0.37575,
+        calls: 1,
+        sessions: 1,
+        inputTokens: 0,
+        outputTokens: 0,
+        cacheReadTokens: 0,
+        cacheWriteTokens: 60_120,
+        editTurns: 0,
+        oneShotTurns: 0,
+        models: { 'Opus 4.7': { calls: 1, cost: 0.37575, inputTokens: 0, outputTokens: 0, cacheReadTokens: 0, cacheWriteTokens: 60_120 } },
+        categories: {},
+        providers: { claude: { calls: 1, cost: 0.37575 } },
+      }],
+    }
+    const { writeFile, mkdir } = await import('fs/promises')
+    await mkdir(TMP_CACHE_ROOT, { recursive: true })
+    await writeFile(join(TMP_CACHE_ROOT, 'daily-cache.json'), JSON.stringify(saved), 'utf-8')
+    const cache = await loadDailyCache()
+    expect(cache.version).toBe(DAILY_CACHE_VERSION)
+    expect(cache.days).toEqual([])
+    expect(cache.lastComputedDate).toBeNull()
+    expect(existsSync(join(TMP_CACHE_ROOT, 'daily-cache.json.v5.bak'))).toBe(true)
+  })
+
  it('round-trips a valid cache through save and load', async () => {
    const saved: DailyCache = {
      version: DAILY_CACHE_VERSION,
--- a/tests/models.test.ts
+++ b/tests/models.test.ts
@ -158,6 +158,18 @@ describe('calculateCost - OMP names produce non-zero cost', () => {
  })
 })

+describe('calculateCost - Claude cache write durations', () => {
+  it('prices 1-hour cache writes at 1.6x the 5-minute cache write rate', () => {
+    const fiveMinute = calculateCost('claude-opus-4-7', 0, 0, 1_000_000, 0, 0)
+    const oneHour = calculateCost('claude-opus-4-7', 0, 0, 1_000_000, 0, 0, 'standard', 1_000_000)
+    const mixed = calculateCost('claude-opus-4-7', 0, 0, 100_000, 0, 0, 'standard', 60_000)
+
+    expect(fiveMinute).toBeCloseTo(6.25, 6)
+    expect(oneHour).toBeCloseTo(10, 6)
+    expect(mixed).toBeCloseTo(0.85, 6)
+  })
+})
+
 describe('existing model names still resolve', () => {
  it('canonical claude-opus-4-6', () => {
    expect(getModelCosts('claude-opus-4-6')).not.toBeNull()
--- a/tests/parser-claude-cwd.test.ts
+++ b/tests/parser-claude-cwd.test.ts
@ -31,7 +31,14 @@ function dayRange(day: string): DateRange {
  }
 }

-async function writeClaudeSession(projectSlug: string, sessionId: string, cwd: string, timestamp: string): Promise<void> {
+async function writeClaudeSession(
+  projectSlug: string,
+  sessionId: string,
+  cwd: string,
+  timestamp: string,
+  usage: Record<string, unknown> = { input_tokens: 100, output_tokens: 50 },
+  model = 'claude-sonnet-4-5',
+): Promise<void> {
  const projectDir = join(tmpDir, 'projects', projectSlug)
  await mkdir(projectDir, { recursive: true })
  const filePath = join(projectDir, `${sessionId}.jsonl`)
@ -44,12 +51,9 @@ async function writeClaudeSession(projectSlug: string, sessionId: string, cwd: s
      id: `msg-${sessionId}`,
      type: 'message',
      role: 'assistant',
-      model: 'claude-sonnet-4-5',
+      model,
      content: [],
-      usage: {
-        input_tokens: 100,
-        output_tokens: 50,
-      },
+      usage,
    },
  }) + '\n')

@ -158,3 +162,51 @@ describe('Claude cwd project paths', () => {
    expect(projects[0]!.projectPath).toBe('fallback/slug')
  })
 })
+
+describe('Claude cache creation pricing', () => {
+  it('prices 1-hour cache writes from usage.cache_creation at the 2x input rate', async () => {
+    await writeClaudeSession(
+      'cache-pricing',
+      'one-hour-cache',
+      '/tmp/cache-pricing',
+      '2099-05-05T10:00:00.000Z',
+      {
+        input_tokens: 0,
+        output_tokens: 0,
+        cache_creation_input_tokens: 60_120,
+        cache_creation: {
+          ephemeral_5m_input_tokens: 0,
+          ephemeral_1h_input_tokens: 60_120,
+        },
+      },
+      'claude-opus-4-7',
+    )
+
+    const projects = await parseAllSessions(dayRange('2099-05-05'), 'claude')
+
+    expect(projects).toHaveLength(1)
+    expect(projects[0]!.sessions[0]!.totalCacheWriteTokens).toBe(60_120)
+    expect(projects[0]!.totalCostUSD).toBeCloseTo(0.6012, 6)
+  })
+
+  it('falls back to the legacy 5-minute cache write rate when split fields are absent', async () => {
+    await writeClaudeSession(
+      'legacy-cache-pricing',
+      'legacy-cache',
+      '/tmp/legacy-cache-pricing',
+      '2099-05-06T10:00:00.000Z',
+      {
+        input_tokens: 0,
+        output_tokens: 0,
+        cache_creation_input_tokens: 60_120,
+      },
+      'claude-opus-4-7',
+    )
+
+    const projects = await parseAllSessions(dayRange('2099-05-06'), 'claude')
+
+    expect(projects).toHaveLength(1)
+    expect(projects[0]!.sessions[0]!.totalCacheWriteTokens).toBe(60_120)
+    expect(projects[0]!.totalCostUSD).toBeCloseTo(0.37575, 6)
+  })
+})