codeburn/src/cursor-cache.ts
Resham Joshi 810b214476
Cursor: per-project breakdown by workspace (closes per-project half of #196) (#296)
Cursor's chat history showed as a single row labeled 'cursor' in
the dashboard because the global state.vscdb has no workspace
field on individual bubbles. The fix joins through Cursor's
per-workspace storage:

1. Walk ~/Library/Application Support/Cursor/User/workspaceStorage/*
2. For each hash dir, read workspace.json -> folder URI
3. Open that dir's state.vscdb, read
   ItemTable['composer.composerData'] -> allComposers list
4. Build Map<composerId, folder URI>
5. emit one SessionSource per workspace plus a catch-all 'cursor'
   source for composers that did not register against any
   workspace (multi-root workspaces, no-folder-open windows,
   deleted workspaces with surviving global rows)

The parser decodes source.path's #cursor-ws= tag, filters the
parsed bubbles to the composerIds that belong to this workspace,
and yields only those. The orphan-tag source negates the filter so
it captures every composer not in any workspace.

In passing, fix a real bug in the old code: parseBubbles set
`sessionId: row.conversation_id ?? 'unknown'`, but the JSON
`conversationId` field is empty in current Cursor builds, so every
call shipped with `sessionId: 'unknown'`. We now derive the
composer id from the row key (`bubbleId:<composerId>:<bubbleUuid>`)
which is what the workspace map joins on. The old behavior masked
the bug because every call went into a single 'cursor' project
anyway; with per-workspace bucketing the bug becomes load-bearing.
Cache version bumped 2 -> 3 to invalidate caches that still record
'unknown' as the session id.

Live-tested against my real 1.9 GB Cursor DB: the single 'cursor'
row with 1904 calls / $4.08 now breaks into 5 workspaces plus an
orphan bucket, totals reconcile exactly. 8 fixture-based tests
cover multi-workspace routing, orphan filtering, legacy bare DB
path backwards compat, multi-root workspace skip, vscode-remote
URI slugification, and total reconciliation across all sources.

Full suite: 46 files, 653 tests passing.
2026-05-10 15:35:57 -07:00

86 lines
2.7 KiB
TypeScript

import { readFile, writeFile, mkdir, rename, stat, unlink } from 'fs/promises'
import { join } from 'path'
import { homedir } from 'os'
import { randomBytes } from 'crypto'
import type { ParsedProviderCall } from './providers/types.js'
// Bumped to 3 for the workspace-aware breakdown change: the cursor parser
// now derives `sessionId` from the bubble row key (the real composer id)
// rather than the empty `conversationId` JSON field, and the workspace
// router relies on those composer ids to bucket calls per project.
// Version 2 caches contain `sessionId: 'unknown'` for every call and would
// route everything to the orphan project, so we invalidate them.
const CURSOR_CACHE_VERSION = 3
type ResultCache = {
version?: number
dbMtimeMs: number
dbSizeBytes: number
calls: ParsedProviderCall[]
}
const CACHE_FILE = 'cursor-results.json'
function getCacheDir(): string {
return join(homedir(), '.cache', 'codeburn')
}
function getCachePath(): string {
return join(getCacheDir(), CACHE_FILE)
}
async function getDbFingerprint(dbPath: string): Promise<{ mtimeMs: number; size: number } | null> {
try {
const s = await stat(dbPath)
return { mtimeMs: s.mtimeMs, size: s.size }
} catch {
return null
}
}
export async function readCachedResults(dbPath: string): Promise<ParsedProviderCall[] | null> {
try {
const fp = await getDbFingerprint(dbPath)
if (!fp) return null
const raw = await readFile(getCachePath(), 'utf-8')
const cache = JSON.parse(raw) as ResultCache
if (cache.version === CURSOR_CACHE_VERSION && cache.dbMtimeMs === fp.mtimeMs && cache.dbSizeBytes === fp.size) {
return cache.calls
}
return null
} catch {
return null
}
}
export async function writeCachedResults(dbPath: string, calls: ParsedProviderCall[]): Promise<void> {
const fp = await getDbFingerprint(dbPath)
if (!fp) return
const dir = getCacheDir()
await mkdir(dir, { recursive: true }).catch(() => {})
const cache: ResultCache = {
version: CURSOR_CACHE_VERSION,
dbMtimeMs: fp.mtimeMs,
dbSizeBytes: fp.size,
calls,
}
// Atomic write: stage to a randomized temp file in the same directory,
// then rename onto the final path. rename() is atomic on POSIX, so a
// crash mid-write never leaves a half-written cache, and concurrent
// CLI invocations using their own random temp names cannot interleave
// bytes in the destination file (they only race on the final rename,
// last-writer-wins, both with valid content).
const target = getCachePath()
const tempPath = `${target}.${randomBytes(8).toString('hex')}.tmp`
try {
await writeFile(tempPath, JSON.stringify(cache), 'utf-8')
await rename(tempPath, target)
} catch {
await unlink(tempPath).catch(() => {})
}
}