codeburn/src/providers/cursor.ts
iamtoruk 7a21b123a8 Cursor: per-project breakdown by workspace (closes per-project half of #196)
Cursor's chat history showed as a single row labeled 'cursor' in
the dashboard because the global state.vscdb has no workspace
field on individual bubbles. The fix joins through Cursor's
per-workspace storage:

1. Walk ~/Library/Application Support/Cursor/User/workspaceStorage/*
2. For each hash dir, read workspace.json -> folder URI
3. Open that dir's state.vscdb, read
   ItemTable['composer.composerData'] -> allComposers list
4. Build Map<composerId, folder URI>
5. emit one SessionSource per workspace plus a catch-all 'cursor'
   source for composers that did not register against any
   workspace (multi-root workspaces, no-folder-open windows,
   deleted workspaces with surviving global rows)

The parser decodes source.path's #cursor-ws= tag, filters the
parsed bubbles to the composerIds that belong to this workspace,
and yields only those. The orphan-tag source negates the filter so
it captures every composer not in any workspace.

In passing, fix a real bug in the old code: parseBubbles set
`sessionId: row.conversation_id ?? 'unknown'`, but the JSON
`conversationId` field is empty in current Cursor builds, so every
call shipped with `sessionId: 'unknown'`. We now derive the
composer id from the row key (`bubbleId:<composerId>:<bubbleUuid>`)
which is what the workspace map joins on. The old behavior masked
the bug because every call went into a single 'cursor' project
anyway; with per-workspace bucketing the bug becomes load-bearing.
Cache version bumped 2 -> 3 to invalidate caches that still record
'unknown' as the session id.

Live-tested against my real 1.9 GB Cursor DB: the single 'cursor'
row with 1904 calls / $4.08 now breaks into 5 workspaces plus an
orphan bucket, totals reconcile exactly. 8 fixture-based tests
cover multi-workspace routing, orphan filtering, legacy bare DB
path backwards compat, multi-root workspace skip, vscode-remote
URI slugification, and total reconciliation across all sources.

Full suite: 46 files, 653 tests passing.
2026-05-10 15:34:30 -07:00

789 lines
27 KiB
TypeScript

import { existsSync, statSync, readdirSync, readFileSync } from 'fs'
import { join } from 'path'
import { homedir } from 'os'
import { calculateCost } from '../models.js'
import { readCachedResults, writeCachedResults } from '../cursor-cache.js'
import { isSqliteAvailable, getSqliteLoadError, openDatabase, type SqliteDatabase } from '../sqlite.js'
import type { Provider, SessionSource, SessionParser, ParsedProviderCall } from './types.js'
const CURSOR_COST_MODEL = 'claude-sonnet-4-5'
const modelDisplayNames: Record<string, string> = {
'claude-4.5-opus-high-thinking': 'Opus 4.5 (Thinking)',
'claude-4-opus': 'Opus 4',
'claude-4-sonnet-thinking': 'Sonnet 4 (Thinking)',
'claude-4.5-sonnet-thinking': 'Sonnet 4.5 (Thinking)',
'claude-4.6-sonnet': 'Sonnet 4.6',
'composer-1': 'Composer 1',
'grok-code-fast-1': 'Grok Code Fast',
'gemini-3-pro': 'Gemini 3 Pro',
'gpt-5.2-low': 'GPT-5.2 Low',
'gpt-5.2': 'GPT-5.2',
'gpt-5.1-codex-high': 'GPT-5.1 Codex',
'gpt-5': 'GPT-5',
'gpt-4.1': 'GPT-4.1',
'cursor-auto': 'Cursor (auto)',
}
type BubbleRow = {
bubble_key: string
input_tokens: number | null
output_tokens: number | null
model: string | null
created_at: string | null
conversation_id: string | null
user_text: string | null
text_length: number | null
bubble_type: number | null
code_blocks: string | null
}
type AgentKvRow = {
key: string
role: string | null
content: string | null
request_id: string | null
content_length: number
}
type AgentKvContent = {
type?: string
text?: string
providerOptions?: {
cursor?: {
modelName?: string
requestId?: string
}
}
}
const CHARS_PER_TOKEN = 4
function getCursorDbPath(): string {
if (process.platform === 'darwin') {
return join(homedir(), 'Library', 'Application Support', 'Cursor', 'User', 'globalStorage', 'state.vscdb')
}
if (process.platform === 'win32') {
return join(homedir(), 'AppData', 'Roaming', 'Cursor', 'User', 'globalStorage', 'state.vscdb')
}
return join(homedir(), '.config', 'Cursor', 'User', 'globalStorage', 'state.vscdb')
}
function getCursorWorkspaceStorageDir(globalDbPath: string): string {
// Sibling of globalStorage. Cursor lays out User/{globalStorage,workspaceStorage}/.
// We derive the workspaceStorage path from the global DB path so a test or
// override can supply both consistently from one root.
// globalDbPath = .../User/globalStorage/state.vscdb
// workspaceStorage = .../User/workspaceStorage
const userDir = join(globalDbPath, '..', '..')
return join(userDir, 'workspaceStorage')
}
/// Per-conversation workspace lookup table. Cursor stores each chat as
/// `bubbleId:<composerId>:<bubbleUuid>` rows in the GLOBAL state.vscdb but
/// does NOT carry a workspace path on the bubble itself. The mapping lives
/// in per-workspace dirs at `workspaceStorage/<hash>/`:
/// - `workspace.json` carries the folder URI (`file:///Users/me/proj`)
/// - `state.vscdb`'s `ItemTable['composer.composerData']` lists every
/// composerId opened in that workspace
/// We walk every workspace dir, pull both, and build composerId -> folder.
type WorkspaceMapping = {
composerToWorkspace: Map<string, string> // composerId -> folder URI
workspaceProjectName: Map<string, string> // folder URI -> sanitized project name
}
const ORPHAN_TAG = '__orphan__'
// Catch-all project label for composers that did not register against any
// workspace. When the user has no workspaces at all this is the only label
// shown, matching the pre-PR `cursor` project so legacy installs are not
// renamed by the breakdown change.
const ORPHAN_PROJECT = 'cursor'
function sanitizeWorkspaceUri(uri: string): string {
// Mirrors Claude's slug convention so two providers reporting the same
// project path produce identical project keys for cross-provider rollup.
// file:///Users/me/myproject → -Users-me-myproject
// vscode-remote://wsl+Ubuntu/home/me/proj → -wsl-Ubuntu-home-me-proj
let path: string
if (uri.startsWith('file://')) {
path = uri.slice('file://'.length)
} else {
// Other URI schemes (vscode-remote://, ssh+remote://, etc.): swap "://"
// for a leading "/" so the slugifier produces a predictable shape.
path = uri.replace(/^[^:]+:\/\//, '/').replace(/\+/g, '-')
}
try {
path = decodeURIComponent(path)
} catch {
// Malformed percent encoding — keep as-is rather than throw.
}
return path.replace(/\/+/g, '-')
}
let workspaceMapCache: WorkspaceMapping | null = null
let workspaceMapCacheRoot: string | null = null
/// Visible for tests so a fixture can rebuild the map after writing fresh
/// workspace directories.
export function clearCursorWorkspaceMapCache(): void {
workspaceMapCache = null
workspaceMapCacheRoot = null
}
function loadWorkspaceMap(workspaceStorageDir: string): WorkspaceMapping {
if (workspaceMapCache && workspaceMapCacheRoot === workspaceStorageDir) {
return workspaceMapCache
}
const result: WorkspaceMapping = {
composerToWorkspace: new Map(),
workspaceProjectName: new Map(),
}
let entries: string[]
try {
entries = readdirSync(workspaceStorageDir)
} catch {
workspaceMapCache = result
workspaceMapCacheRoot = workspaceStorageDir
return result
}
for (const hashDir of entries) {
const wsJsonPath = join(workspaceStorageDir, hashDir, 'workspace.json')
const wsDbPath = join(workspaceStorageDir, hashDir, 'state.vscdb')
let wsJsonRaw: string
try {
wsJsonRaw = readFileSync(wsJsonPath, 'utf-8')
} catch {
continue
}
let folder: string | undefined
try {
const parsed = JSON.parse(wsJsonRaw) as { folder?: string }
folder = parsed.folder
} catch {
continue
}
if (!folder) continue
if (!existsSync(wsDbPath)) continue
let db: SqliteDatabase
try {
db = openDatabase(wsDbPath)
} catch {
continue
}
try {
const rows = db.query<{ value: string }>(
"SELECT value FROM ItemTable WHERE key='composer.composerData'",
)
if (rows.length === 0) continue
let parsed: { allComposers?: Array<{ composerId?: string }> }
try {
parsed = JSON.parse(rows[0]!.value)
} catch {
continue
}
const project = sanitizeWorkspaceUri(folder)
let added = 0
for (const c of parsed.allComposers ?? []) {
if (typeof c.composerId === 'string') {
result.composerToWorkspace.set(c.composerId, folder)
added += 1
}
}
if (added > 0) {
result.workspaceProjectName.set(folder, project)
}
} catch {
// best-effort
} finally {
db.close()
}
}
workspaceMapCache = result
workspaceMapCacheRoot = workspaceStorageDir
return result
}
/// Pulls the composer id out of a `bubbleId:<composerId>:<bubbleUuid>` key.
/// Returns null when the composer segment contains a CR/LF, which is the
/// signature Cursor uses for tool-call sub-composer rows in real data —
/// e.g. `bubbleId:task-call_xxxx\nfc_yyyy:<bubbleUuid>` is one key with a
/// literal newline between the `task-call_` and `fc_` halves. Those rows
/// are not standalone composers and would otherwise inflate the orphan
/// project's session count.
function parseComposerIdFromKey(key: string | undefined): string | null {
if (!key) return null
const firstColon = key.indexOf(':')
if (firstColon < 0) return null
const secondColon = key.indexOf(':', firstColon + 1)
if (secondColon < 0) return null
const candidate = key.slice(firstColon + 1, secondColon)
if (!candidate) return null
// Reject any multi-line / control-char composer id. Real composer ids
// (UUIDs) and synthetic fixture ids are both single-line.
if (/[\r\n\x00]/.test(candidate)) return null
return candidate
}
// Encodes the active workspace into source.path so the parser knows which
// composers to filter for. `#cursor-ws=` is a private separator: `state.vscdb`
// does not contain `#` (we construct the path ourselves), and the literal
// token only appears in source paths emitted from this provider, so there
// is no realistic collision.
const WORKSPACE_SEP = '#cursor-ws='
function encodeSourcePath(dbPath: string, workspaceTag: string): string {
return `${dbPath}${WORKSPACE_SEP}${workspaceTag}`
}
function decodeSourcePath(sourcePath: string): { dbPath: string; workspaceTag: string } {
const idx = sourcePath.indexOf(WORKSPACE_SEP)
// Backwards-compat: a bare DB path with no workspace tag means "give me
// every call from this DB". Older cached SessionSource entries and any
// hand-constructed source from a test land here.
if (idx < 0) return { dbPath: sourcePath, workspaceTag: '__all__' }
return {
dbPath: sourcePath.slice(0, idx),
workspaceTag: sourcePath.slice(idx + WORKSPACE_SEP.length),
}
}
type CodeBlock = { languageId?: string }
function extractLanguages(codeBlocksJson: string | null): string[] {
if (!codeBlocksJson) return []
try {
const blocks = JSON.parse(codeBlocksJson) as CodeBlock[]
if (!Array.isArray(blocks)) return []
const langs = new Set<string>()
for (const block of blocks) {
if (block.languageId && block.languageId !== 'plaintext') {
langs.add(block.languageId)
}
}
return [...langs]
} catch {
return []
}
}
function resolveModel(raw: string | null): string {
if (!raw || raw === 'default') return CURSOR_COST_MODEL
return raw
}
function modelForDisplay(raw: string | null): string {
if (!raw || raw === 'default') return 'cursor-auto'
return raw
}
const BUBBLE_QUERY_BASE = `
SELECT
key as bubble_key,
json_extract(value, '$.tokenCount.inputTokens') as input_tokens,
json_extract(value, '$.tokenCount.outputTokens') as output_tokens,
json_extract(value, '$.modelInfo.modelName') as model,
json_extract(value, '$.createdAt') as created_at,
json_extract(value, '$.conversationId') as conversation_id,
substr(json_extract(value, '$.text'), 1, 500) as user_text,
length(json_extract(value, '$.text')) as text_length,
json_extract(value, '$.type') as bubble_type,
json_extract(value, '$.codeBlocks') as code_blocks
FROM cursorDiskKV
WHERE key LIKE 'bubbleId:%'
`
const AGENTKV_QUERY = `
SELECT
key,
json_extract(value, '$.role') as role,
json_extract(value, '$.content') as content,
json_extract(value, '$.providerOptions.cursor.requestId') as request_id,
length(value) as content_length
FROM cursorDiskKV
WHERE key LIKE 'agentKv:blob:%'
AND hex(substr(value, 1, 1)) = '7B'
ORDER BY ROWID ASC
`
const USER_MESSAGES_QUERY = `
SELECT
json_extract(value, '$.conversationId') as conversation_id,
json_extract(value, '$.createdAt') as created_at,
substr(json_extract(value, '$.text'), 1, 500) as text
FROM cursorDiskKV
WHERE key LIKE 'bubbleId:%'
AND json_extract(value, '$.type') = 1
AND (json_extract(value, '$.createdAt') > ? OR json_extract(value, '$.createdAt') IS NULL)
ORDER BY ROWID ASC
`
// Split into HEAD (predicates we always emit) and TAIL (ORDER BY) so the
// caller can splice in an optional `ROWID >= ?` cutoff without rewriting
// the whole template. The original combined string is preserved as
// BUBBLE_QUERY_SINCE for any caller that doesn't want the cap.
const BUBBLE_QUERY_SINCE_HEAD = BUBBLE_QUERY_BASE + `
AND (json_extract(value, '$.createdAt') > ? OR json_extract(value, '$.createdAt') IS NULL)`
const BUBBLE_QUERY_SINCE_TAIL = `
ORDER BY ROWID ASC
`
const BUBBLE_QUERY_SINCE = BUBBLE_QUERY_SINCE_HEAD + BUBBLE_QUERY_SINCE_TAIL
function validateSchema(db: SqliteDatabase): boolean {
try {
const rows = db.query<{ cnt: number }>(
"SELECT COUNT(*) as cnt FROM cursorDiskKV WHERE key LIKE 'bubbleId:%' LIMIT 1"
)
return rows.length > 0
} catch {
return false
}
}
type UserMsgRow = { conversation_id: string; created_at: string; text: string }
/// Per-conversation user-message buffer. We pop messages in arrival order via
/// the `pos` cursor — a previous implementation called Array.shift() which is
/// O(n) per call on large conversations and pinned multi-GB Cursor DBs at
/// minutes-of-parse for power users. The cursor walk is O(1).
type UserMessageQueue = {
messages: string[]
pos: number
}
function buildUserMessageMap(db: SqliteDatabase, timeFloor: string): Map<string, UserMessageQueue> {
const map = new Map<string, UserMessageQueue>()
try {
const rows = db.query<UserMsgRow>(USER_MESSAGES_QUERY, [timeFloor])
for (const row of rows) {
if (!row.conversation_id || !row.text) continue
const existing = map.get(row.conversation_id)
if (existing) {
existing.messages.push(row.text)
} else {
map.set(row.conversation_id, { messages: [row.text], pos: 0 })
}
}
} catch {}
return map
}
function takeUserMessage(queues: Map<string, UserMessageQueue>, conversationId: string): string {
const queue = queues.get(conversationId)
if (!queue || queue.pos >= queue.messages.length) return ''
const msg = queue.messages[queue.pos]
queue.pos += 1
return msg
}
function parseBubbles(db: SqliteDatabase, seenKeys: Set<string>): { calls: ParsedProviderCall[] } {
const results: ParsedProviderCall[] = []
let skipped = 0
const LOOKBACK_DAYS = 180
const timeFloor = new Date(Date.now() - LOOKBACK_DAYS * 24 * 60 * 60 * 1000).toISOString()
// Hard cap on rows to scan. The BUBBLE_QUERY_SINCE filter relies on
// json_extract over the value BLOB, which SQLite cannot serve from an
// index — every row is JSON-decoded. Multi-GB Cursor DBs (power users,
// years of usage) regularly exceed 500k bubble rows and were producing
// 30s+ parse stalls. Compute a ROWID cutoff that limits the scan to the
// MAX_BUBBLES most-recent bubbles when the user is over the cap, and
// warn so they know older sessions may be missing.
const MAX_BUBBLES = 250_000
let rowIdCutoff = 0
try {
const countRows = db.query<{ cnt: number }>(
"SELECT COUNT(*) as cnt FROM cursorDiskKV WHERE key LIKE 'bubbleId:%'"
)
const total = countRows[0]?.cnt ?? 0
if (total > MAX_BUBBLES) {
// Find the ROWID of the (MAX_BUBBLES)th most-recent bubble. Anything
// below this rowid is older and gets skipped. Bubbles are written
// chronologically so ROWID order ≈ insertion order.
const cutoffRows = db.query<{ rid: number }>(
`SELECT MIN(rid) as rid FROM (
SELECT ROWID as rid FROM cursorDiskKV
WHERE key LIKE 'bubbleId:%'
ORDER BY ROWID DESC
LIMIT ?
)`,
[MAX_BUBBLES]
)
rowIdCutoff = cutoffRows[0]?.rid ?? 0
process.stderr.write(
`codeburn: Cursor database has ${total.toLocaleString()} bubbles, ` +
`scanning the most recent ${MAX_BUBBLES.toLocaleString()}. ` +
`Older sessions may be missing from this report.\n`
)
}
} catch { /* best-effort diagnostic */ }
const userMessages = buildUserMessageMap(db, timeFloor)
// Append the rowid cutoff when active. Empty string when not capped so the
// query string compares identically to the un-capped version on small DBs.
const rowIdFilter = rowIdCutoff > 0 ? ' AND ROWID >= ?' : ''
const params: unknown[] = rowIdCutoff > 0 ? [timeFloor, rowIdCutoff] : [timeFloor]
const cappedQuery = BUBBLE_QUERY_SINCE_HEAD + rowIdFilter + BUBBLE_QUERY_SINCE_TAIL
let rows: BubbleRow[]
try {
rows = db.query<BubbleRow>(cappedQuery, params)
} catch {
return { calls: results }
}
for (const row of rows) {
try {
let inputTokens = row.input_tokens ?? 0
let outputTokens = row.output_tokens ?? 0
// Cursor v3 stores zero token counts — estimate from text length
if (inputTokens === 0 && outputTokens === 0) {
const textLen = row.text_length ?? 0
if (textLen === 0) continue
if (row.bubble_type === 1) {
inputTokens = Math.ceil(textLen / CHARS_PER_TOKEN)
} else {
outputTokens = Math.ceil(textLen / CHARS_PER_TOKEN)
}
}
const createdAt = row.created_at ?? ''
// The JSON `conversationId` field on bubbles is empty in current
// Cursor builds. The real composerId lives in the row key
// `bubbleId:<composerId>:<bubbleUuid>`. Extract from the key so the
// workspace map join works. parseComposerIdFromKey returns null for
// non-UUID composer segments (Cursor stores tool-call output under
// `bubbleId:task-call_xxx\nfc_yyy:<bubbleUuid>` and similar shapes —
// those bubbles are NOT standalone sessions; their tokens are
// already accounted for inside the parent composer's stream).
const parsedComposerId = parseComposerIdFromKey(row.bubble_key)
if (!parsedComposerId) {
skipped++
continue
}
const conversationId = parsedComposerId
// Use the SQLite row key (bubbleId:<unique>) as the dedup key.
// Cursor mutates token counts on the row in place when streaming
// completes — including tokens in the dedup key (the previous
// implementation) caused the same bubble to be counted twice once
// its tokens stabilized.
const dedupKey = `cursor:bubble:${row.bubble_key}`
if (seenKeys.has(dedupKey)) continue
seenKeys.add(dedupKey)
const pricingModel = resolveModel(row.model)
const displayModel = modelForDisplay(row.model)
const costUSD = calculateCost(pricingModel, inputTokens, outputTokens, 0, 0, 0)
const timestamp = createdAt || new Date().toISOString()
const userQuestion = takeUserMessage(userMessages, conversationId)
const assistantText = row.user_text ?? ''
const userText = (userQuestion + ' ' + assistantText).trim()
const languages = extractLanguages(row.code_blocks)
const hasCode = languages.length > 0
const cursorTools: string[] = hasCode ? ['cursor:edit', ...languages.map(l => `lang:${l}`)] : []
results.push({
provider: 'cursor',
model: displayModel,
inputTokens,
outputTokens,
cacheCreationInputTokens: 0,
cacheReadInputTokens: 0,
cachedInputTokens: 0,
reasoningTokens: 0,
webSearchRequests: 0,
costUSD,
tools: cursorTools,
bashCommands: [],
timestamp,
speed: 'standard',
deduplicationKey: dedupKey,
userMessage: userText,
sessionId: conversationId,
})
} catch {
skipped++
}
}
if (skipped > 0) {
process.stderr.write(`codeburn: skipped ${skipped} unreadable Cursor entries\n`)
}
return { calls: results }
}
function extractModelFromContent(content: AgentKvContent[]): string | null {
for (const c of content) {
if (c.providerOptions?.cursor?.modelName) {
return c.providerOptions.cursor.modelName
}
}
return null
}
function extractTextLength(content: AgentKvContent[]): number {
let total = 0
for (const c of content) {
if (c.text) total += c.text.length
}
return total
}
function parseAgentKv(db: SqliteDatabase, seenKeys: Set<string>, dbPath: string): { calls: ParsedProviderCall[] } {
const results: ParsedProviderCall[] = []
// Cursor's agentKv schema does not record per-message timestamps. Use the
// SQLite file's mtime as a bounded "last write" timestamp for all calls;
// it's at least honest (no future time, no always-now). Users running
// codeburn against an idle Cursor install will see agentKv calls land at
// the actual last activity time rather than today's date.
let agentKvTimestamp: string
try {
agentKvTimestamp = new Date(statSync(dbPath).mtimeMs).toISOString()
} catch {
agentKvTimestamp = new Date().toISOString()
}
let rows: AgentKvRow[]
try {
rows = db.query<AgentKvRow>(AGENTKV_QUERY)
} catch {
return { calls: results }
}
const sessions: Map<string, { inputChars: number; outputChars: number; model: string | null; userText: string }> = new Map()
let currentRequestId = 'unknown'
let turnIndex = 0
for (const row of rows) {
if (!row.role || !row.content) continue
let content: AgentKvContent[]
let plainTextLength = 0
try {
const parsed = JSON.parse(row.content)
if (Array.isArray(parsed)) {
content = parsed
} else {
content = []
plainTextLength = row.content.length
}
} catch {
content = []
plainTextLength = row.content.length
}
const requestId = row.request_id ?? currentRequestId
if (requestId !== currentRequestId) {
currentRequestId = requestId
turnIndex = 0
}
const textLength = plainTextLength || extractTextLength(content)
const model = extractModelFromContent(content)
if (row.role === 'user') {
const existing = sessions.get(requestId) ?? { inputChars: 0, outputChars: 0, model: null, userText: '' }
existing.inputChars += textLength
if (!existing.userText) {
const text = content[0]?.text ?? row.content
const queryMatch = text.match(/<user_query>([\s\S]*?)<\/user_query>/)
existing.userText = queryMatch ? queryMatch[1].trim().slice(0, 500) : text.slice(0, 500)
}
sessions.set(requestId, existing)
} else if (row.role === 'assistant') {
const existing = sessions.get(requestId) ?? { inputChars: 0, outputChars: 0, model: null, userText: '' }
existing.outputChars += textLength
if (model) existing.model = model
sessions.set(requestId, existing)
} else if (row.role === 'tool' || row.role === 'system') {
const existing = sessions.get(requestId) ?? { inputChars: 0, outputChars: 0, model: null, userText: '' }
existing.inputChars += textLength
sessions.set(requestId, existing)
}
}
for (const [requestId, session] of sessions) {
if (session.inputChars === 0 && session.outputChars === 0) continue
const inputTokens = Math.ceil(session.inputChars / CHARS_PER_TOKEN)
const outputTokens = Math.ceil(session.outputChars / CHARS_PER_TOKEN)
const dedupKey = `cursor:agentKv:${requestId}`
if (seenKeys.has(dedupKey)) continue
seenKeys.add(dedupKey)
const pricingModel = resolveModel(session.model)
const displayModel = modelForDisplay(session.model)
const costUSD = calculateCost(pricingModel, inputTokens, outputTokens, 0, 0, 0)
results.push({
provider: 'cursor',
model: displayModel,
inputTokens,
outputTokens,
cacheCreationInputTokens: 0,
cacheReadInputTokens: 0,
cachedInputTokens: 0,
reasoningTokens: 0,
webSearchRequests: 0,
costUSD,
tools: [],
bashCommands: [],
timestamp: agentKvTimestamp,
speed: 'standard',
deduplicationKey: dedupKey,
userMessage: session.userText,
sessionId: requestId,
})
}
return { calls: results }
}
function createParser(source: SessionSource, seenKeys: Set<string>): SessionParser {
return {
async *parse(): AsyncGenerator<ParsedProviderCall> {
if (!isSqliteAvailable()) {
process.stderr.write(getSqliteLoadError() + '\n')
return
}
const { dbPath, workspaceTag } = decodeSourcePath(source.path)
// Decide which composers belong to this source. The workspace map is
// built once per process from `workspaceStorage/*` and reused across
// every workspace-scoped source, so we pay the directory walk cost
// only once per CLI run regardless of how many projects the user has.
// `composerFilter` holds the set of composers EITHER allowed (workspace
// source) or denied (orphan source); `filterMode` says which.
let composerFilter: Set<string> | null = null
let filterMode: 'include' | 'exclude' = 'include'
if (workspaceTag !== '__all__') {
const wsMap = loadWorkspaceMap(getCursorWorkspaceStorageDir(dbPath))
if (workspaceTag === ORPHAN_TAG) {
// Orphan source: every composer that is mapped to SOME workspace
// is excluded here, so unmapped composers (and any non-UUID
// sub-composer ids that slip through) land in this bucket.
composerFilter = new Set(wsMap.composerToWorkspace.keys())
filterMode = 'exclude'
} else {
composerFilter = new Set()
for (const [composerId, folder] of wsMap.composerToWorkspace) {
if (folder === workspaceTag) composerFilter.add(composerId)
}
filterMode = 'include'
}
}
// Cache is keyed on the bare DB path so multiple workspace-scoped
// sources reuse one parsed bubble set per CLI run. Filtering happens
// post-cache so each source emits only its own composers.
let allCalls: ParsedProviderCall[] | null = null
const cached = await readCachedResults(dbPath)
if (cached) {
allCalls = cached
} else {
let db: SqliteDatabase
try {
db = openDatabase(dbPath)
} catch (err) {
process.stderr.write(`codeburn: cannot open Cursor database: ${err instanceof Error ? err.message : err}\n`)
return
}
try {
if (!validateSchema(db)) {
process.stderr.write('codeburn: Cursor storage format not recognized. You may need to update CodeBurn.\n')
return
}
// Use a fresh local Set for intra-parse dedup so the global
// seenKeys is not mutated by calls that the workspace filter is
// about to drop. Cross-source dedup happens at yield time.
const localSeen = new Set<string>()
const { calls: bubbleCalls } = parseBubbles(db, localSeen)
const { calls: agentKvCalls } = parseAgentKv(db, localSeen, dbPath)
allCalls = [...bubbleCalls, ...agentKvCalls]
await writeCachedResults(dbPath, allCalls)
} finally {
db.close()
}
}
for (const call of allCalls) {
if (composerFilter !== null) {
const inSet = composerFilter.has(call.sessionId)
if (filterMode === 'include' && !inSet) continue
if (filterMode === 'exclude' && inSet) continue
}
if (seenKeys.has(call.deduplicationKey)) continue
seenKeys.add(call.deduplicationKey)
yield call
}
},
}
}
export function createCursorProvider(dbPathOverride?: string): Provider {
return {
name: 'cursor',
displayName: 'Cursor',
modelDisplayName(model: string): string {
return modelDisplayNames[model] ?? model
},
toolDisplayName(rawTool: string): string {
return rawTool
},
async discoverSessions(): Promise<SessionSource[]> {
if (!isSqliteAvailable()) return []
const dbPath = dbPathOverride ?? getCursorDbPath()
if (!existsSync(dbPath)) return []
const wsMap = loadWorkspaceMap(getCursorWorkspaceStorageDir(dbPath))
const sources: SessionSource[] = []
for (const [folder, project] of wsMap.workspaceProjectName) {
sources.push({
path: encodeSourcePath(dbPath, folder),
project,
provider: 'cursor',
})
}
// Always emit a catch-all source for composers with no workspace
// mapping. About a third of composers in real-world Cursor installs
// are unmapped (multi-root workspaces, "no folder open" sessions,
// deleted workspaces with surviving global rows). When the user has
// no workspaces at all this source captures everything and the
// dashboard looks identical to the pre-PR `cursor` project.
sources.push({
path: encodeSourcePath(dbPath, ORPHAN_TAG),
project: ORPHAN_PROJECT,
provider: 'cursor',
})
return sources
},
createSessionParser(source: SessionSource, seenKeys: Set<string>): SessionParser {
return createParser(source, seenKeys)
},
}
}
export const cursor = createCursorProvider()