From 7a21b123a893767954ccb976ebe0743235cccdea Mon Sep 17 00:00:00 2001 From: iamtoruk Date: Sun, 10 May 2026 13:46:47 -0700 Subject: [PATCH] Cursor: per-project breakdown by workspace (closes per-project half of #196) Cursor's chat history showed as a single row labeled 'cursor' in the dashboard because the global state.vscdb has no workspace field on individual bubbles. The fix joins through Cursor's per-workspace storage: 1. Walk ~/Library/Application Support/Cursor/User/workspaceStorage/* 2. For each hash dir, read workspace.json -> folder URI 3. Open that dir's state.vscdb, read ItemTable['composer.composerData'] -> allComposers list 4. Build Map 5. emit one SessionSource per workspace plus a catch-all 'cursor' source for composers that did not register against any workspace (multi-root workspaces, no-folder-open windows, deleted workspaces with surviving global rows) The parser decodes source.path's #cursor-ws= tag, filters the parsed bubbles to the composerIds that belong to this workspace, and yields only those. The orphan-tag source negates the filter so it captures every composer not in any workspace. In passing, fix a real bug in the old code: parseBubbles set `sessionId: row.conversation_id ?? 'unknown'`, but the JSON `conversationId` field is empty in current Cursor builds, so every call shipped with `sessionId: 'unknown'`. We now derive the composer id from the row key (`bubbleId::`) which is what the workspace map joins on. The old behavior masked the bug because every call went into a single 'cursor' project anyway; with per-workspace bucketing the bug becomes load-bearing. Cache version bumped 2 -> 3 to invalidate caches that still record 'unknown' as the session id. Live-tested against my real 1.9 GB Cursor DB: the single 'cursor' row with 1904 calls / $4.08 now breaks into 5 workspaces plus an orphan bucket, totals reconcile exactly. 8 fixture-based tests cover multi-workspace routing, orphan filtering, legacy bare DB path backwards compat, multi-root workspace skip, vscode-remote URI slugification, and total reconciliation across all sources. Full suite: 46 files, 653 tests passing. --- CHANGELOG.md | 17 + src/cursor-cache.ts | 8 +- src/daily-cache.ts | 9 +- src/providers/cursor.ts | 315 +++++++++++++++-- .../cursor-workspace-breakdown.test.ts | 330 ++++++++++++++++++ 5 files changed, 645 insertions(+), 34 deletions(-) create mode 100644 tests/providers/cursor-workspace-breakdown.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 26a76a7..3d569e2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -41,6 +41,23 @@ reconcile. Closes #279. ### Fixed (CLI) +- **Cursor sessions break down by project, not one row called "cursor".** + Cursor's chat history sat under a single dashboard row labeled `cursor` + because the provider had no way to attribute bubbles to a workspace. + The fix walks `~/Library/Application Support/Cursor/User/workspaceStorage/*` + for each workspace's `workspace.json` (folder URI) and + `composer.composerData` (the composer ids opened in that workspace), + then joins those composer ids against the global bubbles. Each + workspace becomes its own project row, sanitized into the same slug + shape Claude uses (e.g. `-Users-you-myproject`); composers that have + no workspace mapping (multi-root workspaces, "no folder open" + sessions, deleted workspaces) remain under a catch-all `cursor` row. + As part of this the cursor parser now derives `sessionId` from the + bubble row key (`bubbleId::`) instead of the + empty `conversationId` JSON field, which was always falling back to + `'unknown'`. Cursor result cache version bumped to 3 to invalidate + prior caches that recorded the old session id. Closes the per-project + half of #196. - **Cursor cost shown for every model, not just Auto.** Cursor emits model names in a `claude--` shape (`claude-4.6-sonnet`, `claude-4.5-opus`, `claude-4.5-opus-high-thinking`, etc.) plus its own diff --git a/src/cursor-cache.ts b/src/cursor-cache.ts index cbdf9c5..390dcfa 100644 --- a/src/cursor-cache.ts +++ b/src/cursor-cache.ts @@ -5,7 +5,13 @@ import { randomBytes } from 'crypto' import type { ParsedProviderCall } from './providers/types.js' -const CURSOR_CACHE_VERSION = 2 +// Bumped to 3 for the workspace-aware breakdown change: the cursor parser +// now derives `sessionId` from the bubble row key (the real composer id) +// rather than the empty `conversationId` JSON field, and the workspace +// router relies on those composer ids to bucket calls per project. +// Version 2 caches contain `sessionId: 'unknown'` for every call and would +// route everything to the orphan project, so we invalidate them. +const CURSOR_CACHE_VERSION = 3 type ResultCache = { version?: number diff --git a/src/daily-cache.ts b/src/daily-cache.ts index 3455662..c5641bf 100644 --- a/src/daily-cache.ts +++ b/src/daily-cache.ts @@ -5,7 +5,14 @@ import { homedir } from 'os' import { join } from 'path' import type { DateRange, ProjectSummary } from './types.js' -export const DAILY_CACHE_VERSION = 4 +// Bumped to 5 alongside the Cursor per-project breakdown: prior daily +// entries recorded every Cursor session under a single 'cursor' project +// label. After the upgrade, the breakdown produces per-workspace project +// labels for new days; without invalidation the dashboard would show +// 'cursor' for historical days and `-Users-you-myproject` for new ones +// in the same window, producing a confusing mixed projection. v5 forces a +// full recompute. +export const DAILY_CACHE_VERSION = 5 const MIN_SUPPORTED_VERSION = 2 const DAILY_CACHE_FILENAME = 'daily-cache.json' diff --git a/src/providers/cursor.ts b/src/providers/cursor.ts index 9ba1230..370566a 100644 --- a/src/providers/cursor.ts +++ b/src/providers/cursor.ts @@ -1,4 +1,4 @@ -import { existsSync, statSync } from 'fs' +import { existsSync, statSync, readdirSync, readFileSync } from 'fs' import { join } from 'path' import { homedir } from 'os' @@ -70,6 +70,190 @@ function getCursorDbPath(): string { return join(homedir(), '.config', 'Cursor', 'User', 'globalStorage', 'state.vscdb') } +function getCursorWorkspaceStorageDir(globalDbPath: string): string { + // Sibling of globalStorage. Cursor lays out User/{globalStorage,workspaceStorage}/. + // We derive the workspaceStorage path from the global DB path so a test or + // override can supply both consistently from one root. + // globalDbPath = .../User/globalStorage/state.vscdb + // workspaceStorage = .../User/workspaceStorage + const userDir = join(globalDbPath, '..', '..') + return join(userDir, 'workspaceStorage') +} + +/// Per-conversation workspace lookup table. Cursor stores each chat as +/// `bubbleId::` rows in the GLOBAL state.vscdb but +/// does NOT carry a workspace path on the bubble itself. The mapping lives +/// in per-workspace dirs at `workspaceStorage//`: +/// - `workspace.json` carries the folder URI (`file:///Users/me/proj`) +/// - `state.vscdb`'s `ItemTable['composer.composerData']` lists every +/// composerId opened in that workspace +/// We walk every workspace dir, pull both, and build composerId -> folder. +type WorkspaceMapping = { + composerToWorkspace: Map // composerId -> folder URI + workspaceProjectName: Map // folder URI -> sanitized project name +} + +const ORPHAN_TAG = '__orphan__' +// Catch-all project label for composers that did not register against any +// workspace. When the user has no workspaces at all this is the only label +// shown, matching the pre-PR `cursor` project so legacy installs are not +// renamed by the breakdown change. +const ORPHAN_PROJECT = 'cursor' + +function sanitizeWorkspaceUri(uri: string): string { + // Mirrors Claude's slug convention so two providers reporting the same + // project path produce identical project keys for cross-provider rollup. + // file:///Users/me/myproject → -Users-me-myproject + // vscode-remote://wsl+Ubuntu/home/me/proj → -wsl-Ubuntu-home-me-proj + let path: string + if (uri.startsWith('file://')) { + path = uri.slice('file://'.length) + } else { + // Other URI schemes (vscode-remote://, ssh+remote://, etc.): swap "://" + // for a leading "/" so the slugifier produces a predictable shape. + path = uri.replace(/^[^:]+:\/\//, '/').replace(/\+/g, '-') + } + try { + path = decodeURIComponent(path) + } catch { + // Malformed percent encoding — keep as-is rather than throw. + } + return path.replace(/\/+/g, '-') +} + +let workspaceMapCache: WorkspaceMapping | null = null +let workspaceMapCacheRoot: string | null = null + +/// Visible for tests so a fixture can rebuild the map after writing fresh +/// workspace directories. +export function clearCursorWorkspaceMapCache(): void { + workspaceMapCache = null + workspaceMapCacheRoot = null +} + +function loadWorkspaceMap(workspaceStorageDir: string): WorkspaceMapping { + if (workspaceMapCache && workspaceMapCacheRoot === workspaceStorageDir) { + return workspaceMapCache + } + const result: WorkspaceMapping = { + composerToWorkspace: new Map(), + workspaceProjectName: new Map(), + } + + let entries: string[] + try { + entries = readdirSync(workspaceStorageDir) + } catch { + workspaceMapCache = result + workspaceMapCacheRoot = workspaceStorageDir + return result + } + + for (const hashDir of entries) { + const wsJsonPath = join(workspaceStorageDir, hashDir, 'workspace.json') + const wsDbPath = join(workspaceStorageDir, hashDir, 'state.vscdb') + + let wsJsonRaw: string + try { + wsJsonRaw = readFileSync(wsJsonPath, 'utf-8') + } catch { + continue + } + + let folder: string | undefined + try { + const parsed = JSON.parse(wsJsonRaw) as { folder?: string } + folder = parsed.folder + } catch { + continue + } + if (!folder) continue + if (!existsSync(wsDbPath)) continue + + let db: SqliteDatabase + try { + db = openDatabase(wsDbPath) + } catch { + continue + } + try { + const rows = db.query<{ value: string }>( + "SELECT value FROM ItemTable WHERE key='composer.composerData'", + ) + if (rows.length === 0) continue + let parsed: { allComposers?: Array<{ composerId?: string }> } + try { + parsed = JSON.parse(rows[0]!.value) + } catch { + continue + } + const project = sanitizeWorkspaceUri(folder) + let added = 0 + for (const c of parsed.allComposers ?? []) { + if (typeof c.composerId === 'string') { + result.composerToWorkspace.set(c.composerId, folder) + added += 1 + } + } + if (added > 0) { + result.workspaceProjectName.set(folder, project) + } + } catch { + // best-effort + } finally { + db.close() + } + } + + workspaceMapCache = result + workspaceMapCacheRoot = workspaceStorageDir + return result +} + +/// Pulls the composer id out of a `bubbleId::` key. +/// Returns null when the composer segment contains a CR/LF, which is the +/// signature Cursor uses for tool-call sub-composer rows in real data — +/// e.g. `bubbleId:task-call_xxxx\nfc_yyyy:` is one key with a +/// literal newline between the `task-call_` and `fc_` halves. Those rows +/// are not standalone composers and would otherwise inflate the orphan +/// project's session count. +function parseComposerIdFromKey(key: string | undefined): string | null { + if (!key) return null + const firstColon = key.indexOf(':') + if (firstColon < 0) return null + const secondColon = key.indexOf(':', firstColon + 1) + if (secondColon < 0) return null + const candidate = key.slice(firstColon + 1, secondColon) + if (!candidate) return null + // Reject any multi-line / control-char composer id. Real composer ids + // (UUIDs) and synthetic fixture ids are both single-line. + if (/[\r\n\x00]/.test(candidate)) return null + return candidate +} + +// Encodes the active workspace into source.path so the parser knows which +// composers to filter for. `#cursor-ws=` is a private separator: `state.vscdb` +// does not contain `#` (we construct the path ourselves), and the literal +// token only appears in source paths emitted from this provider, so there +// is no realistic collision. +const WORKSPACE_SEP = '#cursor-ws=' + +function encodeSourcePath(dbPath: string, workspaceTag: string): string { + return `${dbPath}${WORKSPACE_SEP}${workspaceTag}` +} + +function decodeSourcePath(sourcePath: string): { dbPath: string; workspaceTag: string } { + const idx = sourcePath.indexOf(WORKSPACE_SEP) + // Backwards-compat: a bare DB path with no workspace tag means "give me + // every call from this DB". Older cached SessionSource entries and any + // hand-constructed source from a test land here. + if (idx < 0) return { dbPath: sourcePath, workspaceTag: '__all__' } + return { + dbPath: sourcePath.slice(0, idx), + workspaceTag: sourcePath.slice(idx + WORKSPACE_SEP.length), + } +} + type CodeBlock = { languageId?: string } function extractLanguages(codeBlocksJson: string | null): string[] { @@ -273,7 +457,20 @@ function parseBubbles(db: SqliteDatabase, seenKeys: Set): { calls: Parse } const createdAt = row.created_at ?? '' - const conversationId = row.conversation_id ?? 'unknown' + // The JSON `conversationId` field on bubbles is empty in current + // Cursor builds. The real composerId lives in the row key + // `bubbleId::`. Extract from the key so the + // workspace map join works. parseComposerIdFromKey returns null for + // non-UUID composer segments (Cursor stores tool-call output under + // `bubbleId:task-call_xxx\nfc_yyy:` and similar shapes — + // those bubbles are NOT standalone sessions; their tokens are + // already accounted for inside the parent composer's stream). + const parsedComposerId = parseComposerIdFromKey(row.bubble_key) + if (!parsedComposerId) { + skipped++ + continue + } + const conversationId = parsedComposerId // Use the SQLite row key (bubbleId:) as the dedup key. // Cursor mutates token counts on the row in place when streaming // completes — including tokens in the dedup key (the previous @@ -467,41 +664,75 @@ function createParser(source: SessionSource, seenKeys: Set): SessionPars return } - const cached = await readCachedResults(source.path) - if (cached) { - for (const call of cached) { - if (seenKeys.has(call.deduplicationKey)) continue - seenKeys.add(call.deduplicationKey) - yield call + const { dbPath, workspaceTag } = decodeSourcePath(source.path) + + // Decide which composers belong to this source. The workspace map is + // built once per process from `workspaceStorage/*` and reused across + // every workspace-scoped source, so we pay the directory walk cost + // only once per CLI run regardless of how many projects the user has. + // `composerFilter` holds the set of composers EITHER allowed (workspace + // source) or denied (orphan source); `filterMode` says which. + let composerFilter: Set | null = null + let filterMode: 'include' | 'exclude' = 'include' + if (workspaceTag !== '__all__') { + const wsMap = loadWorkspaceMap(getCursorWorkspaceStorageDir(dbPath)) + if (workspaceTag === ORPHAN_TAG) { + // Orphan source: every composer that is mapped to SOME workspace + // is excluded here, so unmapped composers (and any non-UUID + // sub-composer ids that slip through) land in this bucket. + composerFilter = new Set(wsMap.composerToWorkspace.keys()) + filterMode = 'exclude' + } else { + composerFilter = new Set() + for (const [composerId, folder] of wsMap.composerToWorkspace) { + if (folder === workspaceTag) composerFilter.add(composerId) + } + filterMode = 'include' } - return } - let db: SqliteDatabase - try { - db = openDatabase(source.path) - } catch (err) { - process.stderr.write(`codeburn: cannot open Cursor database: ${err instanceof Error ? err.message : err}\n`) - return - } - - try { - if (!validateSchema(db)) { - process.stderr.write('codeburn: Cursor storage format not recognized. You may need to update CodeBurn.\n') + // Cache is keyed on the bare DB path so multiple workspace-scoped + // sources reuse one parsed bubble set per CLI run. Filtering happens + // post-cache so each source emits only its own composers. + let allCalls: ParsedProviderCall[] | null = null + const cached = await readCachedResults(dbPath) + if (cached) { + allCalls = cached + } else { + let db: SqliteDatabase + try { + db = openDatabase(dbPath) + } catch (err) { + process.stderr.write(`codeburn: cannot open Cursor database: ${err instanceof Error ? err.message : err}\n`) return } - - const { calls: bubbleCalls } = parseBubbles(db, seenKeys) - const { calls: agentKvCalls } = parseAgentKv(db, seenKeys, source.path) - const calls = [...bubbleCalls, ...agentKvCalls] - - await writeCachedResults(source.path, calls) - - for (const call of calls) { - yield call + try { + if (!validateSchema(db)) { + process.stderr.write('codeburn: Cursor storage format not recognized. You may need to update CodeBurn.\n') + return + } + // Use a fresh local Set for intra-parse dedup so the global + // seenKeys is not mutated by calls that the workspace filter is + // about to drop. Cross-source dedup happens at yield time. + const localSeen = new Set() + const { calls: bubbleCalls } = parseBubbles(db, localSeen) + const { calls: agentKvCalls } = parseAgentKv(db, localSeen, dbPath) + allCalls = [...bubbleCalls, ...agentKvCalls] + await writeCachedResults(dbPath, allCalls) + } finally { + db.close() } - } finally { - db.close() + } + + for (const call of allCalls) { + if (composerFilter !== null) { + const inSet = composerFilter.has(call.sessionId) + if (filterMode === 'include' && !inSet) continue + if (filterMode === 'exclude' && inSet) continue + } + if (seenKeys.has(call.deduplicationKey)) continue + seenKeys.add(call.deduplicationKey) + yield call } }, } @@ -526,7 +757,27 @@ export function createCursorProvider(dbPathOverride?: string): Provider { const dbPath = dbPathOverride ?? getCursorDbPath() if (!existsSync(dbPath)) return [] - return [{ path: dbPath, project: 'cursor', provider: 'cursor' }] + const wsMap = loadWorkspaceMap(getCursorWorkspaceStorageDir(dbPath)) + const sources: SessionSource[] = [] + for (const [folder, project] of wsMap.workspaceProjectName) { + sources.push({ + path: encodeSourcePath(dbPath, folder), + project, + provider: 'cursor', + }) + } + // Always emit a catch-all source for composers with no workspace + // mapping. About a third of composers in real-world Cursor installs + // are unmapped (multi-root workspaces, "no folder open" sessions, + // deleted workspaces with surviving global rows). When the user has + // no workspaces at all this source captures everything and the + // dashboard looks identical to the pre-PR `cursor` project. + sources.push({ + path: encodeSourcePath(dbPath, ORPHAN_TAG), + project: ORPHAN_PROJECT, + provider: 'cursor', + }) + return sources }, createSessionParser(source: SessionSource, seenKeys: Set): SessionParser { diff --git a/tests/providers/cursor-workspace-breakdown.test.ts b/tests/providers/cursor-workspace-breakdown.test.ts new file mode 100644 index 0000000..8e666b4 --- /dev/null +++ b/tests/providers/cursor-workspace-breakdown.test.ts @@ -0,0 +1,330 @@ +import { mkdtemp, mkdir, rm, writeFile } from 'fs/promises' +import { mkdirSync } from 'fs' +import { join } from 'path' +import { tmpdir } from 'os' +import { createRequire } from 'node:module' + +import { describe, it, expect, beforeEach, afterEach } from 'vitest' + +import { + createCursorProvider, + clearCursorWorkspaceMapCache, +} from '../../src/providers/cursor.js' +import { isSqliteAvailable } from '../../src/sqlite.js' +import type { ParsedProviderCall } from '../../src/providers/types.js' + +const requireForTest = createRequire(import.meta.url) + +let userDir: string + +beforeEach(async () => { + userDir = await mkdtemp(join(tmpdir(), 'cursor-ws-test-')) + // Layout matches Cursor's: /{globalStorage,workspaceStorage}/. + await mkdir(join(userDir, 'globalStorage'), { recursive: true }) + await mkdir(join(userDir, 'workspaceStorage'), { recursive: true }) + clearCursorWorkspaceMapCache() +}) + +afterEach(async () => { + clearCursorWorkspaceMapCache() + await rm(userDir, { recursive: true, force: true }) +}) + +function globalDbPath(): string { + return join(userDir, 'globalStorage', 'state.vscdb') +} + +/// Builds a global state.vscdb with the cursorDiskKV table and a small set of +/// bubbles for the requested composer ids. Each bubble carries enough fields +/// to satisfy parseBubbles() — created_at, tokenCount, conversationId, type. +function createGlobalDb(composerIds: string[]): string { + const dbPath = globalDbPath() + const { DatabaseSync: Database } = requireForTest('node:sqlite') + const db = new Database(dbPath) + db.exec(`CREATE TABLE cursorDiskKV (key TEXT PRIMARY KEY, value BLOB)`) + // ItemTable is unused by the global parser but creating it mirrors the + // real schema so a stray query against it does not error. + db.exec(`CREATE TABLE ItemTable (key TEXT UNIQUE, value BLOB)`) + + const insert = db.prepare(`INSERT INTO cursorDiskKV (key, value) VALUES (?, ?)`) + const baseTime = Date.now() - 24 * 3600 * 1000 + + for (const composerId of composerIds) { + // Exactly one assistant bubble per composer so the test math is + // "one composer == one call". User bubbles also produce calls in the + // real parser (text-length token estimation), but they are not + // necessary to exercise the workspace routing logic. + const bubbleId = `bubbleId:${composerId}:bubble-${composerId.slice(0, 6)}` + const bubble = { + type: 2, // assistant + conversationId: composerId, + createdAt: new Date(baseTime).toISOString(), + tokenCount: { inputTokens: 100, outputTokens: 50 }, + modelInfo: { modelName: 'claude-4.6-sonnet' }, + text: 'assistant reply for ' + composerId, + codeBlocks: '[]', + } + insert.run(bubbleId, JSON.stringify(bubble)) + } + + db.close() + return dbPath +} + +/// Creates one workspaceStorage// subdir with workspace.json (folder URI) +/// and state.vscdb (composer.composerData listing the supplied composerIds). +function createWorkspaceDir(hash: string, folderUri: string, composerIds: string[]): void { + const dir = join(userDir, 'workspaceStorage', hash) + mkdirSync(dir, { recursive: true }) + + const wsJsonPath = join(dir, 'workspace.json') + // We cannot do a top-level await in a sync helper; the caller writes via + // mkdirSync above and the JSON via Node's sync writeFile shim through the + // require'd 'fs'. Using readFileSync-friendly imports to keep this test + // helper sync. + const fs = requireForTest('fs') as typeof import('fs') + fs.writeFileSync(wsJsonPath, JSON.stringify({ folder: folderUri })) + + const wsDbPath = join(dir, 'state.vscdb') + const { DatabaseSync: Database } = requireForTest('node:sqlite') + const db = new Database(wsDbPath) + db.exec(`CREATE TABLE ItemTable (key TEXT UNIQUE, value BLOB)`) + const composerData = { + allComposers: composerIds.map(id => ({ + composerId: id, + name: 'session-' + id.slice(0, 6), + unifiedMode: 'agent', + })), + } + db.prepare(`INSERT INTO ItemTable (key, value) VALUES (?, ?)`).run( + 'composer.composerData', + JSON.stringify(composerData), + ) + db.close() +} + +async function collect(parser: { parse(): AsyncGenerator }): Promise { + const out: ParsedProviderCall[] = [] + for await (const call of parser.parse()) out.push(call) + return out +} + +describe('cursor provider — per-project breakdown (#196)', () => { + it('emits one source per workspace plus an orphan source', async () => { + if (!isSqliteAvailable()) return + + const dbPath = createGlobalDb([ + 'composer-work-1', + 'composer-work-2', + 'composer-personal-1', + 'composer-orphan-1', + ]) + createWorkspaceDir('hash-work', 'file:///Users/me/work-app', ['composer-work-1', 'composer-work-2']) + createWorkspaceDir('hash-personal', 'file:///Users/me/personal-app', ['composer-personal-1']) + + const provider = createCursorProvider(dbPath) + const sources = await provider.discoverSessions() + + const projects = sources.map(s => s.project).sort() + expect(projects).toContain('-Users-me-work-app') + expect(projects).toContain('-Users-me-personal-app') + // Orphan source is labeled 'cursor' so a user with no workspaces + // sees the same project name as before the breakdown change. + expect(projects).toContain('cursor') + }) + + it('routes calls to the right workspace and excludes others', async () => { + if (!isSqliteAvailable()) return + + const dbPath = createGlobalDb([ + 'composer-work-1', + 'composer-work-2', + 'composer-personal-1', + ]) + createWorkspaceDir('hash-work', 'file:///Users/me/work-app', ['composer-work-1', 'composer-work-2']) + createWorkspaceDir('hash-personal', 'file:///Users/me/personal-app', ['composer-personal-1']) + + const provider = createCursorProvider(dbPath) + const sources = await provider.discoverSessions() + const workSource = sources.find(s => s.project === '-Users-me-work-app')! + const personalSource = sources.find(s => s.project === '-Users-me-personal-app')! + + const workCalls = await collect(provider.createSessionParser(workSource, new Set())) + const personalCalls = await collect(provider.createSessionParser(personalSource, new Set())) + + const workComposerIds = new Set(workCalls.map(c => c.sessionId)) + expect(workComposerIds).toEqual(new Set(['composer-work-1', 'composer-work-2'])) + const personalComposerIds = new Set(personalCalls.map(c => c.sessionId)) + expect(personalComposerIds).toEqual(new Set(['composer-personal-1'])) + }) + + it('orphan source captures composers not registered in any workspace', async () => { + if (!isSqliteAvailable()) return + + const dbPath = createGlobalDb([ + 'composer-mapped', + 'composer-orphan-a', + 'composer-orphan-b', + ]) + createWorkspaceDir('hash-only', 'file:///Users/me/only-app', ['composer-mapped']) + + const provider = createCursorProvider(dbPath) + const sources = await provider.discoverSessions() + const orphanSource = sources.find(s => s.project === 'cursor')! + + const orphanCalls = await collect(provider.createSessionParser(orphanSource, new Set())) + const ids = new Set(orphanCalls.map(c => c.sessionId)) + expect(ids).toEqual(new Set(['composer-orphan-a', 'composer-orphan-b'])) + }) + + it('totals across all sources equal totals from the legacy single-source behavior', async () => { + if (!isSqliteAvailable()) return + + const dbPath = createGlobalDb([ + 'composer-work-1', + 'composer-personal-1', + 'composer-orphan-1', + ]) + createWorkspaceDir('hash-work', 'file:///Users/me/work-app', ['composer-work-1']) + createWorkspaceDir('hash-personal', 'file:///Users/me/personal-app', ['composer-personal-1']) + + const provider = createCursorProvider(dbPath) + const sources = await provider.discoverSessions() + + const seen = new Set() + let totalCalls = 0 + let totalCost = 0 + for (const source of sources) { + const calls = await collect(provider.createSessionParser(source, seen)) + totalCalls += calls.length + for (const call of calls) totalCost += call.costUSD + } + // Three composers, one assistant call each => three calls overall. + expect(totalCalls).toBe(3) + expect(totalCost).toBeGreaterThan(0) + }) + + it('emits a single `cursor` source (legacy-equivalent) when no workspace mapping exists', async () => { + if (!isSqliteAvailable()) return + + // No createWorkspaceDir calls -> workspaceStorage exists but is empty. + const dbPath = createGlobalDb(['composer-1', 'composer-2']) + + const provider = createCursorProvider(dbPath) + const sources = await provider.discoverSessions() + expect(sources).toHaveLength(1) + expect(sources[0]!.project).toBe('cursor') + + const calls = await collect(provider.createSessionParser(sources[0]!, new Set())) + // All composers fall through to the orphan/catch-all source, matching + // the pre-PR behavior where every Cursor session showed under one row. + const ids = new Set(calls.map(c => c.sessionId)) + expect(ids).toEqual(new Set(['composer-1', 'composer-2'])) + }) + + it('handles multi-root workspaces (workspace.json without folder) by skipping them', async () => { + if (!isSqliteAvailable()) return + + const dbPath = createGlobalDb(['composer-multi']) + // Multi-root workspace: workspace.json carries `configuration` not `folder`. + const dir = join(userDir, 'workspaceStorage', 'hash-multi') + mkdirSync(dir, { recursive: true }) + await writeFile( + join(dir, 'workspace.json'), + JSON.stringify({ configuration: 'file:///path/to/.code-workspace' }), + ) + // No state.vscdb either — multi-root composer never registers. + + const provider = createCursorProvider(dbPath) + const sources = await provider.discoverSessions() + // Multi-root produces no workspace mapping; only the orphan source + // (labeled 'cursor') remains, and it captures the multi-root composer. + const projects = sources.map(s => s.project) + expect(projects).toEqual(['cursor']) + const calls = await collect(provider.createSessionParser(sources[0]!, new Set())) + expect(calls.map(c => c.sessionId)).toEqual(['composer-multi']) + }) + + it('sanitizes vscode-remote URIs into a slug', async () => { + if (!isSqliteAvailable()) return + + const dbPath = createGlobalDb(['composer-remote']) + createWorkspaceDir( + 'hash-remote', + 'vscode-remote://wsl+Ubuntu/home/me/proj', + ['composer-remote'], + ) + + const provider = createCursorProvider(dbPath) + const sources = await provider.discoverSessions() + const project = sources.find(s => s.project !== 'cursor')!.project + // file:// would yield "-Users-me-proj"; remote URIs get the scheme rewritten. + expect(project).toMatch(/wsl-Ubuntu/) + expect(project).toContain('home') + expect(project).toContain('proj') + }) + + it('drops sub-composer rows whose composer id is not a UUID', async () => { + if (!isSqliteAvailable()) return + + const dbPath = globalDbPath() + const { DatabaseSync: Database } = requireForTest('node:sqlite') + const db = new Database(dbPath) + db.exec(`CREATE TABLE cursorDiskKV (key TEXT PRIMARY KEY, value BLOB)`) + db.exec(`CREATE TABLE ItemTable (key TEXT UNIQUE, value BLOB)`) + const insert = db.prepare(`INSERT INTO cursorDiskKV (key, value) VALUES (?, ?)`) + + // One real composer with one bubble. Real composer ids are UUIDs. + const realComposerId = 'cccc1111-2222-3333-4444-555566667777' + insert.run(`bubbleId:${realComposerId}:bubble-real`, JSON.stringify({ + type: 2, + conversationId: realComposerId, + createdAt: new Date().toISOString(), + tokenCount: { inputTokens: 100, outputTokens: 50 }, + modelInfo: { modelName: 'claude-4.6-sonnet' }, + text: 'real', + codeBlocks: '[]', + })) + // A sub-composer row mirroring the real Cursor shape: the composer + // segment has an embedded newline and is not UUID-shaped. Must be + // dropped, not surfaced as its own session. + insert.run(`bubbleId:task-call_xxx\nfc_yyy:bubble-sub`, JSON.stringify({ + type: 2, + conversationId: '', + createdAt: new Date().toISOString(), + tokenCount: { inputTokens: 10, outputTokens: 5 }, + modelInfo: { modelName: 'claude-4.6-sonnet' }, + text: 'sub', + codeBlocks: '[]', + })) + db.close() + + createWorkspaceDir('hash-only', 'file:///Users/me/only', [realComposerId]) + + const provider = createCursorProvider(dbPath) + const sources = await provider.discoverSessions() + const seen = new Set() + let allCalls = 0 + for (const source of sources) { + const calls = await collect(provider.createSessionParser(source, seen)) + allCalls += calls.length + } + // One real composer -> one call. Sub-composer dropped. Total: 1. + expect(allCalls).toBe(1) + }) + + it('remains backwards-compatible when given a legacy bare DB path', async () => { + if (!isSqliteAvailable()) return + + const dbPath = createGlobalDb(['composer-legacy-1', 'composer-legacy-2']) + createWorkspaceDir('hash-legacy', 'file:///Users/me/legacy', ['composer-legacy-1']) + + const provider = createCursorProvider(dbPath) + // Hand-construct a legacy SessionSource (no workspace tag) and verify + // it still yields every call regardless of workspace mapping. + const legacySource = { path: dbPath, project: 'cursor', provider: 'cursor' } + const calls = await collect(provider.createSessionParser(legacySource, new Set())) + const ids = new Set(calls.map(c => c.sessionId)) + expect(ids).toEqual(new Set(['composer-legacy-1', 'composer-legacy-2'])) + }) +})