From 410fad9495ec371d84e956082996d3d067bc778d Mon Sep 17 00:00:00 2001 From: AgentSeal Date: Tue, 28 Apr 2026 00:35:51 +0200 Subject: [PATCH] Fix Cursor provider reporting $0 for v3 bubble format and NULL createdAt rows Cursor v3 stores zero token counts in bubbles, causing parseBubbles to return empty results. The query also dropped rows with NULL createdAt via the SQL comparison, hiding data from older Cursor versions too. Changes: - Remove inputTokens > 0 SQL filter, estimate tokens from text length when token counts are zero (same 4 chars/token ratio as agentKv) - Include NULL createdAt rows with OR IS NULL, fall back to current timestamp when createdAt is missing - Parse agentKv entries with plain string content instead of skipping them (not all content is a JSON array) - Always parse both bubbles and agentKv instead of agentKv-only fallback - Discover subagent transcripts in subagents/ subdirectories - Fix timezone-dependent test in day-aggregator Fixes #159, #163 --- src/providers/cursor-agent.ts | 28 ++++++++++++----- src/providers/cursor.ts | 59 ++++++++++++++++++++++------------- tests/day-aggregator.test.ts | 5 +-- 3 files changed, 61 insertions(+), 31 deletions(-) diff --git a/src/providers/cursor-agent.ts b/src/providers/cursor-agent.ts index 2cadb0a..5827a60 100644 --- a/src/providers/cursor-agent.ts +++ b/src/providers/cursor-agent.ts @@ -464,14 +464,26 @@ export function createCursorAgentProvider(baseDirOverride?: string): Provider { const subdir = join(transcriptDir, transcript.name) const subEntries = await readdir(subdir, { withFileTypes: true }).catch(() => []) for (const sub of subEntries) { - if (!sub.isFile()) continue - if (!sub.name.endsWith('.jsonl') && !sub.name.endsWith('.txt')) continue - const filePath = join(subdir, sub.name) - sources.push({ - path: filePath, - project: projectId, - provider: 'cursor-agent', - }) + if (sub.isFile() && (sub.name.endsWith('.jsonl') || sub.name.endsWith('.txt'))) { + sources.push({ + path: join(subdir, sub.name), + project: projectId, + provider: 'cursor-agent', + }) + } + // Subagent transcripts inside a subagents/ directory + if (sub.isDirectory() && sub.name === 'subagents') { + const subagentEntries = await readdir(join(subdir, sub.name), { withFileTypes: true }).catch(() => []) + for (const sa of subagentEntries) { + if (!sa.isFile()) continue + if (!sa.name.endsWith('.jsonl') && !sa.name.endsWith('.txt')) continue + sources.push({ + path: join(subdir, sub.name, sa.name), + project: projectId, + provider: 'cursor-agent', + }) + } + } } } } diff --git a/src/providers/cursor.ts b/src/providers/cursor.ts index 0c4c61e..bbe6b65 100644 --- a/src/providers/cursor.ts +++ b/src/providers/cursor.ts @@ -33,6 +33,8 @@ type BubbleRow = { created_at: string | null conversation_id: string | null user_text: string | null + text_length: number | null + bubble_type: number | null code_blocks: string | null } @@ -104,10 +106,11 @@ const BUBBLE_QUERY_BASE = ` json_extract(value, '$.createdAt') as created_at, json_extract(value, '$.conversationId') as conversation_id, substr(json_extract(value, '$.text'), 1, 500) as user_text, + length(json_extract(value, '$.text')) as text_length, + json_extract(value, '$.type') as bubble_type, json_extract(value, '$.codeBlocks') as code_blocks FROM cursorDiskKV WHERE key LIKE 'bubbleId:%' - AND json_extract(value, '$.tokenCount.inputTokens') > 0 ` const AGENTKV_QUERY = ` @@ -131,13 +134,13 @@ const USER_MESSAGES_QUERY = ` FROM cursorDiskKV WHERE key LIKE 'bubbleId:%' AND json_extract(value, '$.type') = 1 - AND json_extract(value, '$.createdAt') > ? - ORDER BY json_extract(value, '$.createdAt') ASC + AND (json_extract(value, '$.createdAt') > ? OR json_extract(value, '$.createdAt') IS NULL) + ORDER BY ROWID ASC ` const BUBBLE_QUERY_SINCE = BUBBLE_QUERY_BASE + ` - AND json_extract(value, '$.createdAt') > ? - ORDER BY json_extract(value, '$.createdAt') ASC + AND (json_extract(value, '$.createdAt') > ? OR json_extract(value, '$.createdAt') IS NULL) + ORDER BY ROWID ASC ` function validateSchema(db: SqliteDatabase): boolean { @@ -185,9 +188,19 @@ function parseBubbles(db: SqliteDatabase, seenKeys: Set): { calls: Parse for (const row of rows) { try { - const inputTokens = row.input_tokens ?? 0 - const outputTokens = row.output_tokens ?? 0 - if (inputTokens === 0 && outputTokens === 0) continue + let inputTokens = row.input_tokens ?? 0 + let outputTokens = row.output_tokens ?? 0 + + // Cursor v3 stores zero token counts — estimate from text length + if (inputTokens === 0 && outputTokens === 0) { + const textLen = row.text_length ?? 0 + if (textLen === 0) continue + if (row.bubble_type === 1) { + inputTokens = Math.ceil(textLen / CHARS_PER_TOKEN) + } else { + outputTokens = Math.ceil(textLen / CHARS_PER_TOKEN) + } + } const createdAt = row.created_at ?? '' const conversationId = row.conversation_id ?? 'unknown' @@ -201,7 +214,7 @@ function parseBubbles(db: SqliteDatabase, seenKeys: Set): { calls: Parse const costUSD = calculateCost(pricingModel, inputTokens, outputTokens, 0, 0, 0) - const timestamp = createdAt || '' + const timestamp = createdAt || new Date().toISOString() const convMessages = userMessages.get(conversationId) ?? [] const userQuestion = convMessages.length > 0 ? convMessages.shift()! : '' const assistantText = row.user_text ?? '' @@ -278,11 +291,18 @@ function parseAgentKv(db: SqliteDatabase, seenKeys: Set): { calls: Parse if (!row.role || !row.content) continue let content: AgentKvContent[] + let plainTextLength = 0 try { - content = JSON.parse(row.content) - if (!Array.isArray(content)) continue + const parsed = JSON.parse(row.content) + if (Array.isArray(parsed)) { + content = parsed + } else { + content = [] + plainTextLength = row.content.length + } } catch { - continue + content = [] + plainTextLength = row.content.length } const requestId = row.request_id ?? currentRequestId @@ -291,14 +311,14 @@ function parseAgentKv(db: SqliteDatabase, seenKeys: Set): { calls: Parse turnIndex = 0 } - const textLength = extractTextLength(content) + const textLength = plainTextLength || extractTextLength(content) const model = extractModelFromContent(content) if (row.role === 'user') { const existing = sessions.get(requestId) ?? { inputChars: 0, outputChars: 0, model: null, userText: '' } existing.inputChars += textLength - if (!existing.userText && content[0]?.text) { - const text = content[0].text + if (!existing.userText) { + const text = content[0]?.text ?? row.content const queryMatch = text.match(/([\s\S]*?)<\/user_query>/) existing.userText = queryMatch ? queryMatch[1].trim().slice(0, 500) : text.slice(0, 500) } @@ -385,12 +405,9 @@ function createParser(source: SessionSource, seenKeys: Set): SessionPars return } - let { calls } = parseBubbles(db, seenKeys) - - if (calls.length === 0) { - const agentKvResult = parseAgentKv(db, seenKeys) - calls = agentKvResult.calls - } + const { calls: bubbleCalls } = parseBubbles(db, seenKeys) + const { calls: agentKvCalls } = parseAgentKv(db, seenKeys) + const calls = [...bubbleCalls, ...agentKvCalls] await writeCachedResults(source.path, calls) diff --git a/tests/day-aggregator.test.ts b/tests/day-aggregator.test.ts index fcfe8e3..1c3baed 100644 --- a/tests/day-aggregator.test.ts +++ b/tests/day-aggregator.test.ts @@ -1,6 +1,6 @@ import { describe, expect, it } from 'vitest' -import { aggregateProjectsIntoDays, buildPeriodDataFromDays } from '../src/day-aggregator.js' +import { aggregateProjectsIntoDays, buildPeriodDataFromDays, dateKey } from '../src/day-aggregator.js' import type { ProjectSummary } from '../src/types.js' function makeProject(overrides: Partial & { sessions: ProjectSummary['sessions'] }): ProjectSummary { @@ -147,7 +147,8 @@ describe('aggregateProjectsIntoDays', () => { }), ] const days = aggregateProjectsIntoDays(projects) - expect(days[0]!.date).toBe('2026-04-09') + const expectedDate = dateKey('2026-04-09T23:59:00Z') + expect(days[0]!.date).toBe(expectedDate) expect(days[0]!.sessions).toBe(1) })