mirror of
https://github.com/AgentSeal/codeburn.git
synced 2026-05-19 16:13:56 +00:00
Fix Copilot provider to read VS Code workspace transcripts (#165)
The Copilot provider only looked in ~/.copilot/session-state/ which is from an older CLI tool. VS Code Copilot agent stores transcripts in ~/Library/Application Support/Code/User/workspaceStorage/*/GitHub.copilot-chat/transcripts/. The new transcript format has no outputTokens or model_change events, so tokens are estimated from content length and the model is inferred from tool call ID prefixes. Both legacy and VS Code paths are now scanned in parallel. Fixes #161
This commit is contained in:
parent
314ef7a505
commit
5d1b335c0a
2 changed files with 300 additions and 100 deletions
|
|
@ -1,4 +1,5 @@
|
|||
import { readdir, stat } from 'fs/promises'
|
||||
import { existsSync } from 'fs'
|
||||
import { readdir, readFile, stat } from 'fs/promises'
|
||||
import { basename, dirname, join } from 'path'
|
||||
import { homedir } from 'os'
|
||||
|
||||
|
|
@ -24,63 +25,223 @@ const modelDisplayNames: Record<string, string> = {
|
|||
|
||||
const toolNameMap: Record<string, string> = {
|
||||
bash: 'Bash',
|
||||
run_in_terminal: 'Bash',
|
||||
read_file: 'Read',
|
||||
write_file: 'Edit',
|
||||
edit_file: 'Edit',
|
||||
replace_string_in_file: 'Edit',
|
||||
create_file: 'Write',
|
||||
delete_file: 'Delete',
|
||||
search_files: 'Grep',
|
||||
file_search: 'Grep',
|
||||
find_files: 'Glob',
|
||||
list_directory: 'LS',
|
||||
list_dir: 'LS',
|
||||
web_search: 'WebSearch',
|
||||
fetch_webpage: 'WebFetch',
|
||||
github_repo: 'GitHub',
|
||||
memory: 'Memory',
|
||||
kill_terminal: 'Bash',
|
||||
}
|
||||
|
||||
// Pre-sorted by key length descending so longer/more-specific keys match first
|
||||
const CHARS_PER_TOKEN = 4
|
||||
|
||||
const modelDisplayEntries = Object.entries(modelDisplayNames).sort((a, b) => b[0].length - a[0].length)
|
||||
|
||||
// Fields marked optional document the on-disk schema; they are not read by the parser
|
||||
type ToolRequest = {
|
||||
// --- Legacy format (session-state/events.jsonl with outputTokens) ---
|
||||
|
||||
type LegacyToolRequest = {
|
||||
name?: string
|
||||
toolCallId?: string
|
||||
type?: string
|
||||
}
|
||||
|
||||
type ModelChangeData = {
|
||||
newModel: string
|
||||
previousModel?: string
|
||||
type LegacyCopilotEvent =
|
||||
| { type: 'session.model_change'; timestamp?: string; data: { newModel: string } }
|
||||
| { type: 'user.message'; timestamp?: string; data: { content: string; interactionId?: string } }
|
||||
| { type: 'assistant.message'; timestamp?: string; data: { messageId: string; outputTokens: number; interactionId?: string; toolRequests?: LegacyToolRequest[] } }
|
||||
|
||||
function parseLegacyEvents(content: string, sessionId: string, seenKeys: Set<string>): ParsedProviderCall[] {
|
||||
const results: ParsedProviderCall[] = []
|
||||
const lines = content.split('\n').filter(l => l.trim())
|
||||
let currentModel = ''
|
||||
let pendingUserMessage = ''
|
||||
|
||||
for (const line of lines) {
|
||||
let event: LegacyCopilotEvent
|
||||
try {
|
||||
event = JSON.parse(line)
|
||||
} catch {
|
||||
continue
|
||||
}
|
||||
|
||||
if (event.type === 'session.model_change') {
|
||||
currentModel = event.data.newModel ?? currentModel
|
||||
continue
|
||||
}
|
||||
|
||||
if (event.type === 'user.message') {
|
||||
pendingUserMessage = event.data.content ?? ''
|
||||
continue
|
||||
}
|
||||
|
||||
if (event.type === 'assistant.message') {
|
||||
const { messageId, outputTokens, toolRequests = [] } = event.data
|
||||
if (outputTokens === 0) continue
|
||||
if (!currentModel) continue
|
||||
|
||||
const dedupKey = `copilot:${sessionId}:${messageId}`
|
||||
if (seenKeys.has(dedupKey)) continue
|
||||
seenKeys.add(dedupKey)
|
||||
|
||||
const tools = toolRequests
|
||||
.map(t => t.name ?? '')
|
||||
.filter(Boolean)
|
||||
.map(n => toolNameMap[n] ?? n)
|
||||
|
||||
const costUSD = calculateCost(currentModel, 0, outputTokens, 0, 0, 0)
|
||||
|
||||
results.push({
|
||||
provider: 'copilot',
|
||||
model: currentModel,
|
||||
inputTokens: 0,
|
||||
outputTokens,
|
||||
cacheCreationInputTokens: 0,
|
||||
cacheReadInputTokens: 0,
|
||||
cachedInputTokens: 0,
|
||||
reasoningTokens: 0,
|
||||
webSearchRequests: 0,
|
||||
costUSD,
|
||||
tools,
|
||||
bashCommands: [],
|
||||
timestamp: event.timestamp ?? '',
|
||||
speed: 'standard',
|
||||
deduplicationKey: dedupKey,
|
||||
userMessage: pendingUserMessage,
|
||||
sessionId,
|
||||
})
|
||||
|
||||
pendingUserMessage = ''
|
||||
}
|
||||
}
|
||||
|
||||
return results
|
||||
}
|
||||
|
||||
type UserMessageData = {
|
||||
content: string
|
||||
interactionId?: string
|
||||
// --- VS Code transcript format (workspaceStorage transcripts) ---
|
||||
|
||||
type TranscriptToolRequest = {
|
||||
toolCallId?: string
|
||||
name?: string
|
||||
arguments?: string
|
||||
type?: string
|
||||
}
|
||||
|
||||
type AssistantMessageData = {
|
||||
messageId: string
|
||||
outputTokens: number
|
||||
interactionId?: string
|
||||
toolRequests?: ToolRequest[]
|
||||
type TranscriptEvent =
|
||||
| { type: 'session.start'; timestamp?: string; data: { sessionId: string; producer?: string } }
|
||||
| { type: 'user.message'; timestamp?: string; data: { content: string; attachments?: unknown[] } }
|
||||
| { type: 'assistant.message'; timestamp?: string; data: { messageId: string; content?: string; reasoningText?: string; toolRequests?: TranscriptToolRequest[]; outputTokens?: number } }
|
||||
| { type: string; timestamp?: string; data: Record<string, unknown> }
|
||||
|
||||
function inferModelFromToolCallIds(events: TranscriptEvent[]): string {
|
||||
for (const e of events) {
|
||||
if (e.type !== 'assistant.message') continue
|
||||
const msg = e as { data: { toolRequests?: TranscriptToolRequest[] } }
|
||||
for (const t of msg.data.toolRequests ?? []) {
|
||||
if (t.toolCallId?.startsWith('toolu_bdrk_')) return 'claude-sonnet-4-5'
|
||||
if (t.toolCallId?.startsWith('call_')) return 'gpt-4.1'
|
||||
}
|
||||
}
|
||||
return 'gpt-4.1'
|
||||
}
|
||||
|
||||
type CopilotEvent =
|
||||
| { type: 'session.model_change'; timestamp?: string; data: ModelChangeData }
|
||||
| { type: 'user.message'; timestamp?: string; data: UserMessageData }
|
||||
| { type: 'assistant.message'; timestamp?: string; data: AssistantMessageData }
|
||||
function parseTranscriptEvents(content: string, sessionId: string, seenKeys: Set<string>): ParsedProviderCall[] {
|
||||
const results: ParsedProviderCall[] = []
|
||||
const lines = content.split('\n').filter(l => l.trim())
|
||||
const events: TranscriptEvent[] = []
|
||||
|
||||
function getCopilotSessionStateDir(override?: string): string {
|
||||
return override ?? join(homedir(), '.copilot', 'session-state')
|
||||
for (const line of lines) {
|
||||
try {
|
||||
events.push(JSON.parse(line))
|
||||
} catch {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
const model = inferModelFromToolCallIds(events)
|
||||
let pendingUserMessage = ''
|
||||
|
||||
for (const event of events) {
|
||||
if (event.type === 'user.message') {
|
||||
const data = event.data as { content?: string }
|
||||
pendingUserMessage = (data.content ?? '').slice(0, 500)
|
||||
continue
|
||||
}
|
||||
|
||||
if (event.type === 'assistant.message') {
|
||||
const data = event.data as { messageId: string; content?: string; reasoningText?: string; toolRequests?: TranscriptToolRequest[]; outputTokens?: number }
|
||||
const contentText = data.content ?? ''
|
||||
const reasoningText = data.reasoningText ?? ''
|
||||
|
||||
if (contentText.length === 0 && reasoningText.length === 0 && (data.toolRequests ?? []).length === 0) continue
|
||||
|
||||
const dedupKey = `copilot:${sessionId}:${data.messageId}`
|
||||
if (seenKeys.has(dedupKey)) continue
|
||||
seenKeys.add(dedupKey)
|
||||
|
||||
let outputTokens = data.outputTokens ?? 0
|
||||
let reasoningTokens = 0
|
||||
if (outputTokens === 0) {
|
||||
outputTokens = Math.ceil(contentText.length / CHARS_PER_TOKEN)
|
||||
reasoningTokens = Math.ceil(reasoningText.length / CHARS_PER_TOKEN)
|
||||
}
|
||||
|
||||
const inputTokens = Math.ceil(pendingUserMessage.length / CHARS_PER_TOKEN)
|
||||
|
||||
const tools = (data.toolRequests ?? [])
|
||||
.map(t => t.name ?? '')
|
||||
.filter(Boolean)
|
||||
.map(n => toolNameMap[n] ?? n)
|
||||
|
||||
const costUSD = calculateCost(model, inputTokens, outputTokens + reasoningTokens, 0, 0, 0)
|
||||
|
||||
results.push({
|
||||
provider: 'copilot',
|
||||
model,
|
||||
inputTokens,
|
||||
outputTokens,
|
||||
cacheCreationInputTokens: 0,
|
||||
cacheReadInputTokens: 0,
|
||||
cachedInputTokens: 0,
|
||||
reasoningTokens,
|
||||
webSearchRequests: 0,
|
||||
costUSD,
|
||||
tools,
|
||||
bashCommands: [],
|
||||
timestamp: event.timestamp ?? '',
|
||||
speed: 'standard',
|
||||
deduplicationKey: dedupKey,
|
||||
userMessage: pendingUserMessage,
|
||||
sessionId,
|
||||
})
|
||||
|
||||
pendingUserMessage = ''
|
||||
}
|
||||
}
|
||||
|
||||
return results
|
||||
}
|
||||
|
||||
function parseCwd(yaml: string): string | null {
|
||||
const match = yaml.match(/^cwd:\s*(.+)$/m)
|
||||
if (!match?.[1]) return null
|
||||
const raw = match[1]
|
||||
.replace(/\s*#.*$/, '') // strip trailing comment
|
||||
.replace(/^['"]|['"]$/g, '') // strip surrounding quotes
|
||||
.trim()
|
||||
return raw || null
|
||||
// --- Parser ---
|
||||
|
||||
function isTranscriptFormat(content: string): boolean {
|
||||
const firstLine = content.split('\n')[0] ?? ''
|
||||
try {
|
||||
const event = JSON.parse(firstLine)
|
||||
return event.type === 'session.start' && event.data?.producer === 'copilot-agent'
|
||||
} catch {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
function createParser(source: SessionSource, seenKeys: Set<string>): SessionParser {
|
||||
|
|
@ -88,76 +249,60 @@ function createParser(source: SessionSource, seenKeys: Set<string>): SessionPars
|
|||
async *parse(): AsyncGenerator<ParsedProviderCall> {
|
||||
const content = await readSessionFile(source.path)
|
||||
if (content === null) return
|
||||
const sessionId = basename(dirname(source.path))
|
||||
const lines = content.split('\n').filter(l => l.trim())
|
||||
let currentModel = ''
|
||||
let pendingUserMessage = ''
|
||||
const sessionId = basename(source.path, '.jsonl').length === 36
|
||||
? basename(source.path, '.jsonl')
|
||||
: basename(dirname(source.path))
|
||||
|
||||
for (const line of lines) {
|
||||
let event: CopilotEvent
|
||||
try {
|
||||
event = JSON.parse(line) as CopilotEvent
|
||||
} catch {
|
||||
continue
|
||||
}
|
||||
const calls = isTranscriptFormat(content)
|
||||
? parseTranscriptEvents(content, sessionId, seenKeys)
|
||||
: parseLegacyEvents(content, sessionId, seenKeys)
|
||||
|
||||
if (event.type === 'session.model_change') {
|
||||
currentModel = event.data.newModel ?? currentModel
|
||||
continue
|
||||
}
|
||||
|
||||
if (event.type === 'user.message') {
|
||||
pendingUserMessage = event.data.content ?? ''
|
||||
continue
|
||||
}
|
||||
|
||||
if (event.type === 'assistant.message') {
|
||||
const { messageId, outputTokens, toolRequests = [] } = event.data
|
||||
if (outputTokens === 0) continue
|
||||
// Skip if no model has been identified yet - avoids silent misattribution
|
||||
if (!currentModel) continue
|
||||
|
||||
const dedupKey = `copilot:${sessionId}:${messageId}`
|
||||
if (seenKeys.has(dedupKey)) continue
|
||||
seenKeys.add(dedupKey)
|
||||
|
||||
const tools = toolRequests
|
||||
.map(t => t.name ?? '')
|
||||
.filter(Boolean)
|
||||
.map(n => toolNameMap[n] ?? n)
|
||||
|
||||
// Copilot only logs outputTokens; inputTokens are not available in session logs.
|
||||
// Cost will be lower than actual API cost.
|
||||
const costUSD = calculateCost(currentModel, 0, outputTokens, 0, 0, 0)
|
||||
|
||||
yield {
|
||||
provider: 'copilot',
|
||||
model: currentModel,
|
||||
inputTokens: 0,
|
||||
outputTokens,
|
||||
cacheCreationInputTokens: 0,
|
||||
cacheReadInputTokens: 0,
|
||||
cachedInputTokens: 0,
|
||||
reasoningTokens: 0,
|
||||
webSearchRequests: 0,
|
||||
costUSD,
|
||||
tools,
|
||||
bashCommands: [],
|
||||
timestamp: event.timestamp ?? '',
|
||||
speed: 'standard',
|
||||
deduplicationKey: dedupKey,
|
||||
userMessage: pendingUserMessage,
|
||||
sessionId,
|
||||
}
|
||||
|
||||
pendingUserMessage = ''
|
||||
}
|
||||
for (const call of calls) {
|
||||
yield call
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
async function discoverSessionsInDir(sessionStateDir: string): Promise<SessionSource[]> {
|
||||
// --- Discovery ---
|
||||
|
||||
function getCopilotSessionStateDir(override?: string): string {
|
||||
return override ?? join(homedir(), '.copilot', 'session-state')
|
||||
}
|
||||
|
||||
function getVSCodeWorkspaceStorageDir(): string {
|
||||
if (process.platform === 'darwin') {
|
||||
return join(homedir(), 'Library', 'Application Support', 'Code', 'User', 'workspaceStorage')
|
||||
}
|
||||
if (process.platform === 'win32') {
|
||||
return join(homedir(), 'AppData', 'Roaming', 'Code', 'User', 'workspaceStorage')
|
||||
}
|
||||
return join(homedir(), '.config', 'Code', 'User', 'workspaceStorage')
|
||||
}
|
||||
|
||||
function parseCwd(yaml: string): string | null {
|
||||
const match = yaml.match(/^cwd:\s*(.+)$/m)
|
||||
if (!match?.[1]) return null
|
||||
const raw = match[1]
|
||||
.replace(/\s*#.*$/, '')
|
||||
.replace(/^['"]|['"]$/g, '')
|
||||
.trim()
|
||||
return raw || null
|
||||
}
|
||||
|
||||
async function readWorkspaceProject(workspaceDir: string): Promise<string> {
|
||||
try {
|
||||
const raw = await readFile(join(workspaceDir, 'workspace.json'), 'utf-8')
|
||||
const data = JSON.parse(raw) as { folder?: string }
|
||||
if (data.folder) {
|
||||
const url = data.folder.replace(/^file:\/\//, '')
|
||||
return basename(decodeURIComponent(url))
|
||||
}
|
||||
} catch {}
|
||||
return basename(workspaceDir)
|
||||
}
|
||||
|
||||
async function discoverLegacySessions(sessionStateDir: string): Promise<SessionSource[]> {
|
||||
const sources: SessionSource[] = []
|
||||
|
||||
let sessionDirs: string[]
|
||||
|
|
@ -185,8 +330,44 @@ async function discoverSessionsInDir(sessionStateDir: string): Promise<SessionSo
|
|||
return sources
|
||||
}
|
||||
|
||||
export function createCopilotProvider(sessionStateDir?: string): Provider {
|
||||
const dir = getCopilotSessionStateDir(sessionStateDir)
|
||||
async function discoverVSCodeTranscripts(workspaceStorageDir: string): Promise<SessionSource[]> {
|
||||
const sources: SessionSource[] = []
|
||||
|
||||
let workspaceDirs: string[]
|
||||
try {
|
||||
workspaceDirs = await readdir(workspaceStorageDir)
|
||||
} catch {
|
||||
return sources
|
||||
}
|
||||
|
||||
for (const wsDir of workspaceDirs) {
|
||||
const transcriptsDir = join(workspaceStorageDir, wsDir, 'GitHub.copilot-chat', 'transcripts')
|
||||
if (!existsSync(transcriptsDir)) continue
|
||||
|
||||
const project = await readWorkspaceProject(join(workspaceStorageDir, wsDir))
|
||||
|
||||
let files: string[]
|
||||
try {
|
||||
files = await readdir(transcriptsDir)
|
||||
} catch {
|
||||
continue
|
||||
}
|
||||
|
||||
for (const file of files) {
|
||||
if (!file.endsWith('.jsonl')) continue
|
||||
const filePath = join(transcriptsDir, file)
|
||||
const s = await stat(filePath).catch(() => null)
|
||||
if (!s?.isFile()) continue
|
||||
sources.push({ path: filePath, project, provider: 'copilot' })
|
||||
}
|
||||
}
|
||||
|
||||
return sources
|
||||
}
|
||||
|
||||
export function createCopilotProvider(sessionStateDir?: string, workspaceStorageDirOverride?: string): Provider {
|
||||
const legacyDir = getCopilotSessionStateDir(sessionStateDir)
|
||||
const vscodeDir = workspaceStorageDirOverride ?? getVSCodeWorkspaceStorageDir()
|
||||
|
||||
return {
|
||||
name: 'copilot',
|
||||
|
|
@ -204,7 +385,11 @@ export function createCopilotProvider(sessionStateDir?: string): Provider {
|
|||
},
|
||||
|
||||
async discoverSessions(): Promise<SessionSource[]> {
|
||||
return discoverSessionsInDir(dir)
|
||||
const [legacy, vscode] = await Promise.all([
|
||||
discoverLegacySessions(legacyDir),
|
||||
discoverVSCodeTranscripts(vscodeDir),
|
||||
])
|
||||
return [...legacy, ...vscode]
|
||||
},
|
||||
|
||||
createSessionParser(source: SessionSource, seenKeys: Set<string>): SessionParser {
|
||||
|
|
|
|||
|
|
@ -174,7 +174,7 @@ describe('copilot provider - discoverSessions', () => {
|
|||
await createSessionDir('sess-disc-001', [modelChange('gpt-4.1')])
|
||||
await createSessionDir('sess-disc-002', [modelChange('gpt-4.1')])
|
||||
|
||||
const provider = createCopilotProvider(tmpDir)
|
||||
const provider = createCopilotProvider(tmpDir, '/nonexistent/vscode')
|
||||
const sessions = await provider.discoverSessions()
|
||||
|
||||
expect(sessions).toHaveLength(2)
|
||||
|
|
@ -185,7 +185,7 @@ describe('copilot provider - discoverSessions', () => {
|
|||
it('reads project name from workspace.yaml cwd', async () => {
|
||||
await createSessionDir('sess-disc-003', [modelChange('gpt-4.1')], '/home/user/myapp')
|
||||
|
||||
const provider = createCopilotProvider(tmpDir)
|
||||
const provider = createCopilotProvider(tmpDir, '/nonexistent/vscode')
|
||||
const sessions = await provider.discoverSessions()
|
||||
|
||||
expect(sessions).toHaveLength(1)
|
||||
|
|
@ -198,7 +198,7 @@ describe('copilot provider - discoverSessions', () => {
|
|||
await writeFile(join(sessionDir, 'workspace.yaml'), 'cwd: "/home/user/myapp" # project root\n')
|
||||
await writeFile(join(sessionDir, 'events.jsonl'), '\n')
|
||||
|
||||
const provider = createCopilotProvider(tmpDir)
|
||||
const provider = createCopilotProvider(tmpDir, '/nonexistent/vscode')
|
||||
const sessions = await provider.discoverSessions()
|
||||
|
||||
expect(sessions).toHaveLength(1)
|
||||
|
|
@ -206,7 +206,7 @@ describe('copilot provider - discoverSessions', () => {
|
|||
})
|
||||
|
||||
it('returns empty when directory does not exist', async () => {
|
||||
const provider = createCopilotProvider('/nonexistent/path')
|
||||
const provider = createCopilotProvider('/nonexistent/path', '/nonexistent/vscode')
|
||||
const sessions = await provider.discoverSessions()
|
||||
expect(sessions).toHaveLength(0)
|
||||
})
|
||||
|
|
@ -215,10 +215,25 @@ describe('copilot provider - discoverSessions', () => {
|
|||
const emptyDir = join(tmpDir, 'empty-session')
|
||||
await mkdir(emptyDir, { recursive: true })
|
||||
|
||||
const provider = createCopilotProvider(tmpDir)
|
||||
const provider = createCopilotProvider(tmpDir, '/nonexistent/vscode')
|
||||
const sessions = await provider.discoverSessions()
|
||||
expect(sessions).toHaveLength(0)
|
||||
})
|
||||
|
||||
it('discovers VS Code workspace transcripts', async () => {
|
||||
const wsDir = join(tmpDir, 'vscode-ws')
|
||||
const transcriptsDir = join(wsDir, 'abc123', 'GitHub.copilot-chat', 'transcripts')
|
||||
await mkdir(transcriptsDir, { recursive: true })
|
||||
await writeFile(join(wsDir, 'abc123', 'workspace.json'), JSON.stringify({ folder: 'file:///home/user/myapp' }))
|
||||
await writeFile(join(transcriptsDir, 'session-1.jsonl'), JSON.stringify({ type: 'session.start', data: { sessionId: 's1', producer: 'copilot-agent' } }) + '\n')
|
||||
|
||||
const provider = createCopilotProvider('/nonexistent/legacy', wsDir)
|
||||
const sessions = await provider.discoverSessions()
|
||||
|
||||
expect(sessions).toHaveLength(1)
|
||||
expect(sessions[0]!.project).toBe('myapp')
|
||||
expect(sessions[0]!.path).toContain('session-1.jsonl')
|
||||
})
|
||||
})
|
||||
|
||||
describe('copilot provider - metadata', () => {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue