feat(export): add metadata and statistics to export data

- Add ExportMetadata type with session info, token stats, file operation stats
- Track response_id from LLM API for telemetry correlation
- Collect usageMetadata from assistant messages
- Calculate file stats (files read/written, lines added/removed)
- Calculate token stats (total tokens, context usage percentage)
- Add metadata sidebar to HTML export template
- Support metadata in JSONL and Markdown formatters
- Update chatRecordingService to record response_id

Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
This commit is contained in:
mingholy.lmh 2026-03-12 21:37:05 +08:00
parent 27356c1bac
commit d59e668729
11 changed files with 776 additions and 31 deletions

View file

@ -6,10 +6,211 @@
import { randomUUID } from 'node:crypto';
import type { Config, ChatRecord } from '@qwen-code/qwen-code-core';
import type { GenerateContentResponseUsageMetadata } from '@google/genai';
import type { SessionContext } from '../../../acp-integration/session/types.js';
import type { SessionUpdate, ToolCall } from '@agentclientprotocol/sdk';
import { HistoryReplayer } from '../../../acp-integration/session/HistoryReplayer.js';
import type { ExportMessage, ExportSessionData } from './types.js';
import type {
ExportMessage,
ExportSessionData,
ExportMetadata,
} from './types.js';
/**
* File operation statistics extracted from tool calls.
*/
interface FileOperationStats {
filesRead: number;
filesWritten: number;
linesAdded: number;
linesRemoved: number;
uniqueFiles: Set<string>;
}
/**
* Calculate file operation statistics from ChatRecords.
* Uses toolCallResult from tool_result records for accurate statistics.
*/
function calculateFileStats(records: ChatRecord[]): FileOperationStats {
const stats: FileOperationStats = {
filesRead: 0,
filesWritten: 0,
linesAdded: 0,
linesRemoved: 0,
uniqueFiles: new Set(),
};
for (const record of records) {
if (record.type !== 'tool_result' || !record.toolCallResult) continue;
const { resultDisplay } = record.toolCallResult;
// Track file locations from resultDisplay
if (
resultDisplay &&
typeof resultDisplay === 'object' &&
'fileName' in resultDisplay
) {
const display = resultDisplay as {
fileName: string;
originalContent?: string | null;
newContent?: string;
diffStat?: { model_added_lines?: number; model_removed_lines?: number };
};
// Track unique files
if (typeof display.fileName === 'string') {
stats.uniqueFiles.add(display.fileName);
}
// Determine operation type based on content fields
const hasOriginalContent = 'originalContent' in display;
const hasNewContent = 'newContent' in display;
if (hasOriginalContent || hasNewContent) {
// This is a write/edit operation
stats.filesWritten++;
// Calculate line changes
if (display.diffStat) {
// Use diffStat if available for accurate counts
stats.linesAdded += display.diffStat.model_added_lines ?? 0;
stats.linesRemoved += display.diffStat.model_removed_lines ?? 0;
} else {
// Fallback: count lines in content
const oldText = String(display.originalContent ?? '');
const newText = String(display.newContent ?? '');
// Count non-empty lines
const oldLines = oldText
.split('\n')
.filter((line) => line.length > 0).length;
const newLines = newText
.split('\n')
.filter((line) => line.length > 0).length;
stats.linesAdded += newLines;
stats.linesRemoved += oldLines;
}
} else {
// This is likely a read operation (no content changes)
stats.filesRead++;
}
}
}
return stats;
}
/**
* Calculate token statistics from ChatRecords.
* Aggregates usageMetadata from assistant records to get total token usage.
*/
function calculateTokenStats(
records: ChatRecord[],
contextWindowSize?: number,
): { totalTokens: number; promptTokens: number; contextUsagePercent?: number } {
let totalTokens = 0;
let lastPromptTokens = 0;
// Aggregate usageMetadata from all assistant records
// Use last available promptTokenCount for context usage calculation
for (const record of records) {
if (record.type === 'assistant' && record.usageMetadata) {
totalTokens += record.usageMetadata.totalTokenCount ?? 0;
// Use the last available promptTokenCount (represents current context usage)
if (record.usageMetadata.promptTokenCount !== undefined) {
lastPromptTokens = record.usageMetadata.promptTokenCount;
}
}
}
// Use promptTokens (input tokens) for context usage calculation
// This represents how much of the context window is being used
if (contextWindowSize && lastPromptTokens > 0) {
const percent = (lastPromptTokens / contextWindowSize) * 100;
return {
totalTokens,
promptTokens: lastPromptTokens,
contextUsagePercent: Math.round(percent * 10) / 10,
};
}
return { totalTokens, promptTokens: lastPromptTokens };
}
/**
* Extract session metadata from ChatRecords.
*/
function extractMetadata(
conversation: {
sessionId: string;
startTime: string;
messages: ChatRecord[];
},
config: Config,
): ExportMetadata {
const { sessionId, startTime, messages } = conversation;
// Extract basic info from the first record
const firstRecord = messages[0];
const cwd = firstRecord?.cwd ?? '';
const gitBranch = firstRecord?.gitBranch;
// Try to get model from assistant messages
let model: string | undefined;
for (const record of messages) {
if (record.type === 'assistant' && record.model) {
model = record.model;
break;
}
}
// Get channel from config
const channel = config.getChannel?.();
// Count user prompts
const promptCount = messages.filter((m) => m.type === 'user').length;
// Get context window size
const contentGenConfig = config.getContentGeneratorConfig?.();
const contextWindowSize = contentGenConfig?.contextWindowSize;
// Calculate file stats from original ChatRecords
const fileStats = calculateFileStats(messages);
// Calculate token stats from original ChatRecords
const tokenStats = calculateTokenStats(messages, contextWindowSize);
// Extract the last response_id from assistant records (for request tracking)
let requestId: string | undefined;
for (let i = messages.length - 1; i >= 0; i--) {
const record = messages[i];
if (record.type === 'assistant' && record.response_id) {
requestId = record.response_id;
break;
}
}
return {
sessionId,
startTime,
exportTime: new Date().toISOString(),
cwd,
gitBranch,
model,
channel,
promptCount,
contextUsagePercent: tokenStats.contextUsagePercent,
totalTokens: tokenStats.totalTokens,
filesRead: fileStats.filesRead,
filesWritten: fileStats.filesWritten,
linesAdded: fileStats.linesAdded,
linesRemoved: fileStats.linesRemoved,
uniqueFiles: Array.from(fileStats.uniqueFiles),
requestId,
};
}
/**
* Export session context that captures session updates into export messages.
@ -24,6 +225,7 @@ class ExportSessionContext implements SessionContext {
role: 'user' | 'assistant' | 'thinking';
parts: Array<{ text: string }>;
timestamp: number;
usageMetadata?: GenerateContentResponseUsageMetadata;
} | null = null;
private activeRecordId: string | null = null;
private activeRecordTimestamp: string | null = null;
@ -39,9 +241,37 @@ class ExportSessionContext implements SessionContext {
case 'user_message_chunk':
this.handleMessageChunk('user', update.content);
break;
case 'agent_message_chunk':
this.handleMessageChunk('assistant', update.content);
case 'agent_message_chunk': {
// Extract usageMetadata from _meta if available
const usageMeta = update._meta as
| {
usage?: {
inputTokens?: number;
outputTokens?: number;
totalTokens?: number;
thoughtTokens?: number;
cachedReadTokens?: number;
};
}
| undefined;
const usageMetadata: GenerateContentResponseUsageMetadata | undefined =
usageMeta?.usage
? {
promptTokenCount: usageMeta.usage.inputTokens,
candidatesTokenCount: usageMeta.usage.outputTokens,
totalTokenCount: usageMeta.usage.totalTokens,
thoughtsTokenCount: usageMeta.usage.thoughtTokens,
cachedContentTokenCount: usageMeta.usage.cachedReadTokens,
}
: undefined;
this.handleMessageChunk(
'assistant',
update.content,
'assistant',
usageMetadata,
);
break;
}
case 'agent_thought_chunk':
this.handleMessageChunk('assistant', update.content, 'thinking');
break;
@ -79,6 +309,7 @@ class ExportSessionContext implements SessionContext {
role: 'user' | 'assistant',
content: { type: string; text?: string },
messageRole: 'user' | 'assistant' | 'thinking' = role,
usageMetadata?: GenerateContentResponseUsageMetadata,
): void {
if (content.type !== 'text' || !content.text) return;
@ -98,12 +329,17 @@ class ExportSessionContext implements SessionContext {
this.currentMessage.role === messageRole
) {
this.currentMessage.parts.push({ text: content.text });
// Merge usageMetadata if provided (for assistant messages)
if (usageMetadata && role === 'assistant') {
this.currentMessage.usageMetadata = usageMetadata;
}
} else {
this.currentMessage = {
type: role,
role: messageRole,
parts: [{ text: content.text }],
timestamp: Date.now(),
...(usageMetadata && role === 'assistant' ? { usageMetadata } : {}),
};
}
}
@ -205,7 +441,7 @@ class ExportSessionContext implements SessionContext {
if (!this.currentMessage) return;
const uuid = this.getMessageUuid();
this.messages.push({
const exportMessage: ExportMessage = {
uuid,
sessionId: this.sessionId,
timestamp: this.getMessageTimestamp(),
@ -214,7 +450,17 @@ class ExportSessionContext implements SessionContext {
role: this.currentMessage.role,
parts: this.currentMessage.parts,
},
});
};
// Add usageMetadata for assistant messages
if (
this.currentMessage.type === 'assistant' &&
this.currentMessage.usageMetadata
) {
exportMessage.usageMetadata = this.currentMessage.usageMetadata;
}
this.messages.push(exportMessage);
this.currentMessage = null;
}
@ -258,9 +504,13 @@ export async function collectSessionData(
// Get the export messages
const messages = exportContext.getMessages();
// Extract metadata from conversation
const metadata = extractMetadata(conversation, config);
return {
sessionId: conversation.sessionId,
startTime: conversation.startTime,
messages,
metadata,
};
}

View file

@ -36,6 +36,7 @@ export function injectDataIntoHtmlTemplate(
sessionId: string;
startTime: string;
messages: unknown[];
metadata?: unknown;
},
): string {
const jsonData = JSON.stringify(data, null, 2);

View file

@ -14,13 +14,18 @@ export function toJsonl(sessionData: ExportSessionData): string {
const lines: string[] = [];
// Add session metadata as the first line
lines.push(
JSON.stringify({
type: 'session_metadata',
sessionId: sessionData.sessionId,
startTime: sessionData.startTime,
}),
);
const metadata: Record<string, unknown> = {
type: 'session_metadata',
sessionId: sessionData.sessionId,
startTime: sessionData.startTime,
};
// Add requestId if available
if (sessionData.metadata?.requestId) {
metadata['requestId'] = sessionData.metadata.requestId;
}
lines.push(JSON.stringify(metadata));
// Add each message as a separate line
for (const message of sessionData.messages) {

View file

@ -16,6 +16,14 @@ export function toMarkdown(sessionData: ExportSessionData): string {
lines.push('# Chat Session Export\n');
lines.push(`- **Session ID**: \`${sanitizeText(sessionData.sessionId)}\``);
lines.push(`- **Start Time**: ${sanitizeText(sessionData.startTime)}`);
// Add requestId if available
if (sessionData.metadata?.requestId) {
lines.push(
`- **Request ID**: \`${sanitizeText(sessionData.metadata.requestId)}\``,
);
}
lines.push(`- **Exported**: ${new Date().toISOString()}`);
lines.push('\n---\n');
@ -26,6 +34,9 @@ export function toMarkdown(sessionData: ExportSessionData): string {
lines.push(formatMessageContent(message));
} else if (message.type === 'assistant') {
lines.push('## Assistant\n');
if (message.response_id) {
lines.push(`*Response ID: \`${sanitizeText(message.response_id)}\`*\n`);
}
lines.push(formatMessageContent(message));
} else if (message.type === 'tool_call') {
lines.push(formatToolCall(message));

View file

@ -28,6 +28,14 @@ export function normalizeSessionData(
}
});
// Build index of assistant messages by uuid for response_id mapping
const assistantMessageIndexByUuid = new Map<string, number>();
normalized.forEach((message, index) => {
if (message.type === 'assistant') {
assistantMessageIndexByUuid.set(message.uuid, index);
}
});
// Merge tool result information into tool call messages
for (const record of originalRecords) {
if (record.type !== 'tool_result') continue;
@ -58,6 +66,31 @@ export function normalizeSessionData(
mergeToolCallData(existingMessage.toolCall, toolCallMessage.toolCall);
}
// Merge response_id from assistant records
for (const record of originalRecords) {
if (record.type !== 'assistant') continue;
if (!record.response_id) continue;
const existingIndex = assistantMessageIndexByUuid.get(record.uuid);
if (existingIndex !== undefined) {
normalized[existingIndex].response_id = record.response_id;
}
}
// Merge usageMetadata from assistant records
for (const record of originalRecords) {
if (record.type !== 'assistant') continue;
if (!record.usageMetadata) continue;
const existingIndex = assistantMessageIndexByUuid.get(record.uuid);
if (existingIndex !== undefined) {
// Only set if not already present from collect phase
if (!normalized[existingIndex].usageMetadata) {
normalized[existingIndex].usageMetadata = record.usageMetadata;
}
}
}
return {
...sessionData,
messages: normalized,

View file

@ -4,6 +4,8 @@
* SPDX-License-Identifier: Apache-2.0
*/
import type { GenerateContentResponseUsageMetadata } from '@google/genai';
/**
* Universal export message format - SSOT for all export formats.
* This is format-agnostic and contains all information needed for any export type.
@ -25,6 +27,12 @@ export interface ExportMessage {
/** Model used for assistant messages */
model?: string;
/** Response ID from the LLM API for telemetry/tracing correlation */
response_id?: string;
/** Token usage for this message (mainly for assistant messages) */
usageMetadata?: GenerateContentResponseUsageMetadata;
/** For tool_call messages */
toolCall?: {
toolCallId: string;
@ -44,6 +52,44 @@ export interface ExportMessage {
};
}
/**
* Metadata for export session - contains aggregated statistics and session context.
*/
export interface ExportMetadata {
/** Session ID */
sessionId: string;
/** ISO timestamp when session started */
startTime: string;
/** Export timestamp */
exportTime: string;
/** Current working directory */
cwd: string;
/** Git branch name, if available */
gitBranch?: string;
/** Model used in the session */
model?: string;
/** Channel/source identifier */
channel?: string;
/** Number of user prompts in the session */
promptCount: number;
/** Context window utilization percentage (0-100) */
contextUsagePercent?: number;
/** Total tokens used (prompt + completion) */
totalTokens?: number;
/** Number of files read */
filesRead?: number;
/** Number of files written/edited */
filesWritten?: number;
/** Lines of code added */
linesAdded?: number;
/** Lines of code removed */
linesRemoved?: number;
/** Unique files referenced in the session */
uniqueFiles: string[];
/** Last response ID from the LLM API (request ID) */
requestId?: string;
}
/**
* Complete export session data - the single source of truth.
*/
@ -51,4 +97,6 @@ export interface ExportSessionData {
sessionId: string;
startTime: string;
messages: ExportMessage[];
/** Session metadata and statistics */
metadata?: ExportMetadata;
}