mirror of
https://github.com/QwenLM/qwen-code.git
synced 2026-05-05 15:31:27 +00:00
feat(export): add metadata and statistics to export data
- Add ExportMetadata type with session info, token stats, file operation stats - Track response_id from LLM API for telemetry correlation - Collect usageMetadata from assistant messages - Calculate file stats (files read/written, lines added/removed) - Calculate token stats (total tokens, context usage percentage) - Add metadata sidebar to HTML export template - Support metadata in JSONL and Markdown formatters - Update chatRecordingService to record response_id Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
This commit is contained in:
parent
27356c1bac
commit
d59e668729
11 changed files with 776 additions and 31 deletions
|
|
@ -6,10 +6,211 @@
|
|||
|
||||
import { randomUUID } from 'node:crypto';
|
||||
import type { Config, ChatRecord } from '@qwen-code/qwen-code-core';
|
||||
import type { GenerateContentResponseUsageMetadata } from '@google/genai';
|
||||
import type { SessionContext } from '../../../acp-integration/session/types.js';
|
||||
import type { SessionUpdate, ToolCall } from '@agentclientprotocol/sdk';
|
||||
import { HistoryReplayer } from '../../../acp-integration/session/HistoryReplayer.js';
|
||||
import type { ExportMessage, ExportSessionData } from './types.js';
|
||||
import type {
|
||||
ExportMessage,
|
||||
ExportSessionData,
|
||||
ExportMetadata,
|
||||
} from './types.js';
|
||||
|
||||
/**
|
||||
* File operation statistics extracted from tool calls.
|
||||
*/
|
||||
interface FileOperationStats {
|
||||
filesRead: number;
|
||||
filesWritten: number;
|
||||
linesAdded: number;
|
||||
linesRemoved: number;
|
||||
uniqueFiles: Set<string>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate file operation statistics from ChatRecords.
|
||||
* Uses toolCallResult from tool_result records for accurate statistics.
|
||||
*/
|
||||
function calculateFileStats(records: ChatRecord[]): FileOperationStats {
|
||||
const stats: FileOperationStats = {
|
||||
filesRead: 0,
|
||||
filesWritten: 0,
|
||||
linesAdded: 0,
|
||||
linesRemoved: 0,
|
||||
uniqueFiles: new Set(),
|
||||
};
|
||||
|
||||
for (const record of records) {
|
||||
if (record.type !== 'tool_result' || !record.toolCallResult) continue;
|
||||
|
||||
const { resultDisplay } = record.toolCallResult;
|
||||
|
||||
// Track file locations from resultDisplay
|
||||
if (
|
||||
resultDisplay &&
|
||||
typeof resultDisplay === 'object' &&
|
||||
'fileName' in resultDisplay
|
||||
) {
|
||||
const display = resultDisplay as {
|
||||
fileName: string;
|
||||
originalContent?: string | null;
|
||||
newContent?: string;
|
||||
diffStat?: { model_added_lines?: number; model_removed_lines?: number };
|
||||
};
|
||||
|
||||
// Track unique files
|
||||
if (typeof display.fileName === 'string') {
|
||||
stats.uniqueFiles.add(display.fileName);
|
||||
}
|
||||
|
||||
// Determine operation type based on content fields
|
||||
const hasOriginalContent = 'originalContent' in display;
|
||||
const hasNewContent = 'newContent' in display;
|
||||
|
||||
if (hasOriginalContent || hasNewContent) {
|
||||
// This is a write/edit operation
|
||||
stats.filesWritten++;
|
||||
|
||||
// Calculate line changes
|
||||
if (display.diffStat) {
|
||||
// Use diffStat if available for accurate counts
|
||||
stats.linesAdded += display.diffStat.model_added_lines ?? 0;
|
||||
stats.linesRemoved += display.diffStat.model_removed_lines ?? 0;
|
||||
} else {
|
||||
// Fallback: count lines in content
|
||||
const oldText = String(display.originalContent ?? '');
|
||||
const newText = String(display.newContent ?? '');
|
||||
|
||||
// Count non-empty lines
|
||||
const oldLines = oldText
|
||||
.split('\n')
|
||||
.filter((line) => line.length > 0).length;
|
||||
const newLines = newText
|
||||
.split('\n')
|
||||
.filter((line) => line.length > 0).length;
|
||||
|
||||
stats.linesAdded += newLines;
|
||||
stats.linesRemoved += oldLines;
|
||||
}
|
||||
} else {
|
||||
// This is likely a read operation (no content changes)
|
||||
stats.filesRead++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return stats;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate token statistics from ChatRecords.
|
||||
* Aggregates usageMetadata from assistant records to get total token usage.
|
||||
*/
|
||||
function calculateTokenStats(
|
||||
records: ChatRecord[],
|
||||
contextWindowSize?: number,
|
||||
): { totalTokens: number; promptTokens: number; contextUsagePercent?: number } {
|
||||
let totalTokens = 0;
|
||||
let lastPromptTokens = 0;
|
||||
|
||||
// Aggregate usageMetadata from all assistant records
|
||||
// Use last available promptTokenCount for context usage calculation
|
||||
for (const record of records) {
|
||||
if (record.type === 'assistant' && record.usageMetadata) {
|
||||
totalTokens += record.usageMetadata.totalTokenCount ?? 0;
|
||||
// Use the last available promptTokenCount (represents current context usage)
|
||||
if (record.usageMetadata.promptTokenCount !== undefined) {
|
||||
lastPromptTokens = record.usageMetadata.promptTokenCount;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Use promptTokens (input tokens) for context usage calculation
|
||||
// This represents how much of the context window is being used
|
||||
if (contextWindowSize && lastPromptTokens > 0) {
|
||||
const percent = (lastPromptTokens / contextWindowSize) * 100;
|
||||
return {
|
||||
totalTokens,
|
||||
promptTokens: lastPromptTokens,
|
||||
contextUsagePercent: Math.round(percent * 10) / 10,
|
||||
};
|
||||
}
|
||||
|
||||
return { totalTokens, promptTokens: lastPromptTokens };
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract session metadata from ChatRecords.
|
||||
*/
|
||||
function extractMetadata(
|
||||
conversation: {
|
||||
sessionId: string;
|
||||
startTime: string;
|
||||
messages: ChatRecord[];
|
||||
},
|
||||
config: Config,
|
||||
): ExportMetadata {
|
||||
const { sessionId, startTime, messages } = conversation;
|
||||
|
||||
// Extract basic info from the first record
|
||||
const firstRecord = messages[0];
|
||||
const cwd = firstRecord?.cwd ?? '';
|
||||
const gitBranch = firstRecord?.gitBranch;
|
||||
|
||||
// Try to get model from assistant messages
|
||||
let model: string | undefined;
|
||||
for (const record of messages) {
|
||||
if (record.type === 'assistant' && record.model) {
|
||||
model = record.model;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Get channel from config
|
||||
const channel = config.getChannel?.();
|
||||
|
||||
// Count user prompts
|
||||
const promptCount = messages.filter((m) => m.type === 'user').length;
|
||||
|
||||
// Get context window size
|
||||
const contentGenConfig = config.getContentGeneratorConfig?.();
|
||||
const contextWindowSize = contentGenConfig?.contextWindowSize;
|
||||
|
||||
// Calculate file stats from original ChatRecords
|
||||
const fileStats = calculateFileStats(messages);
|
||||
|
||||
// Calculate token stats from original ChatRecords
|
||||
const tokenStats = calculateTokenStats(messages, contextWindowSize);
|
||||
|
||||
// Extract the last response_id from assistant records (for request tracking)
|
||||
let requestId: string | undefined;
|
||||
for (let i = messages.length - 1; i >= 0; i--) {
|
||||
const record = messages[i];
|
||||
if (record.type === 'assistant' && record.response_id) {
|
||||
requestId = record.response_id;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
sessionId,
|
||||
startTime,
|
||||
exportTime: new Date().toISOString(),
|
||||
cwd,
|
||||
gitBranch,
|
||||
model,
|
||||
channel,
|
||||
promptCount,
|
||||
contextUsagePercent: tokenStats.contextUsagePercent,
|
||||
totalTokens: tokenStats.totalTokens,
|
||||
filesRead: fileStats.filesRead,
|
||||
filesWritten: fileStats.filesWritten,
|
||||
linesAdded: fileStats.linesAdded,
|
||||
linesRemoved: fileStats.linesRemoved,
|
||||
uniqueFiles: Array.from(fileStats.uniqueFiles),
|
||||
requestId,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Export session context that captures session updates into export messages.
|
||||
|
|
@ -24,6 +225,7 @@ class ExportSessionContext implements SessionContext {
|
|||
role: 'user' | 'assistant' | 'thinking';
|
||||
parts: Array<{ text: string }>;
|
||||
timestamp: number;
|
||||
usageMetadata?: GenerateContentResponseUsageMetadata;
|
||||
} | null = null;
|
||||
private activeRecordId: string | null = null;
|
||||
private activeRecordTimestamp: string | null = null;
|
||||
|
|
@ -39,9 +241,37 @@ class ExportSessionContext implements SessionContext {
|
|||
case 'user_message_chunk':
|
||||
this.handleMessageChunk('user', update.content);
|
||||
break;
|
||||
case 'agent_message_chunk':
|
||||
this.handleMessageChunk('assistant', update.content);
|
||||
case 'agent_message_chunk': {
|
||||
// Extract usageMetadata from _meta if available
|
||||
const usageMeta = update._meta as
|
||||
| {
|
||||
usage?: {
|
||||
inputTokens?: number;
|
||||
outputTokens?: number;
|
||||
totalTokens?: number;
|
||||
thoughtTokens?: number;
|
||||
cachedReadTokens?: number;
|
||||
};
|
||||
}
|
||||
| undefined;
|
||||
const usageMetadata: GenerateContentResponseUsageMetadata | undefined =
|
||||
usageMeta?.usage
|
||||
? {
|
||||
promptTokenCount: usageMeta.usage.inputTokens,
|
||||
candidatesTokenCount: usageMeta.usage.outputTokens,
|
||||
totalTokenCount: usageMeta.usage.totalTokens,
|
||||
thoughtsTokenCount: usageMeta.usage.thoughtTokens,
|
||||
cachedContentTokenCount: usageMeta.usage.cachedReadTokens,
|
||||
}
|
||||
: undefined;
|
||||
this.handleMessageChunk(
|
||||
'assistant',
|
||||
update.content,
|
||||
'assistant',
|
||||
usageMetadata,
|
||||
);
|
||||
break;
|
||||
}
|
||||
case 'agent_thought_chunk':
|
||||
this.handleMessageChunk('assistant', update.content, 'thinking');
|
||||
break;
|
||||
|
|
@ -79,6 +309,7 @@ class ExportSessionContext implements SessionContext {
|
|||
role: 'user' | 'assistant',
|
||||
content: { type: string; text?: string },
|
||||
messageRole: 'user' | 'assistant' | 'thinking' = role,
|
||||
usageMetadata?: GenerateContentResponseUsageMetadata,
|
||||
): void {
|
||||
if (content.type !== 'text' || !content.text) return;
|
||||
|
||||
|
|
@ -98,12 +329,17 @@ class ExportSessionContext implements SessionContext {
|
|||
this.currentMessage.role === messageRole
|
||||
) {
|
||||
this.currentMessage.parts.push({ text: content.text });
|
||||
// Merge usageMetadata if provided (for assistant messages)
|
||||
if (usageMetadata && role === 'assistant') {
|
||||
this.currentMessage.usageMetadata = usageMetadata;
|
||||
}
|
||||
} else {
|
||||
this.currentMessage = {
|
||||
type: role,
|
||||
role: messageRole,
|
||||
parts: [{ text: content.text }],
|
||||
timestamp: Date.now(),
|
||||
...(usageMetadata && role === 'assistant' ? { usageMetadata } : {}),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
@ -205,7 +441,7 @@ class ExportSessionContext implements SessionContext {
|
|||
if (!this.currentMessage) return;
|
||||
|
||||
const uuid = this.getMessageUuid();
|
||||
this.messages.push({
|
||||
const exportMessage: ExportMessage = {
|
||||
uuid,
|
||||
sessionId: this.sessionId,
|
||||
timestamp: this.getMessageTimestamp(),
|
||||
|
|
@ -214,7 +450,17 @@ class ExportSessionContext implements SessionContext {
|
|||
role: this.currentMessage.role,
|
||||
parts: this.currentMessage.parts,
|
||||
},
|
||||
});
|
||||
};
|
||||
|
||||
// Add usageMetadata for assistant messages
|
||||
if (
|
||||
this.currentMessage.type === 'assistant' &&
|
||||
this.currentMessage.usageMetadata
|
||||
) {
|
||||
exportMessage.usageMetadata = this.currentMessage.usageMetadata;
|
||||
}
|
||||
|
||||
this.messages.push(exportMessage);
|
||||
|
||||
this.currentMessage = null;
|
||||
}
|
||||
|
|
@ -258,9 +504,13 @@ export async function collectSessionData(
|
|||
// Get the export messages
|
||||
const messages = exportContext.getMessages();
|
||||
|
||||
// Extract metadata from conversation
|
||||
const metadata = extractMetadata(conversation, config);
|
||||
|
||||
return {
|
||||
sessionId: conversation.sessionId,
|
||||
startTime: conversation.startTime,
|
||||
messages,
|
||||
metadata,
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@ export function injectDataIntoHtmlTemplate(
|
|||
sessionId: string;
|
||||
startTime: string;
|
||||
messages: unknown[];
|
||||
metadata?: unknown;
|
||||
},
|
||||
): string {
|
||||
const jsonData = JSON.stringify(data, null, 2);
|
||||
|
|
|
|||
|
|
@ -14,13 +14,18 @@ export function toJsonl(sessionData: ExportSessionData): string {
|
|||
const lines: string[] = [];
|
||||
|
||||
// Add session metadata as the first line
|
||||
lines.push(
|
||||
JSON.stringify({
|
||||
type: 'session_metadata',
|
||||
sessionId: sessionData.sessionId,
|
||||
startTime: sessionData.startTime,
|
||||
}),
|
||||
);
|
||||
const metadata: Record<string, unknown> = {
|
||||
type: 'session_metadata',
|
||||
sessionId: sessionData.sessionId,
|
||||
startTime: sessionData.startTime,
|
||||
};
|
||||
|
||||
// Add requestId if available
|
||||
if (sessionData.metadata?.requestId) {
|
||||
metadata['requestId'] = sessionData.metadata.requestId;
|
||||
}
|
||||
|
||||
lines.push(JSON.stringify(metadata));
|
||||
|
||||
// Add each message as a separate line
|
||||
for (const message of sessionData.messages) {
|
||||
|
|
|
|||
|
|
@ -16,6 +16,14 @@ export function toMarkdown(sessionData: ExportSessionData): string {
|
|||
lines.push('# Chat Session Export\n');
|
||||
lines.push(`- **Session ID**: \`${sanitizeText(sessionData.sessionId)}\``);
|
||||
lines.push(`- **Start Time**: ${sanitizeText(sessionData.startTime)}`);
|
||||
|
||||
// Add requestId if available
|
||||
if (sessionData.metadata?.requestId) {
|
||||
lines.push(
|
||||
`- **Request ID**: \`${sanitizeText(sessionData.metadata.requestId)}\``,
|
||||
);
|
||||
}
|
||||
|
||||
lines.push(`- **Exported**: ${new Date().toISOString()}`);
|
||||
lines.push('\n---\n');
|
||||
|
||||
|
|
@ -26,6 +34,9 @@ export function toMarkdown(sessionData: ExportSessionData): string {
|
|||
lines.push(formatMessageContent(message));
|
||||
} else if (message.type === 'assistant') {
|
||||
lines.push('## Assistant\n');
|
||||
if (message.response_id) {
|
||||
lines.push(`*Response ID: \`${sanitizeText(message.response_id)}\`*\n`);
|
||||
}
|
||||
lines.push(formatMessageContent(message));
|
||||
} else if (message.type === 'tool_call') {
|
||||
lines.push(formatToolCall(message));
|
||||
|
|
|
|||
|
|
@ -28,6 +28,14 @@ export function normalizeSessionData(
|
|||
}
|
||||
});
|
||||
|
||||
// Build index of assistant messages by uuid for response_id mapping
|
||||
const assistantMessageIndexByUuid = new Map<string, number>();
|
||||
normalized.forEach((message, index) => {
|
||||
if (message.type === 'assistant') {
|
||||
assistantMessageIndexByUuid.set(message.uuid, index);
|
||||
}
|
||||
});
|
||||
|
||||
// Merge tool result information into tool call messages
|
||||
for (const record of originalRecords) {
|
||||
if (record.type !== 'tool_result') continue;
|
||||
|
|
@ -58,6 +66,31 @@ export function normalizeSessionData(
|
|||
mergeToolCallData(existingMessage.toolCall, toolCallMessage.toolCall);
|
||||
}
|
||||
|
||||
// Merge response_id from assistant records
|
||||
for (const record of originalRecords) {
|
||||
if (record.type !== 'assistant') continue;
|
||||
if (!record.response_id) continue;
|
||||
|
||||
const existingIndex = assistantMessageIndexByUuid.get(record.uuid);
|
||||
if (existingIndex !== undefined) {
|
||||
normalized[existingIndex].response_id = record.response_id;
|
||||
}
|
||||
}
|
||||
|
||||
// Merge usageMetadata from assistant records
|
||||
for (const record of originalRecords) {
|
||||
if (record.type !== 'assistant') continue;
|
||||
if (!record.usageMetadata) continue;
|
||||
|
||||
const existingIndex = assistantMessageIndexByUuid.get(record.uuid);
|
||||
if (existingIndex !== undefined) {
|
||||
// Only set if not already present from collect phase
|
||||
if (!normalized[existingIndex].usageMetadata) {
|
||||
normalized[existingIndex].usageMetadata = record.usageMetadata;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
...sessionData,
|
||||
messages: normalized,
|
||||
|
|
|
|||
|
|
@ -4,6 +4,8 @@
|
|||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import type { GenerateContentResponseUsageMetadata } from '@google/genai';
|
||||
|
||||
/**
|
||||
* Universal export message format - SSOT for all export formats.
|
||||
* This is format-agnostic and contains all information needed for any export type.
|
||||
|
|
@ -25,6 +27,12 @@ export interface ExportMessage {
|
|||
/** Model used for assistant messages */
|
||||
model?: string;
|
||||
|
||||
/** Response ID from the LLM API for telemetry/tracing correlation */
|
||||
response_id?: string;
|
||||
|
||||
/** Token usage for this message (mainly for assistant messages) */
|
||||
usageMetadata?: GenerateContentResponseUsageMetadata;
|
||||
|
||||
/** For tool_call messages */
|
||||
toolCall?: {
|
||||
toolCallId: string;
|
||||
|
|
@ -44,6 +52,44 @@ export interface ExportMessage {
|
|||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Metadata for export session - contains aggregated statistics and session context.
|
||||
*/
|
||||
export interface ExportMetadata {
|
||||
/** Session ID */
|
||||
sessionId: string;
|
||||
/** ISO timestamp when session started */
|
||||
startTime: string;
|
||||
/** Export timestamp */
|
||||
exportTime: string;
|
||||
/** Current working directory */
|
||||
cwd: string;
|
||||
/** Git branch name, if available */
|
||||
gitBranch?: string;
|
||||
/** Model used in the session */
|
||||
model?: string;
|
||||
/** Channel/source identifier */
|
||||
channel?: string;
|
||||
/** Number of user prompts in the session */
|
||||
promptCount: number;
|
||||
/** Context window utilization percentage (0-100) */
|
||||
contextUsagePercent?: number;
|
||||
/** Total tokens used (prompt + completion) */
|
||||
totalTokens?: number;
|
||||
/** Number of files read */
|
||||
filesRead?: number;
|
||||
/** Number of files written/edited */
|
||||
filesWritten?: number;
|
||||
/** Lines of code added */
|
||||
linesAdded?: number;
|
||||
/** Lines of code removed */
|
||||
linesRemoved?: number;
|
||||
/** Unique files referenced in the session */
|
||||
uniqueFiles: string[];
|
||||
/** Last response ID from the LLM API (request ID) */
|
||||
requestId?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Complete export session data - the single source of truth.
|
||||
*/
|
||||
|
|
@ -51,4 +97,6 @@ export interface ExportSessionData {
|
|||
sessionId: string;
|
||||
startTime: string;
|
||||
messages: ExportMessage[];
|
||||
/** Session metadata and statistics */
|
||||
metadata?: ExportMetadata;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue