mirror of
https://github.com/QwenLM/qwen-code.git
synced 2026-04-30 04:30:48 +00:00
1068 lines
31 KiB
TypeScript
1068 lines
31 KiB
TypeScript
/**
|
|
* @license
|
|
* Copyright 2025 Qwen Code
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
*/
|
|
|
|
import fs from 'fs/promises';
|
|
import path from 'path';
|
|
import {
|
|
read as readJsonlFile,
|
|
createDebugLogger,
|
|
} from '@qwen-code/qwen-code-core';
|
|
import pLimit from 'p-limit';
|
|
import type { Config, ChatRecord } from '@qwen-code/qwen-code-core';
|
|
import type {
|
|
InsightData,
|
|
HeatMapData,
|
|
StreakData,
|
|
SessionFacets,
|
|
InsightProgressCallback,
|
|
} from '../types/StaticInsightTypes.js';
|
|
import type {
|
|
QualitativeInsights,
|
|
InsightImpressiveWorkflows,
|
|
InsightProjectAreas,
|
|
InsightFutureOpportunities,
|
|
InsightFrictionPoints,
|
|
InsightMemorableMoment,
|
|
InsightImprovements,
|
|
InsightInteractionStyle,
|
|
InsightAtAGlance,
|
|
} from '../types/QualitativeInsightTypes.js';
|
|
|
|
import {
|
|
PROMPT_IMPRESSIVE_WORKFLOWS,
|
|
PROMPT_PROJECT_AREAS,
|
|
PROMPT_FUTURE_OPPORTUNITIES,
|
|
PROMPT_FRICTION_POINTS,
|
|
PROMPT_MEMORABLE_MOMENT,
|
|
PROMPT_IMPROVEMENTS,
|
|
PROMPT_INTERACTION_STYLE,
|
|
PROMPT_AT_A_GLANCE,
|
|
ANALYSIS_PROMPT,
|
|
} from '../prompts/InsightPrompts.js';
|
|
|
|
const logger = createDebugLogger('DataProcessor');
|
|
|
|
const CONCURRENCY_LIMIT = 2;
|
|
|
|
export class DataProcessor {
|
|
constructor(private config: Config) {}
|
|
|
|
// Helper function to format date as YYYY-MM-DD
|
|
private formatDate(date: Date): string {
|
|
return date.toISOString().split('T')[0];
|
|
}
|
|
|
|
// Format chat records for LLM analysis
|
|
private formatRecordsForAnalysis(records: ChatRecord[]): string {
|
|
let output = '';
|
|
const sessionStart =
|
|
records.length > 0 ? new Date(records[0].timestamp) : new Date();
|
|
|
|
output += `Session: ${records[0]?.sessionId || 'unknown'}\n`;
|
|
output += `Date: ${sessionStart.toISOString()}\n`;
|
|
output += `Duration: ${records.length} turns\n\n`;
|
|
|
|
for (const record of records) {
|
|
if (record.type === 'user') {
|
|
const text =
|
|
record.message?.parts
|
|
?.map((p) => ('text' in p ? p.text : ''))
|
|
.join('') || '';
|
|
output += `[User]: ${text}\n`;
|
|
} else if (record.type === 'assistant') {
|
|
if (record.message?.parts) {
|
|
for (const part of record.message.parts) {
|
|
if ('text' in part && part.text) {
|
|
output += `[Assistant]: ${part.text}\n`;
|
|
} else if ('functionCall' in part) {
|
|
const call = part.functionCall;
|
|
if (call) {
|
|
output += `[Tool: ${call.name}]\n`;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return output;
|
|
}
|
|
|
|
// Analyze a single session using LLM
|
|
private async analyzeSession(
|
|
records: ChatRecord[],
|
|
): Promise<SessionFacets | null> {
|
|
if (records.length === 0) return null;
|
|
|
|
const INSIGHT_SCHEMA = {
|
|
type: 'object',
|
|
properties: {
|
|
underlying_goal: {
|
|
type: 'string',
|
|
description: 'What the user fundamentally wanted to achieve',
|
|
},
|
|
goal_categories: {
|
|
type: 'object',
|
|
additionalProperties: { type: 'number' },
|
|
},
|
|
outcome: {
|
|
type: 'string',
|
|
enum: [
|
|
'fully_achieved',
|
|
'mostly_achieved',
|
|
'partially_achieved',
|
|
'not_achieved',
|
|
'unclear_from_transcript',
|
|
],
|
|
},
|
|
user_satisfaction_counts: {
|
|
type: 'object',
|
|
additionalProperties: { type: 'number' },
|
|
},
|
|
Qwen_helpfulness: {
|
|
type: 'string',
|
|
enum: [
|
|
'unhelpful',
|
|
'slightly_helpful',
|
|
'moderately_helpful',
|
|
'very_helpful',
|
|
'essential',
|
|
],
|
|
},
|
|
session_type: {
|
|
type: 'string',
|
|
enum: [
|
|
'single_task',
|
|
'multi_task',
|
|
'iterative_refinement',
|
|
'exploration',
|
|
'quick_question',
|
|
],
|
|
},
|
|
friction_counts: {
|
|
type: 'object',
|
|
additionalProperties: { type: 'number' },
|
|
},
|
|
friction_detail: {
|
|
type: 'string',
|
|
description: 'One sentence describing friction or empty',
|
|
},
|
|
primary_success: {
|
|
type: 'string',
|
|
enum: [
|
|
'none',
|
|
'fast_accurate_search',
|
|
'correct_code_edits',
|
|
'good_explanations',
|
|
'proactive_help',
|
|
'multi_file_changes',
|
|
'good_debugging',
|
|
],
|
|
},
|
|
brief_summary: {
|
|
type: 'string',
|
|
description: 'One sentence: what user wanted and whether they got it',
|
|
},
|
|
},
|
|
required: [
|
|
'underlying_goal',
|
|
'goal_categories',
|
|
'outcome',
|
|
'user_satisfaction_counts',
|
|
'Qwen_helpfulness',
|
|
'session_type',
|
|
'friction_counts',
|
|
'friction_detail',
|
|
'primary_success',
|
|
'brief_summary',
|
|
],
|
|
};
|
|
|
|
const sessionText = this.formatRecordsForAnalysis(records);
|
|
const prompt = `${ANALYSIS_PROMPT}\n\nSESSION:\n${sessionText}`;
|
|
|
|
try {
|
|
const result = await this.config.getBaseLlmClient().generateJson({
|
|
// Use the configured model
|
|
model: this.config.getModel(),
|
|
contents: [{ role: 'user', parts: [{ text: prompt }] }],
|
|
schema: INSIGHT_SCHEMA,
|
|
abortSignal: AbortSignal.timeout(60000), // 1 minute timeout per session
|
|
});
|
|
|
|
if (!result || Object.keys(result).length === 0) {
|
|
return null;
|
|
}
|
|
|
|
return {
|
|
...(result as unknown as SessionFacets),
|
|
session_id: records[0].sessionId,
|
|
};
|
|
} catch (error) {
|
|
logger.error(
|
|
`Failed to analyze session ${records[0]?.sessionId}:`,
|
|
error,
|
|
);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
// Calculate streaks from activity dates
|
|
private calculateStreaks(dates: string[]): StreakData {
|
|
if (dates.length === 0) {
|
|
return { currentStreak: 0, longestStreak: 0, dates: [] };
|
|
}
|
|
|
|
// Convert string dates to Date objects and sort them
|
|
const dateObjects = dates.map((dateStr) => new Date(dateStr));
|
|
dateObjects.sort((a, b) => a.getTime() - b.getTime());
|
|
|
|
let currentStreak = 1;
|
|
let maxStreak = 1;
|
|
let currentDate = new Date(dateObjects[0]);
|
|
currentDate.setHours(0, 0, 0, 0); // Normalize to start of day
|
|
|
|
for (let i = 1; i < dateObjects.length; i++) {
|
|
const nextDate = new Date(dateObjects[i]);
|
|
nextDate.setHours(0, 0, 0, 0); // Normalize to start of day
|
|
|
|
// Calculate difference in days
|
|
const diffDays = Math.floor(
|
|
(nextDate.getTime() - currentDate.getTime()) / (1000 * 60 * 60 * 24),
|
|
);
|
|
|
|
if (diffDays === 1) {
|
|
// Consecutive day
|
|
currentStreak++;
|
|
maxStreak = Math.max(maxStreak, currentStreak);
|
|
} else if (diffDays > 1) {
|
|
// Gap in streak
|
|
currentStreak = 1;
|
|
}
|
|
// If diffDays === 0, same day, so streak continues
|
|
|
|
currentDate = nextDate;
|
|
}
|
|
|
|
// Check if the streak is still ongoing (if last activity was yesterday or today)
|
|
const today = new Date();
|
|
today.setHours(0, 0, 0, 0);
|
|
const yesterday = new Date(today);
|
|
yesterday.setDate(yesterday.getDate() - 1);
|
|
|
|
if (
|
|
currentDate.getTime() === today.getTime() ||
|
|
currentDate.getTime() === yesterday.getTime()
|
|
) {
|
|
// The streak might still be active, so we don't reset it
|
|
}
|
|
|
|
return {
|
|
currentStreak,
|
|
longestStreak: maxStreak,
|
|
dates,
|
|
};
|
|
}
|
|
|
|
// Process chat files from all projects in the base directory and generate insights
|
|
async generateInsights(
|
|
baseDir: string,
|
|
facetsOutputDir?: string,
|
|
onProgress?: InsightProgressCallback,
|
|
): Promise<InsightData> {
|
|
if (onProgress) onProgress('Scanning chat files', 0);
|
|
const allChatFiles = await this.scanChatFiles(baseDir);
|
|
|
|
if (onProgress) onProgress('Generating metrics', 10);
|
|
const metrics = await this.generateMetrics(allChatFiles, onProgress);
|
|
|
|
if (onProgress) onProgress('Analyzing sessions', 20);
|
|
const facets = await this.generateFacets(
|
|
allChatFiles,
|
|
facetsOutputDir,
|
|
onProgress,
|
|
);
|
|
|
|
if (onProgress) onProgress('Generating qualitative insights', 80);
|
|
const qualitative = await this.generateQualitativeInsights(metrics, facets);
|
|
|
|
// Aggregate satisfaction, friction, success and outcome data from facets
|
|
const {
|
|
satisfactionAgg,
|
|
frictionAgg,
|
|
primarySuccessAgg,
|
|
outcomesAgg,
|
|
goalsAgg,
|
|
} = this.aggregateFacetsData(facets);
|
|
|
|
if (onProgress) onProgress('Finalizing report', 100);
|
|
|
|
return {
|
|
...metrics,
|
|
qualitative,
|
|
satisfaction: satisfactionAgg,
|
|
friction: frictionAgg,
|
|
primarySuccess: primarySuccessAgg,
|
|
outcomes: outcomesAgg,
|
|
topGoals: goalsAgg,
|
|
};
|
|
}
|
|
|
|
// Aggregate satisfaction and friction data from facets
|
|
private aggregateFacetsData(facets: SessionFacets[]): {
|
|
satisfactionAgg: Record<string, number>;
|
|
frictionAgg: Record<string, number>;
|
|
primarySuccessAgg: Record<string, number>;
|
|
outcomesAgg: Record<string, number>;
|
|
goalsAgg: Record<string, number>;
|
|
} {
|
|
const satisfactionAgg: Record<string, number> = {};
|
|
const frictionAgg: Record<string, number> = {};
|
|
const primarySuccessAgg: Record<string, number> = {};
|
|
const outcomesAgg: Record<string, number> = {};
|
|
const goalsAgg: Record<string, number> = {};
|
|
|
|
facets.forEach((facet) => {
|
|
// Aggregate satisfaction
|
|
Object.entries(facet.user_satisfaction_counts).forEach(([sat, count]) => {
|
|
satisfactionAgg[sat] = (satisfactionAgg[sat] || 0) + count;
|
|
});
|
|
|
|
// Aggregate friction
|
|
Object.entries(facet.friction_counts).forEach(([fric, count]) => {
|
|
frictionAgg[fric] = (frictionAgg[fric] || 0) + count;
|
|
});
|
|
|
|
// Aggregate primary success
|
|
if (facet.primary_success && facet.primary_success !== 'none') {
|
|
primarySuccessAgg[facet.primary_success] =
|
|
(primarySuccessAgg[facet.primary_success] || 0) + 1;
|
|
}
|
|
|
|
// Aggregate outcomes
|
|
if (facet.outcome) {
|
|
outcomesAgg[facet.outcome] = (outcomesAgg[facet.outcome] || 0) + 1;
|
|
}
|
|
|
|
// Aggregate goals
|
|
Object.entries(facet.goal_categories).forEach(([goal, count]) => {
|
|
goalsAgg[goal] = (goalsAgg[goal] || 0) + count;
|
|
});
|
|
});
|
|
|
|
return {
|
|
satisfactionAgg,
|
|
frictionAgg,
|
|
primarySuccessAgg,
|
|
outcomesAgg,
|
|
goalsAgg,
|
|
};
|
|
}
|
|
|
|
private async generateQualitativeInsights(
|
|
metrics: Omit<InsightData, 'facets' | 'qualitative'>,
|
|
facets: SessionFacets[],
|
|
): Promise<QualitativeInsights | undefined> {
|
|
if (facets.length === 0) {
|
|
return undefined;
|
|
}
|
|
|
|
logger.info('Generating qualitative insights...');
|
|
|
|
const commonData = this.prepareCommonPromptData(metrics, facets);
|
|
|
|
const generate = async <T>(
|
|
promptTemplate: string,
|
|
schema: Record<string, unknown>,
|
|
): Promise<T> => {
|
|
const prompt = `${promptTemplate}\n\n${commonData}`;
|
|
try {
|
|
const result = await this.config.getBaseLlmClient().generateJson({
|
|
model: this.config.getModel(),
|
|
contents: [{ role: 'user', parts: [{ text: prompt }] }],
|
|
schema,
|
|
abortSignal: AbortSignal.timeout(60000),
|
|
});
|
|
return result as T;
|
|
} catch (error) {
|
|
logger.error('Failed to generate insight:', error);
|
|
throw error;
|
|
}
|
|
};
|
|
|
|
// Schemas for each insight type
|
|
// We define simplified schemas here to guide the LLM.
|
|
// The types are already defined in QualitativeInsightTypes.ts
|
|
|
|
// 1. Impressive Workflows
|
|
const schemaImpressiveWorkflows = {
|
|
type: 'object',
|
|
properties: {
|
|
intro: { type: 'string' },
|
|
impressive_workflows: {
|
|
type: 'array',
|
|
items: {
|
|
type: 'object',
|
|
properties: {
|
|
title: { type: 'string' },
|
|
description: { type: 'string' },
|
|
},
|
|
required: ['title', 'description'],
|
|
},
|
|
},
|
|
},
|
|
required: ['intro', 'impressive_workflows'],
|
|
};
|
|
|
|
// 2. Project Areas
|
|
const schemaProjectAreas = {
|
|
type: 'object',
|
|
properties: {
|
|
areas: {
|
|
type: 'array',
|
|
items: {
|
|
type: 'object',
|
|
properties: {
|
|
name: { type: 'string' },
|
|
session_count: { type: 'number' },
|
|
description: { type: 'string' },
|
|
},
|
|
required: ['name', 'session_count', 'description'],
|
|
},
|
|
},
|
|
},
|
|
required: ['areas'],
|
|
};
|
|
|
|
// 3. Future Opportunities
|
|
const schemaFutureOpportunities = {
|
|
type: 'object',
|
|
properties: {
|
|
intro: { type: 'string' },
|
|
opportunities: {
|
|
type: 'array',
|
|
items: {
|
|
type: 'object',
|
|
properties: {
|
|
title: { type: 'string' },
|
|
whats_possible: { type: 'string' },
|
|
how_to_try: { type: 'string' },
|
|
copyable_prompt: { type: 'string' },
|
|
},
|
|
required: [
|
|
'title',
|
|
'whats_possible',
|
|
'how_to_try',
|
|
'copyable_prompt',
|
|
],
|
|
},
|
|
},
|
|
},
|
|
required: ['intro', 'opportunities'],
|
|
};
|
|
|
|
// 4. Friction Points
|
|
const schemaFrictionPoints = {
|
|
type: 'object',
|
|
properties: {
|
|
intro: { type: 'string' },
|
|
categories: {
|
|
type: 'array',
|
|
items: {
|
|
type: 'object',
|
|
properties: {
|
|
category: { type: 'string' },
|
|
description: { type: 'string' },
|
|
examples: { type: 'array', items: { type: 'string' } },
|
|
},
|
|
required: ['category', 'description', 'examples'],
|
|
},
|
|
},
|
|
},
|
|
required: ['intro', 'categories'],
|
|
};
|
|
|
|
// 5. Memorable Moment
|
|
const schemaMemorableMoment = {
|
|
type: 'object',
|
|
properties: {
|
|
headline: { type: 'string' },
|
|
detail: { type: 'string' },
|
|
},
|
|
required: ['headline', 'detail'],
|
|
};
|
|
|
|
// 6. Improvements
|
|
const schemaImprovements = {
|
|
type: 'object',
|
|
properties: {
|
|
Qwen_md_additions: {
|
|
type: 'array',
|
|
items: {
|
|
type: 'object',
|
|
properties: {
|
|
addition: { type: 'string' },
|
|
why: { type: 'string' },
|
|
prompt_scaffold: { type: 'string' },
|
|
},
|
|
required: ['addition', 'why', 'prompt_scaffold'],
|
|
},
|
|
},
|
|
features_to_try: {
|
|
type: 'array',
|
|
items: {
|
|
type: 'object',
|
|
properties: {
|
|
feature: { type: 'string' },
|
|
one_liner: { type: 'string' },
|
|
why_for_you: { type: 'string' },
|
|
example_code: { type: 'string' },
|
|
},
|
|
required: ['feature', 'one_liner', 'why_for_you', 'example_code'],
|
|
},
|
|
},
|
|
usage_patterns: {
|
|
type: 'array',
|
|
items: {
|
|
type: 'object',
|
|
properties: {
|
|
title: { type: 'string' },
|
|
suggestion: { type: 'string' },
|
|
detail: { type: 'string' },
|
|
copyable_prompt: { type: 'string' },
|
|
},
|
|
required: ['title', 'suggestion', 'detail', 'copyable_prompt'],
|
|
},
|
|
},
|
|
},
|
|
required: ['Qwen_md_additions', 'features_to_try', 'usage_patterns'],
|
|
};
|
|
|
|
// 7. Interaction Style
|
|
const schemaInteractionStyle = {
|
|
type: 'object',
|
|
properties: {
|
|
narrative: { type: 'string' },
|
|
key_pattern: { type: 'string' },
|
|
},
|
|
required: ['narrative', 'key_pattern'],
|
|
};
|
|
|
|
// 8. At A Glance
|
|
const schemaAtAGlance = {
|
|
type: 'object',
|
|
properties: {
|
|
whats_working: { type: 'string' },
|
|
whats_hindering: { type: 'string' },
|
|
quick_wins: { type: 'string' },
|
|
ambitious_workflows: { type: 'string' },
|
|
},
|
|
required: [
|
|
'whats_working',
|
|
'whats_hindering',
|
|
'quick_wins',
|
|
'ambitious_workflows',
|
|
],
|
|
};
|
|
|
|
const limit = pLimit(CONCURRENCY_LIMIT);
|
|
|
|
try {
|
|
const [
|
|
impressiveWorkflows,
|
|
projectAreas,
|
|
futureOpportunities,
|
|
frictionPoints,
|
|
memorableMoment,
|
|
improvements,
|
|
interactionStyle,
|
|
atAGlance,
|
|
] = await Promise.all([
|
|
limit(() =>
|
|
generate<InsightImpressiveWorkflows>(
|
|
PROMPT_IMPRESSIVE_WORKFLOWS,
|
|
schemaImpressiveWorkflows,
|
|
),
|
|
),
|
|
limit(() =>
|
|
generate<InsightProjectAreas>(
|
|
PROMPT_PROJECT_AREAS,
|
|
schemaProjectAreas,
|
|
),
|
|
),
|
|
limit(() =>
|
|
generate<InsightFutureOpportunities>(
|
|
PROMPT_FUTURE_OPPORTUNITIES,
|
|
schemaFutureOpportunities,
|
|
),
|
|
),
|
|
limit(() =>
|
|
generate<InsightFrictionPoints>(
|
|
PROMPT_FRICTION_POINTS,
|
|
schemaFrictionPoints,
|
|
),
|
|
),
|
|
limit(() =>
|
|
generate<InsightMemorableMoment>(
|
|
PROMPT_MEMORABLE_MOMENT,
|
|
schemaMemorableMoment,
|
|
),
|
|
),
|
|
limit(() =>
|
|
generate<InsightImprovements>(
|
|
PROMPT_IMPROVEMENTS,
|
|
schemaImprovements,
|
|
),
|
|
),
|
|
limit(() =>
|
|
generate<InsightInteractionStyle>(
|
|
PROMPT_INTERACTION_STYLE,
|
|
schemaInteractionStyle,
|
|
),
|
|
),
|
|
limit(() =>
|
|
generate<InsightAtAGlance>(PROMPT_AT_A_GLANCE, schemaAtAGlance),
|
|
),
|
|
]);
|
|
|
|
return {
|
|
impressiveWorkflows,
|
|
projectAreas,
|
|
futureOpportunities,
|
|
frictionPoints,
|
|
memorableMoment,
|
|
improvements,
|
|
interactionStyle,
|
|
atAGlance,
|
|
};
|
|
} catch (e) {
|
|
logger.error('Error generating qualitative insights:', e);
|
|
return undefined;
|
|
}
|
|
}
|
|
|
|
private prepareCommonPromptData(
|
|
metrics: Omit<InsightData, 'facets' | 'qualitative'>,
|
|
facets: SessionFacets[],
|
|
): string {
|
|
// 1. DATA section
|
|
const goalsAgg: Record<string, number> = {};
|
|
const outcomesAgg: Record<string, number> = {};
|
|
const satisfactionAgg: Record<string, number> = {};
|
|
const frictionAgg: Record<string, number> = {};
|
|
const successAgg: Record<string, number> = {};
|
|
|
|
facets.forEach((facet) => {
|
|
// Aggregate goals
|
|
Object.entries(facet.goal_categories).forEach(([goal, count]) => {
|
|
goalsAgg[goal] = (goalsAgg[goal] || 0) + count;
|
|
});
|
|
|
|
// Aggregate outcomes
|
|
outcomesAgg[facet.outcome] = (outcomesAgg[facet.outcome] || 0) + 1;
|
|
|
|
// Aggregate satisfaction
|
|
Object.entries(facet.user_satisfaction_counts).forEach(([sat, count]) => {
|
|
satisfactionAgg[sat] = (satisfactionAgg[sat] || 0) + count;
|
|
});
|
|
|
|
// Aggregate friction
|
|
Object.entries(facet.friction_counts).forEach(([fric, count]) => {
|
|
frictionAgg[fric] = (frictionAgg[fric] || 0) + count;
|
|
});
|
|
|
|
// Aggregate success (primary_success)
|
|
if (facet.primary_success && facet.primary_success !== 'none') {
|
|
successAgg[facet.primary_success] =
|
|
(successAgg[facet.primary_success] || 0) + 1;
|
|
}
|
|
});
|
|
|
|
const topGoals = Object.entries(goalsAgg)
|
|
.sort((a, b) => b[1] - a[1])
|
|
.slice(0, 8);
|
|
|
|
const dataObj = {
|
|
sessions: metrics.totalSessions || facets.length,
|
|
analyzed: facets.length,
|
|
date_range: {
|
|
start: Object.keys(metrics.heatmap).sort()[0] || 'N/A',
|
|
end: Object.keys(metrics.heatmap).sort().pop() || 'N/A',
|
|
},
|
|
messages: metrics.totalMessages || 0,
|
|
hours: metrics.totalHours || 0,
|
|
commits: 0, // Not tracked yet
|
|
top_tools: metrics.topTools || [],
|
|
top_goals: topGoals,
|
|
outcomes: outcomesAgg,
|
|
satisfaction: satisfactionAgg,
|
|
friction: frictionAgg,
|
|
success: successAgg,
|
|
};
|
|
|
|
// 2. SESSION SUMMARIES section
|
|
const sessionSummaries = facets
|
|
.map((f) => `- ${f.brief_summary}`)
|
|
.join('\n');
|
|
|
|
// 3. FRICTION DETAILS section
|
|
const frictionDetails = facets
|
|
.filter((f) => f.friction_detail && f.friction_detail.trim().length > 0)
|
|
.map((f) => `- ${f.friction_detail}`)
|
|
.join('\n');
|
|
|
|
return `DATA:
|
|
${JSON.stringify(dataObj, null, 2)}
|
|
|
|
SESSION SUMMARIES:
|
|
${sessionSummaries}
|
|
|
|
FRICTION DETAILS:
|
|
${frictionDetails}
|
|
|
|
USER INSTRUCTIONS TO Qwen:
|
|
None captured`;
|
|
}
|
|
|
|
private async scanChatFiles(
|
|
baseDir: string,
|
|
): Promise<Array<{ path: string; mtime: number }>> {
|
|
const allChatFiles: Array<{ path: string; mtime: number }> = [];
|
|
|
|
try {
|
|
// Get all project directories in the base directory
|
|
const projectDirs = await fs.readdir(baseDir);
|
|
|
|
// Process each project directory
|
|
for (const projectDir of projectDirs) {
|
|
const projectPath = path.join(baseDir, projectDir);
|
|
const stats = await fs.stat(projectPath);
|
|
|
|
// Only process if it's a directory
|
|
if (stats.isDirectory()) {
|
|
const chatsDir = path.join(projectPath, 'chats');
|
|
|
|
try {
|
|
// Get all chat files in the chats directory
|
|
const files = await fs.readdir(chatsDir);
|
|
const chatFiles = files.filter((file) => file.endsWith('.jsonl'));
|
|
|
|
for (const file of chatFiles) {
|
|
const filePath = path.join(chatsDir, file);
|
|
|
|
// Get file stats for sorting by recency
|
|
try {
|
|
const fileStats = await fs.stat(filePath);
|
|
allChatFiles.push({ path: filePath, mtime: fileStats.mtimeMs });
|
|
} catch (e) {
|
|
logger.error(`Failed to stat file ${filePath}:`, e);
|
|
}
|
|
}
|
|
} catch (error) {
|
|
if ((error as NodeJS.ErrnoException).code !== 'ENOENT') {
|
|
logger.error(
|
|
`Error reading chats directory for project ${projectDir}: ${error}`,
|
|
);
|
|
}
|
|
// Continue to next project if chats directory doesn't exist
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
} catch (error) {
|
|
if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
|
|
// Base directory doesn't exist, return empty
|
|
logger.info(`Base directory does not exist: ${baseDir}`);
|
|
} else {
|
|
logger.error(`Error reading base directory: ${error}`);
|
|
}
|
|
}
|
|
|
|
return allChatFiles;
|
|
}
|
|
|
|
private async generateMetrics(
|
|
files: Array<{ path: string; mtime: number }>,
|
|
onProgress?: InsightProgressCallback,
|
|
): Promise<Omit<InsightData, 'facets' | 'qualitative'>> {
|
|
// Initialize data structures
|
|
const heatmap: HeatMapData = {};
|
|
const activeHours: { [hour: number]: number } = {};
|
|
const sessionStartTimes: { [sessionId: string]: Date } = {};
|
|
const sessionEndTimes: { [sessionId: string]: Date } = {};
|
|
let totalMessages = 0;
|
|
let totalLinesAdded = 0;
|
|
let totalLinesRemoved = 0;
|
|
const uniqueFiles = new Set<string>();
|
|
const toolUsage: Record<string, number> = {};
|
|
|
|
// Process files in batches to avoid OOM and blocking the event loop
|
|
const BATCH_SIZE = 50;
|
|
const totalFiles = files.length;
|
|
|
|
for (let i = 0; i < totalFiles; i += BATCH_SIZE) {
|
|
const batchEnd = Math.min(i + BATCH_SIZE, totalFiles);
|
|
const batch = files.slice(i, batchEnd);
|
|
|
|
// Process batch sequentially to minimize memory usage
|
|
for (const fileInfo of batch) {
|
|
try {
|
|
const records = await readJsonlFile<ChatRecord>(fileInfo.path);
|
|
totalMessages += records.length;
|
|
|
|
// Process each record
|
|
for (const record of records) {
|
|
const timestamp = new Date(record.timestamp);
|
|
const dateKey = this.formatDate(timestamp);
|
|
const hour = timestamp.getHours();
|
|
|
|
// Update heatmap (count of interactions per day)
|
|
heatmap[dateKey] = (heatmap[dateKey] || 0) + 1;
|
|
|
|
// Update active hours
|
|
activeHours[hour] = (activeHours[hour] || 0) + 1;
|
|
|
|
// Track session times
|
|
if (!sessionStartTimes[record.sessionId]) {
|
|
sessionStartTimes[record.sessionId] = timestamp;
|
|
}
|
|
sessionEndTimes[record.sessionId] = timestamp;
|
|
|
|
// Track tool usage
|
|
if (record.type === 'assistant' && record.message?.parts) {
|
|
for (const part of record.message.parts) {
|
|
if ('functionCall' in part) {
|
|
const name = part.functionCall!.name!;
|
|
toolUsage[name] = (toolUsage[name] || 0) + 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Track lines and files from tool results
|
|
if (
|
|
record.type === 'tool_result' &&
|
|
record.toolCallResult?.resultDisplay
|
|
) {
|
|
const display = record.toolCallResult.resultDisplay;
|
|
// Check if it matches FileDiff shape
|
|
if (
|
|
typeof display === 'object' &&
|
|
display !== null &&
|
|
'fileName' in display
|
|
) {
|
|
// Cast to any to avoid importing FileDiff type which might not be available here
|
|
const diff = display as {
|
|
fileName: unknown;
|
|
diffStat?: {
|
|
model_added_lines?: number;
|
|
model_removed_lines?: number;
|
|
};
|
|
};
|
|
if (typeof diff.fileName === 'string') {
|
|
uniqueFiles.add(diff.fileName);
|
|
}
|
|
|
|
if (diff.diffStat) {
|
|
totalLinesAdded += diff.diffStat.model_added_lines || 0;
|
|
totalLinesRemoved += diff.diffStat.model_removed_lines || 0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} catch (error) {
|
|
logger.error(
|
|
`Failed to process metrics for file ${fileInfo.path}:`,
|
|
error,
|
|
);
|
|
// Continue to next file
|
|
}
|
|
}
|
|
|
|
// Update progress (mapped to 10-20% range of total progress)
|
|
if (onProgress) {
|
|
const percentComplete = batchEnd / totalFiles;
|
|
const overallProgress = 10 + Math.round(percentComplete * 10);
|
|
onProgress(
|
|
`Generating metrics (${batchEnd}/${totalFiles})`,
|
|
overallProgress,
|
|
);
|
|
}
|
|
|
|
// Yield to event loop to allow GC and UI updates
|
|
await new Promise((resolve) => setTimeout(resolve, 0));
|
|
}
|
|
|
|
// Calculate streak data
|
|
const streakData = this.calculateStreaks(Object.keys(heatmap));
|
|
|
|
// Calculate longest work session and total hours
|
|
let longestWorkDuration = 0;
|
|
let longestWorkDate: string | null = null;
|
|
let totalDurationMs = 0;
|
|
|
|
const sessionIds = Object.keys(sessionStartTimes);
|
|
const totalSessions = sessionIds.length;
|
|
|
|
for (const sessionId of sessionIds) {
|
|
const start = sessionStartTimes[sessionId];
|
|
const end = sessionEndTimes[sessionId];
|
|
const durationMs = end.getTime() - start.getTime();
|
|
const durationMinutes = Math.round(durationMs / (1000 * 60));
|
|
|
|
totalDurationMs += durationMs;
|
|
|
|
if (durationMinutes > longestWorkDuration) {
|
|
longestWorkDuration = durationMinutes;
|
|
longestWorkDate = this.formatDate(start);
|
|
}
|
|
}
|
|
|
|
const totalHours = Math.round(totalDurationMs / (1000 * 60 * 60));
|
|
|
|
// Calculate latest active time
|
|
let latestActiveTime: string | null = null;
|
|
let latestTimestamp = new Date(0);
|
|
for (const dateStr in heatmap) {
|
|
const date = new Date(dateStr);
|
|
if (date > latestTimestamp) {
|
|
latestTimestamp = date;
|
|
latestActiveTime = date.toLocaleTimeString([], {
|
|
hour: '2-digit',
|
|
minute: '2-digit',
|
|
});
|
|
}
|
|
}
|
|
|
|
// Calculate top tools
|
|
const topTools = Object.entries(toolUsage)
|
|
.sort((a, b) => b[1] - a[1])
|
|
.slice(0, 10);
|
|
|
|
return {
|
|
heatmap,
|
|
currentStreak: streakData.currentStreak,
|
|
longestStreak: streakData.longestStreak,
|
|
longestWorkDate,
|
|
longestWorkDuration,
|
|
activeHours,
|
|
latestActiveTime,
|
|
totalSessions,
|
|
totalMessages,
|
|
totalHours,
|
|
topTools,
|
|
totalLinesAdded,
|
|
totalLinesRemoved,
|
|
totalFiles: uniqueFiles.size,
|
|
};
|
|
}
|
|
|
|
private async generateFacets(
|
|
allFiles: Array<{ path: string; mtime: number }>,
|
|
facetsOutputDir?: string,
|
|
onProgress?: InsightProgressCallback,
|
|
): Promise<SessionFacets[]> {
|
|
// Sort files by recency (descending) and take top 50
|
|
const recentFiles = [...allFiles]
|
|
.sort((a, b) => b.mtime - a.mtime)
|
|
.slice(0, 50);
|
|
|
|
logger.info(`Analyzing ${recentFiles.length} recent sessions with LLM...`);
|
|
|
|
// Create a limit function with concurrency of 4 to avoid 429 errors
|
|
const limit = pLimit(CONCURRENCY_LIMIT);
|
|
|
|
let completed = 0;
|
|
const total = recentFiles.length;
|
|
|
|
// Analyze sessions concurrently with limit
|
|
const analysisPromises = recentFiles.map((fileInfo) =>
|
|
limit(async () => {
|
|
try {
|
|
const records = await readJsonlFile<ChatRecord>(fileInfo.path);
|
|
|
|
// Check if we already have this session analyzed
|
|
if (records.length > 0 && facetsOutputDir) {
|
|
const sessionId = records[0].sessionId;
|
|
if (sessionId) {
|
|
const existingFacetPath = path.join(
|
|
facetsOutputDir,
|
|
`${sessionId}.json`,
|
|
);
|
|
try {
|
|
// Check if file exists and is readable
|
|
const existingData = await fs.readFile(
|
|
existingFacetPath,
|
|
'utf-8',
|
|
);
|
|
const existingFacet = JSON.parse(existingData);
|
|
completed++;
|
|
if (onProgress) {
|
|
const percent = 20 + Math.round((completed / total) * 60);
|
|
onProgress(
|
|
'Analyzing sessions',
|
|
percent,
|
|
`${completed}/${total}`,
|
|
);
|
|
}
|
|
return existingFacet;
|
|
} catch (readError) {
|
|
// File doesn't exist or is invalid, proceed to analyze
|
|
if ((readError as NodeJS.ErrnoException).code !== 'ENOENT') {
|
|
logger.warn(
|
|
`Failed to read existing facet for ${sessionId}, regenerating:`,
|
|
readError,
|
|
);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
const facet = await this.analyzeSession(records);
|
|
|
|
if (facet && facetsOutputDir) {
|
|
try {
|
|
const facetPath = path.join(
|
|
facetsOutputDir,
|
|
`${facet.session_id}.json`,
|
|
);
|
|
await fs.writeFile(
|
|
facetPath,
|
|
JSON.stringify(facet, null, 2),
|
|
'utf-8',
|
|
);
|
|
} catch (writeError) {
|
|
logger.error(
|
|
`Failed to write facet file for session ${facet.session_id}:`,
|
|
writeError,
|
|
);
|
|
}
|
|
}
|
|
|
|
completed++;
|
|
if (onProgress) {
|
|
const percent = 20 + Math.round((completed / total) * 60);
|
|
onProgress('Analyzing sessions', percent, `${completed}/${total}`);
|
|
}
|
|
|
|
return facet;
|
|
} catch (e) {
|
|
logger.error(`Error analyzing session file ${fileInfo.path}:`, e);
|
|
completed++;
|
|
if (onProgress) {
|
|
const percent = 20 + Math.round((completed / total) * 60);
|
|
onProgress('Analyzing sessions', percent, `${completed}/${total}`);
|
|
}
|
|
return null;
|
|
}
|
|
}),
|
|
);
|
|
|
|
const sessionFacetsWithNulls = await Promise.all(analysisPromises);
|
|
const facets = sessionFacetsWithNulls.filter(
|
|
(f): f is SessionFacets => f !== null,
|
|
);
|
|
return facets;
|
|
}
|
|
}
|