feat(insight): refactor data processing and add qualitative insights structure

This commit is contained in:
DragonnZhang 2026-02-06 00:44:04 +08:00
parent f1214c90ea
commit 5a7dcce196
4 changed files with 884 additions and 225 deletions

View file

@ -17,116 +17,29 @@ import type {
StreakData,
SessionFacets,
} from '../types/StaticInsightTypes.js';
import type {
QualitativeInsights,
InsightImpressiveWorkflows,
InsightProjectAreas,
InsightFutureOpportunities,
InsightFrictionPoints,
InsightMemorableMoment,
InsightImprovements,
InsightInteractionStyle,
InsightAtAGlance,
} from '../types/QualitativeInsightTypes.js';
// Prompt content from prompt.txt
const ANALYSIS_PROMPT = `Analyze this Qwen Code session and extract structured facets.
CRITICAL GUIDELINES:
1. **goal_categories**: Count ONLY what the USER explicitly asked for.
- DO NOT count Qwen's autonomous codebase exploration
- DO NOT count work Qwen decided to do on its own
- ONLY count when user says "can you...", "please...", "I need...", "let's..."
2. **user_satisfaction_counts**: Base ONLY on explicit user signals.
- "Yay!", "great!", "perfect!" happy
- "thanks", "looks good", "that works" satisfied
- "ok, now let's..." (continuing without complaint) likely_satisfied
- "that's not right", "try again" dissatisfied
- "this is broken", "I give up" frustrated
3. **friction_counts**: Be specific about what went wrong.
- misunderstood_request: Qwen interpreted incorrectly
- wrong_approach: Right goal, wrong solution method
- buggy_code: Code didn't work correctly
- user_rejected_action: User said no/stop to a tool call
- excessive_changes: Over-engineered or changed too much
4. If very short or just warmup, use warmup_minimal for goal_category`;
const INSIGHT_SCHEMA = {
type: 'object',
properties: {
underlying_goal: {
type: 'string',
description: 'What the user fundamentally wanted to achieve',
},
goal_categories: {
type: 'object',
additionalProperties: { type: 'number' },
},
outcome: {
type: 'string',
enum: [
'fully_achieved',
'mostly_achieved',
'partially_achieved',
'not_achieved',
'unclear_from_transcript',
],
},
user_satisfaction_counts: {
type: 'object',
additionalProperties: { type: 'number' },
},
Qwen_helpfulness: {
type: 'string',
enum: [
'unhelpful',
'slightly_helpful',
'moderately_helpful',
'very_helpful',
'essential',
],
},
session_type: {
type: 'string',
enum: [
'single_task',
'multi_task',
'iterative_refinement',
'exploration',
'quick_question',
],
},
friction_counts: {
type: 'object',
additionalProperties: { type: 'number' },
},
friction_detail: {
type: 'string',
description: 'One sentence describing friction or empty',
},
primary_success: {
type: 'string',
enum: [
'none',
'fast_accurate_search',
'correct_code_edits',
'good_explanations',
'proactive_help',
'multi_file_changes',
'good_debugging',
],
},
brief_summary: {
type: 'string',
description: 'One sentence: what user wanted and whether they got it',
},
},
required: [
'underlying_goal',
'goal_categories',
'outcome',
'user_satisfaction_counts',
'Qwen_helpfulness',
'session_type',
'friction_counts',
'friction_detail',
'primary_success',
'brief_summary',
],
};
import {
PROMPT_IMPRESSIVE_WORKFLOWS,
PROMPT_PROJECT_AREAS,
PROMPT_FUTURE_OPPORTUNITIES,
PROMPT_FRICTION_POINTS,
PROMPT_MEMORABLE_MOMENT,
PROMPT_IMPROVEMENTS,
PROMPT_INTERACTION_STYLE,
PROMPT_AT_A_GLANCE,
ANALYSIS_PROMPT,
} from '../prompts/InsightPrompts.js';
export class DataProcessor {
constructor(private config: Config) {}
@ -159,8 +72,7 @@ export class DataProcessor {
if ('text' in part && part.text) {
output += `[Assistant]: ${part.text}\n`;
} else if ('functionCall' in part) {
// eslint-disable-next-line @typescript-eslint/no-explicit-any
const call = (part as any).functionCall;
const call = part.functionCall;
if (call) {
output += `[Tool: ${call.name}]\n`;
}
@ -178,6 +90,90 @@ export class DataProcessor {
): Promise<SessionFacets | null> {
if (records.length === 0) return null;
const INSIGHT_SCHEMA = {
type: 'object',
properties: {
underlying_goal: {
type: 'string',
description: 'What the user fundamentally wanted to achieve',
},
goal_categories: {
type: 'object',
additionalProperties: { type: 'number' },
},
outcome: {
type: 'string',
enum: [
'fully_achieved',
'mostly_achieved',
'partially_achieved',
'not_achieved',
'unclear_from_transcript',
],
},
user_satisfaction_counts: {
type: 'object',
additionalProperties: { type: 'number' },
},
Qwen_helpfulness: {
type: 'string',
enum: [
'unhelpful',
'slightly_helpful',
'moderately_helpful',
'very_helpful',
'essential',
],
},
session_type: {
type: 'string',
enum: [
'single_task',
'multi_task',
'iterative_refinement',
'exploration',
'quick_question',
],
},
friction_counts: {
type: 'object',
additionalProperties: { type: 'number' },
},
friction_detail: {
type: 'string',
description: 'One sentence describing friction or empty',
},
primary_success: {
type: 'string',
enum: [
'none',
'fast_accurate_search',
'correct_code_edits',
'good_explanations',
'proactive_help',
'multi_file_changes',
'good_debugging',
],
},
brief_summary: {
type: 'string',
description: 'One sentence: what user wanted and whether they got it',
},
},
required: [
'underlying_goal',
'goal_categories',
'outcome',
'user_satisfaction_counts',
'Qwen_helpfulness',
'session_type',
'friction_counts',
'friction_detail',
'primary_success',
'brief_summary',
],
};
const sessionText = this.formatRecordsForAnalysis(records);
const prompt = `${ANALYSIS_PROMPT}\n\nSESSION:\n${sessionText}`;
@ -367,14 +363,389 @@ export class DataProcessor {
baseDir: string,
facetsOutputDir?: string,
): Promise<InsightData> {
// Initialize data structures
const heatmap: HeatMapData = {};
const tokenUsage: TokenUsageData = {};
const activeHours: { [hour: number]: number } = {};
const sessionStartTimes: { [sessionId: string]: Date } = {};
const sessionEndTimes: { [sessionId: string]: Date } = {};
const allChatFiles = await this.scanChatFiles(baseDir);
// Store all valid chat file paths for LLM analysis
const [metrics, facets] = await Promise.all([
this.generateMetrics(allChatFiles),
this.generateFacets(allChatFiles, facetsOutputDir),
]);
const qualitative = await this.generateQualitativeInsights(metrics, facets);
return {
...metrics,
qualitative,
};
}
private async generateQualitativeInsights(
metrics: Omit<InsightData, 'facets' | 'qualitative'>,
facets: SessionFacets[],
): Promise<QualitativeInsights | undefined> {
if (facets.length === 0) {
return undefined;
}
console.log('Generating qualitative insights...');
const commonData = this.prepareCommonPromptData(metrics, facets);
const generate = async <T>(
promptTemplate: string,
schema: Record<string, unknown>,
): Promise<T> => {
const prompt = `${promptTemplate}\n\n${commonData}`;
try {
const result = await this.config.getBaseLlmClient().generateJson({
model: this.config.getModel(),
contents: [{ role: 'user', parts: [{ text: prompt }] }],
schema,
abortSignal: AbortSignal.timeout(60000),
});
return result as T;
} catch (error) {
console.error('Failed to generate insight:', error);
throw error;
}
};
// Schemas for each insight type
// We define simplified schemas here to guide the LLM.
// The types are already defined in QualitativeInsightTypes.ts
// 1. Impressive Workflows
const schemaImpressiveWorkflows = {
type: 'object',
properties: {
intro: { type: 'string' },
impressive_workflows: {
type: 'array',
items: {
type: 'object',
properties: {
title: { type: 'string' },
description: { type: 'string' },
},
required: ['title', 'description'],
},
},
},
required: ['intro', 'impressive_workflows'],
};
// 2. Project Areas
const schemaProjectAreas = {
type: 'object',
properties: {
areas: {
type: 'array',
items: {
type: 'object',
properties: {
name: { type: 'string' },
session_count: { type: 'number' },
description: { type: 'string' },
},
required: ['name', 'session_count', 'description'],
},
},
},
required: ['areas'],
};
// 3. Future Opportunities
const schemaFutureOpportunities = {
type: 'object',
properties: {
intro: { type: 'string' },
opportunities: {
type: 'array',
items: {
type: 'object',
properties: {
title: { type: 'string' },
whats_possible: { type: 'string' },
how_to_try: { type: 'string' },
copyable_prompt: { type: 'string' },
},
required: [
'title',
'whats_possible',
'how_to_try',
'copyable_prompt',
],
},
},
},
required: ['intro', 'opportunities'],
};
// 4. Friction Points
const schemaFrictionPoints = {
type: 'object',
properties: {
intro: { type: 'string' },
categories: {
type: 'array',
items: {
type: 'object',
properties: {
category: { type: 'string' },
description: { type: 'string' },
examples: { type: 'array', items: { type: 'string' } },
},
required: ['category', 'description', 'examples'],
},
},
},
required: ['intro', 'categories'],
};
// 5. Memorable Moment
const schemaMemorableMoment = {
type: 'object',
properties: {
headline: { type: 'string' },
detail: { type: 'string' },
},
required: ['headline', 'detail'],
};
// 6. Improvements
const schemaImprovements = {
type: 'object',
properties: {
Qwen_md_additions: {
type: 'array',
items: {
type: 'object',
properties: {
addition: { type: 'string' },
why: { type: 'string' },
prompt_scaffold: { type: 'string' },
},
required: ['addition', 'why', 'prompt_scaffold'],
},
},
features_to_try: {
type: 'array',
items: {
type: 'object',
properties: {
feature: { type: 'string' },
one_liner: { type: 'string' },
why_for_you: { type: 'string' },
example_code: { type: 'string' },
},
required: ['feature', 'one_liner', 'why_for_you', 'example_code'],
},
},
usage_patterns: {
type: 'array',
items: {
type: 'object',
properties: {
title: { type: 'string' },
suggestion: { type: 'string' },
detail: { type: 'string' },
copyable_prompt: { type: 'string' },
},
required: ['title', 'suggestion', 'detail', 'copyable_prompt'],
},
},
},
required: ['Qwen_md_additions', 'features_to_try', 'usage_patterns'],
};
// 7. Interaction Style
const schemaInteractionStyle = {
type: 'object',
properties: {
narrative: { type: 'string' },
key_pattern: { type: 'string' },
},
required: ['narrative', 'key_pattern'],
};
// 8. At A Glance
const schemaAtAGlance = {
type: 'object',
properties: {
whats_working: { type: 'string' },
whats_hindering: { type: 'string' },
quick_wins: { type: 'string' },
ambitious_workflows: { type: 'string' },
},
required: [
'whats_working',
'whats_hindering',
'quick_wins',
'ambitious_workflows',
],
};
const limit = pLimit(4);
try {
const [
impressiveWorkflows,
projectAreas,
futureOpportunities,
frictionPoints,
memorableMoment,
improvements,
interactionStyle,
atAGlance,
] = await Promise.all([
limit(() =>
generate<InsightImpressiveWorkflows>(
PROMPT_IMPRESSIVE_WORKFLOWS,
schemaImpressiveWorkflows,
),
),
limit(() =>
generate<InsightProjectAreas>(
PROMPT_PROJECT_AREAS,
schemaProjectAreas,
),
),
limit(() =>
generate<InsightFutureOpportunities>(
PROMPT_FUTURE_OPPORTUNITIES,
schemaFutureOpportunities,
),
),
limit(() =>
generate<InsightFrictionPoints>(
PROMPT_FRICTION_POINTS,
schemaFrictionPoints,
),
),
limit(() =>
generate<InsightMemorableMoment>(
PROMPT_MEMORABLE_MOMENT,
schemaMemorableMoment,
),
),
limit(() =>
generate<InsightImprovements>(
PROMPT_IMPROVEMENTS,
schemaImprovements,
),
),
limit(() =>
generate<InsightInteractionStyle>(
PROMPT_INTERACTION_STYLE,
schemaInteractionStyle,
),
),
limit(() =>
generate<InsightAtAGlance>(PROMPT_AT_A_GLANCE, schemaAtAGlance),
),
]);
return {
impressiveWorkflows,
projectAreas,
futureOpportunities,
frictionPoints,
memorableMoment,
improvements,
interactionStyle,
atAGlance,
};
} catch (e) {
console.error('Error generating qualitative insights:', e);
return undefined;
}
}
private prepareCommonPromptData(
metrics: Omit<InsightData, 'facets' | 'qualitative'>,
facets: SessionFacets[],
): string {
// 1. DATA section
const goalsAgg: Record<string, number> = {};
const outcomesAgg: Record<string, number> = {};
const satisfactionAgg: Record<string, number> = {};
const frictionAgg: Record<string, number> = {};
const successAgg: Record<string, number> = {};
facets.forEach((facet) => {
// Aggregate goals
Object.entries(facet.goal_categories).forEach(([goal, count]) => {
goalsAgg[goal] = (goalsAgg[goal] || 0) + count;
});
// Aggregate outcomes
outcomesAgg[facet.outcome] = (outcomesAgg[facet.outcome] || 0) + 1;
// Aggregate satisfaction
Object.entries(facet.user_satisfaction_counts).forEach(([sat, count]) => {
satisfactionAgg[sat] = (satisfactionAgg[sat] || 0) + count;
});
// Aggregate friction
Object.entries(facet.friction_counts).forEach(([fric, count]) => {
frictionAgg[fric] = (frictionAgg[fric] || 0) + count;
});
// Aggregate success (primary_success)
if (facet.primary_success && facet.primary_success !== 'none') {
successAgg[facet.primary_success] =
(successAgg[facet.primary_success] || 0) + 1;
}
});
const topGoals = Object.entries(goalsAgg)
.sort((a, b) => b[1] - a[1])
.slice(0, 8);
const dataObj = {
sessions: metrics.totalSessions || facets.length,
analyzed: facets.length,
date_range: {
start: Object.keys(metrics.heatmap).sort()[0] || 'N/A',
end: Object.keys(metrics.heatmap).sort().pop() || 'N/A',
},
messages: metrics.totalMessages || 0,
hours: metrics.totalHours || 0,
commits: 0, // Not tracked yet
top_tools: metrics.topTools || [],
top_goals: topGoals,
outcomes: outcomesAgg,
satisfaction: satisfactionAgg,
friction: frictionAgg,
success: successAgg,
};
// 2. SESSION SUMMARIES section
const sessionSummaries = facets
.map((f) => `- ${f.brief_summary}`)
.join('\n');
// 3. FRICTION DETAILS section
const frictionDetails = facets
.filter((f) => f.friction_detail && f.friction_detail.trim().length > 0)
.map((f) => `- ${f.friction_detail}`)
.join('\n');
return `DATA:
${JSON.stringify(dataObj, null, 2)}
SESSION SUMMARIES:
${sessionSummaries}
FRICTION DETAILS:
${frictionDetails}
USER INSTRUCTIONS TO Qwen:
None captured`;
}
private async scanChatFiles(
baseDir: string,
): Promise<Array<{ path: string; mtime: number }>> {
const allChatFiles: Array<{ path: string; mtime: number }> = [];
try {
@ -390,11 +761,22 @@ export class DataProcessor {
if (stats.isDirectory()) {
const chatsDir = path.join(projectPath, 'chats');
let chatFiles: string[] = [];
try {
// Get all chat files in the chats directory
const files = await fs.readdir(chatsDir);
chatFiles = files.filter((file) => file.endsWith('.jsonl'));
const chatFiles = files.filter((file) => file.endsWith('.jsonl'));
for (const file of chatFiles) {
const filePath = path.join(chatsDir, file);
// Get file stats for sorting by recency
try {
const fileStats = await fs.stat(filePath);
allChatFiles.push({ path: filePath, mtime: fileStats.mtimeMs });
} catch (e) {
console.error(`Failed to stat file ${filePath}:`, e);
}
}
} catch (error) {
if ((error as NodeJS.ErrnoException).code !== 'ENOENT') {
console.log(
@ -404,68 +786,157 @@ export class DataProcessor {
// Continue to next project if chats directory doesn't exist
continue;
}
// Process each chat file in this project
for (const file of chatFiles) {
const filePath = path.join(chatsDir, file);
// Get file stats for sorting by recency
try {
const fileStats = await fs.stat(filePath);
allChatFiles.push({ path: filePath, mtime: fileStats.mtimeMs });
} catch (e) {
console.error(`Failed to stat file ${filePath}:`, e);
}
const records = await readJsonlFile<ChatRecord>(filePath);
// Process each record
for (const record of records) {
const timestamp = new Date(record.timestamp);
const dateKey = this.formatDate(timestamp);
const hour = timestamp.getHours();
// Update heatmap (count of interactions per day)
heatmap[dateKey] = (heatmap[dateKey] || 0) + 1;
// Update active hours
activeHours[hour] = (activeHours[hour] || 0) + 1;
// Update token usage
if (record.usageMetadata) {
const usage = tokenUsage[dateKey] || {
input: 0,
output: 0,
total: 0,
};
usage.input += record.usageMetadata.promptTokenCount || 0;
usage.output += record.usageMetadata.candidatesTokenCount || 0;
usage.total += record.usageMetadata.totalTokenCount || 0;
tokenUsage[dateKey] = usage;
}
// Track session times
if (!sessionStartTimes[record.sessionId]) {
sessionStartTimes[record.sessionId] = timestamp;
}
sessionEndTimes[record.sessionId] = timestamp;
}
}
}
}
} catch (error) {
if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
// Base directory doesn't exist, return empty insights
// Base directory doesn't exist, return empty
console.log(`Base directory does not exist: ${baseDir}`);
} else {
console.log(`Error reading base directory: ${error}`);
}
}
return allChatFiles;
}
private async generateMetrics(
files: Array<{ path: string; mtime: number }>,
): Promise<Omit<InsightData, 'facets' | 'qualitative'>> {
// Initialize data structures
const heatmap: HeatMapData = {};
const tokenUsage: TokenUsageData = {};
const activeHours: { [hour: number]: number } = {};
const sessionStartTimes: { [sessionId: string]: Date } = {};
const sessionEndTimes: { [sessionId: string]: Date } = {};
let totalMessages = 0;
const toolUsage: Record<string, number> = {};
for (const fileInfo of files) {
const records = await readJsonlFile<ChatRecord>(fileInfo.path);
totalMessages += records.length;
// Process each record
for (const record of records) {
const timestamp = new Date(record.timestamp);
const dateKey = this.formatDate(timestamp);
const hour = timestamp.getHours();
// Update heatmap (count of interactions per day)
heatmap[dateKey] = (heatmap[dateKey] || 0) + 1;
// Update active hours
activeHours[hour] = (activeHours[hour] || 0) + 1;
// Update token usage
if (record.usageMetadata) {
const usage = tokenUsage[dateKey] || {
input: 0,
output: 0,
total: 0,
};
usage.input += record.usageMetadata.promptTokenCount || 0;
usage.output += record.usageMetadata.candidatesTokenCount || 0;
usage.total += record.usageMetadata.totalTokenCount || 0;
tokenUsage[dateKey] = usage;
}
// Track session times
if (!sessionStartTimes[record.sessionId]) {
sessionStartTimes[record.sessionId] = timestamp;
}
sessionEndTimes[record.sessionId] = timestamp;
// Track tool usage
if (record.type === 'assistant' && record.message?.parts) {
for (const part of record.message.parts) {
if ('functionCall' in part) {
const name = part.functionCall!.name!;
toolUsage[name] = (toolUsage[name] || 0) + 1;
}
}
}
}
}
// Calculate streak data
const streakData = this.calculateStreaks(Object.keys(heatmap));
// Calculate longest work session and total hours
let longestWorkDuration = 0;
let longestWorkDate: string | null = null;
let totalDurationMs = 0;
const sessionIds = Object.keys(sessionStartTimes);
const totalSessions = sessionIds.length;
for (const sessionId of sessionIds) {
const start = sessionStartTimes[sessionId];
const end = sessionEndTimes[sessionId];
const durationMs = end.getTime() - start.getTime();
const durationMinutes = Math.round(durationMs / (1000 * 60));
totalDurationMs += durationMs;
if (durationMinutes > longestWorkDuration) {
longestWorkDuration = durationMinutes;
longestWorkDate = this.formatDate(start);
}
}
const totalHours = Math.round(totalDurationMs / (1000 * 60 * 60));
// Calculate latest active time
let latestActiveTime: string | null = null;
let latestTimestamp = new Date(0);
for (const dateStr in heatmap) {
const date = new Date(dateStr);
if (date > latestTimestamp) {
latestTimestamp = date;
latestActiveTime = date.toLocaleTimeString([], {
hour: '2-digit',
minute: '2-digit',
});
}
}
// Calculate top tools
const topTools = Object.entries(toolUsage)
.sort((a, b) => b[1] - a[1])
.slice(0, 10);
// Calculate achievements
const achievements = this.calculateAchievements(
activeHours,
heatmap,
tokenUsage,
);
return {
heatmap,
tokenUsage,
currentStreak: streakData.currentStreak,
longestStreak: streakData.longestStreak,
longestWorkDate,
longestWorkDuration,
activeHours,
latestActiveTime,
achievements,
totalSessions,
totalMessages,
totalHours,
topTools,
};
}
private async generateFacets(
allFiles: Array<{ path: string; mtime: number }>,
facetsOutputDir?: string,
): Promise<SessionFacets[]> {
// Sort files by recency (descending) and take top 50
const recentFiles = allChatFiles
const recentFiles = [...allFiles]
.sort((a, b) => b.mtime - a.mtime)
.slice(0, 50);
@ -541,58 +1012,6 @@ export class DataProcessor {
const facets = sessionFacetsWithNulls.filter(
(f): f is SessionFacets => f !== null,
);
// Calculate streak data
const streakData = this.calculateStreaks(Object.keys(heatmap));
// Calculate longest work session
let longestWorkDuration = 0;
let longestWorkDate: string | null = null;
for (const sessionId in sessionStartTimes) {
const start = sessionStartTimes[sessionId];
const end = sessionEndTimes[sessionId];
const durationMinutes = Math.round(
(end.getTime() - start.getTime()) / (1000 * 60),
);
if (durationMinutes > longestWorkDuration) {
longestWorkDuration = durationMinutes;
longestWorkDate = this.formatDate(start);
}
}
// Calculate latest active time
let latestActiveTime: string | null = null;
let latestTimestamp = new Date(0);
for (const dateStr in heatmap) {
const date = new Date(dateStr);
if (date > latestTimestamp) {
latestTimestamp = date;
latestActiveTime = date.toLocaleTimeString([], {
hour: '2-digit',
minute: '2-digit',
});
}
}
// Calculate achievements
const achievements = this.calculateAchievements(
activeHours,
heatmap,
tokenUsage,
);
return {
heatmap,
tokenUsage,
currentStreak: streakData.currentStreak,
longestStreak: streakData.longestStreak,
longestWorkDate,
longestWorkDuration,
activeHours,
latestActiveTime,
achievements,
facets,
};
return facets;
}
}