feat(insight): refactor data processing and add qualitative insights structure

2026-04-30 20:50:34 +00:00 · 2026-02-06 00:44:04 +08:00 · 2026-02-06 00:44:04 +08:00 · 5a7dcce196
commit 5a7dcce196
parent f1214c90ea
4 changed files with 884 additions and 225 deletions
--- a/packages/cli/src/services/insight/generators/DataProcessor.ts
+++ b/packages/cli/src/services/insight/generators/DataProcessor.ts
@ -17,116 +17,29 @@ import type {
  StreakData,
  SessionFacets,
 } from '../types/StaticInsightTypes.js';
+import type {
+  QualitativeInsights,
+  InsightImpressiveWorkflows,
+  InsightProjectAreas,
+  InsightFutureOpportunities,
+  InsightFrictionPoints,
+  InsightMemorableMoment,
+  InsightImprovements,
+  InsightInteractionStyle,
+  InsightAtAGlance,
+} from '../types/QualitativeInsightTypes.js';

-// Prompt content from prompt.txt
-const ANALYSIS_PROMPT = `Analyze this Qwen Code session and extract structured facets.
-
-CRITICAL GUIDELINES:
-
-1. **goal_categories**: Count ONLY what the USER explicitly asked for.
-   - DO NOT count Qwen's autonomous codebase exploration
-   - DO NOT count work Qwen decided to do on its own
-   - ONLY count when user says "can you...", "please...", "I need...", "let's..."
-
-2. **user_satisfaction_counts**: Base ONLY on explicit user signals.
-   - "Yay!", "great!", "perfect!" → happy
-   - "thanks", "looks good", "that works" → satisfied
-   - "ok, now let's..." (continuing without complaint) → likely_satisfied
-   - "that's not right", "try again" → dissatisfied
-   - "this is broken", "I give up" → frustrated
-
-3. **friction_counts**: Be specific about what went wrong.
-   - misunderstood_request: Qwen interpreted incorrectly
-   - wrong_approach: Right goal, wrong solution method
-   - buggy_code: Code didn't work correctly
-   - user_rejected_action: User said no/stop to a tool call
-   - excessive_changes: Over-engineered or changed too much
-
-4. If very short or just warmup, use warmup_minimal for goal_category`;
-
-const INSIGHT_SCHEMA = {
-  type: 'object',
-  properties: {
-    underlying_goal: {
-      type: 'string',
-      description: 'What the user fundamentally wanted to achieve',
-    },
-    goal_categories: {
-      type: 'object',
-      additionalProperties: { type: 'number' },
-    },
-    outcome: {
-      type: 'string',
-      enum: [
-        'fully_achieved',
-        'mostly_achieved',
-        'partially_achieved',
-        'not_achieved',
-        'unclear_from_transcript',
-      ],
-    },
-    user_satisfaction_counts: {
-      type: 'object',
-      additionalProperties: { type: 'number' },
-    },
-    Qwen_helpfulness: {
-      type: 'string',
-      enum: [
-        'unhelpful',
-        'slightly_helpful',
-        'moderately_helpful',
-        'very_helpful',
-        'essential',
-      ],
-    },
-    session_type: {
-      type: 'string',
-      enum: [
-        'single_task',
-        'multi_task',
-        'iterative_refinement',
-        'exploration',
-        'quick_question',
-      ],
-    },
-    friction_counts: {
-      type: 'object',
-      additionalProperties: { type: 'number' },
-    },
-    friction_detail: {
-      type: 'string',
-      description: 'One sentence describing friction or empty',
-    },
-    primary_success: {
-      type: 'string',
-      enum: [
-        'none',
-        'fast_accurate_search',
-        'correct_code_edits',
-        'good_explanations',
-        'proactive_help',
-        'multi_file_changes',
-        'good_debugging',
-      ],
-    },
-    brief_summary: {
-      type: 'string',
-      description: 'One sentence: what user wanted and whether they got it',
-    },
-  },
-  required: [
-    'underlying_goal',
-    'goal_categories',
-    'outcome',
-    'user_satisfaction_counts',
-    'Qwen_helpfulness',
-    'session_type',
-    'friction_counts',
-    'friction_detail',
-    'primary_success',
-    'brief_summary',
-  ],
-};
+import {
+  PROMPT_IMPRESSIVE_WORKFLOWS,
+  PROMPT_PROJECT_AREAS,
+  PROMPT_FUTURE_OPPORTUNITIES,
+  PROMPT_FRICTION_POINTS,
+  PROMPT_MEMORABLE_MOMENT,
+  PROMPT_IMPROVEMENTS,
+  PROMPT_INTERACTION_STYLE,
+  PROMPT_AT_A_GLANCE,
+  ANALYSIS_PROMPT,
+} from '../prompts/InsightPrompts.js';

 export class DataProcessor {
  constructor(private config: Config) {}
@ -159,8 +72,7 @@ export class DataProcessor {
            if ('text' in part && part.text) {
              output += `[Assistant]: ${part.text}\n`;
            } else if ('functionCall' in part) {
-              // eslint-disable-next-line @typescript-eslint/no-explicit-any
-              const call = (part as any).functionCall;
+              const call = part.functionCall;
              if (call) {
                output += `[Tool: ${call.name}]\n`;
              }
@ -178,6 +90,90 @@ export class DataProcessor {
  ): Promise<SessionFacets | null> {
    if (records.length === 0) return null;

+    const INSIGHT_SCHEMA = {
+      type: 'object',
+      properties: {
+        underlying_goal: {
+          type: 'string',
+          description: 'What the user fundamentally wanted to achieve',
+        },
+        goal_categories: {
+          type: 'object',
+          additionalProperties: { type: 'number' },
+        },
+        outcome: {
+          type: 'string',
+          enum: [
+            'fully_achieved',
+            'mostly_achieved',
+            'partially_achieved',
+            'not_achieved',
+            'unclear_from_transcript',
+          ],
+        },
+        user_satisfaction_counts: {
+          type: 'object',
+          additionalProperties: { type: 'number' },
+        },
+        Qwen_helpfulness: {
+          type: 'string',
+          enum: [
+            'unhelpful',
+            'slightly_helpful',
+            'moderately_helpful',
+            'very_helpful',
+            'essential',
+          ],
+        },
+        session_type: {
+          type: 'string',
+          enum: [
+            'single_task',
+            'multi_task',
+            'iterative_refinement',
+            'exploration',
+            'quick_question',
+          ],
+        },
+        friction_counts: {
+          type: 'object',
+          additionalProperties: { type: 'number' },
+        },
+        friction_detail: {
+          type: 'string',
+          description: 'One sentence describing friction or empty',
+        },
+        primary_success: {
+          type: 'string',
+          enum: [
+            'none',
+            'fast_accurate_search',
+            'correct_code_edits',
+            'good_explanations',
+            'proactive_help',
+            'multi_file_changes',
+            'good_debugging',
+          ],
+        },
+        brief_summary: {
+          type: 'string',
+          description: 'One sentence: what user wanted and whether they got it',
+        },
+      },
+      required: [
+        'underlying_goal',
+        'goal_categories',
+        'outcome',
+        'user_satisfaction_counts',
+        'Qwen_helpfulness',
+        'session_type',
+        'friction_counts',
+        'friction_detail',
+        'primary_success',
+        'brief_summary',
+      ],
+    };
+
    const sessionText = this.formatRecordsForAnalysis(records);
    const prompt = `${ANALYSIS_PROMPT}\n\nSESSION:\n${sessionText}`;

@ -367,14 +363,389 @@ export class DataProcessor {
    baseDir: string,
    facetsOutputDir?: string,
  ): Promise<InsightData> {
-    // Initialize data structures
-    const heatmap: HeatMapData = {};
-    const tokenUsage: TokenUsageData = {};
-    const activeHours: { [hour: number]: number } = {};
-    const sessionStartTimes: { [sessionId: string]: Date } = {};
-    const sessionEndTimes: { [sessionId: string]: Date } = {};
+    const allChatFiles = await this.scanChatFiles(baseDir);

-    // Store all valid chat file paths for LLM analysis
+    const [metrics, facets] = await Promise.all([
+      this.generateMetrics(allChatFiles),
+      this.generateFacets(allChatFiles, facetsOutputDir),
+    ]);
+
+    const qualitative = await this.generateQualitativeInsights(metrics, facets);
+
+    return {
+      ...metrics,
+      qualitative,
+    };
+  }
+
+  private async generateQualitativeInsights(
+    metrics: Omit<InsightData, 'facets' | 'qualitative'>,
+    facets: SessionFacets[],
+  ): Promise<QualitativeInsights | undefined> {
+    if (facets.length === 0) {
+      return undefined;
+    }
+
+    console.log('Generating qualitative insights...');
+
+    const commonData = this.prepareCommonPromptData(metrics, facets);
+
+    const generate = async <T>(
+      promptTemplate: string,
+      schema: Record<string, unknown>,
+    ): Promise<T> => {
+      const prompt = `${promptTemplate}\n\n${commonData}`;
+      try {
+        const result = await this.config.getBaseLlmClient().generateJson({
+          model: this.config.getModel(),
+          contents: [{ role: 'user', parts: [{ text: prompt }] }],
+          schema,
+          abortSignal: AbortSignal.timeout(60000),
+        });
+        return result as T;
+      } catch (error) {
+        console.error('Failed to generate insight:', error);
+        throw error;
+      }
+    };
+
+    // Schemas for each insight type
+    // We define simplified schemas here to guide the LLM.
+    // The types are already defined in QualitativeInsightTypes.ts
+
+    // 1. Impressive Workflows
+    const schemaImpressiveWorkflows = {
+      type: 'object',
+      properties: {
+        intro: { type: 'string' },
+        impressive_workflows: {
+          type: 'array',
+          items: {
+            type: 'object',
+            properties: {
+              title: { type: 'string' },
+              description: { type: 'string' },
+            },
+            required: ['title', 'description'],
+          },
+        },
+      },
+      required: ['intro', 'impressive_workflows'],
+    };
+
+    // 2. Project Areas
+    const schemaProjectAreas = {
+      type: 'object',
+      properties: {
+        areas: {
+          type: 'array',
+          items: {
+            type: 'object',
+            properties: {
+              name: { type: 'string' },
+              session_count: { type: 'number' },
+              description: { type: 'string' },
+            },
+            required: ['name', 'session_count', 'description'],
+          },
+        },
+      },
+      required: ['areas'],
+    };
+
+    // 3. Future Opportunities
+    const schemaFutureOpportunities = {
+      type: 'object',
+      properties: {
+        intro: { type: 'string' },
+        opportunities: {
+          type: 'array',
+          items: {
+            type: 'object',
+            properties: {
+              title: { type: 'string' },
+              whats_possible: { type: 'string' },
+              how_to_try: { type: 'string' },
+              copyable_prompt: { type: 'string' },
+            },
+            required: [
+              'title',
+              'whats_possible',
+              'how_to_try',
+              'copyable_prompt',
+            ],
+          },
+        },
+      },
+      required: ['intro', 'opportunities'],
+    };
+
+    // 4. Friction Points
+    const schemaFrictionPoints = {
+      type: 'object',
+      properties: {
+        intro: { type: 'string' },
+        categories: {
+          type: 'array',
+          items: {
+            type: 'object',
+            properties: {
+              category: { type: 'string' },
+              description: { type: 'string' },
+              examples: { type: 'array', items: { type: 'string' } },
+            },
+            required: ['category', 'description', 'examples'],
+          },
+        },
+      },
+      required: ['intro', 'categories'],
+    };
+
+    // 5. Memorable Moment
+    const schemaMemorableMoment = {
+      type: 'object',
+      properties: {
+        headline: { type: 'string' },
+        detail: { type: 'string' },
+      },
+      required: ['headline', 'detail'],
+    };
+
+    // 6. Improvements
+    const schemaImprovements = {
+      type: 'object',
+      properties: {
+        Qwen_md_additions: {
+          type: 'array',
+          items: {
+            type: 'object',
+            properties: {
+              addition: { type: 'string' },
+              why: { type: 'string' },
+              prompt_scaffold: { type: 'string' },
+            },
+            required: ['addition', 'why', 'prompt_scaffold'],
+          },
+        },
+        features_to_try: {
+          type: 'array',
+          items: {
+            type: 'object',
+            properties: {
+              feature: { type: 'string' },
+              one_liner: { type: 'string' },
+              why_for_you: { type: 'string' },
+              example_code: { type: 'string' },
+            },
+            required: ['feature', 'one_liner', 'why_for_you', 'example_code'],
+          },
+        },
+        usage_patterns: {
+          type: 'array',
+          items: {
+            type: 'object',
+            properties: {
+              title: { type: 'string' },
+              suggestion: { type: 'string' },
+              detail: { type: 'string' },
+              copyable_prompt: { type: 'string' },
+            },
+            required: ['title', 'suggestion', 'detail', 'copyable_prompt'],
+          },
+        },
+      },
+      required: ['Qwen_md_additions', 'features_to_try', 'usage_patterns'],
+    };
+
+    // 7. Interaction Style
+    const schemaInteractionStyle = {
+      type: 'object',
+      properties: {
+        narrative: { type: 'string' },
+        key_pattern: { type: 'string' },
+      },
+      required: ['narrative', 'key_pattern'],
+    };
+
+    // 8. At A Glance
+    const schemaAtAGlance = {
+      type: 'object',
+      properties: {
+        whats_working: { type: 'string' },
+        whats_hindering: { type: 'string' },
+        quick_wins: { type: 'string' },
+        ambitious_workflows: { type: 'string' },
+      },
+      required: [
+        'whats_working',
+        'whats_hindering',
+        'quick_wins',
+        'ambitious_workflows',
+      ],
+    };
+
+    const limit = pLimit(4);
+
+    try {
+      const [
+        impressiveWorkflows,
+        projectAreas,
+        futureOpportunities,
+        frictionPoints,
+        memorableMoment,
+        improvements,
+        interactionStyle,
+        atAGlance,
+      ] = await Promise.all([
+        limit(() =>
+          generate<InsightImpressiveWorkflows>(
+            PROMPT_IMPRESSIVE_WORKFLOWS,
+            schemaImpressiveWorkflows,
+          ),
+        ),
+        limit(() =>
+          generate<InsightProjectAreas>(
+            PROMPT_PROJECT_AREAS,
+            schemaProjectAreas,
+          ),
+        ),
+        limit(() =>
+          generate<InsightFutureOpportunities>(
+            PROMPT_FUTURE_OPPORTUNITIES,
+            schemaFutureOpportunities,
+          ),
+        ),
+        limit(() =>
+          generate<InsightFrictionPoints>(
+            PROMPT_FRICTION_POINTS,
+            schemaFrictionPoints,
+          ),
+        ),
+        limit(() =>
+          generate<InsightMemorableMoment>(
+            PROMPT_MEMORABLE_MOMENT,
+            schemaMemorableMoment,
+          ),
+        ),
+        limit(() =>
+          generate<InsightImprovements>(
+            PROMPT_IMPROVEMENTS,
+            schemaImprovements,
+          ),
+        ),
+        limit(() =>
+          generate<InsightInteractionStyle>(
+            PROMPT_INTERACTION_STYLE,
+            schemaInteractionStyle,
+          ),
+        ),
+        limit(() =>
+          generate<InsightAtAGlance>(PROMPT_AT_A_GLANCE, schemaAtAGlance),
+        ),
+      ]);
+
+      return {
+        impressiveWorkflows,
+        projectAreas,
+        futureOpportunities,
+        frictionPoints,
+        memorableMoment,
+        improvements,
+        interactionStyle,
+        atAGlance,
+      };
+    } catch (e) {
+      console.error('Error generating qualitative insights:', e);
+      return undefined;
+    }
+  }
+
+  private prepareCommonPromptData(
+    metrics: Omit<InsightData, 'facets' | 'qualitative'>,
+    facets: SessionFacets[],
+  ): string {
+    // 1. DATA section
+    const goalsAgg: Record<string, number> = {};
+    const outcomesAgg: Record<string, number> = {};
+    const satisfactionAgg: Record<string, number> = {};
+    const frictionAgg: Record<string, number> = {};
+    const successAgg: Record<string, number> = {};
+
+    facets.forEach((facet) => {
+      // Aggregate goals
+      Object.entries(facet.goal_categories).forEach(([goal, count]) => {
+        goalsAgg[goal] = (goalsAgg[goal] || 0) + count;
+      });
+
+      // Aggregate outcomes
+      outcomesAgg[facet.outcome] = (outcomesAgg[facet.outcome] || 0) + 1;
+
+      // Aggregate satisfaction
+      Object.entries(facet.user_satisfaction_counts).forEach(([sat, count]) => {
+        satisfactionAgg[sat] = (satisfactionAgg[sat] || 0) + count;
+      });
+
+      // Aggregate friction
+      Object.entries(facet.friction_counts).forEach(([fric, count]) => {
+        frictionAgg[fric] = (frictionAgg[fric] || 0) + count;
+      });
+
+      // Aggregate success (primary_success)
+      if (facet.primary_success && facet.primary_success !== 'none') {
+        successAgg[facet.primary_success] =
+          (successAgg[facet.primary_success] || 0) + 1;
+      }
+    });
+
+    const topGoals = Object.entries(goalsAgg)
+      .sort((a, b) => b[1] - a[1])
+      .slice(0, 8);
+
+    const dataObj = {
+      sessions: metrics.totalSessions || facets.length,
+      analyzed: facets.length,
+      date_range: {
+        start: Object.keys(metrics.heatmap).sort()[0] || 'N/A',
+        end: Object.keys(metrics.heatmap).sort().pop() || 'N/A',
+      },
+      messages: metrics.totalMessages || 0,
+      hours: metrics.totalHours || 0,
+      commits: 0, // Not tracked yet
+      top_tools: metrics.topTools || [],
+      top_goals: topGoals,
+      outcomes: outcomesAgg,
+      satisfaction: satisfactionAgg,
+      friction: frictionAgg,
+      success: successAgg,
+    };
+
+    // 2. SESSION SUMMARIES section
+    const sessionSummaries = facets
+      .map((f) => `- ${f.brief_summary}`)
+      .join('\n');
+
+    // 3. FRICTION DETAILS section
+    const frictionDetails = facets
+      .filter((f) => f.friction_detail && f.friction_detail.trim().length > 0)
+      .map((f) => `- ${f.friction_detail}`)
+      .join('\n');
+
+    return `DATA:
+${JSON.stringify(dataObj, null, 2)}
+
+SESSION SUMMARIES:
+${sessionSummaries}
+
+FRICTION DETAILS:
+${frictionDetails}
+
+USER INSTRUCTIONS TO Qwen:
+None captured`;
+  }
+
+  private async scanChatFiles(
+    baseDir: string,
+  ): Promise<Array<{ path: string; mtime: number }>> {
    const allChatFiles: Array<{ path: string; mtime: number }> = [];

    try {
@ -390,11 +761,22 @@ export class DataProcessor {
        if (stats.isDirectory()) {
          const chatsDir = path.join(projectPath, 'chats');

-          let chatFiles: string[] = [];
          try {
            // Get all chat files in the chats directory
            const files = await fs.readdir(chatsDir);
-            chatFiles = files.filter((file) => file.endsWith('.jsonl'));
+            const chatFiles = files.filter((file) => file.endsWith('.jsonl'));
+
+            for (const file of chatFiles) {
+              const filePath = path.join(chatsDir, file);
+
+              // Get file stats for sorting by recency
+              try {
+                const fileStats = await fs.stat(filePath);
+                allChatFiles.push({ path: filePath, mtime: fileStats.mtimeMs });
+              } catch (e) {
+                console.error(`Failed to stat file ${filePath}:`, e);
+              }
+            }
          } catch (error) {
            if ((error as NodeJS.ErrnoException).code !== 'ENOENT') {
              console.log(
@ -404,68 +786,157 @@ export class DataProcessor {
            // Continue to next project if chats directory doesn't exist
            continue;
          }
-
-          // Process each chat file in this project
-          for (const file of chatFiles) {
-            const filePath = path.join(chatsDir, file);
-
-            // Get file stats for sorting by recency
-            try {
-              const fileStats = await fs.stat(filePath);
-              allChatFiles.push({ path: filePath, mtime: fileStats.mtimeMs });
-            } catch (e) {
-              console.error(`Failed to stat file ${filePath}:`, e);
-            }
-
-            const records = await readJsonlFile<ChatRecord>(filePath);
-
-            // Process each record
-            for (const record of records) {
-              const timestamp = new Date(record.timestamp);
-              const dateKey = this.formatDate(timestamp);
-              const hour = timestamp.getHours();
-
-              // Update heatmap (count of interactions per day)
-              heatmap[dateKey] = (heatmap[dateKey] || 0) + 1;
-
-              // Update active hours
-              activeHours[hour] = (activeHours[hour] || 0) + 1;
-
-              // Update token usage
-              if (record.usageMetadata) {
-                const usage = tokenUsage[dateKey] || {
-                  input: 0,
-                  output: 0,
-                  total: 0,
-                };
-
-                usage.input += record.usageMetadata.promptTokenCount || 0;
-                usage.output += record.usageMetadata.candidatesTokenCount || 0;
-                usage.total += record.usageMetadata.totalTokenCount || 0;
-
-                tokenUsage[dateKey] = usage;
-              }
-
-              // Track session times
-              if (!sessionStartTimes[record.sessionId]) {
-                sessionStartTimes[record.sessionId] = timestamp;
-              }
-              sessionEndTimes[record.sessionId] = timestamp;
-            }
-          }
        }
      }
    } catch (error) {
      if ((error as NodeJS.ErrnoException).code === 'ENOENT') {
-        // Base directory doesn't exist, return empty insights
+        // Base directory doesn't exist, return empty
        console.log(`Base directory does not exist: ${baseDir}`);
      } else {
        console.log(`Error reading base directory: ${error}`);
      }
    }

+    return allChatFiles;
+  }
+
+  private async generateMetrics(
+    files: Array<{ path: string; mtime: number }>,
+  ): Promise<Omit<InsightData, 'facets' | 'qualitative'>> {
+    // Initialize data structures
+    const heatmap: HeatMapData = {};
+    const tokenUsage: TokenUsageData = {};
+    const activeHours: { [hour: number]: number } = {};
+    const sessionStartTimes: { [sessionId: string]: Date } = {};
+    const sessionEndTimes: { [sessionId: string]: Date } = {};
+    let totalMessages = 0;
+    const toolUsage: Record<string, number> = {};
+
+    for (const fileInfo of files) {
+      const records = await readJsonlFile<ChatRecord>(fileInfo.path);
+      totalMessages += records.length;
+
+      // Process each record
+      for (const record of records) {
+        const timestamp = new Date(record.timestamp);
+        const dateKey = this.formatDate(timestamp);
+        const hour = timestamp.getHours();
+
+        // Update heatmap (count of interactions per day)
+        heatmap[dateKey] = (heatmap[dateKey] || 0) + 1;
+
+        // Update active hours
+        activeHours[hour] = (activeHours[hour] || 0) + 1;
+
+        // Update token usage
+        if (record.usageMetadata) {
+          const usage = tokenUsage[dateKey] || {
+            input: 0,
+            output: 0,
+            total: 0,
+          };
+
+          usage.input += record.usageMetadata.promptTokenCount || 0;
+          usage.output += record.usageMetadata.candidatesTokenCount || 0;
+          usage.total += record.usageMetadata.totalTokenCount || 0;
+
+          tokenUsage[dateKey] = usage;
+        }
+
+        // Track session times
+        if (!sessionStartTimes[record.sessionId]) {
+          sessionStartTimes[record.sessionId] = timestamp;
+        }
+        sessionEndTimes[record.sessionId] = timestamp;
+
+        // Track tool usage
+        if (record.type === 'assistant' && record.message?.parts) {
+          for (const part of record.message.parts) {
+            if ('functionCall' in part) {
+              const name = part.functionCall!.name!;
+              toolUsage[name] = (toolUsage[name] || 0) + 1;
+            }
+          }
+        }
+      }
+    }
+
+    // Calculate streak data
+    const streakData = this.calculateStreaks(Object.keys(heatmap));
+
+    // Calculate longest work session and total hours
+    let longestWorkDuration = 0;
+    let longestWorkDate: string | null = null;
+    let totalDurationMs = 0;
+
+    const sessionIds = Object.keys(sessionStartTimes);
+    const totalSessions = sessionIds.length;
+
+    for (const sessionId of sessionIds) {
+      const start = sessionStartTimes[sessionId];
+      const end = sessionEndTimes[sessionId];
+      const durationMs = end.getTime() - start.getTime();
+      const durationMinutes = Math.round(durationMs / (1000 * 60));
+
+      totalDurationMs += durationMs;
+
+      if (durationMinutes > longestWorkDuration) {
+        longestWorkDuration = durationMinutes;
+        longestWorkDate = this.formatDate(start);
+      }
+    }
+
+    const totalHours = Math.round(totalDurationMs / (1000 * 60 * 60));
+
+    // Calculate latest active time
+    let latestActiveTime: string | null = null;
+    let latestTimestamp = new Date(0);
+    for (const dateStr in heatmap) {
+      const date = new Date(dateStr);
+      if (date > latestTimestamp) {
+        latestTimestamp = date;
+        latestActiveTime = date.toLocaleTimeString([], {
+          hour: '2-digit',
+          minute: '2-digit',
+        });
+      }
+    }
+
+    // Calculate top tools
+    const topTools = Object.entries(toolUsage)
+      .sort((a, b) => b[1] - a[1])
+      .slice(0, 10);
+
+    // Calculate achievements
+    const achievements = this.calculateAchievements(
+      activeHours,
+      heatmap,
+      tokenUsage,
+    );
+
+    return {
+      heatmap,
+      tokenUsage,
+      currentStreak: streakData.currentStreak,
+      longestStreak: streakData.longestStreak,
+      longestWorkDate,
+      longestWorkDuration,
+      activeHours,
+      latestActiveTime,
+      achievements,
+      totalSessions,
+      totalMessages,
+      totalHours,
+      topTools,
+    };
+  }
+
+  private async generateFacets(
+    allFiles: Array<{ path: string; mtime: number }>,
+    facetsOutputDir?: string,
+  ): Promise<SessionFacets[]> {
    // Sort files by recency (descending) and take top 50
-    const recentFiles = allChatFiles
+    const recentFiles = [...allFiles]
      .sort((a, b) => b.mtime - a.mtime)
      .slice(0, 50);

@ -541,58 +1012,6 @@ export class DataProcessor {
    const facets = sessionFacetsWithNulls.filter(
      (f): f is SessionFacets => f !== null,
    );
-
-    // Calculate streak data
-    const streakData = this.calculateStreaks(Object.keys(heatmap));
-
-    // Calculate longest work session
-    let longestWorkDuration = 0;
-    let longestWorkDate: string | null = null;
-    for (const sessionId in sessionStartTimes) {
-      const start = sessionStartTimes[sessionId];
-      const end = sessionEndTimes[sessionId];
-      const durationMinutes = Math.round(
-        (end.getTime() - start.getTime()) / (1000 * 60),
-      );
-
-      if (durationMinutes > longestWorkDuration) {
-        longestWorkDuration = durationMinutes;
-        longestWorkDate = this.formatDate(start);
-      }
-    }
-
-    // Calculate latest active time
-    let latestActiveTime: string | null = null;
-    let latestTimestamp = new Date(0);
-    for (const dateStr in heatmap) {
-      const date = new Date(dateStr);
-      if (date > latestTimestamp) {
-        latestTimestamp = date;
-        latestActiveTime = date.toLocaleTimeString([], {
-          hour: '2-digit',
-          minute: '2-digit',
-        });
-      }
-    }
-
-    // Calculate achievements
-    const achievements = this.calculateAchievements(
-      activeHours,
-      heatmap,
-      tokenUsage,
-    );
-
-    return {
-      heatmap,
-      tokenUsage,
-      currentStreak: streakData.currentStreak,
-      longestStreak: streakData.longestStreak,
-      longestWorkDate,
-      longestWorkDuration,
-      activeHours,
-      latestActiveTime,
-      achievements,
-      facets,
-    };
+    return facets;
  }
 }