feat(insight): update static insight generator and command handling

- Refine DataProcessor and StaticInsightGenerator logic - Update StaticInsightTypes definitions - Enhance insight command implementation - Update package dependencies
2026-04-30 12:40:44 +00:00 · 2026-02-05 19:59:20 +08:00 · 2026-02-05 19:59:20 +08:00 · 39d2067d14
commit 39d2067d14
parent 98735fecd7
6 changed files with 419 additions and 62 deletions
--- a/packages/cli/src/services/insight/generators/DataProcessor.ts
+++ b/packages/cli/src/services/insight/generators/DataProcessor.ts
@ -7,21 +7,201 @@
 import fs from 'fs/promises';
 import path from 'path';
 import { read as readJsonlFile } from '@qwen-code/qwen-code-core';
+import pLimit from 'p-limit';
+import type { Config, ChatRecord } from '@qwen-code/qwen-code-core';
 import type {
  InsightData,
  HeatMapData,
  TokenUsageData,
  AchievementData,
  StreakData,
+  SessionFacets,
 } from '../types/StaticInsightTypes.js';
-import type { ChatRecord } from '@qwen-code/qwen-code-core';
+
+// Prompt content from prompt.txt
+const ANALYSIS_PROMPT = `Analyze this Qwen Code session and extract structured facets.
+
+CRITICAL GUIDELINES:
+
+1. **goal_categories**: Count ONLY what the USER explicitly asked for.
+   - DO NOT count Qwen's autonomous codebase exploration
+   - DO NOT count work Qwen decided to do on its own
+   - ONLY count when user says "can you...", "please...", "I need...", "let's..."
+
+2. **user_satisfaction_counts**: Base ONLY on explicit user signals.
+   - "Yay!", "great!", "perfect!" → happy
+   - "thanks", "looks good", "that works" → satisfied
+   - "ok, now let's..." (continuing without complaint) → likely_satisfied
+   - "that's not right", "try again" → dissatisfied
+   - "this is broken", "I give up" → frustrated
+
+3. **friction_counts**: Be specific about what went wrong.
+   - misunderstood_request: Qwen interpreted incorrectly
+   - wrong_approach: Right goal, wrong solution method
+   - buggy_code: Code didn't work correctly
+   - user_rejected_action: User said no/stop to a tool call
+   - excessive_changes: Over-engineered or changed too much
+
+4. If very short or just warmup, use warmup_minimal for goal_category`;
+
+const INSIGHT_SCHEMA = {
+  type: 'object',
+  properties: {
+    underlying_goal: {
+      type: 'string',
+      description: 'What the user fundamentally wanted to achieve',
+    },
+    goal_categories: {
+      type: 'object',
+      additionalProperties: { type: 'number' },
+    },
+    outcome: {
+      type: 'string',
+      enum: [
+        'fully_achieved',
+        'mostly_achieved',
+        'partially_achieved',
+        'not_achieved',
+        'unclear_from_transcript',
+      ],
+    },
+    user_satisfaction_counts: {
+      type: 'object',
+      additionalProperties: { type: 'number' },
+    },
+    Qwen_helpfulness: {
+      type: 'string',
+      enum: [
+        'unhelpful',
+        'slightly_helpful',
+        'moderately_helpful',
+        'very_helpful',
+        'essential',
+      ],
+    },
+    session_type: {
+      type: 'string',
+      enum: [
+        'single_task',
+        'multi_task',
+        'iterative_refinement',
+        'exploration',
+        'quick_question',
+      ],
+    },
+    friction_counts: {
+      type: 'object',
+      additionalProperties: { type: 'number' },
+    },
+    friction_detail: {
+      type: 'string',
+      description: 'One sentence describing friction or empty',
+    },
+    primary_success: {
+      type: 'string',
+      enum: [
+        'none',
+        'fast_accurate_search',
+        'correct_code_edits',
+        'good_explanations',
+        'proactive_help',
+        'multi_file_changes',
+        'good_debugging',
+      ],
+    },
+    brief_summary: {
+      type: 'string',
+      description: 'One sentence: what user wanted and whether they got it',
+    },
+  },
+  required: [
+    'underlying_goal',
+    'goal_categories',
+    'outcome',
+    'user_satisfaction_counts',
+    'Qwen_helpfulness',
+    'session_type',
+    'friction_counts',
+    'friction_detail',
+    'primary_success',
+    'brief_summary',
+  ],
+};

 export class DataProcessor {
+  constructor(private config: Config) {}
+
  // Helper function to format date as YYYY-MM-DD
  private formatDate(date: Date): string {
    return date.toISOString().split('T')[0];
  }

+  // Format chat records for LLM analysis
+  private formatRecordsForAnalysis(records: ChatRecord[]): string {
+    let output = '';
+    const sessionStart =
+      records.length > 0 ? new Date(records[0].timestamp) : new Date();
+
+    output += `Session: ${records[0]?.sessionId || 'unknown'}\n`;
+    output += `Date: ${sessionStart.toISOString()}\n`;
+    output += `Duration: ${records.length} turns\n\n`;
+
+    for (const record of records) {
+      if (record.type === 'user') {
+        const text =
+          record.message?.parts
+            ?.map((p) => ('text' in p ? p.text : ''))
+            .join('') || '';
+        output += `[User]: ${text}\n`;
+      } else if (record.type === 'assistant') {
+        if (record.message?.parts) {
+          for (const part of record.message.parts) {
+            if ('text' in part && part.text) {
+              output += `[Assistant]: ${part.text}\n`;
+            } else if ('functionCall' in part) {
+              // eslint-disable-next-line @typescript-eslint/no-explicit-any
+              const call = (part as any).functionCall;
+              if (call) {
+                output += `[Tool: ${call.name}]\n`;
+              }
+            }
+          }
+        }
+      }
+    }
+    return output;
+  }
+
+  // Analyze a single session using LLM
+  private async analyzeSession(
+    records: ChatRecord[],
+  ): Promise<SessionFacets | null> {
+    if (records.length === 0) return null;
+
+    const sessionText = this.formatRecordsForAnalysis(records);
+    const prompt = `${ANALYSIS_PROMPT}\n\nSESSION:\n${sessionText}`;
+
+    try {
+      const result = await this.config.getBaseLlmClient().generateJson({
+        // Use the configured model
+        model: this.config.getModel(),
+        contents: [{ role: 'user', parts: [{ text: prompt }] }],
+        schema: INSIGHT_SCHEMA,
+        abortSignal: AbortSignal.timeout(60000), // 1 minute timeout per session
+      });
+      return {
+        ...(result as unknown as SessionFacets),
+        session_id: records[0].sessionId,
+      };
+    } catch (error) {
+      console.error(
+        `Failed to analyze session ${records[0]?.sessionId}:`,
+        error,
+      );
+      return null;
+    }
+  }
+
  // Calculate streaks from activity dates
  private calculateStreaks(dates: string[]): StreakData {
    if (dates.length === 0) {
@ -183,7 +363,10 @@ export class DataProcessor {
  }

  // Process chat files from all projects in the base directory and generate insights
-  async generateInsights(baseDir: string): Promise<InsightData> {
+  async generateInsights(
+    baseDir: string,
+    facetsOutputDir?: string,
+  ): Promise<InsightData> {
    // Initialize data structures
    const heatmap: HeatMapData = {};
    const tokenUsage: TokenUsageData = {};
@ -191,6 +374,9 @@ export class DataProcessor {
    const sessionStartTimes: { [sessionId: string]: Date } = {};
    const sessionEndTimes: { [sessionId: string]: Date } = {};

+    // Store all valid chat file paths for LLM analysis
+    const allChatFiles: Array<{ path: string; mtime: number }> = [];
+
    try {
      // Get all project directories in the base directory
      const projectDirs = await fs.readdir(baseDir);
@ -222,6 +408,15 @@ export class DataProcessor {
          // Process each chat file in this project
          for (const file of chatFiles) {
            const filePath = path.join(chatsDir, file);
+
+            // Get file stats for sorting by recency
+            try {
+              const fileStats = await fs.stat(filePath);
+              allChatFiles.push({ path: filePath, mtime: fileStats.mtimeMs });
+            } catch (e) {
+              console.error(`Failed to stat file ${filePath}:`, e);
+            }
+
            const records = await readJsonlFile<ChatRecord>(filePath);

            // Process each record
@ -269,6 +464,84 @@ export class DataProcessor {
      }
    }

+    // Sort files by recency (descending) and take top 50
+    const recentFiles = allChatFiles
+      .sort((a, b) => b.mtime - a.mtime)
+      .slice(0, 50);
+
+    console.log(`Analyzing ${recentFiles.length} recent sessions with LLM...`);
+
+    // Create a limit function with concurrency of 4 to avoid 429 errors
+    const limit = pLimit(4);
+
+    // Analyze sessions concurrently with limit
+    const analysisPromises = recentFiles.map((fileInfo) =>
+      limit(async () => {
+        try {
+          const records = await readJsonlFile<ChatRecord>(fileInfo.path);
+
+          // Check if we already have this session analyzed
+          if (records.length > 0 && facetsOutputDir) {
+            const sessionId = records[0].sessionId;
+            if (sessionId) {
+              const existingFacetPath = path.join(
+                facetsOutputDir,
+                `${sessionId}.json`,
+              );
+              try {
+                // Check if file exists and is readable
+                const existingData = await fs.readFile(
+                  existingFacetPath,
+                  'utf-8',
+                );
+                const existingFacet = JSON.parse(existingData);
+                return existingFacet;
+              } catch (readError) {
+                // File doesn't exist or is invalid, proceed to analyze
+                if ((readError as NodeJS.ErrnoException).code !== 'ENOENT') {
+                  console.warn(
+                    `Failed to read existing facet for ${sessionId}, regenerating:`,
+                    readError,
+                  );
+                }
+              }
+            }
+          }
+
+          const facet = await this.analyzeSession(records);
+
+          if (facet && facetsOutputDir) {
+            try {
+              const facetPath = path.join(
+                facetsOutputDir,
+                `${facet.session_id}.json`,
+              );
+              await fs.writeFile(
+                facetPath,
+                JSON.stringify(facet, null, 2),
+                'utf-8',
+              );
+            } catch (writeError) {
+              console.error(
+                `Failed to write facet file for session ${facet.session_id}:`,
+                writeError,
+              );
+            }
+          }
+
+          return facet;
+        } catch (e) {
+          console.error(`Error analyzing session file ${fileInfo.path}:`, e);
+          return null;
+        }
+      }),
+    );
+
+    const sessionFacetsWithNulls = await Promise.all(analysisPromises);
+    const facets = sessionFacetsWithNulls.filter(
+      (f): f is SessionFacets => f !== null,
+    );
+
    // Calculate streak data
    const streakData = this.calculateStreaks(Object.keys(heatmap));

@ -319,6 +592,7 @@ export class DataProcessor {
      activeHours,
      latestActiveTime,
      achievements,
+      facets,
    };
  }
 }
--- a/packages/cli/src/services/insight/generators/StaticInsightGenerator.ts
+++ b/packages/cli/src/services/insight/generators/StaticInsightGenerator.ts
@ -11,12 +11,14 @@ import { DataProcessor } from './DataProcessor.js';
 import { TemplateRenderer } from './TemplateRenderer.js';
 import type { InsightData } from '../types/StaticInsightTypes.js';

+import type { Config } from '@qwen-code/qwen-code-core';
+
 export class StaticInsightGenerator {
  private dataProcessor: DataProcessor;
  private templateRenderer: TemplateRenderer;

-  constructor() {
-    this.dataProcessor = new DataProcessor();
+  constructor(config: Config) {
+    this.dataProcessor = new DataProcessor(config);
    this.templateRenderer = new TemplateRenderer();
  }

@ -30,23 +32,42 @@ export class StaticInsightGenerator {
  // Generate the static insight HTML file
  async generateStaticInsight(baseDir: string): Promise<string> {
    try {
+      // Ensure output directory exists
+      const outputDir = await this.ensureOutputDirectory();
+      const facetsDir = path.join(outputDir, 'facets');
+      await fs.mkdir(facetsDir, { recursive: true });
+
      // Process data
      console.log('Processing insight data...');
-      const insights: InsightData =
-        await this.dataProcessor.generateInsights(baseDir);
+      const insights: InsightData = await this.dataProcessor.generateInsights(
+        baseDir,
+        facetsDir,
+      );

      // Render HTML
      console.log('Rendering HTML template...');
      const html = await this.templateRenderer.renderInsightHTML(insights);

-      // Ensure output directory exists
-      const outputDir = await this.ensureOutputDirectory();
      const outputPath = path.join(outputDir, 'insight.html');

      // Write the HTML file
      console.log(`Writing HTML file to: ${outputPath}`);
      await fs.writeFile(outputPath, html, 'utf-8');

+      // Write the JSON data file
+      const jsonPath = path.join(outputDir, 'insight.json');
+      console.log(`Writing JSON data to: ${jsonPath}`);
+
+      // Exclude facets from the main JSON file as they are stored individually
+      // eslint-disable-next-line @typescript-eslint/no-unused-vars
+      const { facets, ...insightsWithoutFacets } = insights;
+
+      await fs.writeFile(
+        jsonPath,
+        JSON.stringify(insightsWithoutFacets, null, 2),
+        'utf-8',
+      );
+
      console.log('Static insight generation completed successfully');
      return outputPath;
    } catch (error) {
--- a/packages/cli/src/services/insight/types/StaticInsightTypes.ts
+++ b/packages/cli/src/services/insight/types/StaticInsightTypes.ts
@ -34,6 +34,7 @@ export interface InsightData {
  activeHours: { [hour: number]: number };
  latestActiveTime: string | null;
  achievements: AchievementData[];
+  facets?: SessionFacets[];
 }

 export interface StreakData {
@ -42,6 +43,42 @@ export interface StreakData {
  dates: string[];
 }

+export interface SessionFacets {
+  session_id: string;
+  underlying_goal: string;
+  goal_categories: Record<string, number>;
+  outcome:
+    | 'fully_achieved'
+    | 'mostly_achieved'
+    | 'partially_achieved'
+    | 'not_achieved'
+    | 'unclear_from_transcript';
+  user_satisfaction_counts: Record<string, number>;
+  Qwen_helpfulness:
+    | 'unhelpful'
+    | 'slightly_helpful'
+    | 'moderately_helpful'
+    | 'very_helpful'
+    | 'essential';
+  session_type:
+    | 'single_task'
+    | 'multi_task'
+    | 'iterative_refinement'
+    | 'exploration'
+    | 'quick_question';
+  friction_counts: Record<string, number>;
+  friction_detail: string;
+  primary_success:
+    | 'none'
+    | 'fast_accurate_search'
+    | 'correct_code_edits'
+    | 'good_explanations'
+    | 'proactive_help'
+    | 'multi_file_changes'
+    | 'good_debugging';
+  brief_summary: string;
+}
+
 export interface StaticInsightTemplateData {
  styles: string;
  content: string;