fix(insight): only analyze conversational sessions for facets

Filter sessions to only include those with both user and assistant
records when generating facets. This prevents system-only logs from
being analyzed, ensuring more accurate session insights.

Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
This commit is contained in:
tanzhenxin 2026-02-26 11:26:04 +08:00
parent 2421bb185b
commit c948e0c6e9
2 changed files with 131 additions and 9 deletions

View file

@ -1126,4 +1126,82 @@ describe('DataProcessor', () => {
expect(hasNonEmptyFrictionDetail).toBe(false);
});
});
describe('generateFacets', () => {
it('should skip non-conversational sessions', async () => {
const userOnlyRecords: ChatRecord[] = [
{
sessionId: 'user-only',
timestamp: '2025-01-15T10:00:00Z',
type: 'user',
message: { role: 'user', parts: [{ text: 'Hello' }] },
uuid: '',
parentUuid: null,
cwd: '',
version: '',
},
];
const conversationalRecords: ChatRecord[] = [
{
sessionId: 'conversational',
timestamp: '2025-01-15T10:00:00Z',
type: 'user',
message: { role: 'user', parts: [{ text: 'Hello' }] },
uuid: '',
parentUuid: null,
cwd: '',
version: '',
},
{
sessionId: 'conversational',
timestamp: '2025-01-15T10:01:00Z',
type: 'assistant',
message: { role: 'assistant', parts: [{ text: 'Hi' }] },
uuid: '',
parentUuid: null,
cwd: '',
version: '',
},
];
// First file is user-only, second is conversational
mockedReadJsonlFile
.mockResolvedValueOnce(userOnlyRecords)
.mockResolvedValueOnce(conversationalRecords);
const mockFacet = {
underlying_goal: 'Test',
goal_categories: {},
outcome: 'fully_achieved',
user_satisfaction_counts: {},
Qwen_helpfulness: 'very_helpful',
session_type: 'single_task',
friction_counts: {},
friction_detail: '',
primary_success: 'none',
brief_summary: 'Test',
};
mockGenerateJson.mockResolvedValue(mockFacet);
const files = [
{ path: '/test/user-only.jsonl', mtime: 2000 },
{ path: '/test/conversational.jsonl', mtime: 1000 },
];
const result = await (
dataProcessor as unknown as {
generateFacets(
files: Array<{ path: string; mtime: number }>,
facetsOutputDir?: string,
): Promise<SessionFacets[]>;
}
).generateFacets(files);
// Only the conversational session should be analyzed
expect(mockGenerateJson).toHaveBeenCalledTimes(1);
expect(result).toHaveLength(1);
expect(result[0].session_id).toBe('conversational');
});
});
});

View file

@ -82,6 +82,26 @@ export class DataProcessor {
return output;
}
// Only analyze conversational sessions for facets (skip system-only logs).
private hasUserAndAssistantRecords(records: ChatRecord[]): boolean {
let hasUser = false;
let hasAssistant = false;
for (const record of records) {
if (record.type === 'user') {
hasUser = true;
} else if (record.type === 'assistant') {
hasAssistant = true;
}
if (hasUser && hasAssistant) {
return true;
}
}
return false;
}
// Analyze a single session using LLM
private async analyzeSession(
records: ChatRecord[],
@ -975,25 +995,49 @@ None captured`;
facetsOutputDir?: string,
onProgress?: InsightProgressCallback,
): Promise<SessionFacets[]> {
// Sort files by recency (descending) and take top 50
const recentFiles = [...allFiles]
.sort((a, b) => b.mtime - a.mtime)
.slice(0, 50);
const MAX_ELIGIBLE_SESSIONS = 50;
logger.info(`Analyzing ${recentFiles.length} recent sessions with LLM...`);
// Sort files by recency (descending), then select up to 50 conversational
// sessions (must contain both user and assistant records).
const sortedFiles = [...allFiles].sort((a, b) => b.mtime - a.mtime);
const eligibleSessions: Array<{
fileInfo: { path: string; mtime: number };
records: ChatRecord[];
}> = [];
for (const fileInfo of sortedFiles) {
if (eligibleSessions.length >= MAX_ELIGIBLE_SESSIONS) {
break;
}
try {
const records = await readJsonlFile<ChatRecord>(fileInfo.path);
if (!this.hasUserAndAssistantRecords(records)) {
continue;
}
eligibleSessions.push({ fileInfo, records });
} catch (e) {
logger.error(
`Error reading session file ${fileInfo.path} for facet eligibility:`,
e,
);
}
}
logger.info(
`Analyzing ${eligibleSessions.length} eligible recent sessions with LLM...`,
);
// Create a limit function with concurrency of 4 to avoid 429 errors
const limit = pLimit(CONCURRENCY_LIMIT);
let completed = 0;
const total = recentFiles.length;
const total = eligibleSessions.length;
// Analyze sessions concurrently with limit
const analysisPromises = recentFiles.map((fileInfo) =>
const analysisPromises = eligibleSessions.map(({ fileInfo, records }) =>
limit(async () => {
try {
const records = await readJsonlFile<ChatRecord>(fileInfo.path);
// Check if we already have this session analyzed
if (records.length > 0 && facetsOutputDir) {
const sessionId = records[0].sessionId;