mirror of
https://github.com/QwenLM/qwen-code.git
synced 2026-04-30 12:40:44 +00:00
fix(insight): only analyze conversational sessions for facets
Filter sessions to only include those with both user and assistant records when generating facets. This prevents system-only logs from being analyzed, ensuring more accurate session insights. Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
This commit is contained in:
parent
2421bb185b
commit
c948e0c6e9
2 changed files with 131 additions and 9 deletions
|
|
@ -1126,4 +1126,82 @@ describe('DataProcessor', () => {
|
|||
expect(hasNonEmptyFrictionDetail).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe('generateFacets', () => {
|
||||
it('should skip non-conversational sessions', async () => {
|
||||
const userOnlyRecords: ChatRecord[] = [
|
||||
{
|
||||
sessionId: 'user-only',
|
||||
timestamp: '2025-01-15T10:00:00Z',
|
||||
type: 'user',
|
||||
message: { role: 'user', parts: [{ text: 'Hello' }] },
|
||||
uuid: '',
|
||||
parentUuid: null,
|
||||
cwd: '',
|
||||
version: '',
|
||||
},
|
||||
];
|
||||
|
||||
const conversationalRecords: ChatRecord[] = [
|
||||
{
|
||||
sessionId: 'conversational',
|
||||
timestamp: '2025-01-15T10:00:00Z',
|
||||
type: 'user',
|
||||
message: { role: 'user', parts: [{ text: 'Hello' }] },
|
||||
uuid: '',
|
||||
parentUuid: null,
|
||||
cwd: '',
|
||||
version: '',
|
||||
},
|
||||
{
|
||||
sessionId: 'conversational',
|
||||
timestamp: '2025-01-15T10:01:00Z',
|
||||
type: 'assistant',
|
||||
message: { role: 'assistant', parts: [{ text: 'Hi' }] },
|
||||
uuid: '',
|
||||
parentUuid: null,
|
||||
cwd: '',
|
||||
version: '',
|
||||
},
|
||||
];
|
||||
|
||||
// First file is user-only, second is conversational
|
||||
mockedReadJsonlFile
|
||||
.mockResolvedValueOnce(userOnlyRecords)
|
||||
.mockResolvedValueOnce(conversationalRecords);
|
||||
|
||||
const mockFacet = {
|
||||
underlying_goal: 'Test',
|
||||
goal_categories: {},
|
||||
outcome: 'fully_achieved',
|
||||
user_satisfaction_counts: {},
|
||||
Qwen_helpfulness: 'very_helpful',
|
||||
session_type: 'single_task',
|
||||
friction_counts: {},
|
||||
friction_detail: '',
|
||||
primary_success: 'none',
|
||||
brief_summary: 'Test',
|
||||
};
|
||||
mockGenerateJson.mockResolvedValue(mockFacet);
|
||||
|
||||
const files = [
|
||||
{ path: '/test/user-only.jsonl', mtime: 2000 },
|
||||
{ path: '/test/conversational.jsonl', mtime: 1000 },
|
||||
];
|
||||
|
||||
const result = await (
|
||||
dataProcessor as unknown as {
|
||||
generateFacets(
|
||||
files: Array<{ path: string; mtime: number }>,
|
||||
facetsOutputDir?: string,
|
||||
): Promise<SessionFacets[]>;
|
||||
}
|
||||
).generateFacets(files);
|
||||
|
||||
// Only the conversational session should be analyzed
|
||||
expect(mockGenerateJson).toHaveBeenCalledTimes(1);
|
||||
expect(result).toHaveLength(1);
|
||||
expect(result[0].session_id).toBe('conversational');
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -82,6 +82,26 @@ export class DataProcessor {
|
|||
return output;
|
||||
}
|
||||
|
||||
// Only analyze conversational sessions for facets (skip system-only logs).
|
||||
private hasUserAndAssistantRecords(records: ChatRecord[]): boolean {
|
||||
let hasUser = false;
|
||||
let hasAssistant = false;
|
||||
|
||||
for (const record of records) {
|
||||
if (record.type === 'user') {
|
||||
hasUser = true;
|
||||
} else if (record.type === 'assistant') {
|
||||
hasAssistant = true;
|
||||
}
|
||||
|
||||
if (hasUser && hasAssistant) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Analyze a single session using LLM
|
||||
private async analyzeSession(
|
||||
records: ChatRecord[],
|
||||
|
|
@ -975,25 +995,49 @@ None captured`;
|
|||
facetsOutputDir?: string,
|
||||
onProgress?: InsightProgressCallback,
|
||||
): Promise<SessionFacets[]> {
|
||||
// Sort files by recency (descending) and take top 50
|
||||
const recentFiles = [...allFiles]
|
||||
.sort((a, b) => b.mtime - a.mtime)
|
||||
.slice(0, 50);
|
||||
const MAX_ELIGIBLE_SESSIONS = 50;
|
||||
|
||||
logger.info(`Analyzing ${recentFiles.length} recent sessions with LLM...`);
|
||||
// Sort files by recency (descending), then select up to 50 conversational
|
||||
// sessions (must contain both user and assistant records).
|
||||
const sortedFiles = [...allFiles].sort((a, b) => b.mtime - a.mtime);
|
||||
const eligibleSessions: Array<{
|
||||
fileInfo: { path: string; mtime: number };
|
||||
records: ChatRecord[];
|
||||
}> = [];
|
||||
|
||||
for (const fileInfo of sortedFiles) {
|
||||
if (eligibleSessions.length >= MAX_ELIGIBLE_SESSIONS) {
|
||||
break;
|
||||
}
|
||||
|
||||
try {
|
||||
const records = await readJsonlFile<ChatRecord>(fileInfo.path);
|
||||
if (!this.hasUserAndAssistantRecords(records)) {
|
||||
continue;
|
||||
}
|
||||
eligibleSessions.push({ fileInfo, records });
|
||||
} catch (e) {
|
||||
logger.error(
|
||||
`Error reading session file ${fileInfo.path} for facet eligibility:`,
|
||||
e,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
logger.info(
|
||||
`Analyzing ${eligibleSessions.length} eligible recent sessions with LLM...`,
|
||||
);
|
||||
|
||||
// Create a limit function with concurrency of 4 to avoid 429 errors
|
||||
const limit = pLimit(CONCURRENCY_LIMIT);
|
||||
|
||||
let completed = 0;
|
||||
const total = recentFiles.length;
|
||||
const total = eligibleSessions.length;
|
||||
|
||||
// Analyze sessions concurrently with limit
|
||||
const analysisPromises = recentFiles.map((fileInfo) =>
|
||||
const analysisPromises = eligibleSessions.map(({ fileInfo, records }) =>
|
||||
limit(async () => {
|
||||
try {
|
||||
const records = await readJsonlFile<ChatRecord>(fileInfo.path);
|
||||
|
||||
// Check if we already have this session analyzed
|
||||
if (records.length > 0 && facetsOutputDir) {
|
||||
const sessionId = records[0].sessionId;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue