mirror of
https://github.com/QwenLM/qwen-code.git
synced 2026-04-30 12:40:44 +00:00
fix(insight): only analyze conversational sessions for facets
Filter sessions to only include those with both user and assistant records when generating facets. This prevents system-only logs from being analyzed, ensuring more accurate session insights. Co-authored-by: Qwen-Coder <qwen-coder@alibabacloud.com>
This commit is contained in:
parent
2421bb185b
commit
c948e0c6e9
2 changed files with 131 additions and 9 deletions
|
|
@ -1126,4 +1126,82 @@ describe('DataProcessor', () => {
|
||||||
expect(hasNonEmptyFrictionDetail).toBe(false);
|
expect(hasNonEmptyFrictionDetail).toBe(false);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe('generateFacets', () => {
|
||||||
|
it('should skip non-conversational sessions', async () => {
|
||||||
|
const userOnlyRecords: ChatRecord[] = [
|
||||||
|
{
|
||||||
|
sessionId: 'user-only',
|
||||||
|
timestamp: '2025-01-15T10:00:00Z',
|
||||||
|
type: 'user',
|
||||||
|
message: { role: 'user', parts: [{ text: 'Hello' }] },
|
||||||
|
uuid: '',
|
||||||
|
parentUuid: null,
|
||||||
|
cwd: '',
|
||||||
|
version: '',
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
const conversationalRecords: ChatRecord[] = [
|
||||||
|
{
|
||||||
|
sessionId: 'conversational',
|
||||||
|
timestamp: '2025-01-15T10:00:00Z',
|
||||||
|
type: 'user',
|
||||||
|
message: { role: 'user', parts: [{ text: 'Hello' }] },
|
||||||
|
uuid: '',
|
||||||
|
parentUuid: null,
|
||||||
|
cwd: '',
|
||||||
|
version: '',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
sessionId: 'conversational',
|
||||||
|
timestamp: '2025-01-15T10:01:00Z',
|
||||||
|
type: 'assistant',
|
||||||
|
message: { role: 'assistant', parts: [{ text: 'Hi' }] },
|
||||||
|
uuid: '',
|
||||||
|
parentUuid: null,
|
||||||
|
cwd: '',
|
||||||
|
version: '',
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
// First file is user-only, second is conversational
|
||||||
|
mockedReadJsonlFile
|
||||||
|
.mockResolvedValueOnce(userOnlyRecords)
|
||||||
|
.mockResolvedValueOnce(conversationalRecords);
|
||||||
|
|
||||||
|
const mockFacet = {
|
||||||
|
underlying_goal: 'Test',
|
||||||
|
goal_categories: {},
|
||||||
|
outcome: 'fully_achieved',
|
||||||
|
user_satisfaction_counts: {},
|
||||||
|
Qwen_helpfulness: 'very_helpful',
|
||||||
|
session_type: 'single_task',
|
||||||
|
friction_counts: {},
|
||||||
|
friction_detail: '',
|
||||||
|
primary_success: 'none',
|
||||||
|
brief_summary: 'Test',
|
||||||
|
};
|
||||||
|
mockGenerateJson.mockResolvedValue(mockFacet);
|
||||||
|
|
||||||
|
const files = [
|
||||||
|
{ path: '/test/user-only.jsonl', mtime: 2000 },
|
||||||
|
{ path: '/test/conversational.jsonl', mtime: 1000 },
|
||||||
|
];
|
||||||
|
|
||||||
|
const result = await (
|
||||||
|
dataProcessor as unknown as {
|
||||||
|
generateFacets(
|
||||||
|
files: Array<{ path: string; mtime: number }>,
|
||||||
|
facetsOutputDir?: string,
|
||||||
|
): Promise<SessionFacets[]>;
|
||||||
|
}
|
||||||
|
).generateFacets(files);
|
||||||
|
|
||||||
|
// Only the conversational session should be analyzed
|
||||||
|
expect(mockGenerateJson).toHaveBeenCalledTimes(1);
|
||||||
|
expect(result).toHaveLength(1);
|
||||||
|
expect(result[0].session_id).toBe('conversational');
|
||||||
|
});
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
|
||||||
|
|
@ -82,6 +82,26 @@ export class DataProcessor {
|
||||||
return output;
|
return output;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Only analyze conversational sessions for facets (skip system-only logs).
|
||||||
|
private hasUserAndAssistantRecords(records: ChatRecord[]): boolean {
|
||||||
|
let hasUser = false;
|
||||||
|
let hasAssistant = false;
|
||||||
|
|
||||||
|
for (const record of records) {
|
||||||
|
if (record.type === 'user') {
|
||||||
|
hasUser = true;
|
||||||
|
} else if (record.type === 'assistant') {
|
||||||
|
hasAssistant = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hasUser && hasAssistant) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
// Analyze a single session using LLM
|
// Analyze a single session using LLM
|
||||||
private async analyzeSession(
|
private async analyzeSession(
|
||||||
records: ChatRecord[],
|
records: ChatRecord[],
|
||||||
|
|
@ -975,25 +995,49 @@ None captured`;
|
||||||
facetsOutputDir?: string,
|
facetsOutputDir?: string,
|
||||||
onProgress?: InsightProgressCallback,
|
onProgress?: InsightProgressCallback,
|
||||||
): Promise<SessionFacets[]> {
|
): Promise<SessionFacets[]> {
|
||||||
// Sort files by recency (descending) and take top 50
|
const MAX_ELIGIBLE_SESSIONS = 50;
|
||||||
const recentFiles = [...allFiles]
|
|
||||||
.sort((a, b) => b.mtime - a.mtime)
|
|
||||||
.slice(0, 50);
|
|
||||||
|
|
||||||
logger.info(`Analyzing ${recentFiles.length} recent sessions with LLM...`);
|
// Sort files by recency (descending), then select up to 50 conversational
|
||||||
|
// sessions (must contain both user and assistant records).
|
||||||
|
const sortedFiles = [...allFiles].sort((a, b) => b.mtime - a.mtime);
|
||||||
|
const eligibleSessions: Array<{
|
||||||
|
fileInfo: { path: string; mtime: number };
|
||||||
|
records: ChatRecord[];
|
||||||
|
}> = [];
|
||||||
|
|
||||||
|
for (const fileInfo of sortedFiles) {
|
||||||
|
if (eligibleSessions.length >= MAX_ELIGIBLE_SESSIONS) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const records = await readJsonlFile<ChatRecord>(fileInfo.path);
|
||||||
|
if (!this.hasUserAndAssistantRecords(records)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
eligibleSessions.push({ fileInfo, records });
|
||||||
|
} catch (e) {
|
||||||
|
logger.error(
|
||||||
|
`Error reading session file ${fileInfo.path} for facet eligibility:`,
|
||||||
|
e,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
`Analyzing ${eligibleSessions.length} eligible recent sessions with LLM...`,
|
||||||
|
);
|
||||||
|
|
||||||
// Create a limit function with concurrency of 4 to avoid 429 errors
|
// Create a limit function with concurrency of 4 to avoid 429 errors
|
||||||
const limit = pLimit(CONCURRENCY_LIMIT);
|
const limit = pLimit(CONCURRENCY_LIMIT);
|
||||||
|
|
||||||
let completed = 0;
|
let completed = 0;
|
||||||
const total = recentFiles.length;
|
const total = eligibleSessions.length;
|
||||||
|
|
||||||
// Analyze sessions concurrently with limit
|
// Analyze sessions concurrently with limit
|
||||||
const analysisPromises = recentFiles.map((fileInfo) =>
|
const analysisPromises = eligibleSessions.map(({ fileInfo, records }) =>
|
||||||
limit(async () => {
|
limit(async () => {
|
||||||
try {
|
try {
|
||||||
const records = await readJsonlFile<ChatRecord>(fileInfo.path);
|
|
||||||
|
|
||||||
// Check if we already have this session analyzed
|
// Check if we already have this session analyzed
|
||||||
if (records.length > 0 && facetsOutputDir) {
|
if (records.length > 0 && facetsOutputDir) {
|
||||||
const sessionId = records[0].sessionId;
|
const sessionId = records[0].sessionId;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue