mirror of
https://github.com/QwenLM/qwen-code.git
synced 2026-05-05 15:31:27 +00:00
feat(arena): add comparison summary for agent results (#3394)
Some checks are pending
Qwen Code CI / Lint (push) Waiting to run
Qwen Code CI / Test (push) Blocked by required conditions
Qwen Code CI / Test-1 (push) Blocked by required conditions
Qwen Code CI / Test-2 (push) Blocked by required conditions
Qwen Code CI / Test-3 (push) Blocked by required conditions
Qwen Code CI / Test-4 (push) Blocked by required conditions
Qwen Code CI / Test-5 (push) Blocked by required conditions
Qwen Code CI / Test-6 (push) Blocked by required conditions
Qwen Code CI / Test-7 (push) Blocked by required conditions
Qwen Code CI / Test-8 (push) Blocked by required conditions
Qwen Code CI / Post Coverage Comment (push) Blocked by required conditions
Qwen Code CI / CodeQL (push) Waiting to run
E2E Tests / E2E Test (Linux) - sandbox:docker (push) Waiting to run
E2E Tests / E2E Test (Linux) - sandbox:none (push) Waiting to run
E2E Tests / E2E Test - macOS (push) Waiting to run
Some checks are pending
Qwen Code CI / Lint (push) Waiting to run
Qwen Code CI / Test (push) Blocked by required conditions
Qwen Code CI / Test-1 (push) Blocked by required conditions
Qwen Code CI / Test-2 (push) Blocked by required conditions
Qwen Code CI / Test-3 (push) Blocked by required conditions
Qwen Code CI / Test-4 (push) Blocked by required conditions
Qwen Code CI / Test-5 (push) Blocked by required conditions
Qwen Code CI / Test-6 (push) Blocked by required conditions
Qwen Code CI / Test-7 (push) Blocked by required conditions
Qwen Code CI / Test-8 (push) Blocked by required conditions
Qwen Code CI / Post Coverage Comment (push) Blocked by required conditions
Qwen Code CI / CodeQL (push) Waiting to run
E2E Tests / E2E Test (Linux) - sandbox:docker (push) Waiting to run
E2E Tests / E2E Test (Linux) - sandbox:none (push) Waiting to run
E2E Tests / E2E Test - macOS (push) Waiting to run
Adds a summary view that runs after Arena agents finish, so users can compare model outputs without opening each agent's conversation first. Summary surface: - Agent status overview - Files changed in common vs. unique to one agent - Per-agent approach summary generated through that agent's own provider - Token / runtime / line-change / file-count metrics Selection dialog now supports: - p — toggle preview for the highlighted agent - d — toggle detailed diff - Enter — select winner - x — discard all results - Esc — cancel Approach summary generation: - Each agent's summary is generated through that agent's own content generator, keeping mixed-provider Arena sessions within their respective auth boundaries - 20s timeout + AbortController per agent, bounded prompt inputs (finalText 2K, transcript 6K, diff 6K) - Falls back to a deterministic "Changed N files ..." summary when no per-agent generator is available or on error Diff summary now handles binary, rename-only, and mode-only diffs; the previous heuristic required textual +/- hunks and would have dropped those. Resolves #2559
This commit is contained in:
parent
8a0489625b
commit
d1c8dff4d2
15 changed files with 1378 additions and 143 deletions
|
|
@ -276,6 +276,9 @@ function executeArenaCommand(
|
|||
rounds: result.stats.rounds,
|
||||
error: result.error,
|
||||
diff: result.diff,
|
||||
diffSummary: result.diffSummary,
|
||||
modifiedFiles: result.modifiedFiles,
|
||||
approachSummary: result.approachSummary,
|
||||
});
|
||||
|
||||
const handleAgentComplete = (event: ArenaAgentCompleteEvent) => {
|
||||
|
|
|
|||
150
packages/cli/src/ui/components/arena/ArenaCards.test.tsx
Normal file
150
packages/cli/src/ui/components/arena/ArenaCards.test.tsx
Normal file
|
|
@ -0,0 +1,150 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2025 Qwen Team
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { AgentStatus } from '@qwen-code/qwen-code-core';
|
||||
import { renderWithProviders } from '../../../test-utils/render.js';
|
||||
import { ArenaSessionCard } from './ArenaCards.js';
|
||||
import type { ArenaAgentCardData } from '../../types.js';
|
||||
|
||||
describe('ArenaSessionCard', () => {
|
||||
it('renders the comparison summary sections from agent results', () => {
|
||||
const agents: ArenaAgentCardData[] = [
|
||||
{
|
||||
label: 'qwen-coder-plus',
|
||||
status: AgentStatus.IDLE,
|
||||
durationMs: 12_000,
|
||||
totalTokens: 45_000,
|
||||
inputTokens: 30_000,
|
||||
outputTokens: 15_000,
|
||||
toolCalls: 12,
|
||||
successfulToolCalls: 12,
|
||||
failedToolCalls: 0,
|
||||
rounds: 3,
|
||||
diffSummary: {
|
||||
files: [
|
||||
{ path: 'src/auth.ts', additions: 200, deletions: 80 },
|
||||
{ path: 'tests/auth.test.ts', additions: 45, deletions: 9 },
|
||||
],
|
||||
additions: 245,
|
||||
deletions: 89,
|
||||
},
|
||||
modifiedFiles: ['src/auth.ts', 'tests/auth.test.ts'],
|
||||
approachSummary: 'Refactored with JWT strategy pattern.',
|
||||
},
|
||||
{
|
||||
label: 'gpt-4o',
|
||||
status: AgentStatus.IDLE,
|
||||
durationMs: 10_000,
|
||||
totalTokens: 38_000,
|
||||
inputTokens: 25_000,
|
||||
outputTokens: 13_000,
|
||||
toolCalls: 8,
|
||||
successfulToolCalls: 8,
|
||||
failedToolCalls: 0,
|
||||
rounds: 2,
|
||||
diffSummary: {
|
||||
files: [
|
||||
{ path: 'src/auth.ts', additions: 120, deletions: 40 },
|
||||
{ path: 'src/middleware.ts', additions: 69, deletions: 27 },
|
||||
],
|
||||
additions: 189,
|
||||
deletions: 67,
|
||||
},
|
||||
modifiedFiles: ['src/auth.ts', 'src/middleware.ts'],
|
||||
approachSummary: 'Made inline changes with validation layer.',
|
||||
},
|
||||
];
|
||||
|
||||
const { lastFrame } = renderWithProviders(
|
||||
<ArenaSessionCard
|
||||
sessionStatus="idle"
|
||||
task="Refactor authentication"
|
||||
totalDurationMs={12_000}
|
||||
agents={agents}
|
||||
width={100}
|
||||
/>,
|
||||
);
|
||||
|
||||
const output = lastFrame();
|
||||
expect(output).toContain('Arena Comparison Summary');
|
||||
expect(output).not.toContain('Status Time Tokens Changes');
|
||||
expect(output).toContain('Status Summary:');
|
||||
expect(output).toContain('qwen-coder-plus: Idle');
|
||||
expect(output).toContain('gpt-4o: Idle');
|
||||
expect(output).toContain('Files Modified:');
|
||||
expect(output).toContain('common: src/auth.ts');
|
||||
expect(output).toContain('qwen-coder-plus-only: tests/auth.test.ts');
|
||||
expect(output).toContain('gpt-4o-only: src/middleware.ts');
|
||||
expect(output).toContain('Approach Summary:');
|
||||
expect(output).toContain('Refactored with JWT strategy pattern.');
|
||||
expect(output).toContain('Token Efficiency:');
|
||||
expect(output).toContain('45,000 tokens');
|
||||
expect(output).toContain('45,000 tokens · runtime 12.0s');
|
||||
expect(output).not.toContain('45,000 tokens · runtime 12.0s · 12 tools');
|
||||
expect(output).not.toContain('Quick Preview:');
|
||||
expect(output).not.toContain('[View Detailed Diff]');
|
||||
expect(output).not.toContain('[Select Winner →]');
|
||||
});
|
||||
|
||||
it('hides empty per-agent unique file groups', () => {
|
||||
const agents: ArenaAgentCardData[] = [
|
||||
{
|
||||
label: 'gemma4:31b',
|
||||
status: AgentStatus.IDLE,
|
||||
durationMs: 10_000,
|
||||
totalTokens: 10_000,
|
||||
inputTokens: 7_000,
|
||||
outputTokens: 3_000,
|
||||
toolCalls: 2,
|
||||
successfulToolCalls: 2,
|
||||
failedToolCalls: 0,
|
||||
rounds: 1,
|
||||
diffSummary: {
|
||||
files: [{ path: 'reader.py', additions: 20, deletions: 0 }],
|
||||
additions: 20,
|
||||
deletions: 0,
|
||||
},
|
||||
modifiedFiles: ['reader.py'],
|
||||
approachSummary: 'Created a reader.',
|
||||
},
|
||||
{
|
||||
label: 'qwen2.5:14b',
|
||||
status: AgentStatus.IDLE,
|
||||
durationMs: 8_000,
|
||||
totalTokens: 8_000,
|
||||
inputTokens: 6_000,
|
||||
outputTokens: 2_000,
|
||||
toolCalls: 2,
|
||||
successfulToolCalls: 2,
|
||||
failedToolCalls: 0,
|
||||
rounds: 1,
|
||||
diffSummary: {
|
||||
files: [{ path: 'reader.py', additions: 22, deletions: 0 }],
|
||||
additions: 22,
|
||||
deletions: 0,
|
||||
},
|
||||
modifiedFiles: ['reader.py'],
|
||||
approachSummary: 'Created a reader.',
|
||||
},
|
||||
];
|
||||
|
||||
const { lastFrame } = renderWithProviders(
|
||||
<ArenaSessionCard
|
||||
sessionStatus="idle"
|
||||
task="Create a reader"
|
||||
totalDurationMs={10_000}
|
||||
agents={agents}
|
||||
width={100}
|
||||
/>,
|
||||
);
|
||||
|
||||
const output = lastFrame();
|
||||
expect(output).toContain('common: reader.py');
|
||||
expect(output).not.toContain('only gemma4:31b: none');
|
||||
expect(output).not.toContain('only qwen2.5:14b: none');
|
||||
});
|
||||
});
|
||||
|
|
@ -10,6 +10,7 @@ import { theme } from '../../semantic-colors.js';
|
|||
import { formatDuration } from '../../utils/formatters.js';
|
||||
import { getArenaStatusLabel } from '../../utils/displayUtils.js';
|
||||
import type { ArenaAgentCardData } from '../../types.js';
|
||||
import type { ArenaDiffSummary } from '@qwen-code/qwen-code-core';
|
||||
|
||||
// ─── Helpers ────────────────────────────────────────────────
|
||||
|
||||
|
|
@ -84,36 +85,25 @@ interface ArenaSessionCardProps {
|
|||
width?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Pad or truncate a string to a fixed visual width.
|
||||
*/
|
||||
function pad(
|
||||
str: string,
|
||||
len: number,
|
||||
align: 'left' | 'right' = 'left',
|
||||
): string {
|
||||
if (str.length >= len) return str.slice(0, len);
|
||||
const padding = ' '.repeat(len - str.length);
|
||||
return align === 'right' ? padding + str : str + padding;
|
||||
}
|
||||
|
||||
/**
|
||||
* Truncate a string to a maximum length, adding ellipsis if truncated.
|
||||
*/
|
||||
function truncate(str: string, maxLen: number): string {
|
||||
if (str.length <= maxLen) return str;
|
||||
return str.slice(0, maxLen - 1) + '…';
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate diff stats from a unified diff string.
|
||||
* Returns the stats string and individual counts for colored rendering.
|
||||
*/
|
||||
function getDiffStats(diff: string | undefined): {
|
||||
function getDiffStats(
|
||||
diff: string | undefined,
|
||||
diffSummary?: ArenaDiffSummary,
|
||||
): {
|
||||
text: string;
|
||||
additions: number;
|
||||
deletions: number;
|
||||
} {
|
||||
if (diffSummary) {
|
||||
return {
|
||||
text: `+${diffSummary.additions}/-${diffSummary.deletions}`,
|
||||
additions: diffSummary.additions,
|
||||
deletions: diffSummary.deletions,
|
||||
};
|
||||
}
|
||||
if (!diff) return { text: '', additions: 0, deletions: 0 };
|
||||
const lines = diff.split('\n');
|
||||
let additions = 0;
|
||||
|
|
@ -128,33 +118,71 @@ function getDiffStats(diff: string | undefined): {
|
|||
return { text: `+${additions}/-${deletions}`, additions, deletions };
|
||||
}
|
||||
|
||||
const MAX_MODEL_NAME_LENGTH = 35;
|
||||
const MAX_FILE_LIST_ITEMS = 4;
|
||||
|
||||
function formatFileList(files: string[] | undefined): string {
|
||||
if (!files || files.length === 0) {
|
||||
return 'none';
|
||||
}
|
||||
const visible = files.slice(0, MAX_FILE_LIST_ITEMS);
|
||||
const suffix =
|
||||
files.length > MAX_FILE_LIST_ITEMS
|
||||
? `, +${files.length - MAX_FILE_LIST_ITEMS} more`
|
||||
: '';
|
||||
return `${visible.join(', ')}${suffix}`;
|
||||
}
|
||||
|
||||
function getAgentFiles(agent: ArenaAgentCardData): string[] {
|
||||
return (
|
||||
agent.modifiedFiles ??
|
||||
agent.diffSummary?.files.map((file) => file.path) ??
|
||||
[]
|
||||
);
|
||||
}
|
||||
|
||||
function getComparisonFileGroups(
|
||||
agents: ArenaAgentCardData[],
|
||||
): Array<{ label: string; files: string[] }> {
|
||||
const counts = new Map<string, number>();
|
||||
for (const agent of agents) {
|
||||
for (const file of new Set(getAgentFiles(agent))) {
|
||||
counts.set(file, (counts.get(file) ?? 0) + 1);
|
||||
}
|
||||
}
|
||||
|
||||
const common = [...counts.entries()]
|
||||
.filter(([, count]) => count > 1)
|
||||
.map(([file]) => file)
|
||||
.sort();
|
||||
const groups = [{ label: 'common', files: common }];
|
||||
|
||||
for (const agent of agents) {
|
||||
const unique = getAgentFiles(agent)
|
||||
.filter((file) => counts.get(file) === 1)
|
||||
.sort();
|
||||
if (unique.length > 0) {
|
||||
groups.push({ label: `${agent.label}-only`, files: unique });
|
||||
}
|
||||
}
|
||||
|
||||
return groups;
|
||||
}
|
||||
|
||||
function getTreeBranch(index: number, total: number): string {
|
||||
return index === total - 1 ? '└─' : '├─';
|
||||
}
|
||||
|
||||
export const ArenaSessionCard: React.FC<ArenaSessionCardProps> = ({
|
||||
sessionStatus,
|
||||
task,
|
||||
agents,
|
||||
width,
|
||||
}) => {
|
||||
// Truncate task for display
|
||||
const maxTaskLen = 60;
|
||||
const displayTask =
|
||||
task.length > maxTaskLen ? task.slice(0, maxTaskLen - 1) + '…' : task;
|
||||
|
||||
// Column widths for the agent table (unified with Arena Results)
|
||||
const colStatus = 14;
|
||||
const colTime = 8;
|
||||
const colTokens = 10;
|
||||
const colChanges = 10;
|
||||
|
||||
const titleLabel =
|
||||
sessionStatus === 'idle'
|
||||
? 'Agents Status · Idle'
|
||||
: sessionStatus === 'completed'
|
||||
? 'Arena Complete'
|
||||
: sessionStatus === 'cancelled'
|
||||
? 'Arena Cancelled'
|
||||
: 'Arena Failed';
|
||||
sessionStatus === 'idle' || sessionStatus === 'completed'
|
||||
? 'Arena Comparison Summary'
|
||||
: sessionStatus === 'cancelled'
|
||||
? 'Arena Cancelled'
|
||||
: 'Arena Failed';
|
||||
|
||||
return (
|
||||
<Box
|
||||
|
|
@ -174,96 +202,109 @@ export const ArenaSessionCard: React.FC<ArenaSessionCardProps> = ({
|
|||
|
||||
<Box height={1} />
|
||||
|
||||
{/* Task */}
|
||||
<Box>
|
||||
<Text>
|
||||
<Text color={theme.text.secondary}>Task: </Text>
|
||||
<Text color={theme.text.primary}>"{displayTask}"</Text>
|
||||
</Text>
|
||||
</Box>
|
||||
|
||||
<Box height={1} />
|
||||
|
||||
{/* Table header - unified columns: Agent, Status, Time, Tokens, Changes */}
|
||||
<Box>
|
||||
<Box flexGrow={1}>
|
||||
<Text bold color={theme.text.secondary}>
|
||||
Agent
|
||||
</Text>
|
||||
</Box>
|
||||
<Box width={colStatus} justifyContent="flex-end">
|
||||
<Text bold color={theme.text.secondary}>
|
||||
Status
|
||||
</Text>
|
||||
</Box>
|
||||
<Box width={colTime} justifyContent="flex-end">
|
||||
<Text bold color={theme.text.secondary}>
|
||||
Time
|
||||
</Text>
|
||||
</Box>
|
||||
<Box width={colTokens} justifyContent="flex-end">
|
||||
<Text bold color={theme.text.secondary}>
|
||||
Tokens
|
||||
</Text>
|
||||
</Box>
|
||||
<Box width={colChanges} justifyContent="flex-end">
|
||||
<Text bold color={theme.text.secondary}>
|
||||
Changes
|
||||
</Text>
|
||||
</Box>
|
||||
</Box>
|
||||
|
||||
{/* Table separator */}
|
||||
<Box>
|
||||
<Text color={theme.border.default}>
|
||||
{'─'.repeat((width ?? 60) - 8)}
|
||||
</Text>
|
||||
</Box>
|
||||
|
||||
{/* Agent rows */}
|
||||
{agents.map((agent) => {
|
||||
const { text: statusText, color } = getArenaStatusLabel(agent.status);
|
||||
const diffStats = getDiffStats(agent.diff);
|
||||
return (
|
||||
<Box key={agent.label}>
|
||||
<Box flexGrow={1}>
|
||||
<Text color={theme.text.primary}>
|
||||
{truncate(agent.label, MAX_MODEL_NAME_LENGTH)}
|
||||
</Text>
|
||||
</Box>
|
||||
<Box width={colStatus} justifyContent="flex-end">
|
||||
<Text color={color}>{statusText}</Text>
|
||||
</Box>
|
||||
<Box width={colTime} justifyContent="flex-end">
|
||||
<Text color={theme.text.primary}>
|
||||
{pad(formatDuration(agent.durationMs), colTime - 1, 'right')}
|
||||
</Text>
|
||||
</Box>
|
||||
<Box width={colTokens} justifyContent="flex-end">
|
||||
<Text color={theme.text.primary}>
|
||||
{pad(
|
||||
agent.totalTokens.toLocaleString(),
|
||||
colTokens - 1,
|
||||
'right',
|
||||
)}
|
||||
</Text>
|
||||
</Box>
|
||||
<Box width={colChanges} justifyContent="flex-end">
|
||||
{diffStats.additions > 0 || diffStats.deletions > 0 ? (
|
||||
<Text>
|
||||
<Text color={theme.status.success}>
|
||||
+{diffStats.additions}
|
||||
{(sessionStatus === 'idle' || sessionStatus === 'completed') && (
|
||||
<>
|
||||
<Box flexDirection="column">
|
||||
<Text bold color={theme.text.primary}>
|
||||
Status Summary:
|
||||
</Text>
|
||||
{agents.map((agent, index) => {
|
||||
const { text: statusText, color } = getArenaStatusLabel(
|
||||
agent.status,
|
||||
);
|
||||
return (
|
||||
<Box key={agent.label} marginLeft={2}>
|
||||
<Text color={theme.text.secondary}>
|
||||
{index === agents.length - 1 ? '└─' : '├─'} {agent.label}
|
||||
:{' '}
|
||||
</Text>
|
||||
<Text color={theme.text.secondary}>/</Text>
|
||||
<Text color={theme.status.error}>-{diffStats.deletions}</Text>
|
||||
</Text>
|
||||
) : (
|
||||
<Text color={theme.text.secondary}>-</Text>
|
||||
)}
|
||||
</Box>
|
||||
<Text color={color}>{statusText}</Text>
|
||||
</Box>
|
||||
);
|
||||
})}
|
||||
</Box>
|
||||
);
|
||||
})}
|
||||
|
||||
<Box height={1} />
|
||||
|
||||
<Box flexDirection="column">
|
||||
<Text bold color={theme.text.primary}>
|
||||
Files Modified:
|
||||
</Text>
|
||||
{getComparisonFileGroups(agents).map((group, index, groups) => (
|
||||
<Box key={group.label} marginLeft={2}>
|
||||
<Text color={theme.text.secondary}>
|
||||
{getTreeBranch(index, groups.length)} {group.label}:{' '}
|
||||
</Text>
|
||||
<Text color={theme.text.primary}>
|
||||
{formatFileList(group.files)}
|
||||
</Text>
|
||||
</Box>
|
||||
))}
|
||||
</Box>
|
||||
|
||||
<Box height={1} />
|
||||
|
||||
<Box flexDirection="column">
|
||||
<Text bold color={theme.text.primary}>
|
||||
Approach Summary:
|
||||
</Text>
|
||||
{agents.map((agent, index) => {
|
||||
const diffStats = getDiffStats(agent.diff, agent.diffSummary);
|
||||
const files = getAgentFiles(agent).length;
|
||||
const branch = index === agents.length - 1 ? '└─' : '├─';
|
||||
const summary =
|
||||
agent.approachSummary ?? 'No approach summary available.';
|
||||
return (
|
||||
<Box key={agent.label} marginLeft={2}>
|
||||
<Text>
|
||||
<Text color={theme.text.secondary}>
|
||||
{branch} {agent.label}:{' '}
|
||||
</Text>
|
||||
<Text color={theme.text.primary}>{summary} </Text>
|
||||
<Text color={theme.text.secondary}>(</Text>
|
||||
<Text color={theme.text.accent}>{files}</Text>
|
||||
<Text color={theme.text.secondary}>
|
||||
{files === 1 ? ' file, ' : ' files, '}
|
||||
</Text>
|
||||
<Text color={theme.status.success}>
|
||||
+{diffStats.additions}
|
||||
</Text>
|
||||
<Text color={theme.text.secondary}> </Text>
|
||||
<Text color={theme.status.error}>
|
||||
-{diffStats.deletions}
|
||||
</Text>
|
||||
<Text color={theme.text.secondary}> lines, </Text>
|
||||
<Text color={theme.text.accent}>{agent.toolCalls}</Text>
|
||||
<Text color={theme.text.secondary}>
|
||||
{agent.toolCalls === 1 ? ' tool call)' : ' tool calls)'}
|
||||
</Text>
|
||||
</Text>
|
||||
</Box>
|
||||
);
|
||||
})}
|
||||
</Box>
|
||||
|
||||
<Box height={1} />
|
||||
|
||||
<Box flexDirection="column">
|
||||
<Text bold color={theme.text.primary}>
|
||||
Token Efficiency:
|
||||
</Text>
|
||||
{agents.map((agent, index) => (
|
||||
<Box key={agent.label} marginLeft={2}>
|
||||
<Text color={theme.text.secondary}>
|
||||
{index === agents.length - 1 ? '└─' : '├─'} {agent.label}
|
||||
:{' '}
|
||||
</Text>
|
||||
<Text color={theme.text.primary}>
|
||||
{agent.totalTokens.toLocaleString()} tokens · runtime{' '}
|
||||
{formatDuration(agent.durationMs)}
|
||||
</Text>
|
||||
</Box>
|
||||
))}
|
||||
</Box>
|
||||
</>
|
||||
)}
|
||||
|
||||
<Box height={1} />
|
||||
|
||||
|
|
@ -271,9 +312,8 @@ export const ArenaSessionCard: React.FC<ArenaSessionCardProps> = ({
|
|||
{sessionStatus === 'idle' && (
|
||||
<Box flexDirection="column">
|
||||
<Text color={theme.text.secondary}>
|
||||
Switch to an agent tab to continue, or{' '}
|
||||
<Text color={theme.text.accent}>/arena select</Text> to pick a
|
||||
winner.
|
||||
Run <Text color={theme.text.accent}>/arena select</Text> to view
|
||||
detailed diff or pick a winner.
|
||||
</Text>
|
||||
</Box>
|
||||
)}
|
||||
|
|
|
|||
108
packages/cli/src/ui/components/arena/ArenaSelectDialog.test.tsx
Normal file
108
packages/cli/src/ui/components/arena/ArenaSelectDialog.test.tsx
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2025 Qwen Team
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { waitFor } from '@testing-library/react';
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import {
|
||||
AgentStatus,
|
||||
ArenaSessionStatus,
|
||||
type ArenaManager,
|
||||
type Config,
|
||||
} from '@qwen-code/qwen-code-core';
|
||||
import { renderWithProviders } from '../../../test-utils/render.js';
|
||||
import { ArenaSelectDialog } from './ArenaSelectDialog.js';
|
||||
|
||||
describe('ArenaSelectDialog', () => {
|
||||
it('toggles quick preview and detailed diff for the highlighted agent', async () => {
|
||||
const result = {
|
||||
sessionId: 'arena-1',
|
||||
task: 'Update auth',
|
||||
status: ArenaSessionStatus.IDLE,
|
||||
agents: [
|
||||
{
|
||||
agentId: 'model-1',
|
||||
model: { modelId: 'model-1', authType: 'openai' },
|
||||
status: AgentStatus.IDLE,
|
||||
worktree: {
|
||||
id: 'w1',
|
||||
name: 'model-1',
|
||||
path: '/tmp/model-1',
|
||||
branch: 'arena/model-1',
|
||||
isActive: true,
|
||||
createdAt: 1,
|
||||
},
|
||||
stats: {
|
||||
rounds: 1,
|
||||
totalTokens: 1000,
|
||||
inputTokens: 700,
|
||||
outputTokens: 300,
|
||||
durationMs: 2000,
|
||||
toolCalls: 2,
|
||||
successfulToolCalls: 2,
|
||||
failedToolCalls: 0,
|
||||
},
|
||||
diff: `diff --git a/src/auth.ts b/src/auth.ts
|
||||
--- a/src/auth.ts
|
||||
+++ b/src/auth.ts
|
||||
@@ -1 +1 @@
|
||||
-old
|
||||
+new`,
|
||||
diffSummary: {
|
||||
files: [{ path: 'src/auth.ts', additions: 1, deletions: 1 }],
|
||||
additions: 1,
|
||||
deletions: 1,
|
||||
},
|
||||
modifiedFiles: ['src/auth.ts'],
|
||||
approachSummary: 'Updated the auth implementation inline.',
|
||||
startedAt: 1,
|
||||
},
|
||||
],
|
||||
startedAt: 1,
|
||||
wasRepoInitialized: false,
|
||||
};
|
||||
|
||||
const manager = {
|
||||
getResult: vi.fn(() => result),
|
||||
getAgentStates: vi.fn(() => [
|
||||
{
|
||||
agentId: 'model-1',
|
||||
model: { modelId: 'model-1', authType: 'openai' },
|
||||
status: AgentStatus.IDLE,
|
||||
stats: result.agents[0]!.stats,
|
||||
},
|
||||
]),
|
||||
getAgentState: vi.fn(),
|
||||
applyAgentResult: vi.fn(),
|
||||
} as unknown as ArenaManager;
|
||||
|
||||
const config = {
|
||||
getArenaManager: () => manager,
|
||||
cleanupArenaRuntime: vi.fn(),
|
||||
getChatRecordingService: () => undefined,
|
||||
} as unknown as Config;
|
||||
|
||||
const { lastFrame, stdin } = renderWithProviders(
|
||||
<ArenaSelectDialog
|
||||
manager={manager}
|
||||
config={config}
|
||||
addItem={vi.fn()}
|
||||
closeArenaDialog={vi.fn()}
|
||||
/>,
|
||||
);
|
||||
|
||||
stdin.write('p');
|
||||
await waitFor(() => {
|
||||
expect(lastFrame()).toContain('Quick Preview · model-1');
|
||||
});
|
||||
expect(lastFrame()).toContain('Updated the auth implementation inline.');
|
||||
|
||||
stdin.write('d');
|
||||
await waitFor(() => {
|
||||
expect(lastFrame()).toContain('Detailed Diff · model-1');
|
||||
});
|
||||
expect(lastFrame()).toContain('diff --git a/src/auth.ts b/src/auth.ts');
|
||||
});
|
||||
});
|
||||
|
|
@ -5,12 +5,13 @@
|
|||
*/
|
||||
|
||||
import type React from 'react';
|
||||
import { useCallback, useMemo } from 'react';
|
||||
import { useCallback, useMemo, useState } from 'react';
|
||||
import { Box, Text } from 'ink';
|
||||
import {
|
||||
type ArenaManager,
|
||||
isSuccessStatus,
|
||||
type Config,
|
||||
type ArenaAgentResult,
|
||||
} from '@qwen-code/qwen-code-core';
|
||||
import { theme } from '../../semantic-colors.js';
|
||||
import { useKeypress } from '../../hooks/useKeypress.js';
|
||||
|
|
@ -134,6 +135,17 @@ export function ArenaSelectDialog({
|
|||
|
||||
const result = manager.getResult();
|
||||
const agents = manager.getAgentStates();
|
||||
const firstSelectableAgentId = agents.find((agent) =>
|
||||
isSuccessStatus(agent.status),
|
||||
)?.agentId;
|
||||
const [selectedAgentId, setSelectedAgentId] = useState<string | undefined>(
|
||||
firstSelectableAgentId,
|
||||
);
|
||||
const [showPreview, setShowPreview] = useState(false);
|
||||
const [showDetailedDiff, setShowDetailedDiff] = useState(false);
|
||||
const selectedResult = result?.agents.find(
|
||||
(agent) => agent.agentId === selectedAgentId,
|
||||
);
|
||||
|
||||
const items: Array<DescriptiveRadioSelectItem<string>> = useMemo(
|
||||
() =>
|
||||
|
|
@ -146,11 +158,16 @@ export function ArenaSelectDialog({
|
|||
// Build diff summary from cached result if available
|
||||
let diffAdditions = 0;
|
||||
let diffDeletions = 0;
|
||||
let fileCount = 0;
|
||||
if (isSuccessStatus(agent.status) && result) {
|
||||
const agentResult = result.agents.find(
|
||||
(a) => a.agentId === agent.agentId,
|
||||
);
|
||||
if (agentResult?.diff) {
|
||||
if (agentResult?.diffSummary) {
|
||||
diffAdditions = agentResult.diffSummary.additions;
|
||||
diffDeletions = agentResult.diffSummary.deletions;
|
||||
fileCount = agentResult.diffSummary.files.length;
|
||||
} else if (agentResult?.diff) {
|
||||
const lines = agentResult.diff.split('\n');
|
||||
for (const line of lines) {
|
||||
if (line.startsWith('+') && !line.startsWith('+++')) {
|
||||
|
|
@ -160,6 +177,7 @@ export function ArenaSelectDialog({
|
|||
}
|
||||
}
|
||||
}
|
||||
fileCount = agentResult?.modifiedFiles?.length ?? fileCount;
|
||||
}
|
||||
|
||||
// Title: full model name (not truncated)
|
||||
|
|
@ -173,6 +191,12 @@ export function ArenaSelectDialog({
|
|||
<Text color={theme.text.secondary}>{duration}</Text>
|
||||
<Text color={theme.text.secondary}> · </Text>
|
||||
<Text color={theme.text.secondary}>{tokens} tokens</Text>
|
||||
{fileCount > 0 && (
|
||||
<>
|
||||
<Text color={theme.text.secondary}> · </Text>
|
||||
<Text color={theme.text.secondary}>{fileCount} files</Text>
|
||||
</>
|
||||
)}
|
||||
{(diffAdditions > 0 || diffDeletions > 0) && (
|
||||
<>
|
||||
<Text color={theme.text.secondary}> · </Text>
|
||||
|
|
@ -201,7 +225,13 @@ export function ArenaSelectDialog({
|
|||
if (key.name === 'escape') {
|
||||
closeArenaDialog();
|
||||
}
|
||||
if (key.name === 'p' && !key.ctrl && !key.meta) {
|
||||
setShowPreview((current) => !current);
|
||||
}
|
||||
if (key.name === 'd' && !key.ctrl && !key.meta) {
|
||||
setShowDetailedDiff((current) => !current);
|
||||
}
|
||||
if (key.name === 'x' && !key.ctrl && !key.meta) {
|
||||
onDiscard();
|
||||
}
|
||||
},
|
||||
|
|
@ -245,16 +275,136 @@ export function ArenaSelectDialog({
|
|||
onSelect={(agentId: string) => {
|
||||
onSelect(agentId);
|
||||
}}
|
||||
onHighlight={(agentId: string) => {
|
||||
setSelectedAgentId(agentId);
|
||||
}}
|
||||
isFocused={true}
|
||||
showNumbers={false}
|
||||
/>
|
||||
</Box>
|
||||
|
||||
{showPreview && selectedResult && (
|
||||
<ArenaAgentPreview result={selectedResult} />
|
||||
)}
|
||||
|
||||
{showDetailedDiff && selectedResult && (
|
||||
<ArenaAgentDetailedDiff result={selectedResult} />
|
||||
)}
|
||||
|
||||
<Box marginTop={1}>
|
||||
<Text color={theme.text.secondary}>
|
||||
Enter to select, d to discard all, Esc to cancel
|
||||
p preview, d detailed diff, Enter select winner, x discard all, Esc
|
||||
cancel
|
||||
</Text>
|
||||
</Box>
|
||||
</Box>
|
||||
);
|
||||
}
|
||||
|
||||
function ArenaAgentPreview({
|
||||
result,
|
||||
}: {
|
||||
result: ArenaAgentResult;
|
||||
}): React.JSX.Element {
|
||||
const fileSummary = result.diffSummary?.files ?? [];
|
||||
return (
|
||||
<Box marginTop={1} flexDirection="column">
|
||||
<Text bold color={theme.text.primary}>
|
||||
Quick Preview · {result.model.modelId}
|
||||
</Text>
|
||||
<Box marginLeft={2}>
|
||||
<Text color={theme.text.secondary}>Approach: </Text>
|
||||
<Text color={theme.text.primary}>
|
||||
{result.approachSummary ?? 'No approach summary available.'}
|
||||
</Text>
|
||||
</Box>
|
||||
<Box marginLeft={2}>
|
||||
<Text color={theme.text.secondary}>Major files: </Text>
|
||||
<Text color={theme.text.primary}>
|
||||
{formatFileList(fileSummary.map((file) => file.path))}
|
||||
</Text>
|
||||
</Box>
|
||||
<Box marginLeft={2}>
|
||||
<Text color={theme.text.secondary}>Metrics: </Text>
|
||||
<Text color={theme.text.primary}>
|
||||
{result.stats.totalTokens.toLocaleString()} tokens ·{' '}
|
||||
{formatDuration(result.stats.durationMs)} · {result.stats.toolCalls}{' '}
|
||||
tools
|
||||
</Text>
|
||||
</Box>
|
||||
</Box>
|
||||
);
|
||||
}
|
||||
|
||||
function ArenaAgentDetailedDiff({
|
||||
result,
|
||||
}: {
|
||||
result: ArenaAgentResult;
|
||||
}): React.JSX.Element {
|
||||
const diffLines = getVisibleDiffLines(result.diff);
|
||||
return (
|
||||
<Box marginTop={1} flexDirection="column">
|
||||
<Text bold color={theme.text.primary}>
|
||||
Detailed Diff · {result.model.modelId}
|
||||
</Text>
|
||||
{diffLines.length === 0 ? (
|
||||
<Box marginLeft={2}>
|
||||
<Text color={theme.text.secondary}>No diff available.</Text>
|
||||
</Box>
|
||||
) : (
|
||||
<Box marginLeft={2} flexDirection="column">
|
||||
{diffLines.map((line, index) => (
|
||||
<Text key={`${index}-${line}`} color={getDiffLineColor(line)}>
|
||||
{line}
|
||||
</Text>
|
||||
))}
|
||||
</Box>
|
||||
)}
|
||||
</Box>
|
||||
);
|
||||
}
|
||||
|
||||
function formatFileList(files: string[]): string {
|
||||
if (files.length === 0) {
|
||||
return 'none';
|
||||
}
|
||||
const visible = files.slice(0, 6);
|
||||
const suffix =
|
||||
files.length > visible.length
|
||||
? `, +${files.length - visible.length} more`
|
||||
: '';
|
||||
return `${visible.join(', ')}${suffix}`;
|
||||
}
|
||||
|
||||
function getVisibleDiffLines(diff: string | undefined): string[] {
|
||||
if (!diff) {
|
||||
return [];
|
||||
}
|
||||
const lines = diff.split('\n');
|
||||
const maxLines = 180;
|
||||
if (lines.length <= maxLines) {
|
||||
return lines;
|
||||
}
|
||||
return [
|
||||
...lines.slice(0, maxLines),
|
||||
`... truncated ${lines.length - maxLines} diff lines`,
|
||||
];
|
||||
}
|
||||
|
||||
function getDiffLineColor(line: string): string {
|
||||
if (line.startsWith('+') && !line.startsWith('+++')) {
|
||||
return theme.status.success;
|
||||
}
|
||||
if (line.startsWith('-') && !line.startsWith('---')) {
|
||||
return theme.status.error;
|
||||
}
|
||||
if (
|
||||
line.startsWith('diff --git') ||
|
||||
line.startsWith('@@') ||
|
||||
line.startsWith('---') ||
|
||||
line.startsWith('+++')
|
||||
) {
|
||||
return theme.text.accent;
|
||||
}
|
||||
return theme.text.secondary;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ import type {
|
|||
ToolConfirmationOutcome,
|
||||
ToolResultDisplay,
|
||||
AgentStatus,
|
||||
ArenaDiffSummary,
|
||||
} from '@qwen-code/qwen-code-core';
|
||||
import type { PartListUnion } from '@google/genai';
|
||||
import { type ReactNode } from 'react';
|
||||
|
|
@ -354,6 +355,9 @@ export interface ArenaAgentCardData {
|
|||
rounds: number;
|
||||
error?: string;
|
||||
diff?: string;
|
||||
diffSummary?: ArenaDiffSummary;
|
||||
modifiedFiles?: string[];
|
||||
approachSummary?: string;
|
||||
}
|
||||
|
||||
export type HistoryItemArenaAgentComplete = HistoryItemBase & {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue