qwen-code/packages/cli/src/ui/components/arena/ArenaCards.test.tsx
Reid d1c8dff4d2
Some checks are pending
Qwen Code CI / Lint (push) Waiting to run
Qwen Code CI / Test (push) Blocked by required conditions
Qwen Code CI / Test-1 (push) Blocked by required conditions
Qwen Code CI / Test-2 (push) Blocked by required conditions
Qwen Code CI / Test-3 (push) Blocked by required conditions
Qwen Code CI / Test-4 (push) Blocked by required conditions
Qwen Code CI / Test-5 (push) Blocked by required conditions
Qwen Code CI / Test-6 (push) Blocked by required conditions
Qwen Code CI / Test-7 (push) Blocked by required conditions
Qwen Code CI / Test-8 (push) Blocked by required conditions
Qwen Code CI / Post Coverage Comment (push) Blocked by required conditions
Qwen Code CI / CodeQL (push) Waiting to run
E2E Tests / E2E Test (Linux) - sandbox:docker (push) Waiting to run
E2E Tests / E2E Test (Linux) - sandbox:none (push) Waiting to run
E2E Tests / E2E Test - macOS (push) Waiting to run
feat(arena): add comparison summary for agent results (#3394)
Adds a summary view that runs after Arena agents finish, so users can
compare model outputs without opening each agent's conversation first.

Summary surface:
- Agent status overview
- Files changed in common vs. unique to one agent
- Per-agent approach summary generated through that agent's own provider
- Token / runtime / line-change / file-count metrics

Selection dialog now supports:
- p — toggle preview for the highlighted agent
- d — toggle detailed diff
- Enter — select winner
- x — discard all results
- Esc — cancel

Approach summary generation:
- Each agent's summary is generated through that agent's own content
  generator, keeping mixed-provider Arena sessions within their
  respective auth boundaries
- 20s timeout + AbortController per agent, bounded prompt inputs
  (finalText 2K, transcript 6K, diff 6K)
- Falls back to a deterministic "Changed N files ..." summary when no
  per-agent generator is available or on error

Diff summary now handles binary, rename-only, and mode-only diffs;
the previous heuristic required textual +/- hunks and would have
dropped those.

Resolves #2559
2026-04-22 05:31:19 +08:00

150 lines
4.7 KiB
TypeScript

/**
* @license
* Copyright 2025 Qwen Team
* SPDX-License-Identifier: Apache-2.0
*/
import { describe, expect, it } from 'vitest';
import { AgentStatus } from '@qwen-code/qwen-code-core';
import { renderWithProviders } from '../../../test-utils/render.js';
import { ArenaSessionCard } from './ArenaCards.js';
import type { ArenaAgentCardData } from '../../types.js';
describe('ArenaSessionCard', () => {
it('renders the comparison summary sections from agent results', () => {
const agents: ArenaAgentCardData[] = [
{
label: 'qwen-coder-plus',
status: AgentStatus.IDLE,
durationMs: 12_000,
totalTokens: 45_000,
inputTokens: 30_000,
outputTokens: 15_000,
toolCalls: 12,
successfulToolCalls: 12,
failedToolCalls: 0,
rounds: 3,
diffSummary: {
files: [
{ path: 'src/auth.ts', additions: 200, deletions: 80 },
{ path: 'tests/auth.test.ts', additions: 45, deletions: 9 },
],
additions: 245,
deletions: 89,
},
modifiedFiles: ['src/auth.ts', 'tests/auth.test.ts'],
approachSummary: 'Refactored with JWT strategy pattern.',
},
{
label: 'gpt-4o',
status: AgentStatus.IDLE,
durationMs: 10_000,
totalTokens: 38_000,
inputTokens: 25_000,
outputTokens: 13_000,
toolCalls: 8,
successfulToolCalls: 8,
failedToolCalls: 0,
rounds: 2,
diffSummary: {
files: [
{ path: 'src/auth.ts', additions: 120, deletions: 40 },
{ path: 'src/middleware.ts', additions: 69, deletions: 27 },
],
additions: 189,
deletions: 67,
},
modifiedFiles: ['src/auth.ts', 'src/middleware.ts'],
approachSummary: 'Made inline changes with validation layer.',
},
];
const { lastFrame } = renderWithProviders(
<ArenaSessionCard
sessionStatus="idle"
task="Refactor authentication"
totalDurationMs={12_000}
agents={agents}
width={100}
/>,
);
const output = lastFrame();
expect(output).toContain('Arena Comparison Summary');
expect(output).not.toContain('Status Time Tokens Changes');
expect(output).toContain('Status Summary:');
expect(output).toContain('qwen-coder-plus: Idle');
expect(output).toContain('gpt-4o: Idle');
expect(output).toContain('Files Modified:');
expect(output).toContain('common: src/auth.ts');
expect(output).toContain('qwen-coder-plus-only: tests/auth.test.ts');
expect(output).toContain('gpt-4o-only: src/middleware.ts');
expect(output).toContain('Approach Summary:');
expect(output).toContain('Refactored with JWT strategy pattern.');
expect(output).toContain('Token Efficiency:');
expect(output).toContain('45,000 tokens');
expect(output).toContain('45,000 tokens · runtime 12.0s');
expect(output).not.toContain('45,000 tokens · runtime 12.0s · 12 tools');
expect(output).not.toContain('Quick Preview:');
expect(output).not.toContain('[View Detailed Diff]');
expect(output).not.toContain('[Select Winner →]');
});
it('hides empty per-agent unique file groups', () => {
const agents: ArenaAgentCardData[] = [
{
label: 'gemma4:31b',
status: AgentStatus.IDLE,
durationMs: 10_000,
totalTokens: 10_000,
inputTokens: 7_000,
outputTokens: 3_000,
toolCalls: 2,
successfulToolCalls: 2,
failedToolCalls: 0,
rounds: 1,
diffSummary: {
files: [{ path: 'reader.py', additions: 20, deletions: 0 }],
additions: 20,
deletions: 0,
},
modifiedFiles: ['reader.py'],
approachSummary: 'Created a reader.',
},
{
label: 'qwen2.5:14b',
status: AgentStatus.IDLE,
durationMs: 8_000,
totalTokens: 8_000,
inputTokens: 6_000,
outputTokens: 2_000,
toolCalls: 2,
successfulToolCalls: 2,
failedToolCalls: 0,
rounds: 1,
diffSummary: {
files: [{ path: 'reader.py', additions: 22, deletions: 0 }],
additions: 22,
deletions: 0,
},
modifiedFiles: ['reader.py'],
approachSummary: 'Created a reader.',
},
];
const { lastFrame } = renderWithProviders(
<ArenaSessionCard
sessionStatus="idle"
task="Create a reader"
totalDurationMs={10_000}
agents={agents}
width={100}
/>,
);
const output = lastFrame();
expect(output).toContain('common: reader.py');
expect(output).not.toContain('only gemma4:31b: none');
expect(output).not.toContain('only qwen2.5:14b: none');
});
});