mirror of
https://github.com/QwenLM/qwen-code.git
synced 2026-05-16 19:44:31 +00:00
feat(arena): add comparison summary for agent results (#3394)
Some checks are pending
Qwen Code CI / Lint (push) Waiting to run
Qwen Code CI / Test (push) Blocked by required conditions
Qwen Code CI / Test-1 (push) Blocked by required conditions
Qwen Code CI / Test-2 (push) Blocked by required conditions
Qwen Code CI / Test-3 (push) Blocked by required conditions
Qwen Code CI / Test-4 (push) Blocked by required conditions
Qwen Code CI / Test-5 (push) Blocked by required conditions
Qwen Code CI / Test-6 (push) Blocked by required conditions
Qwen Code CI / Test-7 (push) Blocked by required conditions
Qwen Code CI / Test-8 (push) Blocked by required conditions
Qwen Code CI / Post Coverage Comment (push) Blocked by required conditions
Qwen Code CI / CodeQL (push) Waiting to run
E2E Tests / E2E Test (Linux) - sandbox:docker (push) Waiting to run
E2E Tests / E2E Test (Linux) - sandbox:none (push) Waiting to run
E2E Tests / E2E Test - macOS (push) Waiting to run
Some checks are pending
Qwen Code CI / Lint (push) Waiting to run
Qwen Code CI / Test (push) Blocked by required conditions
Qwen Code CI / Test-1 (push) Blocked by required conditions
Qwen Code CI / Test-2 (push) Blocked by required conditions
Qwen Code CI / Test-3 (push) Blocked by required conditions
Qwen Code CI / Test-4 (push) Blocked by required conditions
Qwen Code CI / Test-5 (push) Blocked by required conditions
Qwen Code CI / Test-6 (push) Blocked by required conditions
Qwen Code CI / Test-7 (push) Blocked by required conditions
Qwen Code CI / Test-8 (push) Blocked by required conditions
Qwen Code CI / Post Coverage Comment (push) Blocked by required conditions
Qwen Code CI / CodeQL (push) Waiting to run
E2E Tests / E2E Test (Linux) - sandbox:docker (push) Waiting to run
E2E Tests / E2E Test (Linux) - sandbox:none (push) Waiting to run
E2E Tests / E2E Test - macOS (push) Waiting to run
Adds a summary view that runs after Arena agents finish, so users can compare model outputs without opening each agent's conversation first. Summary surface: - Agent status overview - Files changed in common vs. unique to one agent - Per-agent approach summary generated through that agent's own provider - Token / runtime / line-change / file-count metrics Selection dialog now supports: - p — toggle preview for the highlighted agent - d — toggle detailed diff - Enter — select winner - x — discard all results - Esc — cancel Approach summary generation: - Each agent's summary is generated through that agent's own content generator, keeping mixed-provider Arena sessions within their respective auth boundaries - 20s timeout + AbortController per agent, bounded prompt inputs (finalText 2K, transcript 6K, diff 6K) - Falls back to a deterministic "Changed N files ..." summary when no per-agent generator is available or on error Diff summary now handles binary, rename-only, and mode-only diffs; the previous heuristic required textual +/- hunks and would have dropped those. Resolves #2559
This commit is contained in:
parent
8a0489625b
commit
d1c8dff4d2
15 changed files with 1378 additions and 143 deletions
|
|
@ -90,8 +90,9 @@ When all agents complete, the Arena enters the result comparison phase. You'll s
|
|||
|
||||
- **Status summary**: Which agents succeeded, failed, or were cancelled
|
||||
- **Execution metrics**: Duration, rounds of reasoning, token usage, and tool call counts for each agent
|
||||
- **Arena comparison summary**: Files changed in common vs. by one agent only, line-change counts, token efficiency, and a high-level approach summary generated from each agent's diff, metrics, and conversation history
|
||||
|
||||
A selection dialog presents the successful agents. Choose one to apply its changes to your main workspace, or discard all results.
|
||||
A selection dialog presents the successful agents. Choose one to apply its changes to your main workspace, or discard all results. Press `p` to toggle a quick preview for the highlighted agent, or `d` to toggle that agent's detailed diff before selecting a winner.
|
||||
|
||||
### What happens when you select a winner
|
||||
|
||||
|
|
@ -99,7 +100,7 @@ A selection dialog presents the successful agents. Choose one to apply its chang
|
|||
2. The diff is applied to your main working directory
|
||||
3. All worktrees and temporary branches are cleaned up automatically
|
||||
|
||||
If you want to inspect results before deciding, each agent's full conversation history is available via the tab bar while the selection dialog is active.
|
||||
If you want to inspect the complete reasoning path before deciding, each agent's full conversation history is still available via the tab bar while the selection dialog is active.
|
||||
|
||||
## Configuration
|
||||
|
||||
|
|
|
|||
|
|
@ -276,6 +276,9 @@ function executeArenaCommand(
|
|||
rounds: result.stats.rounds,
|
||||
error: result.error,
|
||||
diff: result.diff,
|
||||
diffSummary: result.diffSummary,
|
||||
modifiedFiles: result.modifiedFiles,
|
||||
approachSummary: result.approachSummary,
|
||||
});
|
||||
|
||||
const handleAgentComplete = (event: ArenaAgentCompleteEvent) => {
|
||||
|
|
|
|||
150
packages/cli/src/ui/components/arena/ArenaCards.test.tsx
Normal file
150
packages/cli/src/ui/components/arena/ArenaCards.test.tsx
Normal file
|
|
@ -0,0 +1,150 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2025 Qwen Team
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { AgentStatus } from '@qwen-code/qwen-code-core';
|
||||
import { renderWithProviders } from '../../../test-utils/render.js';
|
||||
import { ArenaSessionCard } from './ArenaCards.js';
|
||||
import type { ArenaAgentCardData } from '../../types.js';
|
||||
|
||||
describe('ArenaSessionCard', () => {
|
||||
it('renders the comparison summary sections from agent results', () => {
|
||||
const agents: ArenaAgentCardData[] = [
|
||||
{
|
||||
label: 'qwen-coder-plus',
|
||||
status: AgentStatus.IDLE,
|
||||
durationMs: 12_000,
|
||||
totalTokens: 45_000,
|
||||
inputTokens: 30_000,
|
||||
outputTokens: 15_000,
|
||||
toolCalls: 12,
|
||||
successfulToolCalls: 12,
|
||||
failedToolCalls: 0,
|
||||
rounds: 3,
|
||||
diffSummary: {
|
||||
files: [
|
||||
{ path: 'src/auth.ts', additions: 200, deletions: 80 },
|
||||
{ path: 'tests/auth.test.ts', additions: 45, deletions: 9 },
|
||||
],
|
||||
additions: 245,
|
||||
deletions: 89,
|
||||
},
|
||||
modifiedFiles: ['src/auth.ts', 'tests/auth.test.ts'],
|
||||
approachSummary: 'Refactored with JWT strategy pattern.',
|
||||
},
|
||||
{
|
||||
label: 'gpt-4o',
|
||||
status: AgentStatus.IDLE,
|
||||
durationMs: 10_000,
|
||||
totalTokens: 38_000,
|
||||
inputTokens: 25_000,
|
||||
outputTokens: 13_000,
|
||||
toolCalls: 8,
|
||||
successfulToolCalls: 8,
|
||||
failedToolCalls: 0,
|
||||
rounds: 2,
|
||||
diffSummary: {
|
||||
files: [
|
||||
{ path: 'src/auth.ts', additions: 120, deletions: 40 },
|
||||
{ path: 'src/middleware.ts', additions: 69, deletions: 27 },
|
||||
],
|
||||
additions: 189,
|
||||
deletions: 67,
|
||||
},
|
||||
modifiedFiles: ['src/auth.ts', 'src/middleware.ts'],
|
||||
approachSummary: 'Made inline changes with validation layer.',
|
||||
},
|
||||
];
|
||||
|
||||
const { lastFrame } = renderWithProviders(
|
||||
<ArenaSessionCard
|
||||
sessionStatus="idle"
|
||||
task="Refactor authentication"
|
||||
totalDurationMs={12_000}
|
||||
agents={agents}
|
||||
width={100}
|
||||
/>,
|
||||
);
|
||||
|
||||
const output = lastFrame();
|
||||
expect(output).toContain('Arena Comparison Summary');
|
||||
expect(output).not.toContain('Status Time Tokens Changes');
|
||||
expect(output).toContain('Status Summary:');
|
||||
expect(output).toContain('qwen-coder-plus: Idle');
|
||||
expect(output).toContain('gpt-4o: Idle');
|
||||
expect(output).toContain('Files Modified:');
|
||||
expect(output).toContain('common: src/auth.ts');
|
||||
expect(output).toContain('qwen-coder-plus-only: tests/auth.test.ts');
|
||||
expect(output).toContain('gpt-4o-only: src/middleware.ts');
|
||||
expect(output).toContain('Approach Summary:');
|
||||
expect(output).toContain('Refactored with JWT strategy pattern.');
|
||||
expect(output).toContain('Token Efficiency:');
|
||||
expect(output).toContain('45,000 tokens');
|
||||
expect(output).toContain('45,000 tokens · runtime 12.0s');
|
||||
expect(output).not.toContain('45,000 tokens · runtime 12.0s · 12 tools');
|
||||
expect(output).not.toContain('Quick Preview:');
|
||||
expect(output).not.toContain('[View Detailed Diff]');
|
||||
expect(output).not.toContain('[Select Winner →]');
|
||||
});
|
||||
|
||||
it('hides empty per-agent unique file groups', () => {
|
||||
const agents: ArenaAgentCardData[] = [
|
||||
{
|
||||
label: 'gemma4:31b',
|
||||
status: AgentStatus.IDLE,
|
||||
durationMs: 10_000,
|
||||
totalTokens: 10_000,
|
||||
inputTokens: 7_000,
|
||||
outputTokens: 3_000,
|
||||
toolCalls: 2,
|
||||
successfulToolCalls: 2,
|
||||
failedToolCalls: 0,
|
||||
rounds: 1,
|
||||
diffSummary: {
|
||||
files: [{ path: 'reader.py', additions: 20, deletions: 0 }],
|
||||
additions: 20,
|
||||
deletions: 0,
|
||||
},
|
||||
modifiedFiles: ['reader.py'],
|
||||
approachSummary: 'Created a reader.',
|
||||
},
|
||||
{
|
||||
label: 'qwen2.5:14b',
|
||||
status: AgentStatus.IDLE,
|
||||
durationMs: 8_000,
|
||||
totalTokens: 8_000,
|
||||
inputTokens: 6_000,
|
||||
outputTokens: 2_000,
|
||||
toolCalls: 2,
|
||||
successfulToolCalls: 2,
|
||||
failedToolCalls: 0,
|
||||
rounds: 1,
|
||||
diffSummary: {
|
||||
files: [{ path: 'reader.py', additions: 22, deletions: 0 }],
|
||||
additions: 22,
|
||||
deletions: 0,
|
||||
},
|
||||
modifiedFiles: ['reader.py'],
|
||||
approachSummary: 'Created a reader.',
|
||||
},
|
||||
];
|
||||
|
||||
const { lastFrame } = renderWithProviders(
|
||||
<ArenaSessionCard
|
||||
sessionStatus="idle"
|
||||
task="Create a reader"
|
||||
totalDurationMs={10_000}
|
||||
agents={agents}
|
||||
width={100}
|
||||
/>,
|
||||
);
|
||||
|
||||
const output = lastFrame();
|
||||
expect(output).toContain('common: reader.py');
|
||||
expect(output).not.toContain('only gemma4:31b: none');
|
||||
expect(output).not.toContain('only qwen2.5:14b: none');
|
||||
});
|
||||
});
|
||||
|
|
@ -10,6 +10,7 @@ import { theme } from '../../semantic-colors.js';
|
|||
import { formatDuration } from '../../utils/formatters.js';
|
||||
import { getArenaStatusLabel } from '../../utils/displayUtils.js';
|
||||
import type { ArenaAgentCardData } from '../../types.js';
|
||||
import type { ArenaDiffSummary } from '@qwen-code/qwen-code-core';
|
||||
|
||||
// ─── Helpers ────────────────────────────────────────────────
|
||||
|
||||
|
|
@ -84,36 +85,25 @@ interface ArenaSessionCardProps {
|
|||
width?: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Pad or truncate a string to a fixed visual width.
|
||||
*/
|
||||
function pad(
|
||||
str: string,
|
||||
len: number,
|
||||
align: 'left' | 'right' = 'left',
|
||||
): string {
|
||||
if (str.length >= len) return str.slice(0, len);
|
||||
const padding = ' '.repeat(len - str.length);
|
||||
return align === 'right' ? padding + str : str + padding;
|
||||
}
|
||||
|
||||
/**
|
||||
* Truncate a string to a maximum length, adding ellipsis if truncated.
|
||||
*/
|
||||
function truncate(str: string, maxLen: number): string {
|
||||
if (str.length <= maxLen) return str;
|
||||
return str.slice(0, maxLen - 1) + '…';
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate diff stats from a unified diff string.
|
||||
* Returns the stats string and individual counts for colored rendering.
|
||||
*/
|
||||
function getDiffStats(diff: string | undefined): {
|
||||
function getDiffStats(
|
||||
diff: string | undefined,
|
||||
diffSummary?: ArenaDiffSummary,
|
||||
): {
|
||||
text: string;
|
||||
additions: number;
|
||||
deletions: number;
|
||||
} {
|
||||
if (diffSummary) {
|
||||
return {
|
||||
text: `+${diffSummary.additions}/-${diffSummary.deletions}`,
|
||||
additions: diffSummary.additions,
|
||||
deletions: diffSummary.deletions,
|
||||
};
|
||||
}
|
||||
if (!diff) return { text: '', additions: 0, deletions: 0 };
|
||||
const lines = diff.split('\n');
|
||||
let additions = 0;
|
||||
|
|
@ -128,33 +118,71 @@ function getDiffStats(diff: string | undefined): {
|
|||
return { text: `+${additions}/-${deletions}`, additions, deletions };
|
||||
}
|
||||
|
||||
const MAX_MODEL_NAME_LENGTH = 35;
|
||||
const MAX_FILE_LIST_ITEMS = 4;
|
||||
|
||||
function formatFileList(files: string[] | undefined): string {
|
||||
if (!files || files.length === 0) {
|
||||
return 'none';
|
||||
}
|
||||
const visible = files.slice(0, MAX_FILE_LIST_ITEMS);
|
||||
const suffix =
|
||||
files.length > MAX_FILE_LIST_ITEMS
|
||||
? `, +${files.length - MAX_FILE_LIST_ITEMS} more`
|
||||
: '';
|
||||
return `${visible.join(', ')}${suffix}`;
|
||||
}
|
||||
|
||||
function getAgentFiles(agent: ArenaAgentCardData): string[] {
|
||||
return (
|
||||
agent.modifiedFiles ??
|
||||
agent.diffSummary?.files.map((file) => file.path) ??
|
||||
[]
|
||||
);
|
||||
}
|
||||
|
||||
function getComparisonFileGroups(
|
||||
agents: ArenaAgentCardData[],
|
||||
): Array<{ label: string; files: string[] }> {
|
||||
const counts = new Map<string, number>();
|
||||
for (const agent of agents) {
|
||||
for (const file of new Set(getAgentFiles(agent))) {
|
||||
counts.set(file, (counts.get(file) ?? 0) + 1);
|
||||
}
|
||||
}
|
||||
|
||||
const common = [...counts.entries()]
|
||||
.filter(([, count]) => count > 1)
|
||||
.map(([file]) => file)
|
||||
.sort();
|
||||
const groups = [{ label: 'common', files: common }];
|
||||
|
||||
for (const agent of agents) {
|
||||
const unique = getAgentFiles(agent)
|
||||
.filter((file) => counts.get(file) === 1)
|
||||
.sort();
|
||||
if (unique.length > 0) {
|
||||
groups.push({ label: `${agent.label}-only`, files: unique });
|
||||
}
|
||||
}
|
||||
|
||||
return groups;
|
||||
}
|
||||
|
||||
function getTreeBranch(index: number, total: number): string {
|
||||
return index === total - 1 ? '└─' : '├─';
|
||||
}
|
||||
|
||||
export const ArenaSessionCard: React.FC<ArenaSessionCardProps> = ({
|
||||
sessionStatus,
|
||||
task,
|
||||
agents,
|
||||
width,
|
||||
}) => {
|
||||
// Truncate task for display
|
||||
const maxTaskLen = 60;
|
||||
const displayTask =
|
||||
task.length > maxTaskLen ? task.slice(0, maxTaskLen - 1) + '…' : task;
|
||||
|
||||
// Column widths for the agent table (unified with Arena Results)
|
||||
const colStatus = 14;
|
||||
const colTime = 8;
|
||||
const colTokens = 10;
|
||||
const colChanges = 10;
|
||||
|
||||
const titleLabel =
|
||||
sessionStatus === 'idle'
|
||||
? 'Agents Status · Idle'
|
||||
: sessionStatus === 'completed'
|
||||
? 'Arena Complete'
|
||||
: sessionStatus === 'cancelled'
|
||||
? 'Arena Cancelled'
|
||||
: 'Arena Failed';
|
||||
sessionStatus === 'idle' || sessionStatus === 'completed'
|
||||
? 'Arena Comparison Summary'
|
||||
: sessionStatus === 'cancelled'
|
||||
? 'Arena Cancelled'
|
||||
: 'Arena Failed';
|
||||
|
||||
return (
|
||||
<Box
|
||||
|
|
@ -174,96 +202,109 @@ export const ArenaSessionCard: React.FC<ArenaSessionCardProps> = ({
|
|||
|
||||
<Box height={1} />
|
||||
|
||||
{/* Task */}
|
||||
<Box>
|
||||
<Text>
|
||||
<Text color={theme.text.secondary}>Task: </Text>
|
||||
<Text color={theme.text.primary}>"{displayTask}"</Text>
|
||||
</Text>
|
||||
</Box>
|
||||
|
||||
<Box height={1} />
|
||||
|
||||
{/* Table header - unified columns: Agent, Status, Time, Tokens, Changes */}
|
||||
<Box>
|
||||
<Box flexGrow={1}>
|
||||
<Text bold color={theme.text.secondary}>
|
||||
Agent
|
||||
</Text>
|
||||
</Box>
|
||||
<Box width={colStatus} justifyContent="flex-end">
|
||||
<Text bold color={theme.text.secondary}>
|
||||
Status
|
||||
</Text>
|
||||
</Box>
|
||||
<Box width={colTime} justifyContent="flex-end">
|
||||
<Text bold color={theme.text.secondary}>
|
||||
Time
|
||||
</Text>
|
||||
</Box>
|
||||
<Box width={colTokens} justifyContent="flex-end">
|
||||
<Text bold color={theme.text.secondary}>
|
||||
Tokens
|
||||
</Text>
|
||||
</Box>
|
||||
<Box width={colChanges} justifyContent="flex-end">
|
||||
<Text bold color={theme.text.secondary}>
|
||||
Changes
|
||||
</Text>
|
||||
</Box>
|
||||
</Box>
|
||||
|
||||
{/* Table separator */}
|
||||
<Box>
|
||||
<Text color={theme.border.default}>
|
||||
{'─'.repeat((width ?? 60) - 8)}
|
||||
</Text>
|
||||
</Box>
|
||||
|
||||
{/* Agent rows */}
|
||||
{agents.map((agent) => {
|
||||
const { text: statusText, color } = getArenaStatusLabel(agent.status);
|
||||
const diffStats = getDiffStats(agent.diff);
|
||||
return (
|
||||
<Box key={agent.label}>
|
||||
<Box flexGrow={1}>
|
||||
<Text color={theme.text.primary}>
|
||||
{truncate(agent.label, MAX_MODEL_NAME_LENGTH)}
|
||||
</Text>
|
||||
</Box>
|
||||
<Box width={colStatus} justifyContent="flex-end">
|
||||
<Text color={color}>{statusText}</Text>
|
||||
</Box>
|
||||
<Box width={colTime} justifyContent="flex-end">
|
||||
<Text color={theme.text.primary}>
|
||||
{pad(formatDuration(agent.durationMs), colTime - 1, 'right')}
|
||||
</Text>
|
||||
</Box>
|
||||
<Box width={colTokens} justifyContent="flex-end">
|
||||
<Text color={theme.text.primary}>
|
||||
{pad(
|
||||
agent.totalTokens.toLocaleString(),
|
||||
colTokens - 1,
|
||||
'right',
|
||||
)}
|
||||
</Text>
|
||||
</Box>
|
||||
<Box width={colChanges} justifyContent="flex-end">
|
||||
{diffStats.additions > 0 || diffStats.deletions > 0 ? (
|
||||
<Text>
|
||||
<Text color={theme.status.success}>
|
||||
+{diffStats.additions}
|
||||
{(sessionStatus === 'idle' || sessionStatus === 'completed') && (
|
||||
<>
|
||||
<Box flexDirection="column">
|
||||
<Text bold color={theme.text.primary}>
|
||||
Status Summary:
|
||||
</Text>
|
||||
{agents.map((agent, index) => {
|
||||
const { text: statusText, color } = getArenaStatusLabel(
|
||||
agent.status,
|
||||
);
|
||||
return (
|
||||
<Box key={agent.label} marginLeft={2}>
|
||||
<Text color={theme.text.secondary}>
|
||||
{index === agents.length - 1 ? '└─' : '├─'} {agent.label}
|
||||
:{' '}
|
||||
</Text>
|
||||
<Text color={theme.text.secondary}>/</Text>
|
||||
<Text color={theme.status.error}>-{diffStats.deletions}</Text>
|
||||
</Text>
|
||||
) : (
|
||||
<Text color={theme.text.secondary}>-</Text>
|
||||
)}
|
||||
</Box>
|
||||
<Text color={color}>{statusText}</Text>
|
||||
</Box>
|
||||
);
|
||||
})}
|
||||
</Box>
|
||||
);
|
||||
})}
|
||||
|
||||
<Box height={1} />
|
||||
|
||||
<Box flexDirection="column">
|
||||
<Text bold color={theme.text.primary}>
|
||||
Files Modified:
|
||||
</Text>
|
||||
{getComparisonFileGroups(agents).map((group, index, groups) => (
|
||||
<Box key={group.label} marginLeft={2}>
|
||||
<Text color={theme.text.secondary}>
|
||||
{getTreeBranch(index, groups.length)} {group.label}:{' '}
|
||||
</Text>
|
||||
<Text color={theme.text.primary}>
|
||||
{formatFileList(group.files)}
|
||||
</Text>
|
||||
</Box>
|
||||
))}
|
||||
</Box>
|
||||
|
||||
<Box height={1} />
|
||||
|
||||
<Box flexDirection="column">
|
||||
<Text bold color={theme.text.primary}>
|
||||
Approach Summary:
|
||||
</Text>
|
||||
{agents.map((agent, index) => {
|
||||
const diffStats = getDiffStats(agent.diff, agent.diffSummary);
|
||||
const files = getAgentFiles(agent).length;
|
||||
const branch = index === agents.length - 1 ? '└─' : '├─';
|
||||
const summary =
|
||||
agent.approachSummary ?? 'No approach summary available.';
|
||||
return (
|
||||
<Box key={agent.label} marginLeft={2}>
|
||||
<Text>
|
||||
<Text color={theme.text.secondary}>
|
||||
{branch} {agent.label}:{' '}
|
||||
</Text>
|
||||
<Text color={theme.text.primary}>{summary} </Text>
|
||||
<Text color={theme.text.secondary}>(</Text>
|
||||
<Text color={theme.text.accent}>{files}</Text>
|
||||
<Text color={theme.text.secondary}>
|
||||
{files === 1 ? ' file, ' : ' files, '}
|
||||
</Text>
|
||||
<Text color={theme.status.success}>
|
||||
+{diffStats.additions}
|
||||
</Text>
|
||||
<Text color={theme.text.secondary}> </Text>
|
||||
<Text color={theme.status.error}>
|
||||
-{diffStats.deletions}
|
||||
</Text>
|
||||
<Text color={theme.text.secondary}> lines, </Text>
|
||||
<Text color={theme.text.accent}>{agent.toolCalls}</Text>
|
||||
<Text color={theme.text.secondary}>
|
||||
{agent.toolCalls === 1 ? ' tool call)' : ' tool calls)'}
|
||||
</Text>
|
||||
</Text>
|
||||
</Box>
|
||||
);
|
||||
})}
|
||||
</Box>
|
||||
|
||||
<Box height={1} />
|
||||
|
||||
<Box flexDirection="column">
|
||||
<Text bold color={theme.text.primary}>
|
||||
Token Efficiency:
|
||||
</Text>
|
||||
{agents.map((agent, index) => (
|
||||
<Box key={agent.label} marginLeft={2}>
|
||||
<Text color={theme.text.secondary}>
|
||||
{index === agents.length - 1 ? '└─' : '├─'} {agent.label}
|
||||
:{' '}
|
||||
</Text>
|
||||
<Text color={theme.text.primary}>
|
||||
{agent.totalTokens.toLocaleString()} tokens · runtime{' '}
|
||||
{formatDuration(agent.durationMs)}
|
||||
</Text>
|
||||
</Box>
|
||||
))}
|
||||
</Box>
|
||||
</>
|
||||
)}
|
||||
|
||||
<Box height={1} />
|
||||
|
||||
|
|
@ -271,9 +312,8 @@ export const ArenaSessionCard: React.FC<ArenaSessionCardProps> = ({
|
|||
{sessionStatus === 'idle' && (
|
||||
<Box flexDirection="column">
|
||||
<Text color={theme.text.secondary}>
|
||||
Switch to an agent tab to continue, or{' '}
|
||||
<Text color={theme.text.accent}>/arena select</Text> to pick a
|
||||
winner.
|
||||
Run <Text color={theme.text.accent}>/arena select</Text> to view
|
||||
detailed diff or pick a winner.
|
||||
</Text>
|
||||
</Box>
|
||||
)}
|
||||
|
|
|
|||
108
packages/cli/src/ui/components/arena/ArenaSelectDialog.test.tsx
Normal file
108
packages/cli/src/ui/components/arena/ArenaSelectDialog.test.tsx
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2025 Qwen Team
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { waitFor } from '@testing-library/react';
|
||||
import { describe, expect, it, vi } from 'vitest';
|
||||
import {
|
||||
AgentStatus,
|
||||
ArenaSessionStatus,
|
||||
type ArenaManager,
|
||||
type Config,
|
||||
} from '@qwen-code/qwen-code-core';
|
||||
import { renderWithProviders } from '../../../test-utils/render.js';
|
||||
import { ArenaSelectDialog } from './ArenaSelectDialog.js';
|
||||
|
||||
describe('ArenaSelectDialog', () => {
|
||||
it('toggles quick preview and detailed diff for the highlighted agent', async () => {
|
||||
const result = {
|
||||
sessionId: 'arena-1',
|
||||
task: 'Update auth',
|
||||
status: ArenaSessionStatus.IDLE,
|
||||
agents: [
|
||||
{
|
||||
agentId: 'model-1',
|
||||
model: { modelId: 'model-1', authType: 'openai' },
|
||||
status: AgentStatus.IDLE,
|
||||
worktree: {
|
||||
id: 'w1',
|
||||
name: 'model-1',
|
||||
path: '/tmp/model-1',
|
||||
branch: 'arena/model-1',
|
||||
isActive: true,
|
||||
createdAt: 1,
|
||||
},
|
||||
stats: {
|
||||
rounds: 1,
|
||||
totalTokens: 1000,
|
||||
inputTokens: 700,
|
||||
outputTokens: 300,
|
||||
durationMs: 2000,
|
||||
toolCalls: 2,
|
||||
successfulToolCalls: 2,
|
||||
failedToolCalls: 0,
|
||||
},
|
||||
diff: `diff --git a/src/auth.ts b/src/auth.ts
|
||||
--- a/src/auth.ts
|
||||
+++ b/src/auth.ts
|
||||
@@ -1 +1 @@
|
||||
-old
|
||||
+new`,
|
||||
diffSummary: {
|
||||
files: [{ path: 'src/auth.ts', additions: 1, deletions: 1 }],
|
||||
additions: 1,
|
||||
deletions: 1,
|
||||
},
|
||||
modifiedFiles: ['src/auth.ts'],
|
||||
approachSummary: 'Updated the auth implementation inline.',
|
||||
startedAt: 1,
|
||||
},
|
||||
],
|
||||
startedAt: 1,
|
||||
wasRepoInitialized: false,
|
||||
};
|
||||
|
||||
const manager = {
|
||||
getResult: vi.fn(() => result),
|
||||
getAgentStates: vi.fn(() => [
|
||||
{
|
||||
agentId: 'model-1',
|
||||
model: { modelId: 'model-1', authType: 'openai' },
|
||||
status: AgentStatus.IDLE,
|
||||
stats: result.agents[0]!.stats,
|
||||
},
|
||||
]),
|
||||
getAgentState: vi.fn(),
|
||||
applyAgentResult: vi.fn(),
|
||||
} as unknown as ArenaManager;
|
||||
|
||||
const config = {
|
||||
getArenaManager: () => manager,
|
||||
cleanupArenaRuntime: vi.fn(),
|
||||
getChatRecordingService: () => undefined,
|
||||
} as unknown as Config;
|
||||
|
||||
const { lastFrame, stdin } = renderWithProviders(
|
||||
<ArenaSelectDialog
|
||||
manager={manager}
|
||||
config={config}
|
||||
addItem={vi.fn()}
|
||||
closeArenaDialog={vi.fn()}
|
||||
/>,
|
||||
);
|
||||
|
||||
stdin.write('p');
|
||||
await waitFor(() => {
|
||||
expect(lastFrame()).toContain('Quick Preview · model-1');
|
||||
});
|
||||
expect(lastFrame()).toContain('Updated the auth implementation inline.');
|
||||
|
||||
stdin.write('d');
|
||||
await waitFor(() => {
|
||||
expect(lastFrame()).toContain('Detailed Diff · model-1');
|
||||
});
|
||||
expect(lastFrame()).toContain('diff --git a/src/auth.ts b/src/auth.ts');
|
||||
});
|
||||
});
|
||||
|
|
@ -5,12 +5,13 @@
|
|||
*/
|
||||
|
||||
import type React from 'react';
|
||||
import { useCallback, useMemo } from 'react';
|
||||
import { useCallback, useMemo, useState } from 'react';
|
||||
import { Box, Text } from 'ink';
|
||||
import {
|
||||
type ArenaManager,
|
||||
isSuccessStatus,
|
||||
type Config,
|
||||
type ArenaAgentResult,
|
||||
} from '@qwen-code/qwen-code-core';
|
||||
import { theme } from '../../semantic-colors.js';
|
||||
import { useKeypress } from '../../hooks/useKeypress.js';
|
||||
|
|
@ -134,6 +135,17 @@ export function ArenaSelectDialog({
|
|||
|
||||
const result = manager.getResult();
|
||||
const agents = manager.getAgentStates();
|
||||
const firstSelectableAgentId = agents.find((agent) =>
|
||||
isSuccessStatus(agent.status),
|
||||
)?.agentId;
|
||||
const [selectedAgentId, setSelectedAgentId] = useState<string | undefined>(
|
||||
firstSelectableAgentId,
|
||||
);
|
||||
const [showPreview, setShowPreview] = useState(false);
|
||||
const [showDetailedDiff, setShowDetailedDiff] = useState(false);
|
||||
const selectedResult = result?.agents.find(
|
||||
(agent) => agent.agentId === selectedAgentId,
|
||||
);
|
||||
|
||||
const items: Array<DescriptiveRadioSelectItem<string>> = useMemo(
|
||||
() =>
|
||||
|
|
@ -146,11 +158,16 @@ export function ArenaSelectDialog({
|
|||
// Build diff summary from cached result if available
|
||||
let diffAdditions = 0;
|
||||
let diffDeletions = 0;
|
||||
let fileCount = 0;
|
||||
if (isSuccessStatus(agent.status) && result) {
|
||||
const agentResult = result.agents.find(
|
||||
(a) => a.agentId === agent.agentId,
|
||||
);
|
||||
if (agentResult?.diff) {
|
||||
if (agentResult?.diffSummary) {
|
||||
diffAdditions = agentResult.diffSummary.additions;
|
||||
diffDeletions = agentResult.diffSummary.deletions;
|
||||
fileCount = agentResult.diffSummary.files.length;
|
||||
} else if (agentResult?.diff) {
|
||||
const lines = agentResult.diff.split('\n');
|
||||
for (const line of lines) {
|
||||
if (line.startsWith('+') && !line.startsWith('+++')) {
|
||||
|
|
@ -160,6 +177,7 @@ export function ArenaSelectDialog({
|
|||
}
|
||||
}
|
||||
}
|
||||
fileCount = agentResult?.modifiedFiles?.length ?? fileCount;
|
||||
}
|
||||
|
||||
// Title: full model name (not truncated)
|
||||
|
|
@ -173,6 +191,12 @@ export function ArenaSelectDialog({
|
|||
<Text color={theme.text.secondary}>{duration}</Text>
|
||||
<Text color={theme.text.secondary}> · </Text>
|
||||
<Text color={theme.text.secondary}>{tokens} tokens</Text>
|
||||
{fileCount > 0 && (
|
||||
<>
|
||||
<Text color={theme.text.secondary}> · </Text>
|
||||
<Text color={theme.text.secondary}>{fileCount} files</Text>
|
||||
</>
|
||||
)}
|
||||
{(diffAdditions > 0 || diffDeletions > 0) && (
|
||||
<>
|
||||
<Text color={theme.text.secondary}> · </Text>
|
||||
|
|
@ -201,7 +225,13 @@ export function ArenaSelectDialog({
|
|||
if (key.name === 'escape') {
|
||||
closeArenaDialog();
|
||||
}
|
||||
if (key.name === 'p' && !key.ctrl && !key.meta) {
|
||||
setShowPreview((current) => !current);
|
||||
}
|
||||
if (key.name === 'd' && !key.ctrl && !key.meta) {
|
||||
setShowDetailedDiff((current) => !current);
|
||||
}
|
||||
if (key.name === 'x' && !key.ctrl && !key.meta) {
|
||||
onDiscard();
|
||||
}
|
||||
},
|
||||
|
|
@ -245,16 +275,136 @@ export function ArenaSelectDialog({
|
|||
onSelect={(agentId: string) => {
|
||||
onSelect(agentId);
|
||||
}}
|
||||
onHighlight={(agentId: string) => {
|
||||
setSelectedAgentId(agentId);
|
||||
}}
|
||||
isFocused={true}
|
||||
showNumbers={false}
|
||||
/>
|
||||
</Box>
|
||||
|
||||
{showPreview && selectedResult && (
|
||||
<ArenaAgentPreview result={selectedResult} />
|
||||
)}
|
||||
|
||||
{showDetailedDiff && selectedResult && (
|
||||
<ArenaAgentDetailedDiff result={selectedResult} />
|
||||
)}
|
||||
|
||||
<Box marginTop={1}>
|
||||
<Text color={theme.text.secondary}>
|
||||
Enter to select, d to discard all, Esc to cancel
|
||||
p preview, d detailed diff, Enter select winner, x discard all, Esc
|
||||
cancel
|
||||
</Text>
|
||||
</Box>
|
||||
</Box>
|
||||
);
|
||||
}
|
||||
|
||||
function ArenaAgentPreview({
|
||||
result,
|
||||
}: {
|
||||
result: ArenaAgentResult;
|
||||
}): React.JSX.Element {
|
||||
const fileSummary = result.diffSummary?.files ?? [];
|
||||
return (
|
||||
<Box marginTop={1} flexDirection="column">
|
||||
<Text bold color={theme.text.primary}>
|
||||
Quick Preview · {result.model.modelId}
|
||||
</Text>
|
||||
<Box marginLeft={2}>
|
||||
<Text color={theme.text.secondary}>Approach: </Text>
|
||||
<Text color={theme.text.primary}>
|
||||
{result.approachSummary ?? 'No approach summary available.'}
|
||||
</Text>
|
||||
</Box>
|
||||
<Box marginLeft={2}>
|
||||
<Text color={theme.text.secondary}>Major files: </Text>
|
||||
<Text color={theme.text.primary}>
|
||||
{formatFileList(fileSummary.map((file) => file.path))}
|
||||
</Text>
|
||||
</Box>
|
||||
<Box marginLeft={2}>
|
||||
<Text color={theme.text.secondary}>Metrics: </Text>
|
||||
<Text color={theme.text.primary}>
|
||||
{result.stats.totalTokens.toLocaleString()} tokens ·{' '}
|
||||
{formatDuration(result.stats.durationMs)} · {result.stats.toolCalls}{' '}
|
||||
tools
|
||||
</Text>
|
||||
</Box>
|
||||
</Box>
|
||||
);
|
||||
}
|
||||
|
||||
function ArenaAgentDetailedDiff({
|
||||
result,
|
||||
}: {
|
||||
result: ArenaAgentResult;
|
||||
}): React.JSX.Element {
|
||||
const diffLines = getVisibleDiffLines(result.diff);
|
||||
return (
|
||||
<Box marginTop={1} flexDirection="column">
|
||||
<Text bold color={theme.text.primary}>
|
||||
Detailed Diff · {result.model.modelId}
|
||||
</Text>
|
||||
{diffLines.length === 0 ? (
|
||||
<Box marginLeft={2}>
|
||||
<Text color={theme.text.secondary}>No diff available.</Text>
|
||||
</Box>
|
||||
) : (
|
||||
<Box marginLeft={2} flexDirection="column">
|
||||
{diffLines.map((line, index) => (
|
||||
<Text key={`${index}-${line}`} color={getDiffLineColor(line)}>
|
||||
{line}
|
||||
</Text>
|
||||
))}
|
||||
</Box>
|
||||
)}
|
||||
</Box>
|
||||
);
|
||||
}
|
||||
|
||||
function formatFileList(files: string[]): string {
|
||||
if (files.length === 0) {
|
||||
return 'none';
|
||||
}
|
||||
const visible = files.slice(0, 6);
|
||||
const suffix =
|
||||
files.length > visible.length
|
||||
? `, +${files.length - visible.length} more`
|
||||
: '';
|
||||
return `${visible.join(', ')}${suffix}`;
|
||||
}
|
||||
|
||||
function getVisibleDiffLines(diff: string | undefined): string[] {
|
||||
if (!diff) {
|
||||
return [];
|
||||
}
|
||||
const lines = diff.split('\n');
|
||||
const maxLines = 180;
|
||||
if (lines.length <= maxLines) {
|
||||
return lines;
|
||||
}
|
||||
return [
|
||||
...lines.slice(0, maxLines),
|
||||
`... truncated ${lines.length - maxLines} diff lines`,
|
||||
];
|
||||
}
|
||||
|
||||
function getDiffLineColor(line: string): string {
|
||||
if (line.startsWith('+') && !line.startsWith('+++')) {
|
||||
return theme.status.success;
|
||||
}
|
||||
if (line.startsWith('-') && !line.startsWith('---')) {
|
||||
return theme.status.error;
|
||||
}
|
||||
if (
|
||||
line.startsWith('diff --git') ||
|
||||
line.startsWith('@@') ||
|
||||
line.startsWith('---') ||
|
||||
line.startsWith('+++')
|
||||
) {
|
||||
return theme.text.accent;
|
||||
}
|
||||
return theme.text.secondary;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ import type {
|
|||
ToolConfirmationOutcome,
|
||||
ToolResultDisplay,
|
||||
AgentStatus,
|
||||
ArenaDiffSummary,
|
||||
} from '@qwen-code/qwen-code-core';
|
||||
import type { PartListUnion } from '@google/genai';
|
||||
import { type ReactNode } from 'react';
|
||||
|
|
@ -354,6 +355,9 @@ export interface ArenaAgentCardData {
|
|||
rounds: number;
|
||||
error?: string;
|
||||
diff?: string;
|
||||
diffSummary?: ArenaDiffSummary;
|
||||
modifiedFiles?: string[];
|
||||
approachSummary?: string;
|
||||
}
|
||||
|
||||
export type HistoryItemArenaAgentComplete = HistoryItemBase & {
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ import * as os from 'node:os';
|
|||
import { ArenaManager } from './ArenaManager.js';
|
||||
import { ArenaEventType } from './arena-events.js';
|
||||
import { ArenaSessionStatus, ARENA_MAX_AGENTS } from './types.js';
|
||||
import { AgentStatus } from '../runtime/agent-types.js';
|
||||
|
||||
const hoistedMockSetupWorktrees = vi.hoisted(() => vi.fn());
|
||||
const hoistedMockCleanupSession = vi.hoisted(() => vi.fn());
|
||||
|
|
@ -374,6 +375,156 @@ describe('ArenaManager', () => {
|
|||
});
|
||||
|
||||
describe('active session lifecycle', () => {
|
||||
it('collects diff summaries and fallback approach summaries', async () => {
|
||||
const manager = new ArenaManager(mockConfig as never);
|
||||
mockBackend.setAutoExit(false);
|
||||
hoistedMockGetWorktreeDiff.mockResolvedValue(`diff --git a/src/auth.ts b/src/auth.ts
|
||||
index 111..222 100644
|
||||
--- a/src/auth.ts
|
||||
+++ b/src/auth.ts
|
||||
@@ -1 +1,2 @@
|
||||
-old
|
||||
+new
|
||||
+extra`);
|
||||
|
||||
const startPromise = manager.start(createValidStartOptions());
|
||||
await waitForCondition(
|
||||
() => mockBackend.spawnAgent.mock.calls.length >= 2,
|
||||
);
|
||||
|
||||
const agentsDir = path.join(
|
||||
os.tmpdir(),
|
||||
'arena-mock',
|
||||
'testsess',
|
||||
'agents',
|
||||
);
|
||||
await fs.mkdir(agentsDir, { recursive: true });
|
||||
for (const modelId of ['model-1', 'model-2']) {
|
||||
await fs.writeFile(
|
||||
path.join(agentsDir, `${modelId}.json`),
|
||||
JSON.stringify({
|
||||
agentId: modelId,
|
||||
status: AgentStatus.COMPLETED,
|
||||
updatedAt: Date.now(),
|
||||
rounds: 1,
|
||||
stats: {
|
||||
rounds: 1,
|
||||
totalTokens: 0,
|
||||
inputTokens: 0,
|
||||
outputTokens: 0,
|
||||
durationMs: 0,
|
||||
toolCalls: 0,
|
||||
successfulToolCalls: 0,
|
||||
failedToolCalls: 0,
|
||||
},
|
||||
finalSummary: null,
|
||||
error: null,
|
||||
}),
|
||||
'utf-8',
|
||||
);
|
||||
}
|
||||
|
||||
const result = await startPromise;
|
||||
|
||||
expect(result.agents).toHaveLength(2);
|
||||
expect(result.agents[0]?.modifiedFiles).toEqual(['src/auth.ts']);
|
||||
expect(result.agents[0]?.diffSummary).toEqual({
|
||||
files: [{ path: 'src/auth.ts', additions: 2, deletions: 1 }],
|
||||
additions: 2,
|
||||
deletions: 1,
|
||||
});
|
||||
expect(result.agents[0]?.approachSummary).toBe(
|
||||
'Changed 1 file with 0 tool calls (+2/-1).',
|
||||
);
|
||||
});
|
||||
|
||||
it('uses each in-process agent generator for semantic approach summaries', async () => {
|
||||
const mainGenerateContent = vi.fn();
|
||||
const model1GenerateContent = vi.fn().mockResolvedValue({
|
||||
candidates: [
|
||||
{
|
||||
content: {
|
||||
parts: [
|
||||
{
|
||||
text: JSON.stringify({
|
||||
summary: 'Model 1 used a strategy pattern.',
|
||||
}),
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
],
|
||||
});
|
||||
const model2GenerateContent = vi.fn().mockResolvedValue({
|
||||
candidates: [
|
||||
{
|
||||
content: {
|
||||
parts: [
|
||||
{
|
||||
text: JSON.stringify({
|
||||
summary: 'Model 2 made inline edits.',
|
||||
}),
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
],
|
||||
});
|
||||
const config = {
|
||||
...mockConfig,
|
||||
getContentGenerator: () => ({
|
||||
generateContent: mainGenerateContent,
|
||||
}),
|
||||
};
|
||||
mockBackend.type = 'in-process';
|
||||
mockBackend.setAutoExit(false);
|
||||
const agentInteractives = new Map<
|
||||
string,
|
||||
ReturnType<typeof createMockInteractive>
|
||||
>();
|
||||
mockBackend.getAgent.mockImplementation((agentId: string) =>
|
||||
agentInteractives.get(agentId),
|
||||
);
|
||||
mockBackend.getAgentContentGenerator.mockImplementation(
|
||||
(agentId: string) =>
|
||||
agentId === 'model-1'
|
||||
? { generateContent: model1GenerateContent }
|
||||
: { generateContent: model2GenerateContent },
|
||||
);
|
||||
mockBackend.spawnAgent.mockImplementation(
|
||||
async (config: { agentId: string }) => {
|
||||
agentInteractives.set(
|
||||
config.agentId,
|
||||
createMockInteractive(config.agentId),
|
||||
);
|
||||
},
|
||||
);
|
||||
const manager = new ArenaManager(config as never);
|
||||
|
||||
const result = await manager.start(createValidStartOptions());
|
||||
|
||||
expect(mainGenerateContent).not.toHaveBeenCalled();
|
||||
expect(model1GenerateContent).toHaveBeenCalledTimes(1);
|
||||
expect(model2GenerateContent).toHaveBeenCalledTimes(1);
|
||||
expect(model1GenerateContent.mock.calls[0]?.[0].model).toBe('model-1');
|
||||
expect(model2GenerateContent.mock.calls[0]?.[0].model).toBe('model-2');
|
||||
|
||||
const model1Prompt = model1GenerateContent.mock.calls[0]?.[0].contents[0]
|
||||
.parts[0].text as string;
|
||||
const model2Prompt = model2GenerateContent.mock.calls[0]?.[0].contents[0]
|
||||
.parts[0].text as string;
|
||||
expect(model1Prompt).toContain('"agentId": "model-1"');
|
||||
expect(model1Prompt).not.toContain('"agentId": "model-2"');
|
||||
expect(model2Prompt).toContain('"agentId": "model-2"');
|
||||
expect(model2Prompt).not.toContain('"agentId": "model-1"');
|
||||
expect(result.agents[0]?.approachSummary).toBe(
|
||||
'Model 1 used a strategy pattern.',
|
||||
);
|
||||
expect(result.agents[1]?.approachSummary).toBe(
|
||||
'Model 2 made inline edits.',
|
||||
);
|
||||
});
|
||||
|
||||
it('cancel should stop backend and move session to CANCELLED', async () => {
|
||||
const manager = new ArenaManager(mockConfig as never);
|
||||
|
||||
|
|
@ -434,7 +585,7 @@ function createMockBackend() {
|
|||
let autoExit = true;
|
||||
|
||||
const backend = {
|
||||
type: 'tmux' as const,
|
||||
type: 'tmux' as 'tmux' | 'in-process',
|
||||
init: vi.fn().mockResolvedValue(undefined),
|
||||
spawnAgent: vi.fn(async (config: { agentId: string }) => {
|
||||
// By default, simulate immediate agent termination so tests
|
||||
|
|
@ -461,6 +612,8 @@ function createMockBackend() {
|
|||
writeToAgent: vi.fn().mockReturnValue(false),
|
||||
resizeAll: vi.fn(),
|
||||
getAttachHint: vi.fn().mockReturnValue(null),
|
||||
getAgent: vi.fn().mockReturnValue(undefined),
|
||||
getAgentContentGenerator: vi.fn().mockReturnValue(undefined),
|
||||
/** Disable automatic agent exit for tests that need to control timing. */
|
||||
setAutoExit(value: boolean) {
|
||||
autoExit = value;
|
||||
|
|
@ -469,6 +622,36 @@ function createMockBackend() {
|
|||
return backend;
|
||||
}
|
||||
|
||||
function createMockInteractive(agentId: string) {
|
||||
const emitter = {
|
||||
on: vi.fn(),
|
||||
off: vi.fn(),
|
||||
};
|
||||
return {
|
||||
getMessages: vi.fn().mockReturnValue([
|
||||
{
|
||||
role: 'assistant',
|
||||
content: `${agentId} final response`,
|
||||
timestamp: Date.now(),
|
||||
},
|
||||
]),
|
||||
getStatus: vi.fn().mockReturnValue(AgentStatus.IDLE),
|
||||
getStats: vi.fn().mockReturnValue({
|
||||
rounds: 1,
|
||||
totalTokens: 0,
|
||||
inputTokens: 0,
|
||||
outputTokens: 0,
|
||||
totalToolCalls: 0,
|
||||
successfulToolCalls: 0,
|
||||
failedToolCalls: 0,
|
||||
totalDurationMs: 1,
|
||||
}),
|
||||
getLastRoundError: vi.fn().mockReturnValue(undefined),
|
||||
getError: vi.fn().mockReturnValue(undefined),
|
||||
getEventEmitter: vi.fn().mockReturnValue(emitter),
|
||||
};
|
||||
}
|
||||
|
||||
function createValidStartOptions() {
|
||||
return {
|
||||
models: [
|
||||
|
|
|
|||
|
|
@ -9,11 +9,13 @@ import * as path from 'node:path';
|
|||
import { GitWorktreeService } from '../../services/gitWorktreeService.js';
|
||||
import { Storage } from '../../config/storage.js';
|
||||
import type { Config } from '../../config/config.js';
|
||||
import type { ContentGenerator } from '../../core/contentGenerator.js';
|
||||
import { getCoreSystemPrompt } from '../../core/prompts.js';
|
||||
import { createDebugLogger } from '../../utils/debugLogger.js';
|
||||
import { isNodeError } from '../../utils/errors.js';
|
||||
import { atomicWriteJSON } from '../../utils/atomicFileWrite.js';
|
||||
import type { AnsiOutput } from '../../utils/terminalSerializer.js';
|
||||
import { getResponseText } from '../../utils/partUtils.js';
|
||||
import { ArenaEventEmitter, ArenaEventType } from './arena-events.js';
|
||||
import type { AgentSpawnConfig, Backend, DisplayMode } from '../index.js';
|
||||
import { detectBackend, DISPLAY_MODE } from '../index.js';
|
||||
|
|
@ -51,10 +53,30 @@ import {
|
|||
makeArenaSessionEndedEvent,
|
||||
} from '../../telemetry/index.js';
|
||||
import type { ArenaSessionEndedStatus } from '../../telemetry/index.js';
|
||||
import {
|
||||
buildFallbackApproachSummary,
|
||||
summarizeUnifiedDiff,
|
||||
} from './diff-summary.js';
|
||||
|
||||
const debugLogger = createDebugLogger('ARENA');
|
||||
|
||||
const ARENA_POLL_INTERVAL_MS = 500;
|
||||
const ARENA_SUMMARY_TIMEOUT_MS = 20_000;
|
||||
const ARENA_SUMMARY_MAX_DIFF_CHARS = 6_000;
|
||||
const ARENA_SUMMARY_MAX_TRANSCRIPT_CHARS = 6_000;
|
||||
|
||||
interface ArenaTranscriptEntry {
|
||||
role: 'user' | 'assistant' | 'tool_call' | 'tool_result' | 'info';
|
||||
content: string;
|
||||
thought?: boolean;
|
||||
metadata?: Record<string, unknown>;
|
||||
timestamp: number;
|
||||
}
|
||||
|
||||
interface ArenaSummaryInput {
|
||||
result: ArenaAgentResult;
|
||||
transcript?: ArenaTranscriptEntry[];
|
||||
}
|
||||
|
||||
/**
|
||||
* ArenaManager orchestrates multi-model competitive execution.
|
||||
|
|
@ -1438,6 +1460,9 @@ export class ArenaManager {
|
|||
...agent.stats,
|
||||
...statusFile.stats,
|
||||
};
|
||||
if (statusFile.finalSummary) {
|
||||
agent.accumulatedText = statusFile.finalSummary;
|
||||
}
|
||||
|
||||
// Detect state transitions from the sideband status file
|
||||
const resolved = this.resolveTransition(
|
||||
|
|
@ -1605,15 +1630,163 @@ export class ArenaManager {
|
|||
}
|
||||
}
|
||||
|
||||
private getAgentTranscript(
|
||||
agentId: string,
|
||||
): ArenaTranscriptEntry[] | undefined {
|
||||
if (this.backend?.type !== DISPLAY_MODE.IN_PROCESS) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const interactive = (this.backend as InProcessBackend).getAgent(agentId);
|
||||
const messages = interactive?.getMessages();
|
||||
if (!messages || messages.length === 0) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
return messages.map((message) => ({
|
||||
role: message.role,
|
||||
content: message.content,
|
||||
thought: message.thought,
|
||||
metadata: message.metadata,
|
||||
timestamp: message.timestamp,
|
||||
}));
|
||||
}
|
||||
|
||||
private getFinalTextFromTranscript(
|
||||
transcript: ArenaTranscriptEntry[] | undefined,
|
||||
): string | undefined {
|
||||
if (!transcript) return undefined;
|
||||
|
||||
for (let i = transcript.length - 1; i >= 0; i--) {
|
||||
const message = transcript[i]!;
|
||||
if (
|
||||
message.role === 'assistant' &&
|
||||
!message.thought &&
|
||||
message.content.trim()
|
||||
) {
|
||||
return message.content.trim();
|
||||
}
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
private async addApproachSummaries(
|
||||
summaryInputs: ArenaSummaryInput[],
|
||||
): Promise<void> {
|
||||
await Promise.all(
|
||||
summaryInputs.map(async (summaryInput) => {
|
||||
summaryInput.result.approachSummary =
|
||||
await this.generateAgentApproachSummary(summaryInput);
|
||||
}),
|
||||
);
|
||||
}
|
||||
|
||||
private getAgentSummaryGenerator(
|
||||
agentId: string,
|
||||
): ContentGenerator | undefined {
|
||||
if (this.backend?.type !== DISPLAY_MODE.IN_PROCESS) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
return (this.backend as InProcessBackend).getAgentContentGenerator(agentId);
|
||||
}
|
||||
|
||||
private async generateAgentApproachSummary(
|
||||
summaryInput: ArenaSummaryInput,
|
||||
): Promise<string> {
|
||||
const { result } = summaryInput;
|
||||
const generator = this.getAgentSummaryGenerator(result.agentId);
|
||||
if (!generator) {
|
||||
return buildFallbackApproachSummary(result);
|
||||
}
|
||||
|
||||
const abortController = new AbortController();
|
||||
const timeout = setTimeout(
|
||||
() => abortController.abort(),
|
||||
ARENA_SUMMARY_TIMEOUT_MS,
|
||||
);
|
||||
|
||||
try {
|
||||
const response = await generator.generateContent(
|
||||
{
|
||||
model: result.model.modelId,
|
||||
contents: [
|
||||
{
|
||||
role: 'user',
|
||||
parts: [
|
||||
{
|
||||
text: this.buildAgentApproachSummaryPrompt(summaryInput),
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
config: {
|
||||
abortSignal: abortController.signal,
|
||||
thinkingConfig: { includeThoughts: false },
|
||||
},
|
||||
},
|
||||
'arena_approach_summary',
|
||||
);
|
||||
|
||||
return (
|
||||
parseApproachSummaryResponse(getResponseText(response) ?? '')?.trim() ||
|
||||
buildFallbackApproachSummary(result)
|
||||
);
|
||||
} catch (error) {
|
||||
debugLogger.error(
|
||||
`Failed to generate Arena approach summary for ${result.agentId}:`,
|
||||
error,
|
||||
);
|
||||
return buildFallbackApproachSummary(result);
|
||||
} finally {
|
||||
clearTimeout(timeout);
|
||||
}
|
||||
}
|
||||
|
||||
private buildAgentApproachSummaryPrompt({
|
||||
result: agent,
|
||||
transcript,
|
||||
}: ArenaSummaryInput): string {
|
||||
const payload = {
|
||||
task: this.arenaConfig?.task ?? '',
|
||||
instruction:
|
||||
'Summarize this Arena agent approach for user comparison. Use git diff as the source of truth for what changed. Use transcript/finalText only to infer intent and architectural decisions. Do not pick a winner. Return only compact JSON: {"summary":"one sentence summary"}.',
|
||||
agent: {
|
||||
agentId: agent.agentId,
|
||||
model: agent.model.modelId,
|
||||
status: agent.status,
|
||||
metrics: {
|
||||
files: agent.diffSummary?.files.length ?? 0,
|
||||
additions: agent.diffSummary?.additions ?? 0,
|
||||
deletions: agent.diffSummary?.deletions ?? 0,
|
||||
tokens: agent.stats.totalTokens,
|
||||
durationMs: agent.stats.durationMs,
|
||||
toolCalls: agent.stats.toolCalls,
|
||||
},
|
||||
files: agent.diffSummary?.files ?? [],
|
||||
finalText: truncateForPrompt(agent.finalText ?? '', 2_000),
|
||||
transcript: truncateForPrompt(formatTranscript(transcript), 6_000),
|
||||
diff: truncateForPrompt(agent.diff ?? '', ARENA_SUMMARY_MAX_DIFF_CHARS),
|
||||
},
|
||||
};
|
||||
|
||||
return JSON.stringify(payload, null, 2);
|
||||
}
|
||||
|
||||
private async collectResults(): Promise<ArenaSessionResult> {
|
||||
if (!this.arenaConfig) {
|
||||
throw new Error('Arena config not initialized');
|
||||
}
|
||||
|
||||
const agents: ArenaAgentResult[] = [];
|
||||
const summaryInputs: ArenaSummaryInput[] = [];
|
||||
|
||||
for (const agent of this.agents.values()) {
|
||||
const result = this.buildAgentResult(agent);
|
||||
const transcript = this.getAgentTranscript(agent.agentId);
|
||||
result.finalText =
|
||||
result.finalText ?? this.getFinalTextFromTranscript(transcript);
|
||||
|
||||
// Get diff for agents that finished their task (IDLE or COMPLETED)
|
||||
if (isSuccessStatus(agent.status)) {
|
||||
|
|
@ -1621,6 +1794,10 @@ export class ArenaManager {
|
|||
result.diff = await this.worktreeService.getWorktreeDiff(
|
||||
agent.worktree.path,
|
||||
);
|
||||
result.diffSummary = summarizeUnifiedDiff(result.diff);
|
||||
result.modifiedFiles = result.diffSummary.files.map(
|
||||
(file) => file.path,
|
||||
);
|
||||
} catch (error) {
|
||||
debugLogger.error(
|
||||
`Failed to get diff for agent ${agent.agentId}:`,
|
||||
|
|
@ -1628,10 +1805,17 @@ export class ArenaManager {
|
|||
);
|
||||
}
|
||||
}
|
||||
result.diffSummary ??= summarizeUnifiedDiff(result.diff);
|
||||
result.modifiedFiles ??= result.diffSummary.files.map(
|
||||
(file) => file.path,
|
||||
);
|
||||
|
||||
agents.push(result);
|
||||
summaryInputs.push({ result, transcript });
|
||||
}
|
||||
|
||||
await this.addApproachSummaries(summaryInputs);
|
||||
|
||||
const endedAt = Date.now();
|
||||
|
||||
return {
|
||||
|
|
@ -1646,3 +1830,73 @@ export class ArenaManager {
|
|||
};
|
||||
}
|
||||
}
|
||||
|
||||
function truncateForPrompt(text: string, maxChars: number): string {
|
||||
if (text.length <= maxChars) return text;
|
||||
return `${text.slice(0, maxChars)}\n...[truncated]`;
|
||||
}
|
||||
|
||||
function formatTranscript(
|
||||
transcript: ArenaTranscriptEntry[] | undefined,
|
||||
): string {
|
||||
if (!transcript || transcript.length === 0) {
|
||||
return '';
|
||||
}
|
||||
|
||||
const lines: string[] = [];
|
||||
for (const entry of transcript) {
|
||||
if (entry.thought) continue;
|
||||
const metadata = entry.metadata ?? {};
|
||||
const toolName =
|
||||
typeof metadata['toolName'] === 'string'
|
||||
? metadata['toolName']
|
||||
: undefined;
|
||||
const success =
|
||||
typeof metadata['success'] === 'boolean'
|
||||
? metadata['success']
|
||||
: undefined;
|
||||
const label = toolName ? `${entry.role}:${toolName}` : entry.role;
|
||||
const suffix =
|
||||
success === undefined ? '' : ` (${success ? 'ok' : 'failed'})`;
|
||||
lines.push(`${label}${suffix}: ${entry.content}`);
|
||||
}
|
||||
|
||||
return truncateForPrompt(
|
||||
lines.join('\n'),
|
||||
ARENA_SUMMARY_MAX_TRANSCRIPT_CHARS,
|
||||
);
|
||||
}
|
||||
|
||||
function parseApproachSummaryResponse(text: string): string | undefined {
|
||||
const jsonText = extractJsonObject(text);
|
||||
if (!jsonText) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
try {
|
||||
const parsed = JSON.parse(jsonText) as unknown;
|
||||
if (!isRecord(parsed)) {
|
||||
return undefined;
|
||||
}
|
||||
const summary = parsed['summary'];
|
||||
if (typeof summary === 'string') {
|
||||
return summary;
|
||||
}
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function extractJsonObject(text: string): string | null {
|
||||
const firstBrace = text.indexOf('{');
|
||||
const lastBrace = text.lastIndexOf('}');
|
||||
if (firstBrace < 0 || lastBrace <= firstBrace) {
|
||||
return null;
|
||||
}
|
||||
return text.slice(firstBrace, lastBrace + 1);
|
||||
}
|
||||
|
||||
function isRecord(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === 'object' && value !== null && !Array.isArray(value);
|
||||
}
|
||||
|
|
|
|||
138
packages/core/src/agents/arena/diff-summary.test.ts
Normal file
138
packages/core/src/agents/arena/diff-summary.test.ts
Normal file
|
|
@ -0,0 +1,138 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2025 Qwen Team
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import { describe, expect, it } from 'vitest';
|
||||
import { AgentStatus } from '../runtime/agent-types.js';
|
||||
import {
|
||||
buildFallbackApproachSummary,
|
||||
summarizeUnifiedDiff,
|
||||
} from './diff-summary.js';
|
||||
import type { ArenaAgentResult } from './types.js';
|
||||
|
||||
describe('summarizeUnifiedDiff', () => {
|
||||
it('parses file and line counts from a unified diff', () => {
|
||||
const summary = summarizeUnifiedDiff(`diff --git a/src/auth.ts b/src/auth.ts
|
||||
index 111..222 100644
|
||||
--- a/src/auth.ts
|
||||
+++ b/src/auth.ts
|
||||
@@ -1,3 +1,4 @@
|
||||
const a = 1;
|
||||
-const b = 2;
|
||||
+const b = 3;
|
||||
+const c = 4;
|
||||
diff --git a/tests/auth.test.ts b/tests/auth.test.ts
|
||||
index 333..444 100644
|
||||
--- a/tests/auth.test.ts
|
||||
+++ b/tests/auth.test.ts
|
||||
@@ -10,2 +10,2 @@
|
||||
-old
|
||||
+new`);
|
||||
|
||||
expect(summary).toEqual({
|
||||
files: [
|
||||
{ path: 'src/auth.ts', additions: 2, deletions: 1 },
|
||||
{ path: 'tests/auth.test.ts', additions: 1, deletions: 1 },
|
||||
],
|
||||
additions: 3,
|
||||
deletions: 2,
|
||||
});
|
||||
});
|
||||
|
||||
it('returns zero counts for an empty diff', () => {
|
||||
expect(summarizeUnifiedDiff('')).toEqual({
|
||||
files: [],
|
||||
additions: 0,
|
||||
deletions: 0,
|
||||
});
|
||||
});
|
||||
|
||||
it('parses repeated diff header text without relying on regex backtracking', () => {
|
||||
const repeated = Array.from({ length: 200 }, () => 'a b/a').join('');
|
||||
const path = `${repeated}.ts`;
|
||||
const summary = summarizeUnifiedDiff(`diff --git a/${path} b/${path}
|
||||
--- a/${path}
|
||||
+++ b/${path}
|
||||
@@ -1 +1 @@
|
||||
-old
|
||||
+new`);
|
||||
|
||||
expect(summary).toEqual({
|
||||
files: [{ path, additions: 1, deletions: 1 }],
|
||||
additions: 1,
|
||||
deletions: 1,
|
||||
});
|
||||
});
|
||||
|
||||
it('includes binary diffs without textual line changes', () => {
|
||||
const summary =
|
||||
summarizeUnifiedDiff(`diff --git a/assets/logo.png b/assets/logo.png
|
||||
new file mode 100644
|
||||
index 0000000..abc1234
|
||||
Binary files /dev/null and b/assets/logo.png differ`);
|
||||
|
||||
expect(summary).toEqual({
|
||||
files: [{ path: 'assets/logo.png', additions: 0, deletions: 0 }],
|
||||
additions: 0,
|
||||
deletions: 0,
|
||||
});
|
||||
});
|
||||
|
||||
it('includes rename-only diffs without textual line changes', () => {
|
||||
const summary = summarizeUnifiedDiff(`diff --git a/src/old.ts b/src/new.ts
|
||||
similarity index 100%
|
||||
rename from src/old.ts
|
||||
rename to src/new.ts`);
|
||||
|
||||
expect(summary).toEqual({
|
||||
files: [{ path: 'src/new.ts', additions: 0, deletions: 0 }],
|
||||
additions: 0,
|
||||
deletions: 0,
|
||||
});
|
||||
});
|
||||
|
||||
it('includes mode-only diffs without textual line changes', () => {
|
||||
const summary =
|
||||
summarizeUnifiedDiff(`diff --git a/scripts/run.sh b/scripts/run.sh
|
||||
old mode 100644
|
||||
new mode 100755`);
|
||||
|
||||
expect(summary).toEqual({
|
||||
files: [{ path: 'scripts/run.sh', additions: 0, deletions: 0 }],
|
||||
additions: 0,
|
||||
deletions: 0,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('buildFallbackApproachSummary', () => {
|
||||
it('summarizes changed files and tool usage', () => {
|
||||
const result = {
|
||||
status: AgentStatus.IDLE,
|
||||
stats: { toolCalls: 3 },
|
||||
diffSummary: {
|
||||
files: [{ path: 'src/auth.ts', additions: 2, deletions: 1 }],
|
||||
additions: 2,
|
||||
deletions: 1,
|
||||
},
|
||||
} as unknown as ArenaAgentResult;
|
||||
|
||||
expect(buildFallbackApproachSummary(result)).toBe(
|
||||
'Changed 1 file with 3 tool calls (+2/-1).',
|
||||
);
|
||||
});
|
||||
|
||||
it('reports no changes when the diff is empty', () => {
|
||||
const result = {
|
||||
status: AgentStatus.IDLE,
|
||||
stats: { toolCalls: 0 },
|
||||
diffSummary: { files: [], additions: 0, deletions: 0 },
|
||||
} as unknown as ArenaAgentResult;
|
||||
|
||||
expect(buildFallbackApproachSummary(result)).toBe(
|
||||
'No code changes detected.',
|
||||
);
|
||||
});
|
||||
});
|
||||
141
packages/core/src/agents/arena/diff-summary.ts
Normal file
141
packages/core/src/agents/arena/diff-summary.ts
Normal file
|
|
@ -0,0 +1,141 @@
|
|||
/**
|
||||
* @license
|
||||
* Copyright 2025 Qwen Team
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
import type {
|
||||
ArenaAgentResult,
|
||||
ArenaDiffSummary,
|
||||
ArenaFileChangeSummary,
|
||||
} from './types.js';
|
||||
import { isSuccessStatus } from '../runtime/agent-types.js';
|
||||
|
||||
/**
|
||||
* Parse a unified git diff into file-level and aggregate line-change stats.
|
||||
*/
|
||||
export function summarizeUnifiedDiff(
|
||||
diff: string | undefined,
|
||||
): ArenaDiffSummary {
|
||||
if (!diff) {
|
||||
return { files: [], additions: 0, deletions: 0 };
|
||||
}
|
||||
|
||||
const files: ArenaFileChangeSummary[] = [];
|
||||
let current: ArenaFileChangeSummary | undefined;
|
||||
|
||||
const finishFile = () => {
|
||||
if (!current) return;
|
||||
files.push(current);
|
||||
current = undefined;
|
||||
};
|
||||
|
||||
const ensureFile = (path: string) => {
|
||||
if (!current) {
|
||||
current = { path, additions: 0, deletions: 0 };
|
||||
return;
|
||||
}
|
||||
current.path = path;
|
||||
};
|
||||
|
||||
for (const line of diff.split('\n')) {
|
||||
const gitPath = parseDiffGitPath(line);
|
||||
if (gitPath) {
|
||||
finishFile();
|
||||
current = {
|
||||
path: gitPath,
|
||||
additions: 0,
|
||||
deletions: 0,
|
||||
};
|
||||
continue;
|
||||
}
|
||||
|
||||
if (line.startsWith('+++ ')) {
|
||||
const path = normalizeDiffPath(line.slice(4));
|
||||
if (path !== '/dev/null') {
|
||||
ensureFile(path);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (line.startsWith('--- ')) {
|
||||
const path = normalizeDiffPath(line.slice(4));
|
||||
if (!current && path !== '/dev/null') {
|
||||
ensureFile(path);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!current) continue;
|
||||
|
||||
if (line.startsWith('+')) {
|
||||
current.additions++;
|
||||
} else if (line.startsWith('-')) {
|
||||
current.deletions++;
|
||||
}
|
||||
}
|
||||
|
||||
finishFile();
|
||||
|
||||
return {
|
||||
files,
|
||||
additions: files.reduce((sum, file) => sum + file.additions, 0),
|
||||
deletions: files.reduce((sum, file) => sum + file.deletions, 0),
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a deterministic approach summary when semantic LLM summarization is
|
||||
* unavailable or returns unusable output.
|
||||
*/
|
||||
export function buildFallbackApproachSummary(result: ArenaAgentResult): string {
|
||||
if (!isSuccessStatus(result.status)) {
|
||||
const suffix = result.error ? `: ${result.error}` : '';
|
||||
return `Did not produce an applicable result${suffix}.`;
|
||||
}
|
||||
|
||||
const diffSummary =
|
||||
result.diffSummary ?? summarizeUnifiedDiff(result.diff ?? '');
|
||||
if (diffSummary.files.length === 0) {
|
||||
return 'No code changes detected.';
|
||||
}
|
||||
|
||||
const fileWord = diffSummary.files.length === 1 ? 'file' : 'files';
|
||||
const toolWord = result.stats.toolCalls === 1 ? 'tool call' : 'tool calls';
|
||||
return `Changed ${diffSummary.files.length} ${fileWord} with ${result.stats.toolCalls} ${toolWord} (${formatLineStats(diffSummary.additions, diffSummary.deletions)}).`;
|
||||
}
|
||||
|
||||
export function formatLineStats(additions: number, deletions: number): string {
|
||||
if (additions === 0 && deletions === 0) {
|
||||
return 'no line changes';
|
||||
}
|
||||
return `+${additions}/-${deletions}`;
|
||||
}
|
||||
|
||||
function normalizeDiffPath(path: string): string {
|
||||
const trimmed = path.trim();
|
||||
if (trimmed === '/dev/null') {
|
||||
return trimmed;
|
||||
}
|
||||
return trimmed.replace(/^[ab]\//, '');
|
||||
}
|
||||
|
||||
function parseDiffGitPath(line: string): string | undefined {
|
||||
const prefix = 'diff --git a/';
|
||||
const separator = ' b/';
|
||||
if (!line.startsWith(prefix)) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const separatorIndex = line.lastIndexOf(separator);
|
||||
if (separatorIndex < prefix.length) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const pathStart = separatorIndex + separator.length;
|
||||
if (pathStart >= line.length) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
return line.slice(pathStart);
|
||||
}
|
||||
|
|
@ -7,6 +7,7 @@
|
|||
// Arena-specific exports
|
||||
export * from './types.js';
|
||||
export * from './arena-events.js';
|
||||
export * from './diff-summary.js';
|
||||
export * from './ArenaManager.js';
|
||||
export * from './ArenaAgentClient.js';
|
||||
|
||||
|
|
|
|||
|
|
@ -92,6 +92,30 @@ export interface ArenaAgentStats {
|
|||
failedToolCalls: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Per-file change counts parsed from an agent's unified diff.
|
||||
*/
|
||||
export interface ArenaFileChangeSummary {
|
||||
/** Repository-relative file path */
|
||||
path: string;
|
||||
/** Added lines in this file */
|
||||
additions: number;
|
||||
/** Removed lines in this file */
|
||||
deletions: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Aggregate change counts parsed from an agent's unified diff.
|
||||
*/
|
||||
export interface ArenaDiffSummary {
|
||||
/** Files changed by this agent */
|
||||
files: ArenaFileChangeSummary[];
|
||||
/** Total added lines */
|
||||
additions: number;
|
||||
/** Total removed lines */
|
||||
deletions: number;
|
||||
}
|
||||
|
||||
/**
|
||||
* Result from a single Arena agent.
|
||||
*/
|
||||
|
|
@ -112,8 +136,12 @@ export interface ArenaAgentResult {
|
|||
stats: ArenaAgentStats;
|
||||
/** Git diff of changes made */
|
||||
diff?: string;
|
||||
/** Parsed summary of the git diff */
|
||||
diffSummary?: ArenaDiffSummary;
|
||||
/** Files modified by this agent */
|
||||
modifiedFiles?: string[];
|
||||
/** High-level implementation approach summary */
|
||||
approachSummary?: string;
|
||||
/** Start timestamp */
|
||||
startedAt: number;
|
||||
/** End timestamp */
|
||||
|
|
|
|||
|
|
@ -488,9 +488,10 @@ describe('InProcessBackend', () => {
|
|||
|
||||
expect(agentContext.getContentGenerator()).toBe(agentGenerator);
|
||||
expect(agentContext.getAuthType()).toBe('anthropic');
|
||||
expect(backend.getAgentContentGenerator('agent-1')).toBe(agentGenerator);
|
||||
});
|
||||
|
||||
it('should not create per-agent ContentGenerator without authOverrides', async () => {
|
||||
it('should expose inherited ContentGenerator without authOverrides', async () => {
|
||||
const mockCreate = createContentGenerator as ReturnType<typeof vi.fn>;
|
||||
mockCreate.mockClear();
|
||||
|
||||
|
|
@ -498,6 +499,9 @@ describe('InProcessBackend', () => {
|
|||
await backend.spawnAgent(createSpawnConfig('agent-1'));
|
||||
|
||||
expect(mockCreate).not.toHaveBeenCalled();
|
||||
expect(backend.getAgentContentGenerator('agent-1')).toBe(
|
||||
mockContentGenerator,
|
||||
);
|
||||
});
|
||||
|
||||
it('should fall back to parent ContentGenerator if per-agent creation fails', async () => {
|
||||
|
|
@ -523,6 +527,7 @@ describe('InProcessBackend', () => {
|
|||
|
||||
// Falls back to parent's content generator
|
||||
expect(agentContext.getContentGenerator()).toBe(mockContentGenerator);
|
||||
expect(backend.getAgentContentGenerator('agent-1')).toBeUndefined();
|
||||
});
|
||||
|
||||
it('should give different agents different ContentGenerators', async () => {
|
||||
|
|
|
|||
|
|
@ -51,6 +51,7 @@ export class InProcessBackend implements Backend {
|
|||
|
||||
private readonly runtimeContext: Config;
|
||||
private readonly agents = new Map<string, AgentInteractive>();
|
||||
private readonly agentContentGenerators = new Map<string, ContentGenerator>();
|
||||
private readonly agentRegistries: ToolRegistry[] = [];
|
||||
private readonly agentOrder: string[] = [];
|
||||
private activeAgentId: string | null = null;
|
||||
|
|
@ -88,12 +89,19 @@ export class InProcessBackend implements Backend {
|
|||
// Build a per-agent runtime context with isolated working directory,
|
||||
// target directory, workspace context, tool registry, and (optionally)
|
||||
// a dedicated ContentGenerator for per-agent auth isolation.
|
||||
const agentContext = await createPerAgentConfig(
|
||||
const perAgent = await createPerAgentConfig(
|
||||
this.runtimeContext,
|
||||
config.cwd,
|
||||
inProcessConfig.runtimeConfig.modelConfig.model,
|
||||
inProcessConfig.authOverrides,
|
||||
);
|
||||
const agentContext = perAgent.config;
|
||||
if (perAgent.contentGenerator) {
|
||||
this.agentContentGenerators.set(
|
||||
config.agentId,
|
||||
perAgent.contentGenerator,
|
||||
);
|
||||
}
|
||||
|
||||
this.agentRegistries.push(agentContext.getToolRegistry());
|
||||
|
||||
|
|
@ -200,6 +208,7 @@ export class InProcessBackend implements Backend {
|
|||
this.agentRegistries.length = 0;
|
||||
|
||||
this.agents.clear();
|
||||
this.agentContentGenerators.clear();
|
||||
this.agentOrder.length = 0;
|
||||
this.activeAgentId = null;
|
||||
debugLogger.info('InProcessBackend cleaned up');
|
||||
|
|
@ -309,6 +318,18 @@ export class InProcessBackend implements Backend {
|
|||
return this.agents.get(agentId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the ContentGenerator this agent can use for summary generation.
|
||||
* If auth overrides created an isolated generator, this returns that
|
||||
* generator. If no override was requested, this returns the inherited
|
||||
* generator the agent already runs with. If override creation failed, this is
|
||||
* undefined so callers can avoid sending agent data through a fallback
|
||||
* provider.
|
||||
*/
|
||||
getAgentContentGenerator(agentId: string): ContentGenerator | undefined {
|
||||
return this.agentContentGenerators.get(agentId);
|
||||
}
|
||||
|
||||
// ─── Private ───────────────────────────────────────────────
|
||||
|
||||
private navigate(direction: 1 | -1): string | null {
|
||||
|
|
@ -336,15 +357,17 @@ export class InProcessBackend implements Backend {
|
|||
* the agent Config
|
||||
* - `getContentGenerator()` / `getContentGeneratorConfig()` / `getAuthType()`
|
||||
* → per-agent ContentGenerator when `authOverrides` is provided
|
||||
* - returned `contentGenerator` → the generator safe to use for summaries
|
||||
*/
|
||||
async function createPerAgentConfig(
|
||||
base: Config,
|
||||
cwd: string,
|
||||
modelId?: string,
|
||||
authOverrides?: InProcessSpawnConfig['authOverrides'],
|
||||
): Promise<Config> {
|
||||
): Promise<{ config: Config; contentGenerator?: ContentGenerator }> {
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const override = Object.create(base) as any;
|
||||
let dedicatedContentGenerator: ContentGenerator | undefined;
|
||||
|
||||
override.getWorkingDir = () => cwd;
|
||||
override.getTargetDir = () => cwd;
|
||||
|
|
@ -374,6 +397,7 @@ async function createPerAgentConfig(
|
|||
agentGeneratorConfig,
|
||||
override as Config,
|
||||
);
|
||||
dedicatedContentGenerator = agentGenerator;
|
||||
override.getContentGenerator = (): ContentGenerator => agentGenerator;
|
||||
override.getContentGeneratorConfig = (): ContentGeneratorConfig =>
|
||||
agentGeneratorConfig;
|
||||
|
|
@ -392,5 +416,10 @@ async function createPerAgentConfig(
|
|||
}
|
||||
}
|
||||
|
||||
return override as Config;
|
||||
return {
|
||||
config: override as Config,
|
||||
contentGenerator:
|
||||
dedicatedContentGenerator ??
|
||||
(authOverrides?.authType ? undefined : base.getContentGenerator()),
|
||||
};
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue