diff --git a/docs/users/features/arena.md b/docs/users/features/arena.md index 7b53238c7..67c879f9a 100644 --- a/docs/users/features/arena.md +++ b/docs/users/features/arena.md @@ -90,8 +90,9 @@ When all agents complete, the Arena enters the result comparison phase. You'll s - **Status summary**: Which agents succeeded, failed, or were cancelled - **Execution metrics**: Duration, rounds of reasoning, token usage, and tool call counts for each agent +- **Arena comparison summary**: Files changed in common vs. by one agent only, line-change counts, token efficiency, and a high-level approach summary generated from each agent's diff, metrics, and conversation history -A selection dialog presents the successful agents. Choose one to apply its changes to your main workspace, or discard all results. +A selection dialog presents the successful agents. Choose one to apply its changes to your main workspace, or discard all results. Press `p` to toggle a quick preview for the highlighted agent, or `d` to toggle that agent's detailed diff before selecting a winner. ### What happens when you select a winner @@ -99,7 +100,7 @@ A selection dialog presents the successful agents. Choose one to apply its chang 2. The diff is applied to your main working directory 3. All worktrees and temporary branches are cleaned up automatically -If you want to inspect results before deciding, each agent's full conversation history is available via the tab bar while the selection dialog is active. +If you want to inspect the complete reasoning path before deciding, each agent's full conversation history is still available via the tab bar while the selection dialog is active. ## Configuration diff --git a/packages/cli/src/ui/commands/arenaCommand.ts b/packages/cli/src/ui/commands/arenaCommand.ts index f5fbb04c4..4a50ec5b0 100644 --- a/packages/cli/src/ui/commands/arenaCommand.ts +++ b/packages/cli/src/ui/commands/arenaCommand.ts @@ -276,6 +276,9 @@ function executeArenaCommand( rounds: result.stats.rounds, error: result.error, diff: result.diff, + diffSummary: result.diffSummary, + modifiedFiles: result.modifiedFiles, + approachSummary: result.approachSummary, }); const handleAgentComplete = (event: ArenaAgentCompleteEvent) => { diff --git a/packages/cli/src/ui/components/arena/ArenaCards.test.tsx b/packages/cli/src/ui/components/arena/ArenaCards.test.tsx new file mode 100644 index 000000000..d566b8314 --- /dev/null +++ b/packages/cli/src/ui/components/arena/ArenaCards.test.tsx @@ -0,0 +1,150 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, expect, it } from 'vitest'; +import { AgentStatus } from '@qwen-code/qwen-code-core'; +import { renderWithProviders } from '../../../test-utils/render.js'; +import { ArenaSessionCard } from './ArenaCards.js'; +import type { ArenaAgentCardData } from '../../types.js'; + +describe('ArenaSessionCard', () => { + it('renders the comparison summary sections from agent results', () => { + const agents: ArenaAgentCardData[] = [ + { + label: 'qwen-coder-plus', + status: AgentStatus.IDLE, + durationMs: 12_000, + totalTokens: 45_000, + inputTokens: 30_000, + outputTokens: 15_000, + toolCalls: 12, + successfulToolCalls: 12, + failedToolCalls: 0, + rounds: 3, + diffSummary: { + files: [ + { path: 'src/auth.ts', additions: 200, deletions: 80 }, + { path: 'tests/auth.test.ts', additions: 45, deletions: 9 }, + ], + additions: 245, + deletions: 89, + }, + modifiedFiles: ['src/auth.ts', 'tests/auth.test.ts'], + approachSummary: 'Refactored with JWT strategy pattern.', + }, + { + label: 'gpt-4o', + status: AgentStatus.IDLE, + durationMs: 10_000, + totalTokens: 38_000, + inputTokens: 25_000, + outputTokens: 13_000, + toolCalls: 8, + successfulToolCalls: 8, + failedToolCalls: 0, + rounds: 2, + diffSummary: { + files: [ + { path: 'src/auth.ts', additions: 120, deletions: 40 }, + { path: 'src/middleware.ts', additions: 69, deletions: 27 }, + ], + additions: 189, + deletions: 67, + }, + modifiedFiles: ['src/auth.ts', 'src/middleware.ts'], + approachSummary: 'Made inline changes with validation layer.', + }, + ]; + + const { lastFrame } = renderWithProviders( + , + ); + + const output = lastFrame(); + expect(output).toContain('Arena Comparison Summary'); + expect(output).not.toContain('Status Time Tokens Changes'); + expect(output).toContain('Status Summary:'); + expect(output).toContain('qwen-coder-plus: Idle'); + expect(output).toContain('gpt-4o: Idle'); + expect(output).toContain('Files Modified:'); + expect(output).toContain('common: src/auth.ts'); + expect(output).toContain('qwen-coder-plus-only: tests/auth.test.ts'); + expect(output).toContain('gpt-4o-only: src/middleware.ts'); + expect(output).toContain('Approach Summary:'); + expect(output).toContain('Refactored with JWT strategy pattern.'); + expect(output).toContain('Token Efficiency:'); + expect(output).toContain('45,000 tokens'); + expect(output).toContain('45,000 tokens · runtime 12.0s'); + expect(output).not.toContain('45,000 tokens · runtime 12.0s · 12 tools'); + expect(output).not.toContain('Quick Preview:'); + expect(output).not.toContain('[View Detailed Diff]'); + expect(output).not.toContain('[Select Winner →]'); + }); + + it('hides empty per-agent unique file groups', () => { + const agents: ArenaAgentCardData[] = [ + { + label: 'gemma4:31b', + status: AgentStatus.IDLE, + durationMs: 10_000, + totalTokens: 10_000, + inputTokens: 7_000, + outputTokens: 3_000, + toolCalls: 2, + successfulToolCalls: 2, + failedToolCalls: 0, + rounds: 1, + diffSummary: { + files: [{ path: 'reader.py', additions: 20, deletions: 0 }], + additions: 20, + deletions: 0, + }, + modifiedFiles: ['reader.py'], + approachSummary: 'Created a reader.', + }, + { + label: 'qwen2.5:14b', + status: AgentStatus.IDLE, + durationMs: 8_000, + totalTokens: 8_000, + inputTokens: 6_000, + outputTokens: 2_000, + toolCalls: 2, + successfulToolCalls: 2, + failedToolCalls: 0, + rounds: 1, + diffSummary: { + files: [{ path: 'reader.py', additions: 22, deletions: 0 }], + additions: 22, + deletions: 0, + }, + modifiedFiles: ['reader.py'], + approachSummary: 'Created a reader.', + }, + ]; + + const { lastFrame } = renderWithProviders( + , + ); + + const output = lastFrame(); + expect(output).toContain('common: reader.py'); + expect(output).not.toContain('only gemma4:31b: none'); + expect(output).not.toContain('only qwen2.5:14b: none'); + }); +}); diff --git a/packages/cli/src/ui/components/arena/ArenaCards.tsx b/packages/cli/src/ui/components/arena/ArenaCards.tsx index 1ad7d8e2a..b39933a3e 100644 --- a/packages/cli/src/ui/components/arena/ArenaCards.tsx +++ b/packages/cli/src/ui/components/arena/ArenaCards.tsx @@ -10,6 +10,7 @@ import { theme } from '../../semantic-colors.js'; import { formatDuration } from '../../utils/formatters.js'; import { getArenaStatusLabel } from '../../utils/displayUtils.js'; import type { ArenaAgentCardData } from '../../types.js'; +import type { ArenaDiffSummary } from '@qwen-code/qwen-code-core'; // ─── Helpers ──────────────────────────────────────────────── @@ -84,36 +85,25 @@ interface ArenaSessionCardProps { width?: number; } -/** - * Pad or truncate a string to a fixed visual width. - */ -function pad( - str: string, - len: number, - align: 'left' | 'right' = 'left', -): string { - if (str.length >= len) return str.slice(0, len); - const padding = ' '.repeat(len - str.length); - return align === 'right' ? padding + str : str + padding; -} - -/** - * Truncate a string to a maximum length, adding ellipsis if truncated. - */ -function truncate(str: string, maxLen: number): string { - if (str.length <= maxLen) return str; - return str.slice(0, maxLen - 1) + '…'; -} - /** * Calculate diff stats from a unified diff string. * Returns the stats string and individual counts for colored rendering. */ -function getDiffStats(diff: string | undefined): { +function getDiffStats( + diff: string | undefined, + diffSummary?: ArenaDiffSummary, +): { text: string; additions: number; deletions: number; } { + if (diffSummary) { + return { + text: `+${diffSummary.additions}/-${diffSummary.deletions}`, + additions: diffSummary.additions, + deletions: diffSummary.deletions, + }; + } if (!diff) return { text: '', additions: 0, deletions: 0 }; const lines = diff.split('\n'); let additions = 0; @@ -128,33 +118,71 @@ function getDiffStats(diff: string | undefined): { return { text: `+${additions}/-${deletions}`, additions, deletions }; } -const MAX_MODEL_NAME_LENGTH = 35; +const MAX_FILE_LIST_ITEMS = 4; + +function formatFileList(files: string[] | undefined): string { + if (!files || files.length === 0) { + return 'none'; + } + const visible = files.slice(0, MAX_FILE_LIST_ITEMS); + const suffix = + files.length > MAX_FILE_LIST_ITEMS + ? `, +${files.length - MAX_FILE_LIST_ITEMS} more` + : ''; + return `${visible.join(', ')}${suffix}`; +} + +function getAgentFiles(agent: ArenaAgentCardData): string[] { + return ( + agent.modifiedFiles ?? + agent.diffSummary?.files.map((file) => file.path) ?? + [] + ); +} + +function getComparisonFileGroups( + agents: ArenaAgentCardData[], +): Array<{ label: string; files: string[] }> { + const counts = new Map(); + for (const agent of agents) { + for (const file of new Set(getAgentFiles(agent))) { + counts.set(file, (counts.get(file) ?? 0) + 1); + } + } + + const common = [...counts.entries()] + .filter(([, count]) => count > 1) + .map(([file]) => file) + .sort(); + const groups = [{ label: 'common', files: common }]; + + for (const agent of agents) { + const unique = getAgentFiles(agent) + .filter((file) => counts.get(file) === 1) + .sort(); + if (unique.length > 0) { + groups.push({ label: `${agent.label}-only`, files: unique }); + } + } + + return groups; +} + +function getTreeBranch(index: number, total: number): string { + return index === total - 1 ? '└─' : '├─'; +} export const ArenaSessionCard: React.FC = ({ sessionStatus, - task, agents, width, }) => { - // Truncate task for display - const maxTaskLen = 60; - const displayTask = - task.length > maxTaskLen ? task.slice(0, maxTaskLen - 1) + '…' : task; - - // Column widths for the agent table (unified with Arena Results) - const colStatus = 14; - const colTime = 8; - const colTokens = 10; - const colChanges = 10; - const titleLabel = - sessionStatus === 'idle' - ? 'Agents Status · Idle' - : sessionStatus === 'completed' - ? 'Arena Complete' - : sessionStatus === 'cancelled' - ? 'Arena Cancelled' - : 'Arena Failed'; + sessionStatus === 'idle' || sessionStatus === 'completed' + ? 'Arena Comparison Summary' + : sessionStatus === 'cancelled' + ? 'Arena Cancelled' + : 'Arena Failed'; return ( = ({ - {/* Task */} - - - Task: - "{displayTask}" - - - - - - {/* Table header - unified columns: Agent, Status, Time, Tokens, Changes */} - - - - Agent - - - - - Status - - - - - Time - - - - - Tokens - - - - - Changes - - - - - {/* Table separator */} - - - {'─'.repeat((width ?? 60) - 8)} - - - - {/* Agent rows */} - {agents.map((agent) => { - const { text: statusText, color } = getArenaStatusLabel(agent.status); - const diffStats = getDiffStats(agent.diff); - return ( - - - - {truncate(agent.label, MAX_MODEL_NAME_LENGTH)} - - - - {statusText} - - - - {pad(formatDuration(agent.durationMs), colTime - 1, 'right')} - - - - - {pad( - agent.totalTokens.toLocaleString(), - colTokens - 1, - 'right', - )} - - - - {diffStats.additions > 0 || diffStats.deletions > 0 ? ( - - - +{diffStats.additions} + {(sessionStatus === 'idle' || sessionStatus === 'completed') && ( + <> + + + Status Summary: + + {agents.map((agent, index) => { + const { text: statusText, color } = getArenaStatusLabel( + agent.status, + ); + return ( + + + {index === agents.length - 1 ? '└─' : '├─'} {agent.label} + :{' '} - / - -{diffStats.deletions} - - ) : ( - - - )} - + {statusText} + + ); + })} - ); - })} + + + + + + Files Modified: + + {getComparisonFileGroups(agents).map((group, index, groups) => ( + + + {getTreeBranch(index, groups.length)} {group.label}:{' '} + + + {formatFileList(group.files)} + + + ))} + + + + + + + Approach Summary: + + {agents.map((agent, index) => { + const diffStats = getDiffStats(agent.diff, agent.diffSummary); + const files = getAgentFiles(agent).length; + const branch = index === agents.length - 1 ? '└─' : '├─'; + const summary = + agent.approachSummary ?? 'No approach summary available.'; + return ( + + + + {branch} {agent.label}:{' '} + + {summary} + ( + {files} + + {files === 1 ? ' file, ' : ' files, '} + + + +{diffStats.additions} + + + + -{diffStats.deletions} + + lines, + {agent.toolCalls} + + {agent.toolCalls === 1 ? ' tool call)' : ' tool calls)'} + + + + ); + })} + + + + + + + Token Efficiency: + + {agents.map((agent, index) => ( + + + {index === agents.length - 1 ? '└─' : '├─'} {agent.label} + :{' '} + + + {agent.totalTokens.toLocaleString()} tokens · runtime{' '} + {formatDuration(agent.durationMs)} + + + ))} + + + )} @@ -271,9 +312,8 @@ export const ArenaSessionCard: React.FC = ({ {sessionStatus === 'idle' && ( - Switch to an agent tab to continue, or{' '} - /arena select to pick a - winner. + Run /arena select to view + detailed diff or pick a winner. )} diff --git a/packages/cli/src/ui/components/arena/ArenaSelectDialog.test.tsx b/packages/cli/src/ui/components/arena/ArenaSelectDialog.test.tsx new file mode 100644 index 000000000..efcfc9165 --- /dev/null +++ b/packages/cli/src/ui/components/arena/ArenaSelectDialog.test.tsx @@ -0,0 +1,108 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import { waitFor } from '@testing-library/react'; +import { describe, expect, it, vi } from 'vitest'; +import { + AgentStatus, + ArenaSessionStatus, + type ArenaManager, + type Config, +} from '@qwen-code/qwen-code-core'; +import { renderWithProviders } from '../../../test-utils/render.js'; +import { ArenaSelectDialog } from './ArenaSelectDialog.js'; + +describe('ArenaSelectDialog', () => { + it('toggles quick preview and detailed diff for the highlighted agent', async () => { + const result = { + sessionId: 'arena-1', + task: 'Update auth', + status: ArenaSessionStatus.IDLE, + agents: [ + { + agentId: 'model-1', + model: { modelId: 'model-1', authType: 'openai' }, + status: AgentStatus.IDLE, + worktree: { + id: 'w1', + name: 'model-1', + path: '/tmp/model-1', + branch: 'arena/model-1', + isActive: true, + createdAt: 1, + }, + stats: { + rounds: 1, + totalTokens: 1000, + inputTokens: 700, + outputTokens: 300, + durationMs: 2000, + toolCalls: 2, + successfulToolCalls: 2, + failedToolCalls: 0, + }, + diff: `diff --git a/src/auth.ts b/src/auth.ts +--- a/src/auth.ts ++++ b/src/auth.ts +@@ -1 +1 @@ +-old ++new`, + diffSummary: { + files: [{ path: 'src/auth.ts', additions: 1, deletions: 1 }], + additions: 1, + deletions: 1, + }, + modifiedFiles: ['src/auth.ts'], + approachSummary: 'Updated the auth implementation inline.', + startedAt: 1, + }, + ], + startedAt: 1, + wasRepoInitialized: false, + }; + + const manager = { + getResult: vi.fn(() => result), + getAgentStates: vi.fn(() => [ + { + agentId: 'model-1', + model: { modelId: 'model-1', authType: 'openai' }, + status: AgentStatus.IDLE, + stats: result.agents[0]!.stats, + }, + ]), + getAgentState: vi.fn(), + applyAgentResult: vi.fn(), + } as unknown as ArenaManager; + + const config = { + getArenaManager: () => manager, + cleanupArenaRuntime: vi.fn(), + getChatRecordingService: () => undefined, + } as unknown as Config; + + const { lastFrame, stdin } = renderWithProviders( + , + ); + + stdin.write('p'); + await waitFor(() => { + expect(lastFrame()).toContain('Quick Preview · model-1'); + }); + expect(lastFrame()).toContain('Updated the auth implementation inline.'); + + stdin.write('d'); + await waitFor(() => { + expect(lastFrame()).toContain('Detailed Diff · model-1'); + }); + expect(lastFrame()).toContain('diff --git a/src/auth.ts b/src/auth.ts'); + }); +}); diff --git a/packages/cli/src/ui/components/arena/ArenaSelectDialog.tsx b/packages/cli/src/ui/components/arena/ArenaSelectDialog.tsx index 88fe5a507..7ec69499f 100644 --- a/packages/cli/src/ui/components/arena/ArenaSelectDialog.tsx +++ b/packages/cli/src/ui/components/arena/ArenaSelectDialog.tsx @@ -5,12 +5,13 @@ */ import type React from 'react'; -import { useCallback, useMemo } from 'react'; +import { useCallback, useMemo, useState } from 'react'; import { Box, Text } from 'ink'; import { type ArenaManager, isSuccessStatus, type Config, + type ArenaAgentResult, } from '@qwen-code/qwen-code-core'; import { theme } from '../../semantic-colors.js'; import { useKeypress } from '../../hooks/useKeypress.js'; @@ -134,6 +135,17 @@ export function ArenaSelectDialog({ const result = manager.getResult(); const agents = manager.getAgentStates(); + const firstSelectableAgentId = agents.find((agent) => + isSuccessStatus(agent.status), + )?.agentId; + const [selectedAgentId, setSelectedAgentId] = useState( + firstSelectableAgentId, + ); + const [showPreview, setShowPreview] = useState(false); + const [showDetailedDiff, setShowDetailedDiff] = useState(false); + const selectedResult = result?.agents.find( + (agent) => agent.agentId === selectedAgentId, + ); const items: Array> = useMemo( () => @@ -146,11 +158,16 @@ export function ArenaSelectDialog({ // Build diff summary from cached result if available let diffAdditions = 0; let diffDeletions = 0; + let fileCount = 0; if (isSuccessStatus(agent.status) && result) { const agentResult = result.agents.find( (a) => a.agentId === agent.agentId, ); - if (agentResult?.diff) { + if (agentResult?.diffSummary) { + diffAdditions = agentResult.diffSummary.additions; + diffDeletions = agentResult.diffSummary.deletions; + fileCount = agentResult.diffSummary.files.length; + } else if (agentResult?.diff) { const lines = agentResult.diff.split('\n'); for (const line of lines) { if (line.startsWith('+') && !line.startsWith('+++')) { @@ -160,6 +177,7 @@ export function ArenaSelectDialog({ } } } + fileCount = agentResult?.modifiedFiles?.length ?? fileCount; } // Title: full model name (not truncated) @@ -173,6 +191,12 @@ export function ArenaSelectDialog({ {duration} · {tokens} tokens + {fileCount > 0 && ( + <> + · + {fileCount} files + + )} {(diffAdditions > 0 || diffDeletions > 0) && ( <> · @@ -201,7 +225,13 @@ export function ArenaSelectDialog({ if (key.name === 'escape') { closeArenaDialog(); } + if (key.name === 'p' && !key.ctrl && !key.meta) { + setShowPreview((current) => !current); + } if (key.name === 'd' && !key.ctrl && !key.meta) { + setShowDetailedDiff((current) => !current); + } + if (key.name === 'x' && !key.ctrl && !key.meta) { onDiscard(); } }, @@ -245,16 +275,136 @@ export function ArenaSelectDialog({ onSelect={(agentId: string) => { onSelect(agentId); }} + onHighlight={(agentId: string) => { + setSelectedAgentId(agentId); + }} isFocused={true} showNumbers={false} /> + {showPreview && selectedResult && ( + + )} + + {showDetailedDiff && selectedResult && ( + + )} + - Enter to select, d to discard all, Esc to cancel + p preview, d detailed diff, Enter select winner, x discard all, Esc + cancel ); } + +function ArenaAgentPreview({ + result, +}: { + result: ArenaAgentResult; +}): React.JSX.Element { + const fileSummary = result.diffSummary?.files ?? []; + return ( + + + Quick Preview · {result.model.modelId} + + + Approach: + + {result.approachSummary ?? 'No approach summary available.'} + + + + Major files: + + {formatFileList(fileSummary.map((file) => file.path))} + + + + Metrics: + + {result.stats.totalTokens.toLocaleString()} tokens ·{' '} + {formatDuration(result.stats.durationMs)} · {result.stats.toolCalls}{' '} + tools + + + + ); +} + +function ArenaAgentDetailedDiff({ + result, +}: { + result: ArenaAgentResult; +}): React.JSX.Element { + const diffLines = getVisibleDiffLines(result.diff); + return ( + + + Detailed Diff · {result.model.modelId} + + {diffLines.length === 0 ? ( + + No diff available. + + ) : ( + + {diffLines.map((line, index) => ( + + {line} + + ))} + + )} + + ); +} + +function formatFileList(files: string[]): string { + if (files.length === 0) { + return 'none'; + } + const visible = files.slice(0, 6); + const suffix = + files.length > visible.length + ? `, +${files.length - visible.length} more` + : ''; + return `${visible.join(', ')}${suffix}`; +} + +function getVisibleDiffLines(diff: string | undefined): string[] { + if (!diff) { + return []; + } + const lines = diff.split('\n'); + const maxLines = 180; + if (lines.length <= maxLines) { + return lines; + } + return [ + ...lines.slice(0, maxLines), + `... truncated ${lines.length - maxLines} diff lines`, + ]; +} + +function getDiffLineColor(line: string): string { + if (line.startsWith('+') && !line.startsWith('+++')) { + return theme.status.success; + } + if (line.startsWith('-') && !line.startsWith('---')) { + return theme.status.error; + } + if ( + line.startsWith('diff --git') || + line.startsWith('@@') || + line.startsWith('---') || + line.startsWith('+++') + ) { + return theme.text.accent; + } + return theme.text.secondary; +} diff --git a/packages/cli/src/ui/types.ts b/packages/cli/src/ui/types.ts index f66abf675..775537da9 100644 --- a/packages/cli/src/ui/types.ts +++ b/packages/cli/src/ui/types.ts @@ -12,6 +12,7 @@ import type { ToolConfirmationOutcome, ToolResultDisplay, AgentStatus, + ArenaDiffSummary, } from '@qwen-code/qwen-code-core'; import type { PartListUnion } from '@google/genai'; import { type ReactNode } from 'react'; @@ -354,6 +355,9 @@ export interface ArenaAgentCardData { rounds: number; error?: string; diff?: string; + diffSummary?: ArenaDiffSummary; + modifiedFiles?: string[]; + approachSummary?: string; } export type HistoryItemArenaAgentComplete = HistoryItemBase & { diff --git a/packages/core/src/agents/arena/ArenaManager.test.ts b/packages/core/src/agents/arena/ArenaManager.test.ts index a21f15d63..6f756c892 100644 --- a/packages/core/src/agents/arena/ArenaManager.test.ts +++ b/packages/core/src/agents/arena/ArenaManager.test.ts @@ -11,6 +11,7 @@ import * as os from 'node:os'; import { ArenaManager } from './ArenaManager.js'; import { ArenaEventType } from './arena-events.js'; import { ArenaSessionStatus, ARENA_MAX_AGENTS } from './types.js'; +import { AgentStatus } from '../runtime/agent-types.js'; const hoistedMockSetupWorktrees = vi.hoisted(() => vi.fn()); const hoistedMockCleanupSession = vi.hoisted(() => vi.fn()); @@ -374,6 +375,156 @@ describe('ArenaManager', () => { }); describe('active session lifecycle', () => { + it('collects diff summaries and fallback approach summaries', async () => { + const manager = new ArenaManager(mockConfig as never); + mockBackend.setAutoExit(false); + hoistedMockGetWorktreeDiff.mockResolvedValue(`diff --git a/src/auth.ts b/src/auth.ts +index 111..222 100644 +--- a/src/auth.ts ++++ b/src/auth.ts +@@ -1 +1,2 @@ +-old ++new ++extra`); + + const startPromise = manager.start(createValidStartOptions()); + await waitForCondition( + () => mockBackend.spawnAgent.mock.calls.length >= 2, + ); + + const agentsDir = path.join( + os.tmpdir(), + 'arena-mock', + 'testsess', + 'agents', + ); + await fs.mkdir(agentsDir, { recursive: true }); + for (const modelId of ['model-1', 'model-2']) { + await fs.writeFile( + path.join(agentsDir, `${modelId}.json`), + JSON.stringify({ + agentId: modelId, + status: AgentStatus.COMPLETED, + updatedAt: Date.now(), + rounds: 1, + stats: { + rounds: 1, + totalTokens: 0, + inputTokens: 0, + outputTokens: 0, + durationMs: 0, + toolCalls: 0, + successfulToolCalls: 0, + failedToolCalls: 0, + }, + finalSummary: null, + error: null, + }), + 'utf-8', + ); + } + + const result = await startPromise; + + expect(result.agents).toHaveLength(2); + expect(result.agents[0]?.modifiedFiles).toEqual(['src/auth.ts']); + expect(result.agents[0]?.diffSummary).toEqual({ + files: [{ path: 'src/auth.ts', additions: 2, deletions: 1 }], + additions: 2, + deletions: 1, + }); + expect(result.agents[0]?.approachSummary).toBe( + 'Changed 1 file with 0 tool calls (+2/-1).', + ); + }); + + it('uses each in-process agent generator for semantic approach summaries', async () => { + const mainGenerateContent = vi.fn(); + const model1GenerateContent = vi.fn().mockResolvedValue({ + candidates: [ + { + content: { + parts: [ + { + text: JSON.stringify({ + summary: 'Model 1 used a strategy pattern.', + }), + }, + ], + }, + }, + ], + }); + const model2GenerateContent = vi.fn().mockResolvedValue({ + candidates: [ + { + content: { + parts: [ + { + text: JSON.stringify({ + summary: 'Model 2 made inline edits.', + }), + }, + ], + }, + }, + ], + }); + const config = { + ...mockConfig, + getContentGenerator: () => ({ + generateContent: mainGenerateContent, + }), + }; + mockBackend.type = 'in-process'; + mockBackend.setAutoExit(false); + const agentInteractives = new Map< + string, + ReturnType + >(); + mockBackend.getAgent.mockImplementation((agentId: string) => + agentInteractives.get(agentId), + ); + mockBackend.getAgentContentGenerator.mockImplementation( + (agentId: string) => + agentId === 'model-1' + ? { generateContent: model1GenerateContent } + : { generateContent: model2GenerateContent }, + ); + mockBackend.spawnAgent.mockImplementation( + async (config: { agentId: string }) => { + agentInteractives.set( + config.agentId, + createMockInteractive(config.agentId), + ); + }, + ); + const manager = new ArenaManager(config as never); + + const result = await manager.start(createValidStartOptions()); + + expect(mainGenerateContent).not.toHaveBeenCalled(); + expect(model1GenerateContent).toHaveBeenCalledTimes(1); + expect(model2GenerateContent).toHaveBeenCalledTimes(1); + expect(model1GenerateContent.mock.calls[0]?.[0].model).toBe('model-1'); + expect(model2GenerateContent.mock.calls[0]?.[0].model).toBe('model-2'); + + const model1Prompt = model1GenerateContent.mock.calls[0]?.[0].contents[0] + .parts[0].text as string; + const model2Prompt = model2GenerateContent.mock.calls[0]?.[0].contents[0] + .parts[0].text as string; + expect(model1Prompt).toContain('"agentId": "model-1"'); + expect(model1Prompt).not.toContain('"agentId": "model-2"'); + expect(model2Prompt).toContain('"agentId": "model-2"'); + expect(model2Prompt).not.toContain('"agentId": "model-1"'); + expect(result.agents[0]?.approachSummary).toBe( + 'Model 1 used a strategy pattern.', + ); + expect(result.agents[1]?.approachSummary).toBe( + 'Model 2 made inline edits.', + ); + }); + it('cancel should stop backend and move session to CANCELLED', async () => { const manager = new ArenaManager(mockConfig as never); @@ -434,7 +585,7 @@ function createMockBackend() { let autoExit = true; const backend = { - type: 'tmux' as const, + type: 'tmux' as 'tmux' | 'in-process', init: vi.fn().mockResolvedValue(undefined), spawnAgent: vi.fn(async (config: { agentId: string }) => { // By default, simulate immediate agent termination so tests @@ -461,6 +612,8 @@ function createMockBackend() { writeToAgent: vi.fn().mockReturnValue(false), resizeAll: vi.fn(), getAttachHint: vi.fn().mockReturnValue(null), + getAgent: vi.fn().mockReturnValue(undefined), + getAgentContentGenerator: vi.fn().mockReturnValue(undefined), /** Disable automatic agent exit for tests that need to control timing. */ setAutoExit(value: boolean) { autoExit = value; @@ -469,6 +622,36 @@ function createMockBackend() { return backend; } +function createMockInteractive(agentId: string) { + const emitter = { + on: vi.fn(), + off: vi.fn(), + }; + return { + getMessages: vi.fn().mockReturnValue([ + { + role: 'assistant', + content: `${agentId} final response`, + timestamp: Date.now(), + }, + ]), + getStatus: vi.fn().mockReturnValue(AgentStatus.IDLE), + getStats: vi.fn().mockReturnValue({ + rounds: 1, + totalTokens: 0, + inputTokens: 0, + outputTokens: 0, + totalToolCalls: 0, + successfulToolCalls: 0, + failedToolCalls: 0, + totalDurationMs: 1, + }), + getLastRoundError: vi.fn().mockReturnValue(undefined), + getError: vi.fn().mockReturnValue(undefined), + getEventEmitter: vi.fn().mockReturnValue(emitter), + }; +} + function createValidStartOptions() { return { models: [ diff --git a/packages/core/src/agents/arena/ArenaManager.ts b/packages/core/src/agents/arena/ArenaManager.ts index 6a386158f..41a788249 100644 --- a/packages/core/src/agents/arena/ArenaManager.ts +++ b/packages/core/src/agents/arena/ArenaManager.ts @@ -9,11 +9,13 @@ import * as path from 'node:path'; import { GitWorktreeService } from '../../services/gitWorktreeService.js'; import { Storage } from '../../config/storage.js'; import type { Config } from '../../config/config.js'; +import type { ContentGenerator } from '../../core/contentGenerator.js'; import { getCoreSystemPrompt } from '../../core/prompts.js'; import { createDebugLogger } from '../../utils/debugLogger.js'; import { isNodeError } from '../../utils/errors.js'; import { atomicWriteJSON } from '../../utils/atomicFileWrite.js'; import type { AnsiOutput } from '../../utils/terminalSerializer.js'; +import { getResponseText } from '../../utils/partUtils.js'; import { ArenaEventEmitter, ArenaEventType } from './arena-events.js'; import type { AgentSpawnConfig, Backend, DisplayMode } from '../index.js'; import { detectBackend, DISPLAY_MODE } from '../index.js'; @@ -51,10 +53,30 @@ import { makeArenaSessionEndedEvent, } from '../../telemetry/index.js'; import type { ArenaSessionEndedStatus } from '../../telemetry/index.js'; +import { + buildFallbackApproachSummary, + summarizeUnifiedDiff, +} from './diff-summary.js'; const debugLogger = createDebugLogger('ARENA'); const ARENA_POLL_INTERVAL_MS = 500; +const ARENA_SUMMARY_TIMEOUT_MS = 20_000; +const ARENA_SUMMARY_MAX_DIFF_CHARS = 6_000; +const ARENA_SUMMARY_MAX_TRANSCRIPT_CHARS = 6_000; + +interface ArenaTranscriptEntry { + role: 'user' | 'assistant' | 'tool_call' | 'tool_result' | 'info'; + content: string; + thought?: boolean; + metadata?: Record; + timestamp: number; +} + +interface ArenaSummaryInput { + result: ArenaAgentResult; + transcript?: ArenaTranscriptEntry[]; +} /** * ArenaManager orchestrates multi-model competitive execution. @@ -1438,6 +1460,9 @@ export class ArenaManager { ...agent.stats, ...statusFile.stats, }; + if (statusFile.finalSummary) { + agent.accumulatedText = statusFile.finalSummary; + } // Detect state transitions from the sideband status file const resolved = this.resolveTransition( @@ -1605,15 +1630,163 @@ export class ArenaManager { } } + private getAgentTranscript( + agentId: string, + ): ArenaTranscriptEntry[] | undefined { + if (this.backend?.type !== DISPLAY_MODE.IN_PROCESS) { + return undefined; + } + + const interactive = (this.backend as InProcessBackend).getAgent(agentId); + const messages = interactive?.getMessages(); + if (!messages || messages.length === 0) { + return undefined; + } + + return messages.map((message) => ({ + role: message.role, + content: message.content, + thought: message.thought, + metadata: message.metadata, + timestamp: message.timestamp, + })); + } + + private getFinalTextFromTranscript( + transcript: ArenaTranscriptEntry[] | undefined, + ): string | undefined { + if (!transcript) return undefined; + + for (let i = transcript.length - 1; i >= 0; i--) { + const message = transcript[i]!; + if ( + message.role === 'assistant' && + !message.thought && + message.content.trim() + ) { + return message.content.trim(); + } + } + + return undefined; + } + + private async addApproachSummaries( + summaryInputs: ArenaSummaryInput[], + ): Promise { + await Promise.all( + summaryInputs.map(async (summaryInput) => { + summaryInput.result.approachSummary = + await this.generateAgentApproachSummary(summaryInput); + }), + ); + } + + private getAgentSummaryGenerator( + agentId: string, + ): ContentGenerator | undefined { + if (this.backend?.type !== DISPLAY_MODE.IN_PROCESS) { + return undefined; + } + + return (this.backend as InProcessBackend).getAgentContentGenerator(agentId); + } + + private async generateAgentApproachSummary( + summaryInput: ArenaSummaryInput, + ): Promise { + const { result } = summaryInput; + const generator = this.getAgentSummaryGenerator(result.agentId); + if (!generator) { + return buildFallbackApproachSummary(result); + } + + const abortController = new AbortController(); + const timeout = setTimeout( + () => abortController.abort(), + ARENA_SUMMARY_TIMEOUT_MS, + ); + + try { + const response = await generator.generateContent( + { + model: result.model.modelId, + contents: [ + { + role: 'user', + parts: [ + { + text: this.buildAgentApproachSummaryPrompt(summaryInput), + }, + ], + }, + ], + config: { + abortSignal: abortController.signal, + thinkingConfig: { includeThoughts: false }, + }, + }, + 'arena_approach_summary', + ); + + return ( + parseApproachSummaryResponse(getResponseText(response) ?? '')?.trim() || + buildFallbackApproachSummary(result) + ); + } catch (error) { + debugLogger.error( + `Failed to generate Arena approach summary for ${result.agentId}:`, + error, + ); + return buildFallbackApproachSummary(result); + } finally { + clearTimeout(timeout); + } + } + + private buildAgentApproachSummaryPrompt({ + result: agent, + transcript, + }: ArenaSummaryInput): string { + const payload = { + task: this.arenaConfig?.task ?? '', + instruction: + 'Summarize this Arena agent approach for user comparison. Use git diff as the source of truth for what changed. Use transcript/finalText only to infer intent and architectural decisions. Do not pick a winner. Return only compact JSON: {"summary":"one sentence summary"}.', + agent: { + agentId: agent.agentId, + model: agent.model.modelId, + status: agent.status, + metrics: { + files: agent.diffSummary?.files.length ?? 0, + additions: agent.diffSummary?.additions ?? 0, + deletions: agent.diffSummary?.deletions ?? 0, + tokens: agent.stats.totalTokens, + durationMs: agent.stats.durationMs, + toolCalls: agent.stats.toolCalls, + }, + files: agent.diffSummary?.files ?? [], + finalText: truncateForPrompt(agent.finalText ?? '', 2_000), + transcript: truncateForPrompt(formatTranscript(transcript), 6_000), + diff: truncateForPrompt(agent.diff ?? '', ARENA_SUMMARY_MAX_DIFF_CHARS), + }, + }; + + return JSON.stringify(payload, null, 2); + } + private async collectResults(): Promise { if (!this.arenaConfig) { throw new Error('Arena config not initialized'); } const agents: ArenaAgentResult[] = []; + const summaryInputs: ArenaSummaryInput[] = []; for (const agent of this.agents.values()) { const result = this.buildAgentResult(agent); + const transcript = this.getAgentTranscript(agent.agentId); + result.finalText = + result.finalText ?? this.getFinalTextFromTranscript(transcript); // Get diff for agents that finished their task (IDLE or COMPLETED) if (isSuccessStatus(agent.status)) { @@ -1621,6 +1794,10 @@ export class ArenaManager { result.diff = await this.worktreeService.getWorktreeDiff( agent.worktree.path, ); + result.diffSummary = summarizeUnifiedDiff(result.diff); + result.modifiedFiles = result.diffSummary.files.map( + (file) => file.path, + ); } catch (error) { debugLogger.error( `Failed to get diff for agent ${agent.agentId}:`, @@ -1628,10 +1805,17 @@ export class ArenaManager { ); } } + result.diffSummary ??= summarizeUnifiedDiff(result.diff); + result.modifiedFiles ??= result.diffSummary.files.map( + (file) => file.path, + ); agents.push(result); + summaryInputs.push({ result, transcript }); } + await this.addApproachSummaries(summaryInputs); + const endedAt = Date.now(); return { @@ -1646,3 +1830,73 @@ export class ArenaManager { }; } } + +function truncateForPrompt(text: string, maxChars: number): string { + if (text.length <= maxChars) return text; + return `${text.slice(0, maxChars)}\n...[truncated]`; +} + +function formatTranscript( + transcript: ArenaTranscriptEntry[] | undefined, +): string { + if (!transcript || transcript.length === 0) { + return ''; + } + + const lines: string[] = []; + for (const entry of transcript) { + if (entry.thought) continue; + const metadata = entry.metadata ?? {}; + const toolName = + typeof metadata['toolName'] === 'string' + ? metadata['toolName'] + : undefined; + const success = + typeof metadata['success'] === 'boolean' + ? metadata['success'] + : undefined; + const label = toolName ? `${entry.role}:${toolName}` : entry.role; + const suffix = + success === undefined ? '' : ` (${success ? 'ok' : 'failed'})`; + lines.push(`${label}${suffix}: ${entry.content}`); + } + + return truncateForPrompt( + lines.join('\n'), + ARENA_SUMMARY_MAX_TRANSCRIPT_CHARS, + ); +} + +function parseApproachSummaryResponse(text: string): string | undefined { + const jsonText = extractJsonObject(text); + if (!jsonText) { + return undefined; + } + + try { + const parsed = JSON.parse(jsonText) as unknown; + if (!isRecord(parsed)) { + return undefined; + } + const summary = parsed['summary']; + if (typeof summary === 'string') { + return summary; + } + } catch { + return undefined; + } + return undefined; +} + +function extractJsonObject(text: string): string | null { + const firstBrace = text.indexOf('{'); + const lastBrace = text.lastIndexOf('}'); + if (firstBrace < 0 || lastBrace <= firstBrace) { + return null; + } + return text.slice(firstBrace, lastBrace + 1); +} + +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} diff --git a/packages/core/src/agents/arena/diff-summary.test.ts b/packages/core/src/agents/arena/diff-summary.test.ts new file mode 100644 index 000000000..57c070ad3 --- /dev/null +++ b/packages/core/src/agents/arena/diff-summary.test.ts @@ -0,0 +1,138 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, expect, it } from 'vitest'; +import { AgentStatus } from '../runtime/agent-types.js'; +import { + buildFallbackApproachSummary, + summarizeUnifiedDiff, +} from './diff-summary.js'; +import type { ArenaAgentResult } from './types.js'; + +describe('summarizeUnifiedDiff', () => { + it('parses file and line counts from a unified diff', () => { + const summary = summarizeUnifiedDiff(`diff --git a/src/auth.ts b/src/auth.ts +index 111..222 100644 +--- a/src/auth.ts ++++ b/src/auth.ts +@@ -1,3 +1,4 @@ + const a = 1; +-const b = 2; ++const b = 3; ++const c = 4; +diff --git a/tests/auth.test.ts b/tests/auth.test.ts +index 333..444 100644 +--- a/tests/auth.test.ts ++++ b/tests/auth.test.ts +@@ -10,2 +10,2 @@ +-old ++new`); + + expect(summary).toEqual({ + files: [ + { path: 'src/auth.ts', additions: 2, deletions: 1 }, + { path: 'tests/auth.test.ts', additions: 1, deletions: 1 }, + ], + additions: 3, + deletions: 2, + }); + }); + + it('returns zero counts for an empty diff', () => { + expect(summarizeUnifiedDiff('')).toEqual({ + files: [], + additions: 0, + deletions: 0, + }); + }); + + it('parses repeated diff header text without relying on regex backtracking', () => { + const repeated = Array.from({ length: 200 }, () => 'a b/a').join(''); + const path = `${repeated}.ts`; + const summary = summarizeUnifiedDiff(`diff --git a/${path} b/${path} +--- a/${path} ++++ b/${path} +@@ -1 +1 @@ +-old ++new`); + + expect(summary).toEqual({ + files: [{ path, additions: 1, deletions: 1 }], + additions: 1, + deletions: 1, + }); + }); + + it('includes binary diffs without textual line changes', () => { + const summary = + summarizeUnifiedDiff(`diff --git a/assets/logo.png b/assets/logo.png +new file mode 100644 +index 0000000..abc1234 +Binary files /dev/null and b/assets/logo.png differ`); + + expect(summary).toEqual({ + files: [{ path: 'assets/logo.png', additions: 0, deletions: 0 }], + additions: 0, + deletions: 0, + }); + }); + + it('includes rename-only diffs without textual line changes', () => { + const summary = summarizeUnifiedDiff(`diff --git a/src/old.ts b/src/new.ts +similarity index 100% +rename from src/old.ts +rename to src/new.ts`); + + expect(summary).toEqual({ + files: [{ path: 'src/new.ts', additions: 0, deletions: 0 }], + additions: 0, + deletions: 0, + }); + }); + + it('includes mode-only diffs without textual line changes', () => { + const summary = + summarizeUnifiedDiff(`diff --git a/scripts/run.sh b/scripts/run.sh +old mode 100644 +new mode 100755`); + + expect(summary).toEqual({ + files: [{ path: 'scripts/run.sh', additions: 0, deletions: 0 }], + additions: 0, + deletions: 0, + }); + }); +}); + +describe('buildFallbackApproachSummary', () => { + it('summarizes changed files and tool usage', () => { + const result = { + status: AgentStatus.IDLE, + stats: { toolCalls: 3 }, + diffSummary: { + files: [{ path: 'src/auth.ts', additions: 2, deletions: 1 }], + additions: 2, + deletions: 1, + }, + } as unknown as ArenaAgentResult; + + expect(buildFallbackApproachSummary(result)).toBe( + 'Changed 1 file with 3 tool calls (+2/-1).', + ); + }); + + it('reports no changes when the diff is empty', () => { + const result = { + status: AgentStatus.IDLE, + stats: { toolCalls: 0 }, + diffSummary: { files: [], additions: 0, deletions: 0 }, + } as unknown as ArenaAgentResult; + + expect(buildFallbackApproachSummary(result)).toBe( + 'No code changes detected.', + ); + }); +}); diff --git a/packages/core/src/agents/arena/diff-summary.ts b/packages/core/src/agents/arena/diff-summary.ts new file mode 100644 index 000000000..0304e2122 --- /dev/null +++ b/packages/core/src/agents/arena/diff-summary.ts @@ -0,0 +1,141 @@ +/** + * @license + * Copyright 2025 Qwen Team + * SPDX-License-Identifier: Apache-2.0 + */ + +import type { + ArenaAgentResult, + ArenaDiffSummary, + ArenaFileChangeSummary, +} from './types.js'; +import { isSuccessStatus } from '../runtime/agent-types.js'; + +/** + * Parse a unified git diff into file-level and aggregate line-change stats. + */ +export function summarizeUnifiedDiff( + diff: string | undefined, +): ArenaDiffSummary { + if (!diff) { + return { files: [], additions: 0, deletions: 0 }; + } + + const files: ArenaFileChangeSummary[] = []; + let current: ArenaFileChangeSummary | undefined; + + const finishFile = () => { + if (!current) return; + files.push(current); + current = undefined; + }; + + const ensureFile = (path: string) => { + if (!current) { + current = { path, additions: 0, deletions: 0 }; + return; + } + current.path = path; + }; + + for (const line of diff.split('\n')) { + const gitPath = parseDiffGitPath(line); + if (gitPath) { + finishFile(); + current = { + path: gitPath, + additions: 0, + deletions: 0, + }; + continue; + } + + if (line.startsWith('+++ ')) { + const path = normalizeDiffPath(line.slice(4)); + if (path !== '/dev/null') { + ensureFile(path); + } + continue; + } + + if (line.startsWith('--- ')) { + const path = normalizeDiffPath(line.slice(4)); + if (!current && path !== '/dev/null') { + ensureFile(path); + } + continue; + } + + if (!current) continue; + + if (line.startsWith('+')) { + current.additions++; + } else if (line.startsWith('-')) { + current.deletions++; + } + } + + finishFile(); + + return { + files, + additions: files.reduce((sum, file) => sum + file.additions, 0), + deletions: files.reduce((sum, file) => sum + file.deletions, 0), + }; +} + +/** + * Build a deterministic approach summary when semantic LLM summarization is + * unavailable or returns unusable output. + */ +export function buildFallbackApproachSummary(result: ArenaAgentResult): string { + if (!isSuccessStatus(result.status)) { + const suffix = result.error ? `: ${result.error}` : ''; + return `Did not produce an applicable result${suffix}.`; + } + + const diffSummary = + result.diffSummary ?? summarizeUnifiedDiff(result.diff ?? ''); + if (diffSummary.files.length === 0) { + return 'No code changes detected.'; + } + + const fileWord = diffSummary.files.length === 1 ? 'file' : 'files'; + const toolWord = result.stats.toolCalls === 1 ? 'tool call' : 'tool calls'; + return `Changed ${diffSummary.files.length} ${fileWord} with ${result.stats.toolCalls} ${toolWord} (${formatLineStats(diffSummary.additions, diffSummary.deletions)}).`; +} + +export function formatLineStats(additions: number, deletions: number): string { + if (additions === 0 && deletions === 0) { + return 'no line changes'; + } + return `+${additions}/-${deletions}`; +} + +function normalizeDiffPath(path: string): string { + const trimmed = path.trim(); + if (trimmed === '/dev/null') { + return trimmed; + } + return trimmed.replace(/^[ab]\//, ''); +} + +function parseDiffGitPath(line: string): string | undefined { + const prefix = 'diff --git a/'; + const separator = ' b/'; + if (!line.startsWith(prefix)) { + return undefined; + } + + const separatorIndex = line.lastIndexOf(separator); + if (separatorIndex < prefix.length) { + return undefined; + } + + const pathStart = separatorIndex + separator.length; + if (pathStart >= line.length) { + return undefined; + } + + return line.slice(pathStart); +} diff --git a/packages/core/src/agents/arena/index.ts b/packages/core/src/agents/arena/index.ts index e744250c7..d5e245a64 100644 --- a/packages/core/src/agents/arena/index.ts +++ b/packages/core/src/agents/arena/index.ts @@ -7,6 +7,7 @@ // Arena-specific exports export * from './types.js'; export * from './arena-events.js'; +export * from './diff-summary.js'; export * from './ArenaManager.js'; export * from './ArenaAgentClient.js'; diff --git a/packages/core/src/agents/arena/types.ts b/packages/core/src/agents/arena/types.ts index 5b9a9ecab..b53cf4c04 100644 --- a/packages/core/src/agents/arena/types.ts +++ b/packages/core/src/agents/arena/types.ts @@ -92,6 +92,30 @@ export interface ArenaAgentStats { failedToolCalls: number; } +/** + * Per-file change counts parsed from an agent's unified diff. + */ +export interface ArenaFileChangeSummary { + /** Repository-relative file path */ + path: string; + /** Added lines in this file */ + additions: number; + /** Removed lines in this file */ + deletions: number; +} + +/** + * Aggregate change counts parsed from an agent's unified diff. + */ +export interface ArenaDiffSummary { + /** Files changed by this agent */ + files: ArenaFileChangeSummary[]; + /** Total added lines */ + additions: number; + /** Total removed lines */ + deletions: number; +} + /** * Result from a single Arena agent. */ @@ -112,8 +136,12 @@ export interface ArenaAgentResult { stats: ArenaAgentStats; /** Git diff of changes made */ diff?: string; + /** Parsed summary of the git diff */ + diffSummary?: ArenaDiffSummary; /** Files modified by this agent */ modifiedFiles?: string[]; + /** High-level implementation approach summary */ + approachSummary?: string; /** Start timestamp */ startedAt: number; /** End timestamp */ diff --git a/packages/core/src/agents/backends/InProcessBackend.test.ts b/packages/core/src/agents/backends/InProcessBackend.test.ts index 952862e91..6bddb0b7a 100644 --- a/packages/core/src/agents/backends/InProcessBackend.test.ts +++ b/packages/core/src/agents/backends/InProcessBackend.test.ts @@ -488,9 +488,10 @@ describe('InProcessBackend', () => { expect(agentContext.getContentGenerator()).toBe(agentGenerator); expect(agentContext.getAuthType()).toBe('anthropic'); + expect(backend.getAgentContentGenerator('agent-1')).toBe(agentGenerator); }); - it('should not create per-agent ContentGenerator without authOverrides', async () => { + it('should expose inherited ContentGenerator without authOverrides', async () => { const mockCreate = createContentGenerator as ReturnType; mockCreate.mockClear(); @@ -498,6 +499,9 @@ describe('InProcessBackend', () => { await backend.spawnAgent(createSpawnConfig('agent-1')); expect(mockCreate).not.toHaveBeenCalled(); + expect(backend.getAgentContentGenerator('agent-1')).toBe( + mockContentGenerator, + ); }); it('should fall back to parent ContentGenerator if per-agent creation fails', async () => { @@ -523,6 +527,7 @@ describe('InProcessBackend', () => { // Falls back to parent's content generator expect(agentContext.getContentGenerator()).toBe(mockContentGenerator); + expect(backend.getAgentContentGenerator('agent-1')).toBeUndefined(); }); it('should give different agents different ContentGenerators', async () => { diff --git a/packages/core/src/agents/backends/InProcessBackend.ts b/packages/core/src/agents/backends/InProcessBackend.ts index 0305ed59d..df434cf8c 100644 --- a/packages/core/src/agents/backends/InProcessBackend.ts +++ b/packages/core/src/agents/backends/InProcessBackend.ts @@ -51,6 +51,7 @@ export class InProcessBackend implements Backend { private readonly runtimeContext: Config; private readonly agents = new Map(); + private readonly agentContentGenerators = new Map(); private readonly agentRegistries: ToolRegistry[] = []; private readonly agentOrder: string[] = []; private activeAgentId: string | null = null; @@ -88,12 +89,19 @@ export class InProcessBackend implements Backend { // Build a per-agent runtime context with isolated working directory, // target directory, workspace context, tool registry, and (optionally) // a dedicated ContentGenerator for per-agent auth isolation. - const agentContext = await createPerAgentConfig( + const perAgent = await createPerAgentConfig( this.runtimeContext, config.cwd, inProcessConfig.runtimeConfig.modelConfig.model, inProcessConfig.authOverrides, ); + const agentContext = perAgent.config; + if (perAgent.contentGenerator) { + this.agentContentGenerators.set( + config.agentId, + perAgent.contentGenerator, + ); + } this.agentRegistries.push(agentContext.getToolRegistry()); @@ -200,6 +208,7 @@ export class InProcessBackend implements Backend { this.agentRegistries.length = 0; this.agents.clear(); + this.agentContentGenerators.clear(); this.agentOrder.length = 0; this.activeAgentId = null; debugLogger.info('InProcessBackend cleaned up'); @@ -309,6 +318,18 @@ export class InProcessBackend implements Backend { return this.agents.get(agentId); } + /** + * Get the ContentGenerator this agent can use for summary generation. + * If auth overrides created an isolated generator, this returns that + * generator. If no override was requested, this returns the inherited + * generator the agent already runs with. If override creation failed, this is + * undefined so callers can avoid sending agent data through a fallback + * provider. + */ + getAgentContentGenerator(agentId: string): ContentGenerator | undefined { + return this.agentContentGenerators.get(agentId); + } + // ─── Private ─────────────────────────────────────────────── private navigate(direction: 1 | -1): string | null { @@ -336,15 +357,17 @@ export class InProcessBackend implements Backend { * the agent Config * - `getContentGenerator()` / `getContentGeneratorConfig()` / `getAuthType()` * → per-agent ContentGenerator when `authOverrides` is provided + * - returned `contentGenerator` → the generator safe to use for summaries */ async function createPerAgentConfig( base: Config, cwd: string, modelId?: string, authOverrides?: InProcessSpawnConfig['authOverrides'], -): Promise { +): Promise<{ config: Config; contentGenerator?: ContentGenerator }> { // eslint-disable-next-line @typescript-eslint/no-explicit-any const override = Object.create(base) as any; + let dedicatedContentGenerator: ContentGenerator | undefined; override.getWorkingDir = () => cwd; override.getTargetDir = () => cwd; @@ -374,6 +397,7 @@ async function createPerAgentConfig( agentGeneratorConfig, override as Config, ); + dedicatedContentGenerator = agentGenerator; override.getContentGenerator = (): ContentGenerator => agentGenerator; override.getContentGeneratorConfig = (): ContentGeneratorConfig => agentGeneratorConfig; @@ -392,5 +416,10 @@ async function createPerAgentConfig( } } - return override as Config; + return { + config: override as Config, + contentGenerator: + dedicatedContentGenerator ?? + (authOverrides?.authType ? undefined : base.getContentGenerator()), + }; }