From b27cb81bb74134f09a858e3d46641a17f84f52f2 Mon Sep 17 00:00:00 2001 From: tanzhenxin Date: Tue, 21 Apr 2026 11:44:10 +0800 Subject: [PATCH] feat(cli): attribute /stats rows to the originating subagent (#3229) * feat(cli): attribute /stats rows to the originating subagent Thread subagent identity through telemetry via an AsyncLocalStorage context so each API response knows which subagent (or main) emitted it. Aggregate a per-source breakdown alongside the existing per-model totals and render one row per (model, source) in /stats and /stats model. Main-only sessions collapse to the existing single-row display. Resolves #3215 * fix(cli): reserve `main` subagent name and stabilize /stats React keys Two latent correctness issues found during self-review of PR #3229: - A subagent named `main` would silently collide with the `MAIN_SOURCE` sentinel and be merged into the main bucket with no attribution. Add `main` to the reserved-names list so validation rejects it. - `flattenModelsBySource` used the normalized display label (with `-001` stripped) as the React key, which could collapse distinct models `foo` and `foo-001` into duplicate keys. Split `ModelSourceEntry` into `{ key, label, metrics }` with `key` built from the raw model name (plus `::source` in the split case), and update both `StatsDisplay` and `ModelStatsDisplay` to key rows/columns off it. Also surface invalid-subagent-file parse errors through the debug logger instead of swallowing them entirely, so users running with debug logging enabled can tell why a subagent failed to load. Add a dedicated unit test file for `flattenModelsBySource` covering the collapse rule, session-wide split, source order, the `foo`/`foo-001` key-collision regression, and the empty-bySource fallback. Extend the reserved-name test to include `main`. --- .../ui/components/ModelStatsDisplay.test.tsx | 106 +++++++++-- .../src/ui/components/ModelStatsDisplay.tsx | 37 ++-- .../components/SessionSummaryDisplay.test.tsx | 16 +- .../src/ui/components/StatsDisplay.test.tsx | 179 +++++++++++++++++- .../cli/src/ui/components/StatsDisplay.tsx | 23 ++- .../ModelStatsDisplay.test.tsx.snap | 40 ++-- .../SessionSummaryDisplay.test.tsx.snap | 6 +- .../__snapshots__/StatsDisplay.test.tsx.snap | 20 +- .../src/ui/contexts/SessionContext.test.tsx | 3 + .../cli/src/ui/contexts/SessionContext.tsx | 25 ++- .../cli/src/ui/utils/computeStats.test.ts | 16 +- packages/cli/src/ui/utils/computeStats.ts | 8 +- .../cli/src/ui/utils/modelsBySource.test.ts | 131 +++++++++++++ packages/cli/src/ui/utils/modelsBySource.ts | 127 +++++++++++++ .../src/utils/nonInteractiveHelpers.test.ts | 4 + .../src/agents/arena/ArenaAgentClient.test.ts | 3 + .../core/src/agents/runtime/agent-core.ts | 23 +++ .../loggingContentGenerator.ts | 10 +- .../core/src/output/json-formatter.test.ts | 2 + .../core/src/subagents/subagent-manager.ts | 23 ++- .../core/src/subagents/validation.test.ts | 1 + packages/core/src/subagents/validation.ts | 6 +- .../src/telemetry/qwen-logger/qwen-logger.ts | 3 + packages/core/src/telemetry/types.ts | 27 ++- .../core/src/telemetry/uiTelemetry.test.ts | 179 +++++++++++++++++- packages/core/src/telemetry/uiTelemetry.ts | 77 ++++++-- .../core/src/utils/subagentNameContext.ts | 26 +++ 27 files changed, 995 insertions(+), 126 deletions(-) create mode 100644 packages/cli/src/ui/utils/modelsBySource.test.ts create mode 100644 packages/cli/src/ui/utils/modelsBySource.ts create mode 100644 packages/core/src/utils/subagentNameContext.ts diff --git a/packages/cli/src/ui/components/ModelStatsDisplay.test.tsx b/packages/cli/src/ui/components/ModelStatsDisplay.test.tsx index 591c2a280..9e871ad3f 100644 --- a/packages/cli/src/ui/components/ModelStatsDisplay.test.tsx +++ b/packages/cli/src/ui/components/ModelStatsDisplay.test.tsx @@ -8,7 +8,17 @@ import { render } from 'ink-testing-library'; import { describe, it, expect, vi, beforeAll, afterAll } from 'vitest'; import { ModelStatsDisplay } from './ModelStatsDisplay.js'; import * as SessionContext from '../contexts/SessionContext.js'; -import type { SessionMetrics } from '../contexts/SessionContext.js'; +import type { + ModelMetrics, + ModelMetricsCore, + SessionMetrics, +} from '../contexts/SessionContext.js'; +import { MAIN_SOURCE } from '@qwen-code/qwen-code-core'; + +const mainOnly = (core: ModelMetricsCore): ModelMetrics => ({ + ...core, + bySource: { [MAIN_SOURCE]: core }, +}); // Mock the context to provide controlled data for testing vi.mock('../contexts/SessionContext.js', async (importOriginal) => { @@ -73,7 +83,7 @@ describe('', () => { it('should not display conditional rows if no model has data for them', () => { const { lastFrame } = renderWithMockedStats({ models: { - 'gemini-2.5-pro': { + 'gemini-2.5-pro': mainOnly({ api: { totalRequests: 1, totalErrors: 0, totalLatencyMs: 100 }, tokens: { prompt: 10, @@ -83,7 +93,7 @@ describe('', () => { thoughts: 0, tool: 0, }, - }, + }), }, tools: { totalCalls: 0, @@ -105,7 +115,7 @@ describe('', () => { it('should display conditional rows if at least one model has data', () => { const { lastFrame } = renderWithMockedStats({ models: { - 'gemini-2.5-pro': { + 'gemini-2.5-pro': mainOnly({ api: { totalRequests: 1, totalErrors: 0, totalLatencyMs: 100 }, tokens: { prompt: 10, @@ -115,8 +125,8 @@ describe('', () => { thoughts: 2, tool: 0, }, - }, - 'gemini-2.5-flash': { + }), + 'gemini-2.5-flash': mainOnly({ api: { totalRequests: 1, totalErrors: 0, totalLatencyMs: 50 }, tokens: { prompt: 5, @@ -126,7 +136,7 @@ describe('', () => { thoughts: 0, tool: 3, }, - }, + }), }, tools: { totalCalls: 0, @@ -148,7 +158,7 @@ describe('', () => { it('should display stats for multiple models correctly', () => { const { lastFrame } = renderWithMockedStats({ models: { - 'gemini-2.5-pro': { + 'gemini-2.5-pro': mainOnly({ api: { totalRequests: 10, totalErrors: 1, totalLatencyMs: 1000 }, tokens: { prompt: 100, @@ -158,8 +168,8 @@ describe('', () => { thoughts: 10, tool: 5, }, - }, - 'gemini-2.5-flash': { + }), + 'gemini-2.5-flash': mainOnly({ api: { totalRequests: 20, totalErrors: 2, totalLatencyMs: 500 }, tokens: { prompt: 200, @@ -169,7 +179,7 @@ describe('', () => { thoughts: 20, tool: 10, }, - }, + }), }, tools: { totalCalls: 0, @@ -190,7 +200,7 @@ describe('', () => { it('should handle large values without wrapping or overlapping', () => { const { lastFrame } = renderWithMockedStats({ models: { - 'gemini-2.5-pro': { + 'gemini-2.5-pro': mainOnly({ api: { totalRequests: 999999999, totalErrors: 123456789, @@ -204,7 +214,7 @@ describe('', () => { thoughts: 111111111, tool: 222222222, }, - }, + }), }, tools: { totalCalls: 0, @@ -222,7 +232,7 @@ describe('', () => { it('should display a single model correctly', () => { const { lastFrame } = renderWithMockedStats({ models: { - 'gemini-2.5-pro': { + 'gemini-2.5-pro': mainOnly({ api: { totalRequests: 1, totalErrors: 0, totalLatencyMs: 100 }, tokens: { prompt: 10, @@ -232,7 +242,7 @@ describe('', () => { thoughts: 2, tool: 1, }, - }, + }), }, tools: { totalCalls: 0, @@ -249,4 +259,70 @@ describe('', () => { expect(output).not.toContain('gemini-2.5-flash'); expect(output).toMatchSnapshot(); }); + + describe('Subagent source attribution', () => { + const baseTools: SessionMetrics['tools'] = { + totalCalls: 0, + totalSuccess: 0, + totalFail: 0, + totalDurationMs: 0, + totalDecisions: { accept: 0, reject: 0, modify: 0 }, + byName: {}, + }; + const baseFiles: SessionMetrics['files'] = { + totalLinesAdded: 0, + totalLinesRemoved: 0, + }; + const makeCore = (reqs: number): ModelMetricsCore => ({ + api: { totalRequests: reqs, totalErrors: 0, totalLatencyMs: 100 }, + tokens: { + prompt: 10, + candidates: 20, + total: 30, + cached: 0, + thoughts: 0, + tool: 0, + }, + }); + + it('collapses the column header when only main is a source', () => { + const { lastFrame } = renderWithMockedStats({ + models: { 'glm-5': mainOnly(makeCore(1)) }, + tools: baseTools, + files: baseFiles, + }); + const output = lastFrame(); + expect(output).toContain('glm-5'); + expect(output).not.toContain('glm-5 (main)'); + }); + + it('renders distinct columns for main and subagent when same model has multiple sources', () => { + const mainCore = makeCore(1); + const echoerCore = makeCore(1); + const { lastFrame } = renderWithMockedStats({ + models: { + 'glm-5': { + api: { totalRequests: 2, totalErrors: 0, totalLatencyMs: 200 }, + tokens: { + prompt: 20, + candidates: 40, + total: 60, + cached: 0, + thoughts: 0, + tool: 0, + }, + bySource: { + [MAIN_SOURCE]: mainCore, + echoer: echoerCore, + }, + }, + }, + tools: baseTools, + files: baseFiles, + }); + const output = lastFrame(); + expect(output).toContain('glm-5 (main)'); + expect(output).toContain('glm-5 (echoer)'); + }); + }); }); diff --git a/packages/cli/src/ui/components/ModelStatsDisplay.tsx b/packages/cli/src/ui/components/ModelStatsDisplay.tsx index c34905981..389c4221f 100644 --- a/packages/cli/src/ui/components/ModelStatsDisplay.tsx +++ b/packages/cli/src/ui/components/ModelStatsDisplay.tsx @@ -13,12 +13,17 @@ import { calculateCacheHitRate, calculateErrorRate, } from '../utils/computeStats.js'; -import type { ModelMetrics } from '../contexts/SessionContext.js'; +import type { ModelMetricsCore } from '../contexts/SessionContext.js'; import { useSessionStats } from '../contexts/SessionContext.js'; +import { flattenModelsBySource } from '../utils/modelsBySource.js'; import { t } from '../../i18n/index.js'; const METRIC_COL_WIDTH = 28; -const MODEL_COL_WIDTH = 22; +// 28 + 2*24 = 76, fitting the 76-column panel at 80-column terminal width +// when the session has a single (model, source) pair split into two columns. +// Sessions with three or more sources will exceed the panel — acceptable per +// the design doc, which accepts the crowded layout for many-subagent cases. +const MODEL_COL_WIDTH = 24; interface StatRowProps { title: string; @@ -59,11 +64,9 @@ export const ModelStatsDisplay: React.FC = ({ }) => { const { stats } = useSessionStats(); const { models } = stats.metrics; - const activeModels = Object.entries(models).filter( - ([, metrics]) => metrics.api.totalRequests > 0, - ); + const entries = flattenModelsBySource(models); - if (activeModels.length === 0) { + if (entries.length === 0) { return ( = ({ ); } - const modelNames = activeModels.map(([name]) => name); - const getModelValues = ( - getter: (metrics: ModelMetrics) => string | React.ReactElement, - ) => activeModels.map(([, metrics]) => getter(metrics)); + getter: (metrics: ModelMetricsCore) => string | React.ReactElement, + ) => entries.map(({ metrics }) => getter(metrics)); - const hasThoughts = activeModels.some( - ([, metrics]) => metrics.tokens.thoughts > 0, - ); - const hasTool = activeModels.some(([, metrics]) => metrics.tokens.tool > 0); - const hasCached = activeModels.some( - ([, metrics]) => metrics.tokens.cached > 0, + const hasThoughts = entries.some( + ({ metrics }) => metrics.tokens.thoughts > 0, ); + const hasTool = entries.some(({ metrics }) => metrics.tokens.tool > 0); + const hasCached = entries.some(({ metrics }) => metrics.tokens.cached > 0); return ( = ({ {t('Metric')} - {modelNames.map((name) => ( - + {entries.map(({ key, label }) => ( + - {name} + {label} ))} diff --git a/packages/cli/src/ui/components/SessionSummaryDisplay.test.tsx b/packages/cli/src/ui/components/SessionSummaryDisplay.test.tsx index 305b50b2c..a86e64eac 100644 --- a/packages/cli/src/ui/components/SessionSummaryDisplay.test.tsx +++ b/packages/cli/src/ui/components/SessionSummaryDisplay.test.tsx @@ -8,9 +8,19 @@ import { render } from 'ink-testing-library'; import { describe, it, expect, vi } from 'vitest'; import { SessionSummaryDisplay } from './SessionSummaryDisplay.js'; import * as SessionContext from '../contexts/SessionContext.js'; -import type { SessionMetrics } from '../contexts/SessionContext.js'; +import type { + ModelMetrics, + ModelMetricsCore, + SessionMetrics, +} from '../contexts/SessionContext.js'; +import { MAIN_SOURCE } from '@qwen-code/qwen-code-core'; import { ConfigContext } from '../contexts/ConfigContext.js'; +const mainOnly = (core: ModelMetricsCore): ModelMetrics => ({ + ...core, + bySource: { [MAIN_SOURCE]: core }, +}); + vi.mock('../contexts/SessionContext.js', async (importOriginal) => { const actual = await importOriginal(); return { @@ -57,7 +67,7 @@ describe('', () => { it('renders the summary display with a title', () => { const metrics: SessionMetrics = { models: { - 'gemini-2.5-pro': { + 'gemini-2.5-pro': mainOnly({ api: { totalRequests: 10, totalErrors: 1, totalLatencyMs: 50234 }, tokens: { prompt: 1000, @@ -67,7 +77,7 @@ describe('', () => { thoughts: 300, tool: 200, }, - }, + }), }, tools: { totalCalls: 0, diff --git a/packages/cli/src/ui/components/StatsDisplay.test.tsx b/packages/cli/src/ui/components/StatsDisplay.test.tsx index 6820171cb..b75b3c2dc 100644 --- a/packages/cli/src/ui/components/StatsDisplay.test.tsx +++ b/packages/cli/src/ui/components/StatsDisplay.test.tsx @@ -8,7 +8,20 @@ import { render } from 'ink-testing-library'; import { describe, it, expect, vi } from 'vitest'; import { StatsDisplay } from './StatsDisplay.js'; import * as SessionContext from '../contexts/SessionContext.js'; -import type { SessionMetrics } from '../contexts/SessionContext.js'; +import type { + ModelMetrics, + ModelMetricsCore, + SessionMetrics, +} from '../contexts/SessionContext.js'; +import { MAIN_SOURCE } from '@qwen-code/qwen-code-core'; + +// Wraps a core metrics object as a ModelMetrics with a single `main` source +// bucket, matching the shape produced by processing an API call with no +// subagent attribution. Used to keep fixtures terse. +const mainOnly = (core: ModelMetricsCore): ModelMetrics => ({ + ...core, + bySource: { [MAIN_SOURCE]: core }, +}); // Mock the context to provide controlled data for testing vi.mock('../contexts/SessionContext.js', async (importOriginal) => { @@ -69,7 +82,7 @@ describe('', () => { it('renders a table with two models correctly', () => { const metrics: SessionMetrics = { models: { - 'gemini-2.5-pro': { + 'gemini-2.5-pro': mainOnly({ api: { totalRequests: 3, totalErrors: 0, totalLatencyMs: 15000 }, tokens: { prompt: 1000, @@ -79,8 +92,8 @@ describe('', () => { thoughts: 100, tool: 50, }, - }, - 'gemini-2.5-flash': { + }), + 'gemini-2.5-flash': mainOnly({ api: { totalRequests: 5, totalErrors: 1, totalLatencyMs: 4500 }, tokens: { prompt: 25000, @@ -90,7 +103,7 @@ describe('', () => { thoughts: 2000, tool: 1000, }, - }, + }), }, tools: { totalCalls: 0, @@ -119,7 +132,7 @@ describe('', () => { it('renders all sections when all data is present', () => { const metrics: SessionMetrics = { models: { - 'gemini-2.5-pro': { + 'gemini-2.5-pro': mainOnly({ api: { totalRequests: 1, totalErrors: 0, totalLatencyMs: 100 }, tokens: { prompt: 100, @@ -129,7 +142,7 @@ describe('', () => { thoughts: 0, tool: 0, }, - }, + }), }, tools: { totalCalls: 2, @@ -202,7 +215,7 @@ describe('', () => { it('hides Efficiency section when cache is not used', () => { const metrics: SessionMetrics = { models: { - 'gemini-2.5-pro': { + 'gemini-2.5-pro': mainOnly({ api: { totalRequests: 1, totalErrors: 0, totalLatencyMs: 100 }, tokens: { prompt: 100, @@ -212,7 +225,7 @@ describe('', () => { thoughts: 0, tool: 0, }, - }, + }), }, tools: { totalCalls: 0, @@ -350,6 +363,154 @@ describe('', () => { }); }); + describe('Subagent source attribution', () => { + const baseTools: SessionMetrics['tools'] = { + totalCalls: 0, + totalSuccess: 0, + totalFail: 0, + totalDurationMs: 0, + totalDecisions: { accept: 0, reject: 0, modify: 0 }, + byName: {}, + }; + const baseFiles: SessionMetrics['files'] = { + totalLinesAdded: 0, + totalLinesRemoved: 0, + }; + const coreMetrics = (reqs: number, tokens: number): ModelMetricsCore => ({ + api: { totalRequests: reqs, totalErrors: 0, totalLatencyMs: 100 }, + tokens: { + prompt: tokens, + candidates: tokens, + total: tokens * 2, + cached: 0, + thoughts: 0, + tool: 0, + }, + }); + + it('renders a plain model name when only main is a source', () => { + const metrics: SessionMetrics = { + models: { 'glm-5': mainOnly(coreMetrics(1, 100)) }, + tools: baseTools, + files: baseFiles, + }; + + const { lastFrame } = renderWithMockedStats(metrics); + const output = lastFrame(); + + expect(output).toContain('glm-5'); + expect(output).not.toContain('glm-5 (main)'); + expect(output).not.toContain('(main)'); + }); + + it('shows main and subagent suffixes when the same model has multiple sources', () => { + const mainCore = coreMetrics(2, 200); + const echoerCore = coreMetrics(1, 40); + const metrics: SessionMetrics = { + models: { + 'glm-5': { + api: { + totalRequests: + mainCore.api.totalRequests + echoerCore.api.totalRequests, + totalErrors: 0, + totalLatencyMs: 200, + }, + tokens: { + prompt: mainCore.tokens.prompt + echoerCore.tokens.prompt, + candidates: + mainCore.tokens.candidates + echoerCore.tokens.candidates, + total: mainCore.tokens.total + echoerCore.tokens.total, + cached: 0, + thoughts: 0, + tool: 0, + }, + bySource: { + [MAIN_SOURCE]: mainCore, + echoer: echoerCore, + }, + }, + }, + tools: baseTools, + files: baseFiles, + }; + + const { lastFrame } = renderWithMockedStats(metrics); + const output = lastFrame(); + + expect(output).toContain('glm-5 (main)'); + expect(output).toContain('glm-5 (echoer)'); + }); + + it('labels main rows session-wide when a subagent uses a different model', () => { + // Session has two models: glm-5 used only by main, qwen-plus used only by + // a subagent. Even though glm-5 has a single main source, it must still + // render with `(main)` because the session-wide rule triggers on qwen-plus. + const metrics: SessionMetrics = { + models: { + 'glm-5': mainOnly(coreMetrics(2, 200)), + 'qwen-plus': { + api: { totalRequests: 1, totalErrors: 0, totalLatencyMs: 100 }, + tokens: { + prompt: 40, + candidates: 40, + total: 80, + cached: 0, + thoughts: 0, + tool: 0, + }, + bySource: { + researcher: coreMetrics(1, 40), + }, + }, + }, + tools: baseTools, + files: baseFiles, + }; + + const { lastFrame } = renderWithMockedStats(metrics); + const output = lastFrame(); + + expect(output).toContain('glm-5 (main)'); + expect(output).toContain('qwen-plus (researcher)'); + // The bare `glm-5` label (not followed by a space + `(`) must not appear + // as a row label in this session. + expect(output).not.toMatch(/glm-5\s{2,}/); + }); + + it('shows distinct rows when two subagents share a model', () => { + const alphaCore = coreMetrics(1, 10); + const bravoCore = coreMetrics(1, 20); + const metrics: SessionMetrics = { + models: { + 'glm-5': { + api: { totalRequests: 2, totalErrors: 0, totalLatencyMs: 100 }, + tokens: { + prompt: 30, + candidates: 30, + total: 60, + cached: 0, + thoughts: 0, + tool: 0, + }, + bySource: { + alpha: alphaCore, + bravo: bravoCore, + }, + }, + }, + tools: baseTools, + files: baseFiles, + }; + + const { lastFrame } = renderWithMockedStats(metrics); + const output = lastFrame(); + + expect(output).toContain('glm-5 (alpha)'); + expect(output).toContain('glm-5 (bravo)'); + expect(output).not.toContain('glm-5 (main)'); + }); + }); + describe('Title Rendering', () => { const zeroMetrics: SessionMetrics = { models: {}, diff --git a/packages/cli/src/ui/components/StatsDisplay.tsx b/packages/cli/src/ui/components/StatsDisplay.tsx index 48ed58f18..2a766f698 100644 --- a/packages/cli/src/ui/components/StatsDisplay.tsx +++ b/packages/cli/src/ui/components/StatsDisplay.tsx @@ -19,6 +19,7 @@ import { USER_AGREEMENT_RATE_MEDIUM, } from '../utils/displayUtils.js'; import { computeSessionStats } from '../utils/computeStats.js'; +import { flattenModelsBySource } from '../utils/modelsBySource.js'; import { t } from '../../i18n/index.js'; // A more flexible and powerful StatRow component @@ -75,11 +76,17 @@ const ModelUsageTable: React.FC<{ totalCachedTokens: number; cacheEfficiency: number; }> = ({ models, totalCachedTokens, cacheEfficiency }) => { - const nameWidth = 25; + // 35 + 8 + 15 + 15 = 73, fitting within the 76-column panel allocated + // when the terminal is at the default 80-column width. Subagent labels + // longer than 35 characters will wrap — acceptable cosmetic trade-off + // given the alternative is overflowing the panel border. + const nameWidth = 35; const requestsWidth = 8; const inputTokensWidth = 15; const outputTokensWidth = 15; + const entries = flattenModelsBySource(models); + return ( {/* Header */} @@ -117,24 +124,22 @@ const ModelUsageTable: React.FC<{ > {/* Rows */} - {Object.entries(models).map(([name, modelMetrics]) => ( - + {entries.map(({ key, label, metrics }) => ( + - {name.replace('-001', '')} + {label} - - {modelMetrics.api.totalRequests} - + {metrics.api.totalRequests} - {modelMetrics.tokens.prompt.toLocaleString()} + {metrics.tokens.prompt.toLocaleString()} - {modelMetrics.tokens.candidates.toLocaleString()} + {metrics.tokens.candidates.toLocaleString()} diff --git a/packages/cli/src/ui/components/__snapshots__/ModelStatsDisplay.test.tsx.snap b/packages/cli/src/ui/components/__snapshots__/ModelStatsDisplay.test.tsx.snap index efc0862b5..970b5159d 100644 --- a/packages/cli/src/ui/components/__snapshots__/ModelStatsDisplay.test.tsx.snap +++ b/packages/cli/src/ui/components/__snapshots__/ModelStatsDisplay.test.tsx.snap @@ -28,20 +28,20 @@ exports[` > should display conditional rows if at least one │ │ │ Model Stats For Nerds │ │ │ -│ Metric gemini-2.5-pro gemini-2.5-flash │ +│ Metric gemini-2.5-pro gemini-2.5-flash │ │ ────────────────────────────────────────────────────────────────────────────────────────────── │ │ API │ -│ Requests 1 1 │ -│ Errors 0 (0.0%) 0 (0.0%) │ -│ Avg Latency 100ms 50ms │ +│ Requests 1 1 │ +│ Errors 0 (0.0%) 0 (0.0%) │ +│ Avg Latency 100ms 50ms │ │ │ │ Tokens │ -│ Total 30 15 │ -│ ↳ Prompt 10 5 │ -│ ↳ Cached 5 (50.0%) 0 (0.0%) │ -│ ↳ Thoughts 2 0 │ -│ ↳ Tool 0 3 │ -│ ↳ Output 20 10 │ +│ Total 30 15 │ +│ ↳ Prompt 10 5 │ +│ ↳ Cached 5 (50.0%) 0 (0.0%) │ +│ ↳ Thoughts 2 0 │ +│ ↳ Tool 0 3 │ +│ ↳ Output 20 10 │ │ │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯" `; @@ -51,20 +51,20 @@ exports[` > should display stats for multiple models correc │ │ │ Model Stats For Nerds │ │ │ -│ Metric gemini-2.5-pro gemini-2.5-flash │ +│ Metric gemini-2.5-pro gemini-2.5-flash │ │ ────────────────────────────────────────────────────────────────────────────────────────────── │ │ API │ -│ Requests 10 20 │ -│ Errors 1 (10.0%) 2 (10.0%) │ -│ Avg Latency 100ms 25ms │ +│ Requests 10 20 │ +│ Errors 1 (10.0%) 2 (10.0%) │ +│ Avg Latency 100ms 25ms │ │ │ │ Tokens │ -│ Total 300 600 │ -│ ↳ Prompt 100 200 │ -│ ↳ Cached 50 (50.0%) 100 (50.0%) │ -│ ↳ Thoughts 10 20 │ -│ ↳ Tool 5 10 │ -│ ↳ Output 200 400 │ +│ Total 300 600 │ +│ ↳ Prompt 100 200 │ +│ ↳ Cached 50 (50.0%) 100 (50.0%) │ +│ ↳ Thoughts 10 20 │ +│ ↳ Tool 5 10 │ +│ ↳ Output 200 400 │ │ │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯" `; diff --git a/packages/cli/src/ui/components/__snapshots__/SessionSummaryDisplay.test.tsx.snap b/packages/cli/src/ui/components/__snapshots__/SessionSummaryDisplay.test.tsx.snap index dfa39ba81..bbb95094c 100644 --- a/packages/cli/src/ui/components/__snapshots__/SessionSummaryDisplay.test.tsx.snap +++ b/packages/cli/src/ui/components/__snapshots__/SessionSummaryDisplay.test.tsx.snap @@ -18,9 +18,9 @@ exports[` > renders the summary display with a title 1` │ » Tool Time: 0s (0.0%) │ │ │ │ │ -│ Model Usage Reqs Input Tokens Output Tokens │ -│ ─────────────────────────────────────────────────────────────── │ -│ gemini-2.5-pro 10 1,000 2,000 │ +│ Model Usage Reqs Input Tokens Output Tokens │ +│ ───────────────────────────────────────────────────────────────────────── │ +│ gemini-2.5-pro 10 1,000 2,000 │ │ │ │ Savings Highlight: 500 (50.0%) of input tokens were served from the cache, reducing costs. │ │ │ diff --git a/packages/cli/src/ui/components/__snapshots__/StatsDisplay.test.tsx.snap b/packages/cli/src/ui/components/__snapshots__/StatsDisplay.test.tsx.snap index 8106d1f5d..c0cb471f9 100644 --- a/packages/cli/src/ui/components/__snapshots__/StatsDisplay.test.tsx.snap +++ b/packages/cli/src/ui/components/__snapshots__/StatsDisplay.test.tsx.snap @@ -118,9 +118,9 @@ exports[` > Conditional Rendering Tests > hides Efficiency secti │ » Tool Time: 0s (0.0%) │ │ │ │ │ -│ Model Usage Reqs Input Tokens Output Tokens │ -│ ─────────────────────────────────────────────────────────────── │ -│ gemini-2.5-pro 1 100 100 │ +│ Model Usage Reqs Input Tokens Output Tokens │ +│ ───────────────────────────────────────────────────────────────────────── │ +│ gemini-2.5-pro 1 100 100 │ │ │ ╰──────────────────────────────────────────────────────────────────────────────────────────────────╯" `; @@ -202,10 +202,10 @@ exports[` > renders a table with two models correctly 1`] = ` │ » Tool Time: 0s (0.0%) │ │ │ │ │ -│ Model Usage Reqs Input Tokens Output Tokens │ -│ ─────────────────────────────────────────────────────────────── │ -│ gemini-2.5-pro 3 1,000 2,000 │ -│ gemini-2.5-flash 5 25,000 15,000 │ +│ Model Usage Reqs Input Tokens Output Tokens │ +│ ───────────────────────────────────────────────────────────────────────── │ +│ gemini-2.5-pro 3 1,000 2,000 │ +│ gemini-2.5-flash 5 25,000 15,000 │ │ │ │ Savings Highlight: 10,500 (40.4%) of input tokens were served from the cache, reducing costs. │ │ │ @@ -232,9 +232,9 @@ exports[` > renders all sections when all data is present 1`] = │ » Tool Time: 123ms (55.2%) │ │ │ │ │ -│ Model Usage Reqs Input Tokens Output Tokens │ -│ ─────────────────────────────────────────────────────────────── │ -│ gemini-2.5-pro 1 100 100 │ +│ Model Usage Reqs Input Tokens Output Tokens │ +│ ───────────────────────────────────────────────────────────────────────── │ +│ gemini-2.5-pro 1 100 100 │ │ │ │ Savings Highlight: 50 (50.0%) of input tokens were served from the cache, reducing costs. │ │ │ diff --git a/packages/cli/src/ui/contexts/SessionContext.test.tsx b/packages/cli/src/ui/contexts/SessionContext.test.tsx index f1550e255..6145a5e03 100644 --- a/packages/cli/src/ui/contexts/SessionContext.test.tsx +++ b/packages/cli/src/ui/contexts/SessionContext.test.tsx @@ -73,6 +73,7 @@ describe('SessionStatsContext', () => { thoughts: 20, tool: 10, }, + bySource: {}, }, }, tools: { @@ -151,6 +152,7 @@ describe('SessionStatsContext', () => { thoughts: 0, tool: 0, }, + bySource: {}, }, }, tools: { @@ -192,6 +194,7 @@ describe('SessionStatsContext', () => { thoughts: 0, tool: 0, }, + bySource: {}, }, }, }; diff --git a/packages/cli/src/ui/contexts/SessionContext.tsx b/packages/cli/src/ui/contexts/SessionContext.tsx index 9fdb4fa25..8d5a103ed 100644 --- a/packages/cli/src/ui/contexts/SessionContext.tsx +++ b/packages/cli/src/ui/contexts/SessionContext.tsx @@ -17,6 +17,7 @@ import { import type { SessionMetrics, ModelMetrics, + ModelMetricsCore, ToolCallStats, } from '@qwen-code/qwen-code-core'; import { uiTelemetryService } from '@qwen-code/qwen-code-core'; @@ -28,7 +29,10 @@ export enum ToolCallDecision { AUTO_ACCEPT = 'auto_accept', } -function areModelMetricsEqual(a: ModelMetrics, b: ModelMetrics): boolean { +function areModelMetricsCoreEqual( + a: ModelMetricsCore, + b: ModelMetricsCore, +): boolean { if ( a.api.totalRequests !== b.api.totalRequests || a.api.totalErrors !== b.api.totalErrors || @@ -49,6 +53,23 @@ function areModelMetricsEqual(a: ModelMetrics, b: ModelMetrics): boolean { return true; } +function areModelMetricsEqual(a: ModelMetrics, b: ModelMetrics): boolean { + if (!areModelMetricsCoreEqual(a, b)) return false; + + const aKeys = Object.keys(a.bySource); + const bKeys = Object.keys(b.bySource); + if (aKeys.length !== bKeys.length) return false; + + for (const key of aKeys) { + const aSource = a.bySource[key]; + const bSource = b.bySource[key]; + if (!bSource || !areModelMetricsCoreEqual(aSource, bSource)) { + return false; + } + } + return true; +} + function areToolCallStatsEqual(a: ToolCallStats, b: ToolCallStats): boolean { if ( a.count !== b.count || @@ -138,7 +159,7 @@ function areMetricsEqual(a: SessionMetrics, b: SessionMetrics): boolean { return true; } -export type { SessionMetrics, ModelMetrics }; +export type { SessionMetrics, ModelMetrics, ModelMetricsCore }; export interface SessionStatsState { sessionId: string; diff --git a/packages/cli/src/ui/utils/computeStats.test.ts b/packages/cli/src/ui/utils/computeStats.test.ts index db8d4cc9b..b0199816d 100644 --- a/packages/cli/src/ui/utils/computeStats.test.ts +++ b/packages/cli/src/ui/utils/computeStats.test.ts @@ -12,13 +12,13 @@ import { computeSessionStats, } from './computeStats.js'; import type { - ModelMetrics, + ModelMetricsCore, SessionMetrics, } from '../contexts/SessionContext.js'; describe('calculateErrorRate', () => { it('should return 0 if totalRequests is 0', () => { - const metrics: ModelMetrics = { + const metrics: ModelMetricsCore = { api: { totalRequests: 0, totalErrors: 0, totalLatencyMs: 0 }, tokens: { prompt: 0, @@ -33,7 +33,7 @@ describe('calculateErrorRate', () => { }); it('should calculate the error rate correctly', () => { - const metrics: ModelMetrics = { + const metrics: ModelMetricsCore = { api: { totalRequests: 10, totalErrors: 2, totalLatencyMs: 0 }, tokens: { prompt: 0, @@ -50,7 +50,7 @@ describe('calculateErrorRate', () => { describe('calculateAverageLatency', () => { it('should return 0 if totalRequests is 0', () => { - const metrics: ModelMetrics = { + const metrics: ModelMetricsCore = { api: { totalRequests: 0, totalErrors: 0, totalLatencyMs: 1000 }, tokens: { prompt: 0, @@ -65,7 +65,7 @@ describe('calculateAverageLatency', () => { }); it('should calculate the average latency correctly', () => { - const metrics: ModelMetrics = { + const metrics: ModelMetricsCore = { api: { totalRequests: 10, totalErrors: 0, totalLatencyMs: 1500 }, tokens: { prompt: 0, @@ -82,7 +82,7 @@ describe('calculateAverageLatency', () => { describe('calculateCacheHitRate', () => { it('should return 0 if prompt tokens is 0', () => { - const metrics: ModelMetrics = { + const metrics: ModelMetricsCore = { api: { totalRequests: 0, totalErrors: 0, totalLatencyMs: 0 }, tokens: { prompt: 0, @@ -97,7 +97,7 @@ describe('calculateCacheHitRate', () => { }); it('should calculate the cache hit rate correctly', () => { - const metrics: ModelMetrics = { + const metrics: ModelMetricsCore = { api: { totalRequests: 0, totalErrors: 0, totalLatencyMs: 0 }, tokens: { prompt: 200, @@ -162,6 +162,7 @@ describe('computeSessionStats', () => { thoughts: 0, tool: 0, }, + bySource: {}, }, }, tools: { @@ -200,6 +201,7 @@ describe('computeSessionStats', () => { thoughts: 0, tool: 0, }, + bySource: {}, }, }, tools: { diff --git a/packages/cli/src/ui/utils/computeStats.ts b/packages/cli/src/ui/utils/computeStats.ts index cc0d870e7..f5bd45239 100644 --- a/packages/cli/src/ui/utils/computeStats.ts +++ b/packages/cli/src/ui/utils/computeStats.ts @@ -7,24 +7,24 @@ import type { SessionMetrics, ComputedSessionStats, - ModelMetrics, + ModelMetricsCore, } from '../contexts/SessionContext.js'; -export function calculateErrorRate(metrics: ModelMetrics): number { +export function calculateErrorRate(metrics: ModelMetricsCore): number { if (metrics.api.totalRequests === 0) { return 0; } return (metrics.api.totalErrors / metrics.api.totalRequests) * 100; } -export function calculateAverageLatency(metrics: ModelMetrics): number { +export function calculateAverageLatency(metrics: ModelMetricsCore): number { if (metrics.api.totalRequests === 0) { return 0; } return metrics.api.totalLatencyMs / metrics.api.totalRequests; } -export function calculateCacheHitRate(metrics: ModelMetrics): number { +export function calculateCacheHitRate(metrics: ModelMetricsCore): number { if (metrics.tokens.prompt === 0) { return 0; } diff --git a/packages/cli/src/ui/utils/modelsBySource.test.ts b/packages/cli/src/ui/utils/modelsBySource.test.ts new file mode 100644 index 000000000..e59871b86 --- /dev/null +++ b/packages/cli/src/ui/utils/modelsBySource.test.ts @@ -0,0 +1,131 @@ +/** + * @license + * Copyright 2025 Qwen + * SPDX-License-Identifier: Apache-2.0 + */ + +import { describe, it, expect } from 'vitest'; +import { + MAIN_SOURCE, + type ModelMetrics, + type ModelMetricsCore, +} from '@qwen-code/qwen-code-core'; +import { flattenModelsBySource } from './modelsBySource.js'; + +const emptyCore = (): ModelMetricsCore => ({ + api: { totalRequests: 0, totalErrors: 0, totalLatencyMs: 0 }, + tokens: { + prompt: 0, + candidates: 0, + total: 0, + cached: 0, + thoughts: 0, + tool: 0, + }, +}); + +const coreWithRequests = (requests: number): ModelMetricsCore => ({ + ...emptyCore(), + api: { totalRequests: requests, totalErrors: 0, totalLatencyMs: 0 }, +}); + +const makeModel = ( + bySource: Record, +): ModelMetrics => { + const totalRequests = Object.values(bySource).reduce( + (sum, m) => sum + m.api.totalRequests, + 0, + ); + return { + ...emptyCore(), + api: { totalRequests, totalErrors: 0, totalLatencyMs: 0 }, + bySource, + }; +}; + +describe('flattenModelsBySource', () => { + it('omits models with zero requests', () => { + const entries = flattenModelsBySource({ + 'idle-model': makeModel({}), + }); + expect(entries).toEqual([]); + }); + + it('collapses to plain label when no non-main source exists in the session', () => { + const entries = flattenModelsBySource({ + 'glm-5': makeModel({ [MAIN_SOURCE]: coreWithRequests(3) }), + 'qwen-max': makeModel({ [MAIN_SOURCE]: coreWithRequests(2) }), + }); + expect(entries.map((e) => e.label)).toEqual(['glm-5', 'qwen-max']); + expect(entries.map((e) => e.key)).toEqual(['glm-5', 'qwen-max']); + }); + + it('splits every row when any model has a non-main source (session-wide rule)', () => { + const entries = flattenModelsBySource({ + 'glm-5': makeModel({ [MAIN_SOURCE]: coreWithRequests(2) }), + 'qwen-plus': makeModel({ researcher: coreWithRequests(1) }), + }); + expect(entries.map((e) => e.label)).toEqual([ + 'glm-5 (main)', + 'qwen-plus (researcher)', + ]); + }); + + it('orders sources with MAIN_SOURCE first then alphabetically', () => { + const entries = flattenModelsBySource({ + 'glm-5': makeModel({ + bravo: coreWithRequests(1), + [MAIN_SOURCE]: coreWithRequests(2), + alpha: coreWithRequests(1), + }), + }); + expect(entries.map((e) => e.label)).toEqual([ + 'glm-5 (main)', + 'glm-5 (alpha)', + 'glm-5 (bravo)', + ]); + }); + + it('produces distinct keys when two raw model names normalize to the same label', () => { + // `normalizeModelName` strips `-001`, so `foo` and `foo-001` both render + // as the label `foo`. The React key must still be unique across entries. + const entries = flattenModelsBySource({ + foo: makeModel({ [MAIN_SOURCE]: coreWithRequests(1) }), + 'foo-001': makeModel({ [MAIN_SOURCE]: coreWithRequests(1) }), + }); + expect(entries).toHaveLength(2); + expect(entries.map((e) => e.label)).toEqual(['foo', 'foo']); + const keys = entries.map((e) => e.key); + expect(new Set(keys).size).toBe(keys.length); + expect(keys).toEqual(['foo', 'foo-001']); + }); + + it('produces distinct keys across (model, source) pairs in the split case', () => { + const entries = flattenModelsBySource({ + 'glm-5': makeModel({ + [MAIN_SOURCE]: coreWithRequests(1), + alpha: coreWithRequests(1), + }), + 'qwen-plus': makeModel({ + alpha: coreWithRequests(1), + }), + }); + const keys = entries.map((e) => e.key); + expect(new Set(keys).size).toBe(keys.length); + expect(keys).toEqual(['glm-5::main', 'glm-5::alpha', 'qwen-plus::alpha']); + }); + + it('falls back to the aggregate when bySource is empty (defensive)', () => { + // Callers shouldn't hit this, but the helper should still produce a + // usable row rather than dropping the model. + const entries = flattenModelsBySource({ + 'glm-5': { + ...coreWithRequests(1), + bySource: {}, + }, + }); + expect(entries).toHaveLength(1); + expect(entries[0]?.label).toBe('glm-5'); + expect(entries[0]?.key).toBe('glm-5'); + }); +}); diff --git a/packages/cli/src/ui/utils/modelsBySource.ts b/packages/cli/src/ui/utils/modelsBySource.ts new file mode 100644 index 000000000..2937024e8 --- /dev/null +++ b/packages/cli/src/ui/utils/modelsBySource.ts @@ -0,0 +1,127 @@ +/** + * @license + * Copyright 2025 Qwen + * SPDX-License-Identifier: Apache-2.0 + */ + +import { + MAIN_SOURCE, + type ModelMetrics, + type ModelMetricsCore, +} from '@qwen-code/qwen-code-core'; + +/** + * One entry in the flattened view of the `models` metric map. Each entry + * corresponds to a single row (in `StatsDisplay`) or column (in + * `ModelStatsDisplay`). + */ +export interface ModelSourceEntry { + /** + * Stable React key built from the raw model name + source. Guaranteed + * unique across the returned array, even when two raw model names + * normalize to the same display label (e.g. `foo` and `foo-001`). + */ + key: string; + /** + * Display label. Either the bare (possibly normalized) model name for + * the single-source collapse case, or `${modelName} (${source})` when + * the model has any non-main source. + */ + label: string; + /** Backing metrics — either the model aggregate or one source bucket. */ + metrics: ModelMetricsCore; +} + +/** + * Flattens `SessionMetrics.models` into a list of `(label, metrics)` entries + * suitable for rendering one per row/column. + * + * Rules (matching the design doc `3215-subagent-stats-attribution.md`): + * - Collapse is decided **session-wide**: if NO model in the entire session + * has any non-main source, every row renders with the plain model name + * (existing UX preserved). + * - If ANY model in the session has a non-main source, EVERY row across + * ALL models renders with a `${model} (${source})` label — including the + * `(main)` rows — so the user can directly compare attribution across the + * whole stats panel. This matches the issue mockup, which shows + * `qwen-max (main)` alongside `qwen-plus (researcher)`. + * - Within the split case, sources under a given model are sorted with + * `MAIN_SOURCE` first (if present), then the rest alphabetically. + * - Models with zero requests (aggregate) are omitted. + * - If `bySource` is somehow empty (defensive — callers shouldn't hit this), + * fall back to the aggregate row with the plain model name. + */ +export function flattenModelsBySource( + models: Record, +): ModelSourceEntry[] { + const sessionHasNonMainSource = Object.values(models).some((modelMetrics) => + Object.keys(modelMetrics.bySource).some((source) => source !== MAIN_SOURCE), + ); + + const result: ModelSourceEntry[] = []; + + for (const [modelName, modelMetrics] of Object.entries(models)) { + if (modelMetrics.api.totalRequests <= 0) continue; + + const displayName = normalizeModelName(modelName); + const sourceNames = Object.keys(modelMetrics.bySource); + + if (sourceNames.length === 0) { + result.push({ + key: modelName, + label: displayName, + metrics: modelMetrics, + }); + continue; + } + + if (!sessionHasNonMainSource) { + // Collapse session-wide: only main sources exist, render aggregate + // with plain model names so the existing UX is preserved. + result.push({ + key: modelName, + label: displayName, + metrics: modelMetrics.bySource[MAIN_SOURCE] ?? modelMetrics, + }); + continue; + } + + const sortedSources = sortSources(sourceNames); + for (const source of sortedSources) { + result.push({ + key: `${modelName}::${source}`, + label: `${displayName} (${source})`, + metrics: modelMetrics.bySource[source], + }); + } + } + + return result; +} + +/** + * Strips the Gemini `-001` version suffix from model names for display. + * Historically the StatsDisplay summary table normalized model names this + * way; keep the behavior but apply it to the model portion only so subagent + * names that happen to contain `-001` are not mangled. + */ +function normalizeModelName(modelName: string): string { + return modelName.replace('-001', ''); +} + +/** + * `MAIN_SOURCE` first (if present), then the rest alphabetically. + */ +function sortSources(sources: string[]): string[] { + const main: string[] = []; + const rest: string[] = []; + for (const source of sources) { + if (source === MAIN_SOURCE) { + main.push(source); + } else { + rest.push(source); + } + } + rest.sort((a, b) => a.localeCompare(b)); + return [...main, ...rest]; +} diff --git a/packages/cli/src/utils/nonInteractiveHelpers.test.ts b/packages/cli/src/utils/nonInteractiveHelpers.test.ts index b69056707..4e2273256 100644 --- a/packages/cli/src/utils/nonInteractiveHelpers.test.ts +++ b/packages/cli/src/utils/nonInteractiveHelpers.test.ts @@ -358,6 +358,7 @@ describe('computeUsageFromMetrics', () => { thoughts: 0, tool: 0, }, + bySource: {}, }, }, tools: { @@ -400,6 +401,7 @@ describe('computeUsageFromMetrics', () => { thoughts: 0, tool: 0, }, + bySource: {}, }, 'model-2': { api: { totalRequests: 1, totalErrors: 0, totalLatencyMs: 100 }, @@ -411,6 +413,7 @@ describe('computeUsageFromMetrics', () => { thoughts: 0, tool: 0, }, + bySource: {}, }, }, tools: { @@ -453,6 +456,7 @@ describe('computeUsageFromMetrics', () => { thoughts: 0, tool: 0, }, + bySource: {}, }, }, tools: { diff --git a/packages/core/src/agents/arena/ArenaAgentClient.test.ts b/packages/core/src/agents/arena/ArenaAgentClient.test.ts index 6ab61039c..5959d68cf 100644 --- a/packages/core/src/agents/arena/ArenaAgentClient.test.ts +++ b/packages/core/src/agents/arena/ArenaAgentClient.test.ts @@ -42,6 +42,7 @@ const createMockMetrics = ( thoughts: 0, tool: 0, }, + bySource: {}, }, }, tools: { @@ -462,6 +463,7 @@ describe('ArenaAgentClient', () => { thoughts: 0, tool: 0, }, + bySource: {}, }, 'model-b': { api: { @@ -477,6 +479,7 @@ describe('ArenaAgentClient', () => { thoughts: 0, tool: 0, }, + bySource: {}, }, }, tools: { diff --git a/packages/core/src/agents/runtime/agent-core.ts b/packages/core/src/agents/runtime/agent-core.ts index eba7565a4..8e88a82f7 100644 --- a/packages/core/src/agents/runtime/agent-core.ts +++ b/packages/core/src/agents/runtime/agent-core.ts @@ -17,6 +17,7 @@ */ import { reportError } from '../../utils/errorReporting.js'; +import { subagentNameContext } from '../../utils/subagentNameContext.js'; import type { Config } from '../../config/config.js'; import { type ToolCallRequestInfo } from '../../core/turn.js'; import { @@ -385,6 +386,28 @@ export class AgentCore { toolsList: FunctionDeclaration[], abortController: AbortController, options?: ReasoningLoopOptions, + ): Promise { + // Tag every API call emitted from this loop with the owning subagent's + // name so the `/stats` panel can attribute tokens/requests to the + // originating subagent. The store is read inside + // `LoggingContentGenerator` via `subagentNameContext.getStore()`. + return subagentNameContext.run(this.name, () => + this._runReasoningLoopInner( + chat, + initialMessages, + toolsList, + abortController, + options, + ), + ); + } + + private async _runReasoningLoopInner( + chat: GeminiChat, + initialMessages: Content[], + toolsList: FunctionDeclaration[], + abortController: AbortController, + options?: ReasoningLoopOptions, ): Promise { const startTime = options?.startTimeMs ?? Date.now(); let currentMessages = initialMessages; diff --git a/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.ts b/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.ts index 4464f5a7e..d6e2db4b9 100644 --- a/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.ts +++ b/packages/core/src/core/loggingContentGenerator/loggingContentGenerator.ts @@ -32,6 +32,7 @@ import { logApiResponse, } from '../../telemetry/loggers.js'; import { isInternalPromptId } from '../../utils/internalPromptIds.js'; +import { subagentNameContext } from '../../utils/subagentNameContext.js'; import type { ContentGenerator, ContentGeneratorConfig, @@ -83,7 +84,12 @@ export class LoggingContentGenerator implements ContentGenerator { const requestText = JSON.stringify(contents); logApiRequest( this.config, - new ApiRequestEvent(model, promptId, requestText), + new ApiRequestEvent( + model, + promptId, + requestText, + subagentNameContext.getStore(), + ), ); } @@ -105,6 +111,7 @@ export class LoggingContentGenerator implements ContentGenerator { this.config.getAuthType(), usageMetadata, responseText, + subagentNameContext.getStore(), ), ); } @@ -135,6 +142,7 @@ export class LoggingContentGenerator implements ContentGenerator { errorMessage, errorType, statusCode: errorStatus, + subagentName: subagentNameContext.getStore(), }), ); } diff --git a/packages/core/src/output/json-formatter.test.ts b/packages/core/src/output/json-formatter.test.ts index 587030a98..071ff3f93 100644 --- a/packages/core/src/output/json-formatter.test.ts +++ b/packages/core/src/output/json-formatter.test.ts @@ -66,6 +66,7 @@ describe('JsonFormatter', () => { thoughts: 103, tool: 0, }, + bySource: {}, }, 'gemini-2.5-flash': { api: { @@ -81,6 +82,7 @@ describe('JsonFormatter', () => { thoughts: 138, tool: 0, }, + bySource: {}, }, }, tools: { diff --git a/packages/core/src/subagents/subagent-manager.ts b/packages/core/src/subagents/subagent-manager.ts index 5869b3974..186222ede 100644 --- a/packages/core/src/subagents/subagent-manager.ts +++ b/packages/core/src/subagents/subagent-manager.ts @@ -914,8 +914,12 @@ export class SubagentManager { try { const config = await this.parseSubagentFile(filePath, level); subagents.push(config); - } catch (_error) { - // Ignore invalid files + } catch (error) { + // Skip invalid files but surface the reason. Before this warning + // was added, invalid subagent files failed silently — a user who + // mistyped frontmatter or used a reserved name had no way to see + // why their agent wasn't loading. + warnInvalidSubagentFile(filePath, error); continue; } } @@ -994,8 +998,8 @@ export async function loadSubagentFromDir( new SubagentValidator(), ); subagents.push(config); - } catch (_error) { - // Ignore invalid files + } catch (error) { + warnInvalidSubagentFile(filePath, error); continue; } } @@ -1138,3 +1142,14 @@ function parseSubagentContent( ); } } + +/** + * Log an invalid-subagent-file error via the debug logger. Before this was + * added, the loader swallowed these errors entirely — users running with + * debug logging enabled had no way to tell why their subagent wasn't loading. + * Kept on the debug channel so the TUI stays quiet during normal startup. + */ +function warnInvalidSubagentFile(filePath: string, error: unknown): void { + const message = error instanceof Error ? error.message : String(error); + debugLogger.debug(`Skipped invalid file ${filePath}: ${message}`); +} diff --git a/packages/core/src/subagents/validation.test.ts b/packages/core/src/subagents/validation.test.ts index 420483b11..9a4cc6279 100644 --- a/packages/core/src/subagents/validation.test.ts +++ b/packages/core/src/subagents/validation.test.ts @@ -106,6 +106,7 @@ describe('SubagentValidator', () => { 'tool', 'config', 'default', + 'main', ]; for (const name of reservedNames) { diff --git a/packages/core/src/subagents/validation.ts b/packages/core/src/subagents/validation.ts index a885b14c4..9f05b0d60 100644 --- a/packages/core/src/subagents/validation.ts +++ b/packages/core/src/subagents/validation.ts @@ -133,7 +133,10 @@ export class SubagentValidator { errors.push('Name cannot end with a hyphen or underscore'); } - // Check for reserved names + // Check for reserved names. `main` is the sentinel used by the /stats + // attribution pipeline to label the main (non-subagent) conversation; + // a subagent named `main` would collide with that sentinel and be + // silently merged into the main bucket. const reservedNames = [ 'self', 'system', @@ -142,6 +145,7 @@ export class SubagentValidator { 'tool', 'config', 'default', + 'main', ]; if (reservedNames.includes(trimmedName.toLowerCase())) { errors.push(`"${trimmedName}" is a reserved name and cannot be used`); diff --git a/packages/core/src/telemetry/qwen-logger/qwen-logger.ts b/packages/core/src/telemetry/qwen-logger/qwen-logger.ts index f22582f05..a6a6c9ace 100644 --- a/packages/core/src/telemetry/qwen-logger/qwen-logger.ts +++ b/packages/core/src/telemetry/qwen-logger/qwen-logger.ts @@ -598,6 +598,7 @@ export class QwenLogger { properties: { model: event.model, prompt_id: event.prompt_id, + subagent_name: event.subagent_name, }, }); @@ -615,6 +616,7 @@ export class QwenLogger { auth_type: event.auth_type, model: event.model, prompt_id: event.prompt_id, + subagent_name: event.subagent_name, }, snapshots: JSON.stringify({ input_token_count: event.input_token_count, @@ -653,6 +655,7 @@ export class QwenLogger { auth_type: event.auth_type, model: event.model, prompt_id: event.prompt_id, + subagent_name: event.subagent_name, error_message: event.error_message, error_type: event.error_type, }, diff --git a/packages/core/src/telemetry/types.ts b/packages/core/src/telemetry/types.ts index c3666ae9a..4a9335434 100644 --- a/packages/core/src/telemetry/types.ts +++ b/packages/core/src/telemetry/types.ts @@ -240,13 +240,24 @@ export class ApiRequestEvent implements BaseTelemetryEvent { model: string; prompt_id: string; request_text?: string; + /** + * Name of the subagent that issued this request, or undefined when the + * request originates from the main conversation. + */ + subagent_name?: string; - constructor(model: string, prompt_id: string, request_text?: string) { + constructor( + model: string, + prompt_id: string, + request_text?: string, + subagent_name?: string, + ) { this['event.name'] = 'api_request'; this['event.timestamp'] = new Date().toISOString(); this.model = model; this.prompt_id = prompt_id; this.request_text = request_text; + this.subagent_name = subagent_name; } } @@ -264,6 +275,11 @@ export class ApiErrorEvent implements BaseTelemetryEvent { error_type?: string; // HTTP status code from the API response (e.g. 429, 500) status_code?: number | string; + /** + * Name of the subagent that issued this request, or undefined when the + * request originates from the main conversation. + */ + subagent_name?: string; constructor(opts: { responseId?: string; @@ -274,6 +290,7 @@ export class ApiErrorEvent implements BaseTelemetryEvent { errorMessage: string; errorType?: string; statusCode?: number | string; + subagentName?: string; }) { this['event.name'] = 'api_error'; this['event.timestamp'] = new Date().toISOString(); @@ -285,6 +302,7 @@ export class ApiErrorEvent implements BaseTelemetryEvent { this.error_message = opts.errorMessage; this.error_type = opts.errorType; this.status_code = opts.statusCode; + this.subagent_name = opts.subagentName; } } @@ -320,6 +338,11 @@ export class ApiResponseEvent implements BaseTelemetryEvent { response_text?: string; prompt_id: string; auth_type?: string; + /** + * Name of the subagent that issued this request, or undefined when the + * request originates from the main conversation. + */ + subagent_name?: string; constructor( response_id: string, @@ -329,6 +352,7 @@ export class ApiResponseEvent implements BaseTelemetryEvent { auth_type?: string, usage_data?: GenerateContentResponseUsageMetadata, response_text?: string, + subagent_name?: string, ) { this['event.name'] = 'api_response'; this['event.timestamp'] = new Date().toISOString(); @@ -345,6 +369,7 @@ export class ApiResponseEvent implements BaseTelemetryEvent { this.response_text = response_text; this.prompt_id = prompt_id; this.auth_type = auth_type; + this.subagent_name = subagent_name; } } diff --git a/packages/core/src/telemetry/uiTelemetry.test.ts b/packages/core/src/telemetry/uiTelemetry.test.ts index 37542273a..311aac4f9 100644 --- a/packages/core/src/telemetry/uiTelemetry.test.ts +++ b/packages/core/src/telemetry/uiTelemetry.test.ts @@ -5,7 +5,7 @@ */ import { describe, it, expect, vi, beforeEach } from 'vitest'; -import { UiTelemetryService } from './uiTelemetry.js'; +import { UiTelemetryService, MAIN_SOURCE } from './uiTelemetry.js'; import { ToolCallDecision } from './tool-call-decision.js'; import type { ApiErrorEvent, ApiResponseEvent } from './types.js'; import { ToolCallEvent } from './types.js'; @@ -187,7 +187,7 @@ describe('UiTelemetryService', () => { service.addEvent(event); const metrics = service.getMetrics(); - expect(metrics.models['gemini-2.5-pro']).toEqual({ + const modelAggregate = { api: { totalRequests: 1, totalErrors: 0, @@ -201,6 +201,12 @@ describe('UiTelemetryService', () => { thoughts: 2, tool: 3, }, + }; + expect(metrics.models['gemini-2.5-pro']).toEqual({ + ...modelAggregate, + bySource: { + [MAIN_SOURCE]: modelAggregate, + }, }); expect(service.getLastPromptTokenCount()).toBe(0); }); @@ -237,7 +243,7 @@ describe('UiTelemetryService', () => { service.addEvent(event2); const metrics = service.getMetrics(); - expect(metrics.models['gemini-2.5-pro']).toEqual({ + const modelAggregate = { api: { totalRequests: 2, totalErrors: 0, @@ -251,6 +257,12 @@ describe('UiTelemetryService', () => { thoughts: 6, tool: 9, }, + }; + expect(metrics.models['gemini-2.5-pro']).toEqual({ + ...modelAggregate, + bySource: { + [MAIN_SOURCE]: modelAggregate, + }, }); expect(service.getLastPromptTokenCount()).toBe(0); }); @@ -307,7 +319,7 @@ describe('UiTelemetryService', () => { service.addEvent(event); const metrics = service.getMetrics(); - expect(metrics.models['gemini-2.5-pro']).toEqual({ + const modelAggregate = { api: { totalRequests: 1, totalErrors: 1, @@ -321,6 +333,12 @@ describe('UiTelemetryService', () => { thoughts: 0, tool: 0, }, + }; + expect(metrics.models['gemini-2.5-pro']).toEqual({ + ...modelAggregate, + bySource: { + [MAIN_SOURCE]: modelAggregate, + }, }); }); @@ -349,7 +367,7 @@ describe('UiTelemetryService', () => { service.addEvent(errorEvent); const metrics = service.getMetrics(); - expect(metrics.models['gemini-2.5-pro']).toEqual({ + const modelAggregate = { api: { totalRequests: 2, totalErrors: 1, @@ -363,10 +381,161 @@ describe('UiTelemetryService', () => { thoughts: 2, tool: 3, }, + }; + expect(metrics.models['gemini-2.5-pro']).toEqual({ + ...modelAggregate, + bySource: { + [MAIN_SOURCE]: modelAggregate, + }, }); }); }); + describe('Subagent Source Attribution', () => { + it('attributes API calls without subagent_name to MAIN_SOURCE', () => { + const event = { + 'event.name': EVENT_API_RESPONSE, + model: 'glm-5', + duration_ms: 100, + input_token_count: 10, + output_token_count: 5, + total_token_count: 15, + cached_content_token_count: 0, + thoughts_token_count: 0, + tool_token_count: 0, + } as ApiResponseEvent & { 'event.name': typeof EVENT_API_RESPONSE }; + + service.addEvent(event); + + const modelMetrics = service.getMetrics().models['glm-5']; + expect(Object.keys(modelMetrics.bySource)).toEqual([MAIN_SOURCE]); + expect(modelMetrics.bySource[MAIN_SOURCE].api.totalRequests).toBe(1); + expect(modelMetrics.api.totalRequests).toBe(1); + }); + + it('splits a single model between main and a subagent', () => { + const mainEvent = { + 'event.name': EVENT_API_RESPONSE, + model: 'glm-5', + duration_ms: 200, + input_token_count: 100, + output_token_count: 50, + total_token_count: 150, + cached_content_token_count: 20, + thoughts_token_count: 0, + tool_token_count: 0, + } as ApiResponseEvent & { 'event.name': typeof EVENT_API_RESPONSE }; + const subagentEvent = { + 'event.name': EVENT_API_RESPONSE, + model: 'glm-5', + duration_ms: 80, + input_token_count: 40, + output_token_count: 10, + total_token_count: 50, + cached_content_token_count: 0, + thoughts_token_count: 0, + tool_token_count: 0, + subagent_name: 'echoer', + } as ApiResponseEvent & { 'event.name': typeof EVENT_API_RESPONSE }; + + service.addEvent(mainEvent); + service.addEvent(subagentEvent); + + const modelMetrics = service.getMetrics().models['glm-5']; + // Aggregate spans both main and subagent calls + expect(modelMetrics.api.totalRequests).toBe(2); + expect(modelMetrics.api.totalLatencyMs).toBe(280); + expect(modelMetrics.tokens.prompt).toBe(140); + expect(modelMetrics.tokens.total).toBe(200); + // Per-source breakdown isolates each contributor + expect(new Set(Object.keys(modelMetrics.bySource))).toEqual( + new Set([MAIN_SOURCE, 'echoer']), + ); + expect(modelMetrics.bySource[MAIN_SOURCE].api.totalRequests).toBe(1); + expect(modelMetrics.bySource[MAIN_SOURCE].tokens.prompt).toBe(100); + expect(modelMetrics.bySource['echoer'].api.totalRequests).toBe(1); + expect(modelMetrics.bySource['echoer'].tokens.prompt).toBe(40); + }); + + it('splits two subagents sharing a model into distinct source buckets', () => { + const makeEvent = ( + subagentName: string, + duration: number, + ): ApiResponseEvent & { 'event.name': typeof EVENT_API_RESPONSE } => + ({ + 'event.name': EVENT_API_RESPONSE, + model: 'glm-5', + duration_ms: duration, + input_token_count: 10, + output_token_count: 5, + total_token_count: 15, + cached_content_token_count: 0, + thoughts_token_count: 0, + tool_token_count: 0, + subagent_name: subagentName, + }) as ApiResponseEvent & { 'event.name': typeof EVENT_API_RESPONSE }; + + service.addEvent(makeEvent('alpha', 50)); + service.addEvent(makeEvent('bravo', 70)); + + const modelMetrics = service.getMetrics().models['glm-5']; + expect(modelMetrics.api.totalRequests).toBe(2); + expect(Object.keys(modelMetrics.bySource).sort()).toEqual([ + 'alpha', + 'bravo', + ]); + expect(modelMetrics.bySource['alpha'].api.totalRequests).toBe(1); + expect(modelMetrics.bySource['bravo'].api.totalRequests).toBe(1); + // Main bucket should NOT be created when no main-origin event arrived + expect(modelMetrics.bySource[MAIN_SOURCE]).toBeUndefined(); + }); + + it('handles a subagent named after an Object.prototype member without crashing', () => { + // `constructor` is a valid subagent name per the naming regex. A + // plain-object `bySource` would return `Object.prototype.constructor` + // from a truthiness check, short-circuiting the bucket creation and + // crashing the aggregation path. The prototype-free map prevents this. + const event = { + 'event.name': EVENT_API_RESPONSE, + model: 'glm-5', + duration_ms: 100, + input_token_count: 10, + output_token_count: 5, + total_token_count: 15, + cached_content_token_count: 0, + thoughts_token_count: 0, + tool_token_count: 0, + subagent_name: 'constructor', + } as ApiResponseEvent & { 'event.name': typeof EVENT_API_RESPONSE }; + + expect(() => service.addEvent(event)).not.toThrow(); + + const modelMetrics = service.getMetrics().models['glm-5']; + expect(modelMetrics.bySource['constructor']).toBeDefined(); + expect(modelMetrics.bySource['constructor'].api.totalRequests).toBe(1); + expect(modelMetrics.bySource['constructor'].tokens.prompt).toBe(10); + // Sanity: the Object prototype member was not actually mutated. + expect(typeof modelMetrics.bySource['constructor']).toBe('object'); + }); + + it('attributes API errors to the subagent source bucket', () => { + const errorEvent = { + 'event.name': EVENT_API_ERROR, + model: 'glm-5', + duration_ms: 150, + error_message: 'boom', + subagent_name: 'alpha', + } as ApiErrorEvent & { 'event.name': typeof EVENT_API_ERROR }; + + service.addEvent(errorEvent); + + const modelMetrics = service.getMetrics().models['glm-5']; + expect(modelMetrics.api.totalErrors).toBe(1); + expect(modelMetrics.bySource['alpha'].api.totalErrors).toBe(1); + expect(modelMetrics.bySource[MAIN_SOURCE]).toBeUndefined(); + }); + }); + describe('Tool Call Event Processing', () => { it('should process a single successful ToolCallEvent', () => { const toolCall = createFakeCompletedToolCall( diff --git a/packages/core/src/telemetry/uiTelemetry.ts b/packages/core/src/telemetry/uiTelemetry.ts index a7361f038..d4639bec3 100644 --- a/packages/core/src/telemetry/uiTelemetry.ts +++ b/packages/core/src/telemetry/uiTelemetry.ts @@ -17,6 +17,9 @@ import type { ApiResponseEvent, ToolCallEvent, } from './types.js'; +import { MAIN_SOURCE } from '../utils/subagentNameContext.js'; + +export { MAIN_SOURCE } from '../utils/subagentNameContext.js'; export type UiEvent = | (ApiResponseEvent & { 'event.name': typeof EVENT_API_RESPONSE }) @@ -42,7 +45,12 @@ export interface ToolCallStats { }; } -export interface ModelMetrics { +/** + * Per-model counters without the nested source breakdown. Used both as the + * aggregate `ModelMetrics` shape (via extension) and as the value type of the + * `bySource` map — keeping the type non-recursive. + */ +export interface ModelMetricsCore { api: { totalRequests: number; totalErrors: number; @@ -58,6 +66,16 @@ export interface ModelMetrics { }; } +export interface ModelMetrics extends ModelMetricsCore { + /** + * Per-source breakdown. Keys are subagent names, or `MAIN_SOURCE` ("main") + * for calls originating from the main conversation. Every API call that + * increments an aggregate counter also increments the matching per-source + * record so the two views stay consistent. + */ + bySource: Record; +} + export interface SessionMetrics { models: Record; tools: { @@ -79,7 +97,7 @@ export interface SessionMetrics { }; } -const createInitialModelMetrics = (): ModelMetrics => ({ +const createInitialModelMetricsCore = (): ModelMetricsCore => ({ api: { totalRequests: 0, totalErrors: 0, @@ -95,6 +113,16 @@ const createInitialModelMetrics = (): ModelMetrics => ({ }, }); +// `bySource` keys are user-controlled subagent names. Using a prototype-free +// map avoids crashes when a subagent is named after an inherited Object +// member (e.g. `constructor`, `toString`, `hasOwnProperty`), which would +// otherwise short-circuit `!bySource[name]` checks and return the inherited +// prototype member as the "bucket". +const createInitialModelMetrics = (): ModelMetrics => ({ + ...createInitialModelMetricsCore(), + bySource: Object.create(null) as Record, +}); + const createInitialMetrics = (): SessionMetrics => ({ models: {}, tools: { @@ -187,25 +215,48 @@ export class UiTelemetryService extends EventEmitter { return this.#metrics.models[modelName]; } + private getOrCreateSourceMetrics( + modelMetrics: ModelMetrics, + source: string, + ): ModelMetricsCore { + if (!modelMetrics.bySource[source]) { + modelMetrics.bySource[source] = createInitialModelMetricsCore(); + } + return modelMetrics.bySource[source]; + } + private processApiResponse(event: ApiResponseEvent) { const modelMetrics = this.getOrCreateModelMetrics(event.model); + const sourceMetrics = this.getOrCreateSourceMetrics( + modelMetrics, + event.subagent_name ?? MAIN_SOURCE, + ); - modelMetrics.api.totalRequests++; - modelMetrics.api.totalLatencyMs += event.duration_ms; + for (const bucket of [modelMetrics, sourceMetrics]) { + bucket.api.totalRequests++; + bucket.api.totalLatencyMs += event.duration_ms; - modelMetrics.tokens.prompt += event.input_token_count; - modelMetrics.tokens.candidates += event.output_token_count; - modelMetrics.tokens.total += event.total_token_count; - modelMetrics.tokens.cached += event.cached_content_token_count; - modelMetrics.tokens.thoughts += event.thoughts_token_count; - modelMetrics.tokens.tool += event.tool_token_count; + bucket.tokens.prompt += event.input_token_count; + bucket.tokens.candidates += event.output_token_count; + bucket.tokens.total += event.total_token_count; + bucket.tokens.cached += event.cached_content_token_count; + bucket.tokens.thoughts += event.thoughts_token_count; + bucket.tokens.tool += event.tool_token_count; + } } private processApiError(event: ApiErrorEvent) { const modelMetrics = this.getOrCreateModelMetrics(event.model); - modelMetrics.api.totalRequests++; - modelMetrics.api.totalErrors++; - modelMetrics.api.totalLatencyMs += event.duration_ms; + const sourceMetrics = this.getOrCreateSourceMetrics( + modelMetrics, + event.subagent_name ?? MAIN_SOURCE, + ); + + for (const bucket of [modelMetrics, sourceMetrics]) { + bucket.api.totalRequests++; + bucket.api.totalErrors++; + bucket.api.totalLatencyMs += event.duration_ms; + } } private processToolCall(event: ToolCallEvent) { diff --git a/packages/core/src/utils/subagentNameContext.ts b/packages/core/src/utils/subagentNameContext.ts new file mode 100644 index 000000000..0571dd4bf --- /dev/null +++ b/packages/core/src/utils/subagentNameContext.ts @@ -0,0 +1,26 @@ +/** + * @license + * Copyright 2025 Qwen + * SPDX-License-Identifier: Apache-2.0 + */ + +import { AsyncLocalStorage } from 'node:async_hooks'; + +/** + * Label used for API calls that do not originate from a subagent + * (i.e. calls made by the main conversation). + */ +export const MAIN_SOURCE = 'main'; + +/** + * AsyncLocalStorage carrying the name of the subagent that owns the current + * execution context. When set, `LoggingContentGenerator` annotates emitted + * telemetry events with this name so the `/stats` panel can attribute API + * calls to the originating subagent. When unset, API calls are attributed + * to `MAIN_SOURCE` ("main"). + * + * AgentCore wraps its reasoning loop in `subagentNameContext.run(this.name, + * ...)`; the content generator reads the store inside its per-call logging + * helpers. + */ +export const subagentNameContext = new AsyncLocalStorage();