import { describe, it, expect } from 'vitest' import { aggregateMcpCoverage, detectMcpToolCoverage, estimateMcpSchemaCost, } from '../src/optimize.js' import type { ClassifiedTurn, ParsedApiCall, ProjectSummary, SessionSummary, TaskCategory, TokenUsage, } from '../src/types.js' // --------------------------------------------------------------------------- // Test fixtures // --------------------------------------------------------------------------- const ZERO_USAGE: TokenUsage = { inputTokens: 0, outputTokens: 0, cacheCreationInputTokens: 0, cacheReadInputTokens: 0, cachedInputTokens: 0, reasoningTokens: 0, webSearchRequests: 0, } function makeCall(opts: { tools?: string[] cacheCreation?: number cacheRead?: number cost?: number } = {}): ParsedApiCall { const tools = opts.tools ?? [] return { provider: 'claude', model: 'Opus 4.7', usage: { ...ZERO_USAGE, cacheCreationInputTokens: opts.cacheCreation ?? 0, cacheReadInputTokens: opts.cacheRead ?? 0, }, costUSD: opts.cost ?? 0, tools, mcpTools: tools.filter(t => t.startsWith('mcp__')), skills: [], hasAgentSpawn: false, hasPlanMode: false, speed: 'standard', timestamp: '2026-05-04T00:00:00Z', bashCommands: [], deduplicationKey: 'k', } } function makeTurn(calls: ParsedApiCall[]): ClassifiedTurn { return { userMessage: '', assistantCalls: calls, timestamp: '2026-05-04T00:00:00Z', sessionId: 's1', category: 'coding', retries: 0, hasEdits: false, } } function makeSession(opts: { sessionId?: string inventory?: string[] turns?: ClassifiedTurn[] mcpBreakdown?: Record }): SessionSummary { const turns = opts.turns ?? [] const apiCalls = turns.reduce((s, t) => s + t.assistantCalls.length, 0) const emptyCategoryBreakdown = {} as Record return { sessionId: opts.sessionId ?? 's1', project: 'p', firstTimestamp: '2026-05-04T00:00:00Z', lastTimestamp: '2026-05-04T00:00:00Z', totalCostUSD: 0, totalInputTokens: 0, totalOutputTokens: 0, totalCacheReadTokens: 0, totalCacheWriteTokens: 0, apiCalls, turns, modelBreakdown: {}, toolBreakdown: {}, mcpBreakdown: opts.mcpBreakdown ?? {}, bashBreakdown: {}, categoryBreakdown: emptyCategoryBreakdown, skillBreakdown: {}, ...(opts.inventory ? { mcpInventory: opts.inventory } : {}), } } function project(sessions: SessionSummary[]): ProjectSummary { return { project: 'p', projectPath: '/tmp/p', sessions, totalCostUSD: 0, totalApiCalls: sessions.reduce((s, ses) => s + ses.apiCalls, 0), } } // --------------------------------------------------------------------------- // aggregateMcpCoverage // --------------------------------------------------------------------------- describe('aggregateMcpCoverage', () => { it('returns empty list when no session has MCP inventory', () => { const projects = [project([makeSession({})])] expect(aggregateMcpCoverage(projects)).toEqual([]) }) it('reports per-server tools available, invoked, and unused', () => { const inventory = [ 'mcp__hf__hub_repo_search', 'mcp__hf__paper_search', 'mcp__hf__hf_doc_search', ] const turns = [ makeTurn([makeCall({ tools: ['mcp__hf__hub_repo_search'] })]), ] const sessions = [ makeSession({ inventory, turns, mcpBreakdown: { hf: { calls: 1 } } }), ] const result = aggregateMcpCoverage([project(sessions)]) expect(result).toHaveLength(1) expect(result[0]!.server).toBe('hf') expect(result[0]!.toolsAvailable).toBe(3) expect(result[0]!.toolsInvoked).toBe(1) expect(result[0]!.unusedTools).toEqual([ 'mcp__hf__hf_doc_search', 'mcp__hf__paper_search', ]) expect(result[0]!.coverageRatio).toBeCloseTo(1 / 3, 5) expect(result[0]!.invocations).toBe(1) expect(result[0]!.loadedSessions).toBe(1) }) it('unions inventory across multiple sessions for the same server', () => { const sessions = [ makeSession({ sessionId: 'a', inventory: ['mcp__x__a', 'mcp__x__b'] }), makeSession({ sessionId: 'b', inventory: ['mcp__x__b', 'mcp__x__c'] }), ] const result = aggregateMcpCoverage([project(sessions)]) expect(result[0]!.toolsAvailable).toBe(3) expect(result[0]!.loadedSessions).toBe(2) }) it('separates servers with similar names', () => { const sessions = [ makeSession({ inventory: ['mcp__hf__a', 'mcp__hugface__a'] }), ] const result = aggregateMcpCoverage([project(sessions)]) expect(result.map(r => r.server).sort()).toEqual(['hf', 'hugface']) }) it('skips invocations without inventory (foreign server, no inventory observed)', () => { // A server can show up only via a call. We still report it so the // operator knows it was invoked, but coverage is 0/0 and it is not a // candidate for the unused-coverage finding. const turns = [makeTurn([makeCall({ tools: ['mcp__ghost__t1'] })])] const sessions = [ makeSession({ turns, mcpBreakdown: { ghost: { calls: 1 } } }), ] const result = aggregateMcpCoverage([project(sessions)]) // No inventory entry -> aggregator drops the server from the report // because we cannot reason about coverage without an inventory baseline. expect(result).toEqual([]) }) }) // --------------------------------------------------------------------------- // estimateMcpSchemaCost — cache-aware accounting // --------------------------------------------------------------------------- describe('estimateMcpSchemaCost', () => { it('charges first cacheCreation turn at full price, subsequent turns at cache-read', () => { const turns = [ makeTurn([makeCall({ cacheCreation: 50_000 })]), // first turn: write makeTurn([makeCall({ cacheRead: 60_000 })]), // ongoing: read makeTurn([makeCall({ cacheRead: 60_000 })]), ] const sessions = [makeSession({ inventory: Array.from({ length: 30 }, (_, i) => `mcp__svc__t${i}`), turns, mcpBreakdown: { svc: { calls: 0 } }, })] // 30 unused tools * 400 token estimate = 12_000 schema tokens // cap by call cache buckets so we never overclaim const cost = estimateMcpSchemaCost(30, [project(sessions)], 'svc') expect(cost.cacheWriteTokens).toBe(12_000) // capped by 50k creation, 12k schema fits expect(cost.cacheReadTokens).toBe(24_000) // 12k + 12k across two ongoing turns // effective = write * 1.25 + read * 0.10 (cache pricing) expect(cost.effectiveInputTokens).toBeCloseTo(12_000 * 1.25 + 24_000 * 0.10, 5) }) it('caps by available cache bucket so we never overclaim', () => { const turns = [makeTurn([makeCall({ cacheCreation: 1_000 })])] const sessions = [makeSession({ inventory: Array.from({ length: 30 }, (_, i) => `mcp__svc__t${i}`), turns, mcpBreakdown: { svc: { calls: 0 } }, })] // 30*400 = 12k schema tokens, but the call only had 1k cache-creation, // so we should not claim more than 1k of overhead for that turn. const cost = estimateMcpSchemaCost(30, [project(sessions)], 'svc') expect(cost.cacheWriteTokens).toBe(1_000) }) it('returns zero when no unused tools', () => { const sessions = [makeSession({ inventory: ['mcp__svc__t1'], turns: [makeTurn([makeCall({ cacheCreation: 5000 })])], })] const cost = estimateMcpSchemaCost(0, [project(sessions)], 'svc') expect(cost).toEqual({ cacheWriteTokens: 0, cacheReadTokens: 0, effectiveInputTokens: 0 }) }) it('counts cache write AND cache read on the same call', () => { // A long session can have a cache rebuild mid-stream where one call // reports both buckets. The estimator must charge both, not skip the // read because of the write. const turns = [makeTurn([ makeCall({ cacheCreation: 50_000, cacheRead: 30_000 }), ])] const sessions = [makeSession({ inventory: Array.from({ length: 30 }, (_, i) => `mcp__svc__t${i}`), turns, mcpBreakdown: { svc: { calls: 0 } }, })] const cost = estimateMcpSchemaCost(30, [project(sessions)], 'svc') expect(cost.cacheWriteTokens).toBe(12_000) // capped at 50k creation expect(cost.cacheReadTokens).toBe(12_000) // capped at 30k read }) it('counts every cache rebuild, not just the first one', () => { // Sessions that span more than 5 minutes can rebuild the cache // multiple times. The estimator should treat every cacheCreation // bucket as another write. const turns = [makeTurn([ makeCall({ cacheCreation: 50_000 }), makeCall({ cacheCreation: 50_000 }), // rebuild after cache TTL makeCall({ cacheRead: 60_000 }), ])] const sessions = [makeSession({ inventory: Array.from({ length: 30 }, (_, i) => `mcp__svc__t${i}`), turns, mcpBreakdown: { svc: { calls: 0 } }, })] const cost = estimateMcpSchemaCost(30, [project(sessions)], 'svc') expect(cost.cacheWriteTokens).toBe(24_000) // both rebuilds counted expect(cost.cacheReadTokens).toBe(12_000) }) it('skips sessions where the server was never loaded', () => { const turns = [makeTurn([makeCall({ cacheCreation: 100_000 })])] const sessions = [makeSession({ inventory: ['mcp__other__t1'], turns, })] const cost = estimateMcpSchemaCost(10, [project(sessions)], 'svc') expect(cost.cacheWriteTokens).toBe(0) }) it('requires observed inventory for the server, not just invocations', () => { // Session invoked the server (mcpBreakdown set, mcpTools called) but // never reported a deferred_tools_delta for it. Cost should be 0 to // stay consistent with aggregateMcpCoverage's loadedSessions rule. const turns = [makeTurn([ makeCall({ tools: ['mcp__svc__t1'], cacheCreation: 100_000 }), ])] const sessions = [makeSession({ // No inventory at all turns, mcpBreakdown: { svc: { calls: 1 } }, })] const cost = estimateMcpSchemaCost(10, [project(sessions)], 'svc') expect(cost.cacheWriteTokens).toBe(0) expect(cost.cacheReadTokens).toBe(0) }) it('caps combined unused-schema budget across multiple flagged servers', () => { // Two flagged servers, each with 30 unused tools (12k schema each = // 24k combined). One call has a 50k cache-creation bucket. The // combined cap means total write tokens reported is min(24k, 50k) = // 24k, not 24k + 24k = 48k. const inventory = [ ...Array.from({ length: 30 }, (_, i) => `mcp__a__t${i}`), ...Array.from({ length: 30 }, (_, i) => `mcp__b__t${i}`), ] const turns = [makeTurn([makeCall({ cacheCreation: 50_000 })])] const sessions = [makeSession({ inventory, turns })] const cost = estimateMcpSchemaCost( { a: 30, b: 30 }, [project(sessions)], ['a', 'b'], ) expect(cost.cacheWriteTokens).toBe(24_000) }) it('still works with the single-server signature (backward compat)', () => { const turns = [makeTurn([makeCall({ cacheCreation: 50_000 })])] const sessions = [makeSession({ inventory: Array.from({ length: 30 }, (_, i) => `mcp__svc__t${i}`), turns, })] const cost = estimateMcpSchemaCost(30, [project(sessions)], 'svc') expect(cost.cacheWriteTokens).toBe(12_000) }) }) // --------------------------------------------------------------------------- // detectMcpToolCoverage — finding emission with thresholds // --------------------------------------------------------------------------- describe('detectMcpToolCoverage', () => { it('returns null when no inventory exists at all', () => { expect(detectMcpToolCoverage([project([makeSession({})])])).toBeNull() }) it('does not flag a server with healthy coverage', () => { const inventory = Array.from({ length: 20 }, (_, i) => `mcp__svc__t${i}`) const turns = [makeTurn( Array.from({ length: 8 }, (_, i) => makeCall({ tools: [`mcp__svc__t${i}`] })), )] const sessions = [ makeSession({ sessionId: 'a', inventory, turns }), makeSession({ sessionId: 'b', inventory, turns }), ] // 8/20 = 40% coverage, above the 20% threshold -> no finding expect(detectMcpToolCoverage([project(sessions)])).toBeNull() }) it('does not flag a server with too few tools (signal too noisy)', () => { // Below MCP_COVERAGE_MIN_TOOLS=10 const inventory = ['mcp__svc__a', 'mcp__svc__b'] const sessions = [ makeSession({ sessionId: 'a', inventory }), makeSession({ sessionId: 'b', inventory }), ] expect(detectMcpToolCoverage([project(sessions)])).toBeNull() }) it('does not flag if seen in only one session (insufficient evidence)', () => { const inventory = Array.from({ length: 20 }, (_, i) => `mcp__svc__t${i}`) const sessions = [makeSession({ inventory })] expect(detectMcpToolCoverage([project(sessions)])).toBeNull() }) it('flags a large server with low coverage across multiple sessions', () => { const inventory = Array.from({ length: 30 }, (_, i) => `mcp__hf__t${i}`) const turns = [makeTurn([ makeCall({ tools: ['mcp__hf__t0'], cacheCreation: 100_000 }), ])] const sessions = [ makeSession({ sessionId: 'a', inventory, turns, mcpBreakdown: { hf: { calls: 1 } } }), makeSession({ sessionId: 'b', inventory, turns, mcpBreakdown: { hf: { calls: 1 } } }), ] const finding = detectMcpToolCoverage([project(sessions)]) expect(finding).not.toBeNull() expect(finding!.title).toContain('1 MCP server') expect(finding!.title).toContain('low tool coverage') expect(finding!.explanation).toContain('hf') expect(finding!.explanation).toContain('1/30') expect(finding!.fix.type).toBe('command') expect((finding!.fix as { text: string }).text).toContain("claude mcp remove 'hf'") expect(finding!.tokensSaved).toBeGreaterThan(0) }) it('escalates impact to high when token waste crosses the threshold', () => { const inventory = Array.from({ length: 60 }, (_, i) => `mcp__big__t${i}`) // 60 tools * 400 tokens = 24k schema. With many sessions and large // cache-creation buckets, total effective tokens easily clear 200k. const turns = [makeTurn([ makeCall({ tools: ['mcp__big__t0'], cacheCreation: 50_000 }), makeCall({ cacheRead: 60_000 }), makeCall({ cacheRead: 60_000 }), ])] // Need enough sessions so the per-session ~28.8k effective tokens // (24k write + 48k read × 0.10) sum past the 200k high-impact threshold. const sessions = Array.from({ length: 8 }, (_, i) => makeSession({ sessionId: `s${i}`, inventory, turns, mcpBreakdown: { big: { calls: 1 } } }), ) const finding = detectMcpToolCoverage([project(sessions)]) expect(finding).not.toBeNull() expect(finding!.impact).toBe('high') }) it('does not count invocation-only sessions toward loadedSessions', () => { // Server `svc` has inventory in only one session, but is invoked in // a second session that never observed the schema. Pre-fix this // would have satisfied the >=2 session threshold; it must not now. const inventory = Array.from({ length: 20 }, (_, i) => `mcp__svc__t${i}`) const turns = [makeTurn([ makeCall({ tools: ['mcp__svc__t0'], cacheCreation: 50_000 }), ])] const sessions = [ makeSession({ sessionId: 'a', inventory, turns, mcpBreakdown: { svc: { calls: 1 } } }), // No inventory — this shouldn't be considered a "loaded" session. makeSession({ sessionId: 'b', turns, mcpBreakdown: { svc: { calls: 1 } } }), ] expect(detectMcpToolCoverage([project(sessions)])).toBeNull() }) it('does not let invocations of un-inventoried tools inflate coverage', () => { // Inventory has 20 tools, none invoked. Calls hit a 21st tool that // never appeared in any deferred_tools_delta (could be a renamed/ // removed tool from an older session config). Coverage must stay 0% // and unusedCount must not go negative. const inventory = Array.from({ length: 20 }, (_, i) => `mcp__svc__t${i}`) const turns = [makeTurn([makeCall({ tools: ['mcp__svc__ghost'] })])] const sessions = [ makeSession({ sessionId: 'a', inventory, turns, mcpBreakdown: { svc: { calls: 1 } } }), makeSession({ sessionId: 'b', inventory, turns, mcpBreakdown: { svc: { calls: 1 } } }), ] const result = aggregateMcpCoverage([project(sessions)]) expect(result[0]!.toolsAvailable).toBe(20) expect(result[0]!.toolsInvoked).toBe(0) expect(result[0]!.coverageRatio).toBe(0) expect(result[0]!.unusedTools).toHaveLength(20) }) it('handles multiple flagged servers and pluralises the title', () => { const sessions: SessionSummary[] = [] for (const server of ['svc1', 'svc2']) { const inventory = Array.from({ length: 20 }, (_, i) => `mcp__${server}__t${i}`) const turns = [makeTurn([ makeCall({ tools: [`mcp__${server}__t0`], cacheCreation: 50_000 }), ])] sessions.push( makeSession({ sessionId: `${server}-a`, inventory, turns, mcpBreakdown: { [server]: { calls: 1 } } }), makeSession({ sessionId: `${server}-b`, inventory, turns, mcpBreakdown: { [server]: { calls: 1 } } }), ) } const finding = detectMcpToolCoverage([project(sessions)]) expect(finding).not.toBeNull() expect(finding!.title).toContain('2 MCP servers') expect((finding!.fix as { text: string }).text.split('\n')).toHaveLength(2) }) })