mirror of
https://github.com/AgentSeal/codeburn.git
synced 2026-05-17 03:56:45 +00:00
- Use 1.25x multiplier for cache-write tokens to match Anthropic's actual pricing (was incorrectly using 1x) - Shell-quote server names in `claude mcp remove` fix text to prevent issues with unusual server names
450 lines
17 KiB
TypeScript
450 lines
17 KiB
TypeScript
import { describe, it, expect } from 'vitest'
|
||
|
||
import {
|
||
aggregateMcpCoverage,
|
||
detectMcpToolCoverage,
|
||
estimateMcpSchemaCost,
|
||
} from '../src/optimize.js'
|
||
import type {
|
||
ClassifiedTurn,
|
||
ParsedApiCall,
|
||
ProjectSummary,
|
||
SessionSummary,
|
||
TaskCategory,
|
||
TokenUsage,
|
||
} from '../src/types.js'
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// Test fixtures
|
||
// ---------------------------------------------------------------------------
|
||
|
||
const ZERO_USAGE: TokenUsage = {
|
||
inputTokens: 0,
|
||
outputTokens: 0,
|
||
cacheCreationInputTokens: 0,
|
||
cacheReadInputTokens: 0,
|
||
cachedInputTokens: 0,
|
||
reasoningTokens: 0,
|
||
webSearchRequests: 0,
|
||
}
|
||
|
||
function makeCall(opts: {
|
||
tools?: string[]
|
||
cacheCreation?: number
|
||
cacheRead?: number
|
||
cost?: number
|
||
} = {}): ParsedApiCall {
|
||
const tools = opts.tools ?? []
|
||
return {
|
||
provider: 'claude',
|
||
model: 'Opus 4.7',
|
||
usage: {
|
||
...ZERO_USAGE,
|
||
cacheCreationInputTokens: opts.cacheCreation ?? 0,
|
||
cacheReadInputTokens: opts.cacheRead ?? 0,
|
||
},
|
||
costUSD: opts.cost ?? 0,
|
||
tools,
|
||
mcpTools: tools.filter(t => t.startsWith('mcp__')),
|
||
skills: [],
|
||
hasAgentSpawn: false,
|
||
hasPlanMode: false,
|
||
speed: 'standard',
|
||
timestamp: '2026-05-04T00:00:00Z',
|
||
bashCommands: [],
|
||
deduplicationKey: 'k',
|
||
}
|
||
}
|
||
|
||
function makeTurn(calls: ParsedApiCall[]): ClassifiedTurn {
|
||
return {
|
||
userMessage: '',
|
||
assistantCalls: calls,
|
||
timestamp: '2026-05-04T00:00:00Z',
|
||
sessionId: 's1',
|
||
category: 'coding',
|
||
retries: 0,
|
||
hasEdits: false,
|
||
}
|
||
}
|
||
|
||
function makeSession(opts: {
|
||
sessionId?: string
|
||
inventory?: string[]
|
||
turns?: ClassifiedTurn[]
|
||
mcpBreakdown?: Record<string, { calls: number }>
|
||
}): SessionSummary {
|
||
const turns = opts.turns ?? []
|
||
const apiCalls = turns.reduce((s, t) => s + t.assistantCalls.length, 0)
|
||
const emptyCategoryBreakdown = {} as Record<TaskCategory, { turns: number; costUSD: number; retries: number; editTurns: number; oneShotTurns: number }>
|
||
return {
|
||
sessionId: opts.sessionId ?? 's1',
|
||
project: 'p',
|
||
firstTimestamp: '2026-05-04T00:00:00Z',
|
||
lastTimestamp: '2026-05-04T00:00:00Z',
|
||
totalCostUSD: 0,
|
||
totalInputTokens: 0,
|
||
totalOutputTokens: 0,
|
||
totalCacheReadTokens: 0,
|
||
totalCacheWriteTokens: 0,
|
||
apiCalls,
|
||
turns,
|
||
modelBreakdown: {},
|
||
toolBreakdown: {},
|
||
mcpBreakdown: opts.mcpBreakdown ?? {},
|
||
bashBreakdown: {},
|
||
categoryBreakdown: emptyCategoryBreakdown,
|
||
skillBreakdown: {},
|
||
...(opts.inventory ? { mcpInventory: opts.inventory } : {}),
|
||
}
|
||
}
|
||
|
||
function project(sessions: SessionSummary[]): ProjectSummary {
|
||
return {
|
||
project: 'p',
|
||
projectPath: '/tmp/p',
|
||
sessions,
|
||
totalCostUSD: 0,
|
||
totalApiCalls: sessions.reduce((s, ses) => s + ses.apiCalls, 0),
|
||
}
|
||
}
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// aggregateMcpCoverage
|
||
// ---------------------------------------------------------------------------
|
||
|
||
describe('aggregateMcpCoverage', () => {
|
||
it('returns empty list when no session has MCP inventory', () => {
|
||
const projects = [project([makeSession({})])]
|
||
expect(aggregateMcpCoverage(projects)).toEqual([])
|
||
})
|
||
|
||
it('reports per-server tools available, invoked, and unused', () => {
|
||
const inventory = [
|
||
'mcp__hf__hub_repo_search',
|
||
'mcp__hf__paper_search',
|
||
'mcp__hf__hf_doc_search',
|
||
]
|
||
const turns = [
|
||
makeTurn([makeCall({ tools: ['mcp__hf__hub_repo_search'] })]),
|
||
]
|
||
const sessions = [
|
||
makeSession({ inventory, turns, mcpBreakdown: { hf: { calls: 1 } } }),
|
||
]
|
||
const result = aggregateMcpCoverage([project(sessions)])
|
||
|
||
expect(result).toHaveLength(1)
|
||
expect(result[0]!.server).toBe('hf')
|
||
expect(result[0]!.toolsAvailable).toBe(3)
|
||
expect(result[0]!.toolsInvoked).toBe(1)
|
||
expect(result[0]!.unusedTools).toEqual([
|
||
'mcp__hf__hf_doc_search',
|
||
'mcp__hf__paper_search',
|
||
])
|
||
expect(result[0]!.coverageRatio).toBeCloseTo(1 / 3, 5)
|
||
expect(result[0]!.invocations).toBe(1)
|
||
expect(result[0]!.loadedSessions).toBe(1)
|
||
})
|
||
|
||
it('unions inventory across multiple sessions for the same server', () => {
|
||
const sessions = [
|
||
makeSession({ sessionId: 'a', inventory: ['mcp__x__a', 'mcp__x__b'] }),
|
||
makeSession({ sessionId: 'b', inventory: ['mcp__x__b', 'mcp__x__c'] }),
|
||
]
|
||
const result = aggregateMcpCoverage([project(sessions)])
|
||
expect(result[0]!.toolsAvailable).toBe(3)
|
||
expect(result[0]!.loadedSessions).toBe(2)
|
||
})
|
||
|
||
it('separates servers with similar names', () => {
|
||
const sessions = [
|
||
makeSession({ inventory: ['mcp__hf__a', 'mcp__hugface__a'] }),
|
||
]
|
||
const result = aggregateMcpCoverage([project(sessions)])
|
||
expect(result.map(r => r.server).sort()).toEqual(['hf', 'hugface'])
|
||
})
|
||
|
||
it('skips invocations without inventory (foreign server, no inventory observed)', () => {
|
||
// A server can show up only via a call. We still report it so the
|
||
// operator knows it was invoked, but coverage is 0/0 and it is not a
|
||
// candidate for the unused-coverage finding.
|
||
const turns = [makeTurn([makeCall({ tools: ['mcp__ghost__t1'] })])]
|
||
const sessions = [
|
||
makeSession({ turns, mcpBreakdown: { ghost: { calls: 1 } } }),
|
||
]
|
||
const result = aggregateMcpCoverage([project(sessions)])
|
||
// No inventory entry -> aggregator drops the server from the report
|
||
// because we cannot reason about coverage without an inventory baseline.
|
||
expect(result).toEqual([])
|
||
})
|
||
})
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// estimateMcpSchemaCost — cache-aware accounting
|
||
// ---------------------------------------------------------------------------
|
||
|
||
describe('estimateMcpSchemaCost', () => {
|
||
it('charges first cacheCreation turn at full price, subsequent turns at cache-read', () => {
|
||
const turns = [
|
||
makeTurn([makeCall({ cacheCreation: 50_000 })]), // first turn: write
|
||
makeTurn([makeCall({ cacheRead: 60_000 })]), // ongoing: read
|
||
makeTurn([makeCall({ cacheRead: 60_000 })]),
|
||
]
|
||
const sessions = [makeSession({
|
||
inventory: Array.from({ length: 30 }, (_, i) => `mcp__svc__t${i}`),
|
||
turns,
|
||
mcpBreakdown: { svc: { calls: 0 } },
|
||
})]
|
||
// 30 unused tools * 400 token estimate = 12_000 schema tokens
|
||
// cap by call cache buckets so we never overclaim
|
||
const cost = estimateMcpSchemaCost(30, [project(sessions)], 'svc')
|
||
expect(cost.cacheWriteTokens).toBe(12_000) // capped by 50k creation, 12k schema fits
|
||
expect(cost.cacheReadTokens).toBe(24_000) // 12k + 12k across two ongoing turns
|
||
// effective = write * 1.25 + read * 0.10 (cache pricing)
|
||
expect(cost.effectiveInputTokens).toBeCloseTo(12_000 * 1.25 + 24_000 * 0.10, 5)
|
||
})
|
||
|
||
it('caps by available cache bucket so we never overclaim', () => {
|
||
const turns = [makeTurn([makeCall({ cacheCreation: 1_000 })])]
|
||
const sessions = [makeSession({
|
||
inventory: Array.from({ length: 30 }, (_, i) => `mcp__svc__t${i}`),
|
||
turns,
|
||
mcpBreakdown: { svc: { calls: 0 } },
|
||
})]
|
||
// 30*400 = 12k schema tokens, but the call only had 1k cache-creation,
|
||
// so we should not claim more than 1k of overhead for that turn.
|
||
const cost = estimateMcpSchemaCost(30, [project(sessions)], 'svc')
|
||
expect(cost.cacheWriteTokens).toBe(1_000)
|
||
})
|
||
|
||
it('returns zero when no unused tools', () => {
|
||
const sessions = [makeSession({
|
||
inventory: ['mcp__svc__t1'],
|
||
turns: [makeTurn([makeCall({ cacheCreation: 5000 })])],
|
||
})]
|
||
const cost = estimateMcpSchemaCost(0, [project(sessions)], 'svc')
|
||
expect(cost).toEqual({ cacheWriteTokens: 0, cacheReadTokens: 0, effectiveInputTokens: 0 })
|
||
})
|
||
|
||
it('counts cache write AND cache read on the same call', () => {
|
||
// A long session can have a cache rebuild mid-stream where one call
|
||
// reports both buckets. The estimator must charge both, not skip the
|
||
// read because of the write.
|
||
const turns = [makeTurn([
|
||
makeCall({ cacheCreation: 50_000, cacheRead: 30_000 }),
|
||
])]
|
||
const sessions = [makeSession({
|
||
inventory: Array.from({ length: 30 }, (_, i) => `mcp__svc__t${i}`),
|
||
turns,
|
||
mcpBreakdown: { svc: { calls: 0 } },
|
||
})]
|
||
const cost = estimateMcpSchemaCost(30, [project(sessions)], 'svc')
|
||
expect(cost.cacheWriteTokens).toBe(12_000) // capped at 50k creation
|
||
expect(cost.cacheReadTokens).toBe(12_000) // capped at 30k read
|
||
})
|
||
|
||
it('counts every cache rebuild, not just the first one', () => {
|
||
// Sessions that span more than 5 minutes can rebuild the cache
|
||
// multiple times. The estimator should treat every cacheCreation
|
||
// bucket as another write.
|
||
const turns = [makeTurn([
|
||
makeCall({ cacheCreation: 50_000 }),
|
||
makeCall({ cacheCreation: 50_000 }), // rebuild after cache TTL
|
||
makeCall({ cacheRead: 60_000 }),
|
||
])]
|
||
const sessions = [makeSession({
|
||
inventory: Array.from({ length: 30 }, (_, i) => `mcp__svc__t${i}`),
|
||
turns,
|
||
mcpBreakdown: { svc: { calls: 0 } },
|
||
})]
|
||
const cost = estimateMcpSchemaCost(30, [project(sessions)], 'svc')
|
||
expect(cost.cacheWriteTokens).toBe(24_000) // both rebuilds counted
|
||
expect(cost.cacheReadTokens).toBe(12_000)
|
||
})
|
||
|
||
it('skips sessions where the server was never loaded', () => {
|
||
const turns = [makeTurn([makeCall({ cacheCreation: 100_000 })])]
|
||
const sessions = [makeSession({
|
||
inventory: ['mcp__other__t1'],
|
||
turns,
|
||
})]
|
||
const cost = estimateMcpSchemaCost(10, [project(sessions)], 'svc')
|
||
expect(cost.cacheWriteTokens).toBe(0)
|
||
})
|
||
|
||
it('requires observed inventory for the server, not just invocations', () => {
|
||
// Session invoked the server (mcpBreakdown set, mcpTools called) but
|
||
// never reported a deferred_tools_delta for it. Cost should be 0 to
|
||
// stay consistent with aggregateMcpCoverage's loadedSessions rule.
|
||
const turns = [makeTurn([
|
||
makeCall({ tools: ['mcp__svc__t1'], cacheCreation: 100_000 }),
|
||
])]
|
||
const sessions = [makeSession({
|
||
// No inventory at all
|
||
turns,
|
||
mcpBreakdown: { svc: { calls: 1 } },
|
||
})]
|
||
const cost = estimateMcpSchemaCost(10, [project(sessions)], 'svc')
|
||
expect(cost.cacheWriteTokens).toBe(0)
|
||
expect(cost.cacheReadTokens).toBe(0)
|
||
})
|
||
|
||
it('caps combined unused-schema budget across multiple flagged servers', () => {
|
||
// Two flagged servers, each with 30 unused tools (12k schema each =
|
||
// 24k combined). One call has a 50k cache-creation bucket. The
|
||
// combined cap means total write tokens reported is min(24k, 50k) =
|
||
// 24k, not 24k + 24k = 48k.
|
||
const inventory = [
|
||
...Array.from({ length: 30 }, (_, i) => `mcp__a__t${i}`),
|
||
...Array.from({ length: 30 }, (_, i) => `mcp__b__t${i}`),
|
||
]
|
||
const turns = [makeTurn([makeCall({ cacheCreation: 50_000 })])]
|
||
const sessions = [makeSession({ inventory, turns })]
|
||
const cost = estimateMcpSchemaCost(
|
||
{ a: 30, b: 30 },
|
||
[project(sessions)],
|
||
['a', 'b'],
|
||
)
|
||
expect(cost.cacheWriteTokens).toBe(24_000)
|
||
})
|
||
|
||
it('still works with the single-server signature (backward compat)', () => {
|
||
const turns = [makeTurn([makeCall({ cacheCreation: 50_000 })])]
|
||
const sessions = [makeSession({
|
||
inventory: Array.from({ length: 30 }, (_, i) => `mcp__svc__t${i}`),
|
||
turns,
|
||
})]
|
||
const cost = estimateMcpSchemaCost(30, [project(sessions)], 'svc')
|
||
expect(cost.cacheWriteTokens).toBe(12_000)
|
||
})
|
||
})
|
||
|
||
// ---------------------------------------------------------------------------
|
||
// detectMcpToolCoverage — finding emission with thresholds
|
||
// ---------------------------------------------------------------------------
|
||
|
||
describe('detectMcpToolCoverage', () => {
|
||
it('returns null when no inventory exists at all', () => {
|
||
expect(detectMcpToolCoverage([project([makeSession({})])])).toBeNull()
|
||
})
|
||
|
||
it('does not flag a server with healthy coverage', () => {
|
||
const inventory = Array.from({ length: 20 }, (_, i) => `mcp__svc__t${i}`)
|
||
const turns = [makeTurn(
|
||
Array.from({ length: 8 }, (_, i) => makeCall({ tools: [`mcp__svc__t${i}`] })),
|
||
)]
|
||
const sessions = [
|
||
makeSession({ sessionId: 'a', inventory, turns }),
|
||
makeSession({ sessionId: 'b', inventory, turns }),
|
||
]
|
||
// 8/20 = 40% coverage, above the 20% threshold -> no finding
|
||
expect(detectMcpToolCoverage([project(sessions)])).toBeNull()
|
||
})
|
||
|
||
it('does not flag a server with too few tools (signal too noisy)', () => {
|
||
// Below MCP_COVERAGE_MIN_TOOLS=10
|
||
const inventory = ['mcp__svc__a', 'mcp__svc__b']
|
||
const sessions = [
|
||
makeSession({ sessionId: 'a', inventory }),
|
||
makeSession({ sessionId: 'b', inventory }),
|
||
]
|
||
expect(detectMcpToolCoverage([project(sessions)])).toBeNull()
|
||
})
|
||
|
||
it('does not flag if seen in only one session (insufficient evidence)', () => {
|
||
const inventory = Array.from({ length: 20 }, (_, i) => `mcp__svc__t${i}`)
|
||
const sessions = [makeSession({ inventory })]
|
||
expect(detectMcpToolCoverage([project(sessions)])).toBeNull()
|
||
})
|
||
|
||
it('flags a large server with low coverage across multiple sessions', () => {
|
||
const inventory = Array.from({ length: 30 }, (_, i) => `mcp__hf__t${i}`)
|
||
const turns = [makeTurn([
|
||
makeCall({ tools: ['mcp__hf__t0'], cacheCreation: 100_000 }),
|
||
])]
|
||
const sessions = [
|
||
makeSession({ sessionId: 'a', inventory, turns, mcpBreakdown: { hf: { calls: 1 } } }),
|
||
makeSession({ sessionId: 'b', inventory, turns, mcpBreakdown: { hf: { calls: 1 } } }),
|
||
]
|
||
const finding = detectMcpToolCoverage([project(sessions)])
|
||
expect(finding).not.toBeNull()
|
||
expect(finding!.title).toContain('1 MCP server')
|
||
expect(finding!.title).toContain('low tool coverage')
|
||
expect(finding!.explanation).toContain('hf')
|
||
expect(finding!.explanation).toContain('1/30')
|
||
expect(finding!.fix.type).toBe('command')
|
||
expect((finding!.fix as { text: string }).text).toContain("claude mcp remove 'hf'")
|
||
expect(finding!.tokensSaved).toBeGreaterThan(0)
|
||
})
|
||
|
||
it('escalates impact to high when token waste crosses the threshold', () => {
|
||
const inventory = Array.from({ length: 60 }, (_, i) => `mcp__big__t${i}`)
|
||
// 60 tools * 400 tokens = 24k schema. With many sessions and large
|
||
// cache-creation buckets, total effective tokens easily clear 200k.
|
||
const turns = [makeTurn([
|
||
makeCall({ tools: ['mcp__big__t0'], cacheCreation: 50_000 }),
|
||
makeCall({ cacheRead: 60_000 }),
|
||
makeCall({ cacheRead: 60_000 }),
|
||
])]
|
||
// Need enough sessions so the per-session ~28.8k effective tokens
|
||
// (24k write + 48k read × 0.10) sum past the 200k high-impact threshold.
|
||
const sessions = Array.from({ length: 8 }, (_, i) =>
|
||
makeSession({ sessionId: `s${i}`, inventory, turns, mcpBreakdown: { big: { calls: 1 } } }),
|
||
)
|
||
const finding = detectMcpToolCoverage([project(sessions)])
|
||
expect(finding).not.toBeNull()
|
||
expect(finding!.impact).toBe('high')
|
||
})
|
||
|
||
it('does not count invocation-only sessions toward loadedSessions', () => {
|
||
// Server `svc` has inventory in only one session, but is invoked in
|
||
// a second session that never observed the schema. Pre-fix this
|
||
// would have satisfied the >=2 session threshold; it must not now.
|
||
const inventory = Array.from({ length: 20 }, (_, i) => `mcp__svc__t${i}`)
|
||
const turns = [makeTurn([
|
||
makeCall({ tools: ['mcp__svc__t0'], cacheCreation: 50_000 }),
|
||
])]
|
||
const sessions = [
|
||
makeSession({ sessionId: 'a', inventory, turns, mcpBreakdown: { svc: { calls: 1 } } }),
|
||
// No inventory — this shouldn't be considered a "loaded" session.
|
||
makeSession({ sessionId: 'b', turns, mcpBreakdown: { svc: { calls: 1 } } }),
|
||
]
|
||
expect(detectMcpToolCoverage([project(sessions)])).toBeNull()
|
||
})
|
||
|
||
it('does not let invocations of un-inventoried tools inflate coverage', () => {
|
||
// Inventory has 20 tools, none invoked. Calls hit a 21st tool that
|
||
// never appeared in any deferred_tools_delta (could be a renamed/
|
||
// removed tool from an older session config). Coverage must stay 0%
|
||
// and unusedCount must not go negative.
|
||
const inventory = Array.from({ length: 20 }, (_, i) => `mcp__svc__t${i}`)
|
||
const turns = [makeTurn([makeCall({ tools: ['mcp__svc__ghost'] })])]
|
||
const sessions = [
|
||
makeSession({ sessionId: 'a', inventory, turns, mcpBreakdown: { svc: { calls: 1 } } }),
|
||
makeSession({ sessionId: 'b', inventory, turns, mcpBreakdown: { svc: { calls: 1 } } }),
|
||
]
|
||
const result = aggregateMcpCoverage([project(sessions)])
|
||
expect(result[0]!.toolsAvailable).toBe(20)
|
||
expect(result[0]!.toolsInvoked).toBe(0)
|
||
expect(result[0]!.coverageRatio).toBe(0)
|
||
expect(result[0]!.unusedTools).toHaveLength(20)
|
||
})
|
||
|
||
it('handles multiple flagged servers and pluralises the title', () => {
|
||
const sessions: SessionSummary[] = []
|
||
for (const server of ['svc1', 'svc2']) {
|
||
const inventory = Array.from({ length: 20 }, (_, i) => `mcp__${server}__t${i}`)
|
||
const turns = [makeTurn([
|
||
makeCall({ tools: [`mcp__${server}__t0`], cacheCreation: 50_000 }),
|
||
])]
|
||
sessions.push(
|
||
makeSession({ sessionId: `${server}-a`, inventory, turns, mcpBreakdown: { [server]: { calls: 1 } } }),
|
||
makeSession({ sessionId: `${server}-b`, inventory, turns, mcpBreakdown: { [server]: { calls: 1 } } }),
|
||
)
|
||
}
|
||
const finding = detectMcpToolCoverage([project(sessions)])
|
||
expect(finding).not.toBeNull()
|
||
expect(finding!.title).toContain('2 MCP servers')
|
||
expect((finding!.fix as { text: string }).text.split('\n')).toHaveLength(2)
|
||
})
|
||
})
|