mirror of
https://github.com/AgentSeal/codeburn.git
synced 2026-05-17 03:56:45 +00:00
Merge origin/main into feat/session-outlier-detection
This commit is contained in:
commit
38d21643bd
15 changed files with 1223 additions and 83 deletions
16
CHANGELOG.md
16
CHANGELOG.md
|
|
@ -1,5 +1,21 @@
|
|||
# Changelog
|
||||
|
||||
## Unreleased
|
||||
|
||||
### Added (CLI)
|
||||
- **MCP tool coverage detector.** New `optimize` finding flags MCP servers
|
||||
whose tool inventory is largely unused. Inventory is observed from the
|
||||
Claude `deferred_tools_delta` JSONL attachments (exact tool names per
|
||||
session) instead of guessed at five tools per server. Token-savings
|
||||
estimates are cache-aware: schema bytes pay full input price on the first
|
||||
cache-creation turn of a session, then carry at the cache-read discount
|
||||
on subsequent turns, capped per call so we never claim more overhead
|
||||
than the call's own cache buckets could contain. Threshold:
|
||||
>10 tools available, <20% coverage, observed in ≥2 sessions. Closes #2.
|
||||
|
||||
### Fixed (CLI)
|
||||
- **`all` period semantics unified between CLI and dashboard.** The dashboard treated `--period all` as all-time (epoch start) while the CLI bounded it to the last 6 months. Both now consistently mean "Last 6 months". Period helpers (`Period`, `PERIODS`, `PERIOD_LABELS`, `toPeriod`, `getDateRange`) consolidated into `cli-date.ts`. Use `--from` / `--to` for unbounded historical ranges.
|
||||
|
||||
## 0.9.6 - 2026-05-03
|
||||
|
||||
### Added (CLI)
|
||||
|
|
|
|||
|
|
@ -85,7 +85,7 @@ codeburn yield # track productive vs reverted/abandoned spend
|
|||
codeburn yield -p 30days # yield analysis for last 30 days
|
||||
```
|
||||
|
||||
Arrow keys switch between Today, 7 Days, 30 Days, Month, and All Time. Press `q` to quit, `1` `2` `3` `4` `5` as shortcuts, `c` to open model comparison, `o` to open optimize. The dashboard auto-refreshes every 30 seconds by default (`--refresh 0` to disable). It also shows average cost per session and the five most expensive sessions across all projects.
|
||||
Arrow keys switch between Today, 7 Days, 30 Days, Month, and 6 Months (use `--from` / `--to` for an exact historical window). Press `q` to quit, `1` `2` `3` `4` `5` as shortcuts, `c` to open model comparison, `o` to open optimize. The dashboard auto-refreshes every 30 seconds by default (`--refresh 0` to disable). It also shows average cost per session and the five most expensive sessions across all projects.
|
||||
|
||||
## Supported Providers
|
||||
|
||||
|
|
@ -196,7 +196,7 @@ You can also open it inline from the dashboard: press `o` when a finding count a
|
|||
### Compare
|
||||
|
||||
```bash
|
||||
codeburn compare # interactive model picker (default: all time)
|
||||
codeburn compare # interactive model picker (default: last 6 months)
|
||||
codeburn compare -p week # last 7 days
|
||||
codeburn compare -p today # today only
|
||||
codeburn compare --provider claude # Claude Code sessions only
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ const PERIODS = [
|
|||
{ id: 'week', label: '7 Days' },
|
||||
{ id: '30days', label: '30 Days' },
|
||||
{ id: 'month', label: 'Month' },
|
||||
{ id: 'all', label: 'All' },
|
||||
{ id: 'all', label: '6 Months' },
|
||||
];
|
||||
|
||||
const INSIGHTS = [
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ const PERIODS = [
|
|||
{ id: 'week', label: '7 Days' },
|
||||
{ id: '30days', label: '30 Days' },
|
||||
{ id: 'month', label: 'Month' },
|
||||
{ id: 'all', label: 'All Time' },
|
||||
{ id: 'all', label: '6 Months' },
|
||||
];
|
||||
|
||||
export default class CodeBurnPreferences extends ExtensionPreferences {
|
||||
|
|
|
|||
|
|
@ -321,7 +321,7 @@ enum Period: String, CaseIterable, Identifiable {
|
|||
case sevenDays = "7 Days"
|
||||
case thirtyDays = "30 Days"
|
||||
case month = "Month"
|
||||
case all = "All"
|
||||
case all = "6 Months"
|
||||
|
||||
var id: String { rawValue }
|
||||
|
||||
|
|
|
|||
|
|
@ -109,7 +109,7 @@ private struct EmptyProviderState: View {
|
|||
case .sevenDays: "the last 7 days"
|
||||
case .thirtyDays: "the last 30 days"
|
||||
case .month: "this month"
|
||||
case .all: "all time"
|
||||
case .all: "the last 6 months"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
import type { DateRange } from './types.js'
|
||||
import { toDateString } from './daily-cache.js'
|
||||
|
||||
const ISO_DATE_RE = /^\d{4}-\d{2}-\d{2}$/
|
||||
|
||||
|
|
@ -7,6 +8,35 @@ const END_OF_DAY_MINUTES = 59
|
|||
const END_OF_DAY_SECONDS = 59
|
||||
const END_OF_DAY_MS = 999
|
||||
|
||||
// "All Time" is intentionally bounded to the last 6 months. Older data is
|
||||
// rarely actionable for a cost tracker, and capping the range keeps the parse
|
||||
// path bounded so providers like Codex/Cursor with sparse multi-year history
|
||||
// still load in seconds. Users who need an unbounded window can use
|
||||
// `--from` / `--to`.
|
||||
const ALL_TIME_MONTHS = 6
|
||||
|
||||
export type Period = 'today' | 'week' | '30days' | 'month' | 'all'
|
||||
|
||||
export const PERIODS: Period[] = ['today', 'week', '30days', 'month', 'all']
|
||||
|
||||
// Short labels suitable for the dashboard tab strip. Long-form labels for
|
||||
// header text come from `getDateRange().label`.
|
||||
export const PERIOD_LABELS: Record<Period, string> = {
|
||||
today: 'Today',
|
||||
week: '7 Days',
|
||||
'30days': '30 Days',
|
||||
month: 'This Month',
|
||||
all: '6 Months',
|
||||
}
|
||||
|
||||
export function toPeriod(s: string): Period {
|
||||
if (s === 'today') return 'today'
|
||||
if (s === 'month') return 'month'
|
||||
if (s === '30days') return '30days'
|
||||
if (s === 'all') return 'all'
|
||||
return 'week'
|
||||
}
|
||||
|
||||
function parseLocalDate(s: string): Date {
|
||||
if (!ISO_DATE_RE.test(s)) {
|
||||
throw new Error(`Invalid date format "${s}": expected YYYY-MM-DD`)
|
||||
|
|
@ -37,3 +67,58 @@ export function parseDateRangeFlags(from: string | undefined, to: string | undef
|
|||
}
|
||||
return { start, end }
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the date range and a human-readable label for a named period.
|
||||
*
|
||||
* Accepts a string (rather than the strict `Period` type) because the CLI
|
||||
* surfaces a few extra inputs not exposed in the dashboard tab strip
|
||||
* (e.g. `'yesterday'`). Unknown values fall back to `'week'`.
|
||||
*
|
||||
* Note: `'all'` is bounded to the last 6 months. Use `--from`/`--to` for
|
||||
* an unbounded historical window.
|
||||
*/
|
||||
export function getDateRange(period: string): { range: DateRange; label: string } {
|
||||
const now = new Date()
|
||||
const end = new Date(
|
||||
now.getFullYear(),
|
||||
now.getMonth(),
|
||||
now.getDate(),
|
||||
END_OF_DAY_HOURS,
|
||||
END_OF_DAY_MINUTES,
|
||||
END_OF_DAY_SECONDS,
|
||||
END_OF_DAY_MS,
|
||||
)
|
||||
|
||||
switch (period) {
|
||||
case 'today': {
|
||||
const start = new Date(now.getFullYear(), now.getMonth(), now.getDate())
|
||||
return { range: { start, end }, label: `Today (${toDateString(start)})` }
|
||||
}
|
||||
case 'yesterday': {
|
||||
const start = new Date(now.getFullYear(), now.getMonth(), now.getDate() - 1)
|
||||
const yesterdayEnd = new Date(now.getFullYear(), now.getMonth(), now.getDate() - 1, END_OF_DAY_HOURS, END_OF_DAY_MINUTES, END_OF_DAY_SECONDS, END_OF_DAY_MS)
|
||||
return { range: { start, end: yesterdayEnd }, label: `Yesterday (${toDateString(start)})` }
|
||||
}
|
||||
case 'week': {
|
||||
const start = new Date(now.getFullYear(), now.getMonth(), now.getDate() - 7)
|
||||
return { range: { start, end }, label: 'Last 7 Days' }
|
||||
}
|
||||
case 'month': {
|
||||
const start = new Date(now.getFullYear(), now.getMonth(), 1)
|
||||
return { range: { start, end }, label: `${now.toLocaleString('default', { month: 'long' })} ${now.getFullYear()}` }
|
||||
}
|
||||
case '30days': {
|
||||
const start = new Date(now.getFullYear(), now.getMonth(), now.getDate() - 30)
|
||||
return { range: { start, end }, label: 'Last 30 Days' }
|
||||
}
|
||||
case 'all': {
|
||||
const start = new Date(now.getFullYear(), now.getMonth() - ALL_TIME_MONTHS, 1)
|
||||
return { range: { start, end }, label: 'Last 6 months' }
|
||||
}
|
||||
default: {
|
||||
const start = new Date(now.getFullYear(), now.getMonth(), now.getDate() - 7)
|
||||
return { range: { start, end }, label: 'Last 7 Days' }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
52
src/cli.ts
52
src/cli.ts
|
|
@ -11,7 +11,7 @@ import { getDaysInRange, ensureCacheHydrated, emptyCache, BACKFILL_DAYS, toDateS
|
|||
import { aggregateProjectsIntoDays, buildPeriodDataFromDays, dateKey } from './day-aggregator.js'
|
||||
import { CATEGORY_LABELS, type DateRange, type ProjectSummary, type TaskCategory } from './types.js'
|
||||
import { renderDashboard } from './dashboard.js'
|
||||
import { parseDateRangeFlags } from './cli-date.js'
|
||||
import { parseDateRangeFlags, getDateRange, toPeriod, type Period } from './cli-date.js'
|
||||
import { runOptimize, scanAndDetect } from './optimize.js'
|
||||
import { renderCompare } from './compare.js'
|
||||
import { getAllProviders } from './providers/index.js'
|
||||
|
|
@ -35,56 +35,6 @@ async function hydrateCache() {
|
|||
}
|
||||
}
|
||||
|
||||
function getDateRange(period: string): { range: DateRange; label: string } {
|
||||
const now = new Date()
|
||||
const end = new Date(now.getFullYear(), now.getMonth(), now.getDate(), 23, 59, 59, 999)
|
||||
|
||||
switch (period) {
|
||||
case 'today': {
|
||||
const start = new Date(now.getFullYear(), now.getMonth(), now.getDate())
|
||||
return { range: { start, end }, label: `Today (${toDateString(start)})` }
|
||||
}
|
||||
case 'yesterday': {
|
||||
const start = new Date(now.getFullYear(), now.getMonth(), now.getDate() - 1)
|
||||
const yesterdayEnd = new Date(now.getFullYear(), now.getMonth(), now.getDate() - 1, 23, 59, 59, 999)
|
||||
return { range: { start, end: yesterdayEnd }, label: `Yesterday (${toDateString(start)})` }
|
||||
}
|
||||
case 'week': {
|
||||
const start = new Date(now.getFullYear(), now.getMonth(), now.getDate() - 7)
|
||||
return { range: { start, end }, label: 'Last 7 Days' }
|
||||
}
|
||||
case 'month': {
|
||||
const start = new Date(now.getFullYear(), now.getMonth(), 1)
|
||||
return { range: { start, end }, label: `${now.toLocaleString('default', { month: 'long' })} ${now.getFullYear()}` }
|
||||
}
|
||||
case '30days': {
|
||||
const start = new Date(now.getFullYear(), now.getMonth(), now.getDate() - 30)
|
||||
return { range: { start, end }, label: 'Last 30 Days' }
|
||||
}
|
||||
case 'all': {
|
||||
// Cap "All Time" to the last 6 months. Older data is rarely actionable for a cost
|
||||
// tracker and keeps the parse path bounded so providers like Codex/Cursor with sparse
|
||||
// data still load in seconds.
|
||||
const start = new Date(now.getFullYear(), now.getMonth() - 6, now.getDate())
|
||||
return { range: { start, end }, label: 'Last 6 months' }
|
||||
}
|
||||
default: {
|
||||
const start = new Date(now.getFullYear(), now.getMonth(), now.getDate() - 7)
|
||||
return { range: { start, end }, label: 'Last 7 Days' }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type Period = 'today' | 'week' | '30days' | 'month' | 'all'
|
||||
|
||||
function toPeriod(s: string): Period {
|
||||
if (s === 'today') return 'today'
|
||||
if (s === 'month') return 'month'
|
||||
if (s === '30days') return '30days'
|
||||
if (s === 'all') return 'all'
|
||||
return 'week'
|
||||
}
|
||||
|
||||
function collect(val: string, acc: string[]): string[] {
|
||||
acc.push(val)
|
||||
return acc
|
||||
|
|
|
|||
|
|
@ -13,21 +13,12 @@ import { dateKey } from './day-aggregator.js'
|
|||
import { CompareView } from './compare.js'
|
||||
import { getPlanUsageOrNull, type PlanUsage } from './plan-usage.js'
|
||||
import { planDisplayName } from './plans.js'
|
||||
import { getDateRange, PERIODS, PERIOD_LABELS, type Period } from './cli-date.js'
|
||||
import { join } from 'path'
|
||||
import { patchStdoutForWindows } from './ink-win.js'
|
||||
|
||||
type Period = 'today' | 'week' | '30days' | 'month' | 'all'
|
||||
type View = 'dashboard' | 'optimize' | 'compare'
|
||||
|
||||
const PERIODS: Period[] = ['today', 'week', '30days', 'month', 'all']
|
||||
const PERIOD_LABELS: Record<Period, string> = {
|
||||
today: 'Today',
|
||||
week: '7 Days',
|
||||
'30days': '30 Days',
|
||||
month: 'This Month',
|
||||
all: 'All Time',
|
||||
}
|
||||
|
||||
const MIN_WIDE = 90
|
||||
const ORANGE = '#FF8C42'
|
||||
const DIM = '#555555'
|
||||
|
|
@ -104,16 +95,8 @@ function gradientColor(pct: number): string {
|
|||
return toHex(lerp(255, 245, t), lerp(140, 91, t), lerp(66, 91, t))
|
||||
}
|
||||
|
||||
function getDateRange(period: Period): { start: Date; end: Date } {
|
||||
const now = new Date()
|
||||
const end = new Date(now.getFullYear(), now.getMonth(), now.getDate(), 23, 59, 59, 999)
|
||||
switch (period) {
|
||||
case 'today': return { start: new Date(now.getFullYear(), now.getMonth(), now.getDate()), end }
|
||||
case 'week': return { start: new Date(now.getFullYear(), now.getMonth(), now.getDate() - 7), end }
|
||||
case '30days': return { start: new Date(now.getFullYear(), now.getMonth(), now.getDate() - 30), end }
|
||||
case 'month': return { start: new Date(now.getFullYear(), now.getMonth(), 1), end }
|
||||
case 'all': return { start: new Date(0), end }
|
||||
}
|
||||
function getPeriodRange(period: Period): { start: Date; end: Date } {
|
||||
return getDateRange(period).range
|
||||
}
|
||||
|
||||
type Layout = { dashWidth: number; wide: boolean; halfWidth: number; barWidth: number }
|
||||
|
|
@ -608,7 +591,7 @@ function StatusBar({ width, showProvider, view, findingCount, optimizeAvailable,
|
|||
<Text color={ORANGE} bold>2</Text><Text dimColor> week </Text>
|
||||
<Text color={ORANGE} bold>3</Text><Text dimColor> 30 days </Text>
|
||||
<Text color={ORANGE} bold>4</Text><Text dimColor> month </Text>
|
||||
<Text color={ORANGE} bold>5</Text><Text dimColor> all time</Text>
|
||||
<Text color={ORANGE} bold>5</Text><Text dimColor> 6 months</Text>
|
||||
{!isOptimize && optimizeAvailable && findingCount != null && findingCount > 0 && (
|
||||
<><Text dimColor> </Text><Text color={ORANGE} bold>o</Text><Text dimColor> optimize</Text><Text color="#F55B5B"> ({findingCount})</Text></>
|
||||
)}
|
||||
|
|
@ -711,7 +694,7 @@ function InteractiveDashboard({ initialProjects, initialPeriod, initialProvider,
|
|||
let cancelled = false
|
||||
async function scan() {
|
||||
if (projects.length === 0) { setOptimizeResult(null); return }
|
||||
const result = await scanAndDetect(projects, getDateRange(period))
|
||||
const result = await scanAndDetect(projects, getPeriodRange(period))
|
||||
if (!cancelled) setOptimizeResult(result)
|
||||
}
|
||||
scan()
|
||||
|
|
@ -723,7 +706,7 @@ function InteractiveDashboard({ initialProjects, initialPeriod, initialProvider,
|
|||
setLoading(true)
|
||||
setOptimizeResult(null)
|
||||
try {
|
||||
const range = getDateRange(p)
|
||||
const range = getPeriodRange(p)
|
||||
const data = await parseAllSessions(range, prov)
|
||||
if (reloadGenerationRef.current !== generation) return
|
||||
|
||||
|
|
@ -828,7 +811,7 @@ function StaticDashboard({ projects, period, activeProvider, planUsage }: { proj
|
|||
|
||||
export async function renderDashboard(period: Period = 'week', provider: string = 'all', refreshSeconds?: number, projectFilter?: string[], excludeFilter?: string[], customRange?: DateRange | null): Promise<void> {
|
||||
await loadPricing()
|
||||
const range = customRange ?? getDateRange(period)
|
||||
const range = customRange ?? getPeriodRange(period)
|
||||
const filteredProjects = filterProjectsByName(await parseAllSessions(range, provider), projectFilter, excludeFilter)
|
||||
const planUsage = await getPlanUsageOrNull()
|
||||
const isTTY = process.stdin.isTTY && process.stdout.isTTY
|
||||
|
|
|
|||
353
src/optimize.ts
353
src/optimize.ts
|
|
@ -53,6 +53,19 @@ const LOW_RATIO_MEDIUM_THRESHOLD = 3
|
|||
const MIN_API_CALLS_FOR_CACHE = 10
|
||||
const CACHE_EXCESS_HIGH_THRESHOLD = 15000
|
||||
const UNUSED_MCP_HIGH_THRESHOLD = 3
|
||||
// MCP tool coverage detector thresholds. A server only earns a finding when
|
||||
// every condition holds: the inventory is large enough to matter, real-world
|
||||
// usage is poor, and we observed it in enough sessions to trust the signal.
|
||||
const MCP_COVERAGE_MIN_TOOLS = 10
|
||||
const MCP_COVERAGE_MIN_SESSIONS = 2
|
||||
const MCP_COVERAGE_LOW_THRESHOLD = 0.20
|
||||
const MCP_COVERAGE_HIGH_IMPACT_TOKENS = 200_000
|
||||
// Anthropic prices cache writes at 125% of base input and cache reads at
|
||||
// roughly 10% of base input. We use these to keep overhead estimates honest:
|
||||
// most MCP schema bytes live in the cached prefix and only get charged at
|
||||
// the discount rate after the first turn of a session.
|
||||
const CACHE_WRITE_MULTIPLIER = 1.25
|
||||
const CACHE_READ_DISCOUNT = 0.10
|
||||
const GHOST_AGENTS_HIGH_THRESHOLD = 5
|
||||
const GHOST_AGENTS_MEDIUM_THRESHOLD = 2
|
||||
const GHOST_SKILLS_HIGH_THRESHOLD = 10
|
||||
|
|
@ -481,10 +494,329 @@ export function detectDuplicateReads(calls: ToolCall[], dateRange?: DateRange):
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Per-server breakdown of MCP tool inventory vs invocations, computed from the
|
||||
* `mcpInventory` field captured by the Claude parser.
|
||||
*
|
||||
* Each session that loaded a server contributes its observed tool list to
|
||||
* the union for that server. Invocations come from the existing
|
||||
* `mcpBreakdown` per-call counts plus the parser's `call.tools` stream.
|
||||
*/
|
||||
export type McpServerCoverage = {
|
||||
server: string
|
||||
toolsAvailable: number
|
||||
toolsInvoked: number
|
||||
unusedTools: string[]
|
||||
invocations: number
|
||||
loadedSessions: number
|
||||
coverageRatio: number
|
||||
}
|
||||
|
||||
type McpSchemaCostEstimate = {
|
||||
cacheWriteTokens: number
|
||||
cacheReadTokens: number
|
||||
effectiveInputTokens: number
|
||||
}
|
||||
|
||||
/**
|
||||
* Aggregate MCP inventory and invocations across the projects in scope.
|
||||
*
|
||||
* Returns one entry per `mcp__<server>__*` namespace observed in any
|
||||
* session's `mcpInventory`. Counts of invocations come from
|
||||
* `session.mcpBreakdown` (per-server call totals already maintained by the
|
||||
* parser).
|
||||
*/
|
||||
export function aggregateMcpCoverage(projects: ProjectSummary[]): McpServerCoverage[] {
|
||||
type ServerAcc = {
|
||||
inventory: Set<string>
|
||||
invokedTools: Set<string>
|
||||
invocations: number
|
||||
loadedSessions: number
|
||||
}
|
||||
const servers = new Map<string, ServerAcc>()
|
||||
|
||||
function getOrInit(server: string): ServerAcc {
|
||||
let acc = servers.get(server)
|
||||
if (!acc) {
|
||||
acc = { inventory: new Set(), invokedTools: new Set(), invocations: 0, loadedSessions: 0 }
|
||||
servers.set(server, acc)
|
||||
}
|
||||
return acc
|
||||
}
|
||||
|
||||
for (const project of projects) {
|
||||
for (const session of project.sessions) {
|
||||
// Only sessions with an observed inventory count toward `loadedSessions`.
|
||||
// Pure invocation-only sessions (server seen via `call.mcpTools` or
|
||||
// `session.mcpBreakdown` without any matching `deferred_tools_delta`)
|
||||
// could otherwise satisfy the `MCP_COVERAGE_MIN_SESSIONS` threshold
|
||||
// without giving us evidence that the schema was actually loaded.
|
||||
const inventoriedServers = new Set<string>()
|
||||
const sessionInvoked = new Map<string, Set<string>>()
|
||||
|
||||
// Inventory: union of tools observed available in this session.
|
||||
for (const fqn of session.mcpInventory ?? []) {
|
||||
const parts = fqn.split('__')
|
||||
if (parts.length < 3 || parts[0] !== 'mcp') continue
|
||||
const server = parts[1]
|
||||
if (!server) continue
|
||||
const tool = parts.slice(2).join('__')
|
||||
if (!tool) continue
|
||||
const acc = getOrInit(server)
|
||||
acc.inventory.add(fqn)
|
||||
inventoriedServers.add(server)
|
||||
}
|
||||
|
||||
// Invoked tools: walk turns to collect per-tool invocations. We can't
|
||||
// get this from session.mcpBreakdown alone because that's keyed by
|
||||
// server, not tool.
|
||||
for (const turn of session.turns) {
|
||||
for (const call of turn.assistantCalls) {
|
||||
for (const fqn of call.mcpTools) {
|
||||
const parts = fqn.split('__')
|
||||
if (parts.length < 3 || parts[0] !== 'mcp') continue
|
||||
const server = parts[1]
|
||||
if (!server) continue
|
||||
let invoked = sessionInvoked.get(server)
|
||||
if (!invoked) {
|
||||
invoked = new Set()
|
||||
sessionInvoked.set(server, invoked)
|
||||
}
|
||||
invoked.add(fqn)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Invocation totals: trust mcpBreakdown which was already aggregated
|
||||
// turn-by-turn, including any invocations the inventory pass missed.
|
||||
for (const [server, data] of Object.entries(session.mcpBreakdown)) {
|
||||
const acc = getOrInit(server)
|
||||
acc.invocations += data.calls
|
||||
}
|
||||
|
||||
for (const [server, invoked] of sessionInvoked) {
|
||||
const acc = getOrInit(server)
|
||||
for (const fqn of invoked) acc.invokedTools.add(fqn)
|
||||
}
|
||||
|
||||
for (const server of inventoriedServers) {
|
||||
getOrInit(server).loadedSessions += 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const result: McpServerCoverage[] = []
|
||||
for (const [server, acc] of servers) {
|
||||
if (acc.inventory.size === 0) continue
|
||||
// Coverage is only meaningful against tools we actually observed in the
|
||||
// inventory: invocations of tools never inventoried (older config, typo,
|
||||
// etc.) would otherwise inflate the numerator and could even drive
|
||||
// `unusedCount` negative.
|
||||
const invokedInInventory = new Set<string>()
|
||||
for (const fqn of acc.invokedTools) {
|
||||
if (acc.inventory.has(fqn)) invokedInInventory.add(fqn)
|
||||
}
|
||||
const unusedTools = Array.from(acc.inventory).filter(t => !invokedInInventory.has(t)).sort()
|
||||
const toolsInvoked = acc.inventory.size - unusedTools.length
|
||||
result.push({
|
||||
server,
|
||||
toolsAvailable: acc.inventory.size,
|
||||
toolsInvoked,
|
||||
unusedTools,
|
||||
invocations: acc.invocations,
|
||||
loadedSessions: acc.loadedSessions,
|
||||
coverageRatio: acc.inventory.size === 0 ? 0 : toolsInvoked / acc.inventory.size,
|
||||
})
|
||||
}
|
||||
result.sort((a, b) => b.toolsAvailable - a.toolsAvailable)
|
||||
return result
|
||||
}
|
||||
|
||||
/**
|
||||
* Cache-aware token cost estimate for the unused-tool overhead of one or
|
||||
* more servers, summed across all sessions that loaded any of them.
|
||||
*
|
||||
* Returns three buckets:
|
||||
* - `cacheWriteTokens`: schema bytes paid at full input price (each
|
||||
* cache-creation event in a session that loaded one of the servers).
|
||||
* - `cacheReadTokens`: schema bytes carried at the cache-read discount on
|
||||
* subsequent turns (ongoing overhead).
|
||||
* - `effectiveInputTokens`: equivalent fresh-input tokens, weighted by
|
||||
* cache pricing. Used to estimate dollar cost downstream by multiplying
|
||||
* by the project's input rate.
|
||||
*
|
||||
* We cap each call's contribution at the observed cache-creation /
|
||||
* cache-read totals for that call: it is not meaningful to claim more MCP
|
||||
* overhead than the call's own cache bucket could possibly contain. The
|
||||
* cap is applied once across the combined unused-schema budget for all
|
||||
* flagged servers, not per server, so two flagged servers cannot both
|
||||
* independently claim the same call's cache bucket.
|
||||
*
|
||||
* Anthropic caches expire after roughly 5 minutes of inactivity, so a long
|
||||
* session can rebuild the cache multiple times. Every call that reports
|
||||
* `cacheCreationInputTokens > 0` is treated as another rebuild, not just
|
||||
* the very first one.
|
||||
*
|
||||
* "Loaded" is defined exclusively by observed inventory: a session that
|
||||
* invoked a server without ever emitting a `deferred_tools_delta` for it
|
||||
* does not count, matching the invariant `aggregateMcpCoverage` uses for
|
||||
* `loadedSessions`.
|
||||
*/
|
||||
export function estimateMcpSchemaCost(
|
||||
unusedToolCount: number,
|
||||
projects: ProjectSummary[],
|
||||
server: string,
|
||||
): McpSchemaCostEstimate
|
||||
export function estimateMcpSchemaCost(
|
||||
unusedToolCountsByServer: Record<string, number>,
|
||||
projects: ProjectSummary[],
|
||||
servers: string[],
|
||||
): McpSchemaCostEstimate
|
||||
export function estimateMcpSchemaCost(
|
||||
unusedToolCounts: Record<string, number> | number,
|
||||
projects: ProjectSummary[],
|
||||
serverOrServers: string | string[],
|
||||
): McpSchemaCostEstimate {
|
||||
let servers: string[]
|
||||
let counts: Record<string, number>
|
||||
if (typeof unusedToolCounts === 'number') {
|
||||
if (typeof serverOrServers !== 'string') {
|
||||
throw new TypeError('single-server MCP cost estimates require a string server name')
|
||||
}
|
||||
servers = [serverOrServers]
|
||||
counts = { [serverOrServers]: unusedToolCounts }
|
||||
} else {
|
||||
if (!Array.isArray(serverOrServers)) {
|
||||
throw new TypeError('multi-server MCP cost estimates require a string[] server list')
|
||||
}
|
||||
servers = serverOrServers
|
||||
counts = unusedToolCounts
|
||||
}
|
||||
|
||||
const totalUnusedSchemaTokens = servers.reduce(
|
||||
(s, srv) => s + (counts[srv] ?? 0) * TOKENS_PER_MCP_TOOL,
|
||||
0,
|
||||
)
|
||||
if (totalUnusedSchemaTokens === 0) {
|
||||
return { cacheWriteTokens: 0, cacheReadTokens: 0, effectiveInputTokens: 0 }
|
||||
}
|
||||
|
||||
const serverSet = new Set(servers)
|
||||
let cacheWriteTokens = 0
|
||||
let cacheReadTokens = 0
|
||||
|
||||
for (const project of projects) {
|
||||
for (const session of project.sessions) {
|
||||
// A session counts only if its observed inventory included at least
|
||||
// one of the flagged servers — same invariant `aggregateMcpCoverage`
|
||||
// uses for `loadedSessions`.
|
||||
let loaded = false
|
||||
for (const fqn of session.mcpInventory ?? []) {
|
||||
const seg = fqn.split('__')[1]
|
||||
if (seg && serverSet.has(seg)) { loaded = true; break }
|
||||
}
|
||||
if (!loaded) continue
|
||||
|
||||
for (const turn of session.turns) {
|
||||
for (const call of turn.assistantCalls) {
|
||||
// Both buckets can be non-zero on the same call (cache rebuild
|
||||
// alongside a partial read), so account for them independently.
|
||||
// The cap is applied to the combined unused-schema budget so
|
||||
// multiple flagged servers cannot all claim the same call.
|
||||
if (call.usage.cacheCreationInputTokens > 0) {
|
||||
cacheWriteTokens += Math.min(totalUnusedSchemaTokens, call.usage.cacheCreationInputTokens)
|
||||
}
|
||||
if (call.usage.cacheReadInputTokens > 0) {
|
||||
cacheReadTokens += Math.min(totalUnusedSchemaTokens, call.usage.cacheReadInputTokens)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const effectiveInputTokens = cacheWriteTokens * CACHE_WRITE_MULTIPLIER + cacheReadTokens * CACHE_READ_DISCOUNT
|
||||
return { cacheWriteTokens, cacheReadTokens, effectiveInputTokens }
|
||||
}
|
||||
|
||||
/**
|
||||
* Find MCP servers whose tool inventory is largely unused. Replaces the
|
||||
* older server-only `detectUnusedMcp` (which only flagged servers with
|
||||
* literal zero invocations).
|
||||
*
|
||||
* A server is flagged when, taken together:
|
||||
* - it exposed more than `MCP_COVERAGE_MIN_TOOLS` tools,
|
||||
* - we saw it loaded in at least `MCP_COVERAGE_MIN_SESSIONS` sessions,
|
||||
* - the coverage ratio is below `MCP_COVERAGE_LOW_THRESHOLD`.
|
||||
*
|
||||
* Token-savings estimates use the cache-aware accounting from
|
||||
* `estimateMcpSchemaCost` so we don't mistake cached-prefix carry-over for
|
||||
* fresh-input billing.
|
||||
*/
|
||||
export function detectMcpToolCoverage(
|
||||
projects: ProjectSummary[],
|
||||
coverage = aggregateMcpCoverage(projects),
|
||||
): WasteFinding | null {
|
||||
if (coverage.length === 0) return null
|
||||
|
||||
const flagged = coverage.filter(c =>
|
||||
c.toolsAvailable > MCP_COVERAGE_MIN_TOOLS
|
||||
&& c.loadedSessions >= MCP_COVERAGE_MIN_SESSIONS
|
||||
&& c.coverageRatio < MCP_COVERAGE_LOW_THRESHOLD,
|
||||
)
|
||||
if (flagged.length === 0) return null
|
||||
|
||||
flagged.sort((a, b) => (b.toolsAvailable - b.toolsInvoked) - (a.toolsAvailable - a.toolsInvoked))
|
||||
|
||||
const lines: string[] = []
|
||||
const removeCommands: string[] = []
|
||||
const unusedCountsByServer: Record<string, number> = {}
|
||||
const flaggedServers: string[] = []
|
||||
|
||||
for (const c of flagged) {
|
||||
unusedCountsByServer[c.server] = c.toolsAvailable - c.toolsInvoked
|
||||
flaggedServers.push(c.server)
|
||||
const pct = Math.round(c.coverageRatio * 100)
|
||||
lines.push(
|
||||
`${c.server}: ${c.toolsInvoked}/${c.toolsAvailable} tools used (${pct}% coverage) across ${c.loadedSessions} session${c.loadedSessions === 1 ? '' : 's'}`,
|
||||
)
|
||||
removeCommands.push(`claude mcp remove '${c.server}'`)
|
||||
}
|
||||
|
||||
// Single combined cost pass: caps each call's contribution at the
|
||||
// total unused-schema budget across all flagged servers, so two
|
||||
// flagged servers cannot independently claim the same call's cache
|
||||
// bucket and overstate `tokensSaved`.
|
||||
const cost = estimateMcpSchemaCost(unusedCountsByServer, projects, flaggedServers)
|
||||
const tokensSaved = Math.round(cost.effectiveInputTokens)
|
||||
const impact: Impact = tokensSaved >= MCP_COVERAGE_HIGH_IMPACT_TOKENS
|
||||
? 'high'
|
||||
: flagged.length >= UNUSED_MCP_HIGH_THRESHOLD
|
||||
? 'high'
|
||||
: 'medium'
|
||||
|
||||
return {
|
||||
title: `${flagged.length} MCP server${flagged.length === 1 ? '' : 's'} with low tool coverage`,
|
||||
explanation:
|
||||
`Schema for unused tools is loaded into the system prompt every session and ` +
|
||||
`carried in the cached prefix on every turn. ` +
|
||||
`${lines.join('; ')}.`,
|
||||
impact,
|
||||
tokensSaved,
|
||||
fix: {
|
||||
type: 'command',
|
||||
label: flagged.length === 1
|
||||
? 'Remove the underused server, or trim its tools in your MCP config:'
|
||||
: 'Remove underused servers, or trim their tools in your MCP config:',
|
||||
text: removeCommands.join('\n'),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
export function detectUnusedMcp(
|
||||
calls: ToolCall[],
|
||||
projects: ProjectSummary[],
|
||||
projectCwds: Set<string>,
|
||||
mcpCoverage = aggregateMcpCoverage(projects),
|
||||
): WasteFinding | null {
|
||||
const configured = loadMcpConfigs(projectCwds)
|
||||
if (configured.size === 0) return null
|
||||
|
|
@ -501,10 +833,27 @@ export function detectUnusedMcp(
|
|||
}
|
||||
}
|
||||
|
||||
// Servers that the new coverage detector will flag fall under its
|
||||
// jurisdiction (per-tool granularity, cache-aware costing) and we
|
||||
// suppress them here to avoid double-flagging. Importantly, we suppress
|
||||
// only the servers that actually clear the coverage detector's
|
||||
// thresholds — a small, inventoried-but-uninvoked server that the
|
||||
// coverage detector skips would otherwise become a blind spot.
|
||||
const coverageReportedServers = new Set(
|
||||
mcpCoverage
|
||||
.filter(c =>
|
||||
c.toolsAvailable > MCP_COVERAGE_MIN_TOOLS
|
||||
&& c.loadedSessions >= MCP_COVERAGE_MIN_SESSIONS
|
||||
&& c.coverageRatio < MCP_COVERAGE_LOW_THRESHOLD,
|
||||
)
|
||||
.map(c => c.server),
|
||||
)
|
||||
|
||||
const now = Date.now()
|
||||
const unused: string[] = []
|
||||
for (const entry of configured.values()) {
|
||||
if (calledServers.has(entry.normalized)) continue
|
||||
if (coverageReportedServers.has(entry.normalized)) continue
|
||||
if (entry.mtime > 0 && now - entry.mtime < MCP_NEW_CONFIG_GRACE_MS) continue
|
||||
unused.push(entry.original)
|
||||
}
|
||||
|
|
@ -1040,6 +1389,7 @@ export async function scanAndDetect(
|
|||
|
||||
const costRate = computeInputCostRate(projects)
|
||||
const { toolCalls, projectCwds, apiCalls, userMessages } = await scanSessions(dateRange)
|
||||
const mcpCoverage = aggregateMcpCoverage(projects)
|
||||
|
||||
const findings: WasteFinding[] = []
|
||||
const syncDetectors: Array<() => WasteFinding | null> = [
|
||||
|
|
@ -1047,7 +1397,8 @@ export async function scanAndDetect(
|
|||
() => detectLowReadEditRatio(toolCalls),
|
||||
() => detectJunkReads(toolCalls, dateRange),
|
||||
() => detectDuplicateReads(toolCalls, dateRange),
|
||||
() => detectUnusedMcp(toolCalls, projects, projectCwds),
|
||||
() => detectUnusedMcp(toolCalls, projects, projectCwds, mcpCoverage),
|
||||
() => detectMcpToolCoverage(projects, mcpCoverage),
|
||||
() => detectSessionOutliers(projects),
|
||||
() => detectBloatedClaudeMd(projectCwds),
|
||||
() => detectBashBloat(),
|
||||
|
|
|
|||
|
|
@ -203,10 +203,54 @@ function groupIntoTurns(entries: JournalEntry[], seenMsgIds: Set<string>): Parse
|
|||
return turns
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract MCP tool inventory observed across a session's JSONL entries.
|
||||
*
|
||||
* Claude Code emits `attachment.type === "deferred_tools_delta"` entries whose
|
||||
* `addedNames` array lists every tool currently available at that turn (built-in
|
||||
* tools plus all `mcp__<server>__<tool>` names exposed by configured MCP
|
||||
* servers). Tool inventory can change mid-session if the user reloads MCP
|
||||
* config, so we union every occurrence rather than trusting only the first.
|
||||
*
|
||||
* Built-in tools are filtered out: only `mcp__*` identifiers survive.
|
||||
*/
|
||||
// Fully-qualified MCP tool name shape: `mcp__<server>__<tool>`. Both server
|
||||
// and tool segments must be non-empty. Names like `mcp__server` (no tool
|
||||
// segment) or `mcp__server__` (trailing empty tool) would silently pollute
|
||||
// the inventory and break downstream `split('__')` consumers, so they're
|
||||
// rejected here.
|
||||
function isMcpToolName(name: string): boolean {
|
||||
if (!name.startsWith('mcp__')) return false
|
||||
const rest = name.slice(5) // strip `mcp__`
|
||||
const sep = rest.indexOf('__')
|
||||
if (sep <= 0) return false // missing or empty server
|
||||
if (sep >= rest.length - 2) return false // missing or empty tool
|
||||
return true
|
||||
}
|
||||
|
||||
export function extractMcpInventory(entries: JournalEntry[]): string[] {
|
||||
const inventory = new Set<string>()
|
||||
for (const entry of entries) {
|
||||
const att = entry['attachment']
|
||||
if (!att || typeof att !== 'object') continue
|
||||
const a = att as { type?: unknown; addedNames?: unknown }
|
||||
if (a.type !== 'deferred_tools_delta') continue
|
||||
if (!Array.isArray(a.addedNames)) continue
|
||||
for (const name of a.addedNames) {
|
||||
if (typeof name !== 'string') continue
|
||||
if (!isMcpToolName(name)) continue
|
||||
inventory.add(name)
|
||||
}
|
||||
}
|
||||
if (inventory.size === 0) return []
|
||||
return Array.from(inventory).sort()
|
||||
}
|
||||
|
||||
function buildSessionSummary(
|
||||
sessionId: string,
|
||||
project: string,
|
||||
turns: ClassifiedTurn[],
|
||||
mcpInventory?: string[],
|
||||
): SessionSummary {
|
||||
const modelBreakdown: SessionSummary['modelBreakdown'] = Object.create(null)
|
||||
const toolBreakdown: SessionSummary['toolBreakdown'] = Object.create(null)
|
||||
|
|
@ -311,6 +355,7 @@ function buildSessionSummary(
|
|||
bashBreakdown,
|
||||
categoryBreakdown,
|
||||
skillBreakdown,
|
||||
...(mcpInventory && mcpInventory.length > 0 ? { mcpInventory } : {}),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -362,7 +407,14 @@ async function parseSessionFile(
|
|||
}
|
||||
const classified = turns.map(classifyTurn)
|
||||
|
||||
return buildSessionSummary(sessionId, project, classified)
|
||||
// Inventory is extracted from the full entry stream, not just the
|
||||
// turns we kept after date filtering: tool availability is set up
|
||||
// once at the start of a session (with possible mid-session reloads),
|
||||
// and we want to reflect what was loaded even if the user only ran
|
||||
// turns inside a narrow date window.
|
||||
const mcpInventory = extractMcpInventory(entries)
|
||||
|
||||
return buildSessionSummary(sessionId, project, classified, mcpInventory)
|
||||
}
|
||||
|
||||
async function collectJsonlFiles(dirPath: string): Promise<string[]> {
|
||||
|
|
|
|||
|
|
@ -121,6 +121,12 @@ export type SessionSummary = {
|
|||
bashBreakdown: Record<string, { calls: number }>
|
||||
categoryBreakdown: Record<TaskCategory, { turns: number; costUSD: number; retries: number; editTurns: number; oneShotTurns: number }>
|
||||
skillBreakdown: Record<string, { turns: number; costUSD: number; editTurns: number; oneShotTurns: number }>
|
||||
// Observed MCP tools available in this session, captured from
|
||||
// `attachment.deferred_tools_delta.addedNames` entries. Union across all
|
||||
// turns. Each name is a fully-qualified `mcp__<server>__<tool>` identifier.
|
||||
// Built-in tools (Bash, Edit, etc.) are filtered out. Provider-agnostic field;
|
||||
// currently populated only by the Claude parser.
|
||||
mcpInventory?: string[]
|
||||
}
|
||||
|
||||
export type ProjectSummary = {
|
||||
|
|
|
|||
121
tests/cli-date.test.ts
Normal file
121
tests/cli-date.test.ts
Normal file
|
|
@ -0,0 +1,121 @@
|
|||
import { afterEach, describe, it, expect, vi } from 'vitest'
|
||||
import {
|
||||
getDateRange,
|
||||
PERIODS,
|
||||
PERIOD_LABELS,
|
||||
toPeriod,
|
||||
type Period,
|
||||
} from '../src/cli-date.js'
|
||||
|
||||
afterEach(() => {
|
||||
vi.useRealTimers()
|
||||
})
|
||||
|
||||
describe('getDateRange', () => {
|
||||
it('"all" is bounded to the last 6 months, not epoch', () => {
|
||||
const { range, label } = getDateRange('all')
|
||||
const now = new Date()
|
||||
|
||||
expect(label).toBe('Last 6 months')
|
||||
|
||||
// Regression guard: must never silently fall back to epoch (the old
|
||||
// dashboard bug) or any pre-2000 date.
|
||||
expect(range.start.getFullYear()).toBeGreaterThan(2000)
|
||||
|
||||
const monthsDiff =
|
||||
(now.getFullYear() - range.start.getFullYear()) * 12 +
|
||||
(now.getMonth() - range.start.getMonth())
|
||||
expect(monthsDiff).toBe(6)
|
||||
expect(range.start.getDate()).toBe(1)
|
||||
|
||||
// End is today, end of day.
|
||||
expect(range.end.getHours()).toBe(23)
|
||||
expect(range.end.getMinutes()).toBe(59)
|
||||
})
|
||||
|
||||
it('"all" does not overflow past the target month at end-of-month', () => {
|
||||
vi.useFakeTimers()
|
||||
vi.setSystemTime(new Date(2026, 7, 31, 12, 0, 0))
|
||||
|
||||
const { range } = getDateRange('all')
|
||||
|
||||
expect(range.start.getFullYear()).toBe(2026)
|
||||
expect(range.start.getMonth()).toBe(1)
|
||||
expect(range.start.getDate()).toBe(1)
|
||||
})
|
||||
|
||||
it('"week" returns the last 7 days', () => {
|
||||
const { range, label } = getDateRange('week')
|
||||
expect(label).toBe('Last 7 Days')
|
||||
// start = midnight 7 days ago, end = today 23:59:59.999 -> ~8 days span.
|
||||
const diffDays = (range.end.getTime() - range.start.getTime()) / (1000 * 60 * 60 * 24)
|
||||
expect(diffDays).toBeGreaterThanOrEqual(7)
|
||||
expect(diffDays).toBeLessThanOrEqual(8)
|
||||
})
|
||||
|
||||
it('"month" starts on day 1 of the current month', () => {
|
||||
const { range } = getDateRange('month')
|
||||
expect(range.start.getDate()).toBe(1)
|
||||
expect(range.start.getHours()).toBe(0)
|
||||
})
|
||||
|
||||
it('"30days" returns 30 days back', () => {
|
||||
const { range, label } = getDateRange('30days')
|
||||
expect(label).toBe('Last 30 Days')
|
||||
const diffDays = (range.end.getTime() - range.start.getTime()) / (1000 * 60 * 60 * 24)
|
||||
expect(diffDays).toBeGreaterThanOrEqual(30)
|
||||
expect(diffDays).toBeLessThanOrEqual(31)
|
||||
})
|
||||
|
||||
it('"today" starts at local midnight', () => {
|
||||
const { range } = getDateRange('today')
|
||||
expect(range.start.getHours()).toBe(0)
|
||||
expect(range.start.getMinutes()).toBe(0)
|
||||
expect(range.end.getHours()).toBe(23)
|
||||
})
|
||||
|
||||
it('"yesterday" is supported (CLI-only convenience)', () => {
|
||||
const { range, label } = getDateRange('yesterday')
|
||||
expect(label).toMatch(/^Yesterday/)
|
||||
expect(range.start.getHours()).toBe(0)
|
||||
expect(range.end.getHours()).toBe(23)
|
||||
})
|
||||
|
||||
it('unknown period falls back to "week"', () => {
|
||||
const fallback = getDateRange('not-a-period')
|
||||
const week = getDateRange('week')
|
||||
expect(fallback.label).toBe(week.label)
|
||||
})
|
||||
})
|
||||
|
||||
describe('PERIODS / PERIOD_LABELS', () => {
|
||||
it('exposes the expected period set', () => {
|
||||
expect(PERIODS).toEqual(['today', 'week', '30days', 'month', 'all'])
|
||||
})
|
||||
|
||||
it('has a label for every period', () => {
|
||||
for (const p of PERIODS) {
|
||||
expect(PERIOD_LABELS[p]).toBeTruthy()
|
||||
}
|
||||
})
|
||||
|
||||
it('"all" tab label reflects the 6-month bound', () => {
|
||||
// Short label used in the dashboard tab strip. The long-form label
|
||||
// ("Last 6 months") comes from getDateRange().label.
|
||||
expect(PERIOD_LABELS.all).toBe('6 Months')
|
||||
})
|
||||
})
|
||||
|
||||
describe('toPeriod', () => {
|
||||
it('round-trips known periods', () => {
|
||||
const known: Period[] = ['today', 'week', '30days', 'month', 'all']
|
||||
for (const p of known) {
|
||||
expect(toPeriod(p)).toBe(p)
|
||||
}
|
||||
})
|
||||
|
||||
it('falls back to "week" for unknown input', () => {
|
||||
expect(toPeriod('garbage')).toBe('week')
|
||||
expect(toPeriod('')).toBe('week')
|
||||
})
|
||||
})
|
||||
450
tests/mcp-coverage.test.ts
Normal file
450
tests/mcp-coverage.test.ts
Normal file
|
|
@ -0,0 +1,450 @@
|
|||
import { describe, it, expect } from 'vitest'
|
||||
|
||||
import {
|
||||
aggregateMcpCoverage,
|
||||
detectMcpToolCoverage,
|
||||
estimateMcpSchemaCost,
|
||||
} from '../src/optimize.js'
|
||||
import type {
|
||||
ClassifiedTurn,
|
||||
ParsedApiCall,
|
||||
ProjectSummary,
|
||||
SessionSummary,
|
||||
TaskCategory,
|
||||
TokenUsage,
|
||||
} from '../src/types.js'
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Test fixtures
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
const ZERO_USAGE: TokenUsage = {
|
||||
inputTokens: 0,
|
||||
outputTokens: 0,
|
||||
cacheCreationInputTokens: 0,
|
||||
cacheReadInputTokens: 0,
|
||||
cachedInputTokens: 0,
|
||||
reasoningTokens: 0,
|
||||
webSearchRequests: 0,
|
||||
}
|
||||
|
||||
function makeCall(opts: {
|
||||
tools?: string[]
|
||||
cacheCreation?: number
|
||||
cacheRead?: number
|
||||
cost?: number
|
||||
} = {}): ParsedApiCall {
|
||||
const tools = opts.tools ?? []
|
||||
return {
|
||||
provider: 'claude',
|
||||
model: 'Opus 4.7',
|
||||
usage: {
|
||||
...ZERO_USAGE,
|
||||
cacheCreationInputTokens: opts.cacheCreation ?? 0,
|
||||
cacheReadInputTokens: opts.cacheRead ?? 0,
|
||||
},
|
||||
costUSD: opts.cost ?? 0,
|
||||
tools,
|
||||
mcpTools: tools.filter(t => t.startsWith('mcp__')),
|
||||
skills: [],
|
||||
hasAgentSpawn: false,
|
||||
hasPlanMode: false,
|
||||
speed: 'standard',
|
||||
timestamp: '2026-05-04T00:00:00Z',
|
||||
bashCommands: [],
|
||||
deduplicationKey: 'k',
|
||||
}
|
||||
}
|
||||
|
||||
function makeTurn(calls: ParsedApiCall[]): ClassifiedTurn {
|
||||
return {
|
||||
userMessage: '',
|
||||
assistantCalls: calls,
|
||||
timestamp: '2026-05-04T00:00:00Z',
|
||||
sessionId: 's1',
|
||||
category: 'coding',
|
||||
retries: 0,
|
||||
hasEdits: false,
|
||||
}
|
||||
}
|
||||
|
||||
function makeSession(opts: {
|
||||
sessionId?: string
|
||||
inventory?: string[]
|
||||
turns?: ClassifiedTurn[]
|
||||
mcpBreakdown?: Record<string, { calls: number }>
|
||||
}): SessionSummary {
|
||||
const turns = opts.turns ?? []
|
||||
const apiCalls = turns.reduce((s, t) => s + t.assistantCalls.length, 0)
|
||||
const emptyCategoryBreakdown = {} as Record<TaskCategory, { turns: number; costUSD: number; retries: number; editTurns: number; oneShotTurns: number }>
|
||||
return {
|
||||
sessionId: opts.sessionId ?? 's1',
|
||||
project: 'p',
|
||||
firstTimestamp: '2026-05-04T00:00:00Z',
|
||||
lastTimestamp: '2026-05-04T00:00:00Z',
|
||||
totalCostUSD: 0,
|
||||
totalInputTokens: 0,
|
||||
totalOutputTokens: 0,
|
||||
totalCacheReadTokens: 0,
|
||||
totalCacheWriteTokens: 0,
|
||||
apiCalls,
|
||||
turns,
|
||||
modelBreakdown: {},
|
||||
toolBreakdown: {},
|
||||
mcpBreakdown: opts.mcpBreakdown ?? {},
|
||||
bashBreakdown: {},
|
||||
categoryBreakdown: emptyCategoryBreakdown,
|
||||
skillBreakdown: {},
|
||||
...(opts.inventory ? { mcpInventory: opts.inventory } : {}),
|
||||
}
|
||||
}
|
||||
|
||||
function project(sessions: SessionSummary[]): ProjectSummary {
|
||||
return {
|
||||
project: 'p',
|
||||
projectPath: '/tmp/p',
|
||||
sessions,
|
||||
totalCostUSD: 0,
|
||||
totalApiCalls: sessions.reduce((s, ses) => s + ses.apiCalls, 0),
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// aggregateMcpCoverage
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('aggregateMcpCoverage', () => {
|
||||
it('returns empty list when no session has MCP inventory', () => {
|
||||
const projects = [project([makeSession({})])]
|
||||
expect(aggregateMcpCoverage(projects)).toEqual([])
|
||||
})
|
||||
|
||||
it('reports per-server tools available, invoked, and unused', () => {
|
||||
const inventory = [
|
||||
'mcp__hf__hub_repo_search',
|
||||
'mcp__hf__paper_search',
|
||||
'mcp__hf__hf_doc_search',
|
||||
]
|
||||
const turns = [
|
||||
makeTurn([makeCall({ tools: ['mcp__hf__hub_repo_search'] })]),
|
||||
]
|
||||
const sessions = [
|
||||
makeSession({ inventory, turns, mcpBreakdown: { hf: { calls: 1 } } }),
|
||||
]
|
||||
const result = aggregateMcpCoverage([project(sessions)])
|
||||
|
||||
expect(result).toHaveLength(1)
|
||||
expect(result[0]!.server).toBe('hf')
|
||||
expect(result[0]!.toolsAvailable).toBe(3)
|
||||
expect(result[0]!.toolsInvoked).toBe(1)
|
||||
expect(result[0]!.unusedTools).toEqual([
|
||||
'mcp__hf__hf_doc_search',
|
||||
'mcp__hf__paper_search',
|
||||
])
|
||||
expect(result[0]!.coverageRatio).toBeCloseTo(1 / 3, 5)
|
||||
expect(result[0]!.invocations).toBe(1)
|
||||
expect(result[0]!.loadedSessions).toBe(1)
|
||||
})
|
||||
|
||||
it('unions inventory across multiple sessions for the same server', () => {
|
||||
const sessions = [
|
||||
makeSession({ sessionId: 'a', inventory: ['mcp__x__a', 'mcp__x__b'] }),
|
||||
makeSession({ sessionId: 'b', inventory: ['mcp__x__b', 'mcp__x__c'] }),
|
||||
]
|
||||
const result = aggregateMcpCoverage([project(sessions)])
|
||||
expect(result[0]!.toolsAvailable).toBe(3)
|
||||
expect(result[0]!.loadedSessions).toBe(2)
|
||||
})
|
||||
|
||||
it('separates servers with similar names', () => {
|
||||
const sessions = [
|
||||
makeSession({ inventory: ['mcp__hf__a', 'mcp__hugface__a'] }),
|
||||
]
|
||||
const result = aggregateMcpCoverage([project(sessions)])
|
||||
expect(result.map(r => r.server).sort()).toEqual(['hf', 'hugface'])
|
||||
})
|
||||
|
||||
it('skips invocations without inventory (foreign server, no inventory observed)', () => {
|
||||
// A server can show up only via a call. We still report it so the
|
||||
// operator knows it was invoked, but coverage is 0/0 and it is not a
|
||||
// candidate for the unused-coverage finding.
|
||||
const turns = [makeTurn([makeCall({ tools: ['mcp__ghost__t1'] })])]
|
||||
const sessions = [
|
||||
makeSession({ turns, mcpBreakdown: { ghost: { calls: 1 } } }),
|
||||
]
|
||||
const result = aggregateMcpCoverage([project(sessions)])
|
||||
// No inventory entry -> aggregator drops the server from the report
|
||||
// because we cannot reason about coverage without an inventory baseline.
|
||||
expect(result).toEqual([])
|
||||
})
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// estimateMcpSchemaCost — cache-aware accounting
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('estimateMcpSchemaCost', () => {
|
||||
it('charges first cacheCreation turn at full price, subsequent turns at cache-read', () => {
|
||||
const turns = [
|
||||
makeTurn([makeCall({ cacheCreation: 50_000 })]), // first turn: write
|
||||
makeTurn([makeCall({ cacheRead: 60_000 })]), // ongoing: read
|
||||
makeTurn([makeCall({ cacheRead: 60_000 })]),
|
||||
]
|
||||
const sessions = [makeSession({
|
||||
inventory: Array.from({ length: 30 }, (_, i) => `mcp__svc__t${i}`),
|
||||
turns,
|
||||
mcpBreakdown: { svc: { calls: 0 } },
|
||||
})]
|
||||
// 30 unused tools * 400 token estimate = 12_000 schema tokens
|
||||
// cap by call cache buckets so we never overclaim
|
||||
const cost = estimateMcpSchemaCost(30, [project(sessions)], 'svc')
|
||||
expect(cost.cacheWriteTokens).toBe(12_000) // capped by 50k creation, 12k schema fits
|
||||
expect(cost.cacheReadTokens).toBe(24_000) // 12k + 12k across two ongoing turns
|
||||
// effective = write * 1.25 + read * 0.10 (cache pricing)
|
||||
expect(cost.effectiveInputTokens).toBeCloseTo(12_000 * 1.25 + 24_000 * 0.10, 5)
|
||||
})
|
||||
|
||||
it('caps by available cache bucket so we never overclaim', () => {
|
||||
const turns = [makeTurn([makeCall({ cacheCreation: 1_000 })])]
|
||||
const sessions = [makeSession({
|
||||
inventory: Array.from({ length: 30 }, (_, i) => `mcp__svc__t${i}`),
|
||||
turns,
|
||||
mcpBreakdown: { svc: { calls: 0 } },
|
||||
})]
|
||||
// 30*400 = 12k schema tokens, but the call only had 1k cache-creation,
|
||||
// so we should not claim more than 1k of overhead for that turn.
|
||||
const cost = estimateMcpSchemaCost(30, [project(sessions)], 'svc')
|
||||
expect(cost.cacheWriteTokens).toBe(1_000)
|
||||
})
|
||||
|
||||
it('returns zero when no unused tools', () => {
|
||||
const sessions = [makeSession({
|
||||
inventory: ['mcp__svc__t1'],
|
||||
turns: [makeTurn([makeCall({ cacheCreation: 5000 })])],
|
||||
})]
|
||||
const cost = estimateMcpSchemaCost(0, [project(sessions)], 'svc')
|
||||
expect(cost).toEqual({ cacheWriteTokens: 0, cacheReadTokens: 0, effectiveInputTokens: 0 })
|
||||
})
|
||||
|
||||
it('counts cache write AND cache read on the same call', () => {
|
||||
// A long session can have a cache rebuild mid-stream where one call
|
||||
// reports both buckets. The estimator must charge both, not skip the
|
||||
// read because of the write.
|
||||
const turns = [makeTurn([
|
||||
makeCall({ cacheCreation: 50_000, cacheRead: 30_000 }),
|
||||
])]
|
||||
const sessions = [makeSession({
|
||||
inventory: Array.from({ length: 30 }, (_, i) => `mcp__svc__t${i}`),
|
||||
turns,
|
||||
mcpBreakdown: { svc: { calls: 0 } },
|
||||
})]
|
||||
const cost = estimateMcpSchemaCost(30, [project(sessions)], 'svc')
|
||||
expect(cost.cacheWriteTokens).toBe(12_000) // capped at 50k creation
|
||||
expect(cost.cacheReadTokens).toBe(12_000) // capped at 30k read
|
||||
})
|
||||
|
||||
it('counts every cache rebuild, not just the first one', () => {
|
||||
// Sessions that span more than 5 minutes can rebuild the cache
|
||||
// multiple times. The estimator should treat every cacheCreation
|
||||
// bucket as another write.
|
||||
const turns = [makeTurn([
|
||||
makeCall({ cacheCreation: 50_000 }),
|
||||
makeCall({ cacheCreation: 50_000 }), // rebuild after cache TTL
|
||||
makeCall({ cacheRead: 60_000 }),
|
||||
])]
|
||||
const sessions = [makeSession({
|
||||
inventory: Array.from({ length: 30 }, (_, i) => `mcp__svc__t${i}`),
|
||||
turns,
|
||||
mcpBreakdown: { svc: { calls: 0 } },
|
||||
})]
|
||||
const cost = estimateMcpSchemaCost(30, [project(sessions)], 'svc')
|
||||
expect(cost.cacheWriteTokens).toBe(24_000) // both rebuilds counted
|
||||
expect(cost.cacheReadTokens).toBe(12_000)
|
||||
})
|
||||
|
||||
it('skips sessions where the server was never loaded', () => {
|
||||
const turns = [makeTurn([makeCall({ cacheCreation: 100_000 })])]
|
||||
const sessions = [makeSession({
|
||||
inventory: ['mcp__other__t1'],
|
||||
turns,
|
||||
})]
|
||||
const cost = estimateMcpSchemaCost(10, [project(sessions)], 'svc')
|
||||
expect(cost.cacheWriteTokens).toBe(0)
|
||||
})
|
||||
|
||||
it('requires observed inventory for the server, not just invocations', () => {
|
||||
// Session invoked the server (mcpBreakdown set, mcpTools called) but
|
||||
// never reported a deferred_tools_delta for it. Cost should be 0 to
|
||||
// stay consistent with aggregateMcpCoverage's loadedSessions rule.
|
||||
const turns = [makeTurn([
|
||||
makeCall({ tools: ['mcp__svc__t1'], cacheCreation: 100_000 }),
|
||||
])]
|
||||
const sessions = [makeSession({
|
||||
// No inventory at all
|
||||
turns,
|
||||
mcpBreakdown: { svc: { calls: 1 } },
|
||||
})]
|
||||
const cost = estimateMcpSchemaCost(10, [project(sessions)], 'svc')
|
||||
expect(cost.cacheWriteTokens).toBe(0)
|
||||
expect(cost.cacheReadTokens).toBe(0)
|
||||
})
|
||||
|
||||
it('caps combined unused-schema budget across multiple flagged servers', () => {
|
||||
// Two flagged servers, each with 30 unused tools (12k schema each =
|
||||
// 24k combined). One call has a 50k cache-creation bucket. The
|
||||
// combined cap means total write tokens reported is min(24k, 50k) =
|
||||
// 24k, not 24k + 24k = 48k.
|
||||
const inventory = [
|
||||
...Array.from({ length: 30 }, (_, i) => `mcp__a__t${i}`),
|
||||
...Array.from({ length: 30 }, (_, i) => `mcp__b__t${i}`),
|
||||
]
|
||||
const turns = [makeTurn([makeCall({ cacheCreation: 50_000 })])]
|
||||
const sessions = [makeSession({ inventory, turns })]
|
||||
const cost = estimateMcpSchemaCost(
|
||||
{ a: 30, b: 30 },
|
||||
[project(sessions)],
|
||||
['a', 'b'],
|
||||
)
|
||||
expect(cost.cacheWriteTokens).toBe(24_000)
|
||||
})
|
||||
|
||||
it('still works with the single-server signature (backward compat)', () => {
|
||||
const turns = [makeTurn([makeCall({ cacheCreation: 50_000 })])]
|
||||
const sessions = [makeSession({
|
||||
inventory: Array.from({ length: 30 }, (_, i) => `mcp__svc__t${i}`),
|
||||
turns,
|
||||
})]
|
||||
const cost = estimateMcpSchemaCost(30, [project(sessions)], 'svc')
|
||||
expect(cost.cacheWriteTokens).toBe(12_000)
|
||||
})
|
||||
})
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// detectMcpToolCoverage — finding emission with thresholds
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
describe('detectMcpToolCoverage', () => {
|
||||
it('returns null when no inventory exists at all', () => {
|
||||
expect(detectMcpToolCoverage([project([makeSession({})])])).toBeNull()
|
||||
})
|
||||
|
||||
it('does not flag a server with healthy coverage', () => {
|
||||
const inventory = Array.from({ length: 20 }, (_, i) => `mcp__svc__t${i}`)
|
||||
const turns = [makeTurn(
|
||||
Array.from({ length: 8 }, (_, i) => makeCall({ tools: [`mcp__svc__t${i}`] })),
|
||||
)]
|
||||
const sessions = [
|
||||
makeSession({ sessionId: 'a', inventory, turns }),
|
||||
makeSession({ sessionId: 'b', inventory, turns }),
|
||||
]
|
||||
// 8/20 = 40% coverage, above the 20% threshold -> no finding
|
||||
expect(detectMcpToolCoverage([project(sessions)])).toBeNull()
|
||||
})
|
||||
|
||||
it('does not flag a server with too few tools (signal too noisy)', () => {
|
||||
// Below MCP_COVERAGE_MIN_TOOLS=10
|
||||
const inventory = ['mcp__svc__a', 'mcp__svc__b']
|
||||
const sessions = [
|
||||
makeSession({ sessionId: 'a', inventory }),
|
||||
makeSession({ sessionId: 'b', inventory }),
|
||||
]
|
||||
expect(detectMcpToolCoverage([project(sessions)])).toBeNull()
|
||||
})
|
||||
|
||||
it('does not flag if seen in only one session (insufficient evidence)', () => {
|
||||
const inventory = Array.from({ length: 20 }, (_, i) => `mcp__svc__t${i}`)
|
||||
const sessions = [makeSession({ inventory })]
|
||||
expect(detectMcpToolCoverage([project(sessions)])).toBeNull()
|
||||
})
|
||||
|
||||
it('flags a large server with low coverage across multiple sessions', () => {
|
||||
const inventory = Array.from({ length: 30 }, (_, i) => `mcp__hf__t${i}`)
|
||||
const turns = [makeTurn([
|
||||
makeCall({ tools: ['mcp__hf__t0'], cacheCreation: 100_000 }),
|
||||
])]
|
||||
const sessions = [
|
||||
makeSession({ sessionId: 'a', inventory, turns, mcpBreakdown: { hf: { calls: 1 } } }),
|
||||
makeSession({ sessionId: 'b', inventory, turns, mcpBreakdown: { hf: { calls: 1 } } }),
|
||||
]
|
||||
const finding = detectMcpToolCoverage([project(sessions)])
|
||||
expect(finding).not.toBeNull()
|
||||
expect(finding!.title).toContain('1 MCP server')
|
||||
expect(finding!.title).toContain('low tool coverage')
|
||||
expect(finding!.explanation).toContain('hf')
|
||||
expect(finding!.explanation).toContain('1/30')
|
||||
expect(finding!.fix.type).toBe('command')
|
||||
expect((finding!.fix as { text: string }).text).toContain("claude mcp remove 'hf'")
|
||||
expect(finding!.tokensSaved).toBeGreaterThan(0)
|
||||
})
|
||||
|
||||
it('escalates impact to high when token waste crosses the threshold', () => {
|
||||
const inventory = Array.from({ length: 60 }, (_, i) => `mcp__big__t${i}`)
|
||||
// 60 tools * 400 tokens = 24k schema. With many sessions and large
|
||||
// cache-creation buckets, total effective tokens easily clear 200k.
|
||||
const turns = [makeTurn([
|
||||
makeCall({ tools: ['mcp__big__t0'], cacheCreation: 50_000 }),
|
||||
makeCall({ cacheRead: 60_000 }),
|
||||
makeCall({ cacheRead: 60_000 }),
|
||||
])]
|
||||
// Need enough sessions so the per-session ~28.8k effective tokens
|
||||
// (24k write + 48k read × 0.10) sum past the 200k high-impact threshold.
|
||||
const sessions = Array.from({ length: 8 }, (_, i) =>
|
||||
makeSession({ sessionId: `s${i}`, inventory, turns, mcpBreakdown: { big: { calls: 1 } } }),
|
||||
)
|
||||
const finding = detectMcpToolCoverage([project(sessions)])
|
||||
expect(finding).not.toBeNull()
|
||||
expect(finding!.impact).toBe('high')
|
||||
})
|
||||
|
||||
it('does not count invocation-only sessions toward loadedSessions', () => {
|
||||
// Server `svc` has inventory in only one session, but is invoked in
|
||||
// a second session that never observed the schema. Pre-fix this
|
||||
// would have satisfied the >=2 session threshold; it must not now.
|
||||
const inventory = Array.from({ length: 20 }, (_, i) => `mcp__svc__t${i}`)
|
||||
const turns = [makeTurn([
|
||||
makeCall({ tools: ['mcp__svc__t0'], cacheCreation: 50_000 }),
|
||||
])]
|
||||
const sessions = [
|
||||
makeSession({ sessionId: 'a', inventory, turns, mcpBreakdown: { svc: { calls: 1 } } }),
|
||||
// No inventory — this shouldn't be considered a "loaded" session.
|
||||
makeSession({ sessionId: 'b', turns, mcpBreakdown: { svc: { calls: 1 } } }),
|
||||
]
|
||||
expect(detectMcpToolCoverage([project(sessions)])).toBeNull()
|
||||
})
|
||||
|
||||
it('does not let invocations of un-inventoried tools inflate coverage', () => {
|
||||
// Inventory has 20 tools, none invoked. Calls hit a 21st tool that
|
||||
// never appeared in any deferred_tools_delta (could be a renamed/
|
||||
// removed tool from an older session config). Coverage must stay 0%
|
||||
// and unusedCount must not go negative.
|
||||
const inventory = Array.from({ length: 20 }, (_, i) => `mcp__svc__t${i}`)
|
||||
const turns = [makeTurn([makeCall({ tools: ['mcp__svc__ghost'] })])]
|
||||
const sessions = [
|
||||
makeSession({ sessionId: 'a', inventory, turns, mcpBreakdown: { svc: { calls: 1 } } }),
|
||||
makeSession({ sessionId: 'b', inventory, turns, mcpBreakdown: { svc: { calls: 1 } } }),
|
||||
]
|
||||
const result = aggregateMcpCoverage([project(sessions)])
|
||||
expect(result[0]!.toolsAvailable).toBe(20)
|
||||
expect(result[0]!.toolsInvoked).toBe(0)
|
||||
expect(result[0]!.coverageRatio).toBe(0)
|
||||
expect(result[0]!.unusedTools).toHaveLength(20)
|
||||
})
|
||||
|
||||
it('handles multiple flagged servers and pluralises the title', () => {
|
||||
const sessions: SessionSummary[] = []
|
||||
for (const server of ['svc1', 'svc2']) {
|
||||
const inventory = Array.from({ length: 20 }, (_, i) => `mcp__${server}__t${i}`)
|
||||
const turns = [makeTurn([
|
||||
makeCall({ tools: [`mcp__${server}__t0`], cacheCreation: 50_000 }),
|
||||
])]
|
||||
sessions.push(
|
||||
makeSession({ sessionId: `${server}-a`, inventory, turns, mcpBreakdown: { [server]: { calls: 1 } } }),
|
||||
makeSession({ sessionId: `${server}-b`, inventory, turns, mcpBreakdown: { [server]: { calls: 1 } } }),
|
||||
)
|
||||
}
|
||||
const finding = detectMcpToolCoverage([project(sessions)])
|
||||
expect(finding).not.toBeNull()
|
||||
expect(finding!.title).toContain('2 MCP servers')
|
||||
expect((finding!.fix as { text: string }).text.split('\n')).toHaveLength(2)
|
||||
})
|
||||
})
|
||||
126
tests/parser-mcp-inventory.test.ts
Normal file
126
tests/parser-mcp-inventory.test.ts
Normal file
|
|
@ -0,0 +1,126 @@
|
|||
import { describe, it, expect } from 'vitest'
|
||||
|
||||
import { extractMcpInventory } from '../src/parser.js'
|
||||
import type { JournalEntry } from '../src/types.js'
|
||||
|
||||
function entry(overrides: Partial<JournalEntry> & Record<string, unknown>): JournalEntry {
|
||||
return { type: 'attachment', ...overrides } as JournalEntry
|
||||
}
|
||||
|
||||
describe('extractMcpInventory', () => {
|
||||
it('returns empty array when no entries have an attachment', () => {
|
||||
expect(extractMcpInventory([entry({ type: 'user' })])).toEqual([])
|
||||
})
|
||||
|
||||
it('returns empty array when no deferred_tools_delta is present', () => {
|
||||
expect(extractMcpInventory([
|
||||
entry({ attachment: { type: 'something_else', addedNames: ['mcp__a__b'] } }),
|
||||
])).toEqual([])
|
||||
})
|
||||
|
||||
it('extracts mcp__server__tool names from a single delta', () => {
|
||||
const result = extractMcpInventory([
|
||||
entry({
|
||||
attachment: {
|
||||
type: 'deferred_tools_delta',
|
||||
addedNames: ['Bash', 'Edit', 'mcp__hf__hub_repo_search', 'mcp__hf__paper_search'],
|
||||
},
|
||||
}),
|
||||
])
|
||||
expect(result).toEqual(['mcp__hf__hub_repo_search', 'mcp__hf__paper_search'])
|
||||
})
|
||||
|
||||
it('filters out built-in tools (no mcp__ prefix)', () => {
|
||||
const result = extractMcpInventory([
|
||||
entry({
|
||||
attachment: {
|
||||
type: 'deferred_tools_delta',
|
||||
addedNames: ['Bash', 'Edit', 'WebFetch', 'mcp__svc__t1'],
|
||||
},
|
||||
}),
|
||||
])
|
||||
expect(result).toEqual(['mcp__svc__t1'])
|
||||
})
|
||||
|
||||
it('rejects malformed names: empty server segment', () => {
|
||||
const result = extractMcpInventory([
|
||||
entry({
|
||||
attachment: {
|
||||
type: 'deferred_tools_delta',
|
||||
addedNames: ['mcp____tool', 'mcp__svc__t1'],
|
||||
},
|
||||
}),
|
||||
])
|
||||
expect(result).toEqual(['mcp__svc__t1'])
|
||||
})
|
||||
|
||||
it('rejects malformed names: missing tool segment (no second `__`)', () => {
|
||||
const result = extractMcpInventory([
|
||||
entry({
|
||||
attachment: {
|
||||
type: 'deferred_tools_delta',
|
||||
addedNames: ['mcp__server', 'mcp__svc__t1'],
|
||||
},
|
||||
}),
|
||||
])
|
||||
expect(result).toEqual(['mcp__svc__t1'])
|
||||
})
|
||||
|
||||
it('rejects malformed names: empty tool segment (trailing `__`)', () => {
|
||||
const result = extractMcpInventory([
|
||||
entry({
|
||||
attachment: {
|
||||
type: 'deferred_tools_delta',
|
||||
addedNames: ['mcp__server__', 'mcp__svc__t1'],
|
||||
},
|
||||
}),
|
||||
])
|
||||
expect(result).toEqual(['mcp__svc__t1'])
|
||||
})
|
||||
|
||||
it('unions across multiple delta entries (incremental adds)', () => {
|
||||
const result = extractMcpInventory([
|
||||
entry({ attachment: { type: 'deferred_tools_delta', addedNames: ['mcp__a__t1'] } }),
|
||||
entry({ attachment: { type: 'deferred_tools_delta', addedNames: ['mcp__a__t2', 'mcp__b__t1'] } }),
|
||||
])
|
||||
expect(result).toEqual(['mcp__a__t1', 'mcp__a__t2', 'mcp__b__t1'])
|
||||
})
|
||||
|
||||
it('deduplicates names seen in multiple deltas', () => {
|
||||
const result = extractMcpInventory([
|
||||
entry({ attachment: { type: 'deferred_tools_delta', addedNames: ['mcp__a__t1', 'mcp__a__t1'] } }),
|
||||
entry({ attachment: { type: 'deferred_tools_delta', addedNames: ['mcp__a__t1'] } }),
|
||||
])
|
||||
expect(result).toEqual(['mcp__a__t1'])
|
||||
})
|
||||
|
||||
it('tolerates missing or non-string addedNames', () => {
|
||||
const result = extractMcpInventory([
|
||||
entry({ attachment: { type: 'deferred_tools_delta' } }),
|
||||
entry({ attachment: { type: 'deferred_tools_delta', addedNames: 'not-an-array' } }),
|
||||
entry({ attachment: { type: 'deferred_tools_delta', addedNames: [42, null, 'mcp__svc__t1', undefined] } }),
|
||||
])
|
||||
expect(result).toEqual(['mcp__svc__t1'])
|
||||
})
|
||||
|
||||
it('tolerates malformed attachment object', () => {
|
||||
const result = extractMcpInventory([
|
||||
entry({ attachment: null }),
|
||||
entry({ attachment: 'string-not-object' }),
|
||||
entry({ attachment: { type: 'deferred_tools_delta', addedNames: ['mcp__svc__t1'] } }),
|
||||
])
|
||||
expect(result).toEqual(['mcp__svc__t1'])
|
||||
})
|
||||
|
||||
it('returns names in sorted order', () => {
|
||||
const result = extractMcpInventory([
|
||||
entry({
|
||||
attachment: {
|
||||
type: 'deferred_tools_delta',
|
||||
addedNames: ['mcp__zzz__a', 'mcp__aaa__z', 'mcp__mmm__m'],
|
||||
},
|
||||
}),
|
||||
])
|
||||
expect(result).toEqual(['mcp__aaa__z', 'mcp__mmm__m', 'mcp__zzz__a'])
|
||||
})
|
||||
})
|
||||
Loading…
Add table
Add a link
Reference in a new issue