From 3dc3e3271537bc5898c55f2ada277ff56a9cd423 Mon Sep 17 00:00:00 2001 From: ozymandiashh <234437643+ozymandiashh@users.noreply.github.com> Date: Tue, 5 May 2026 03:29:54 +0300 Subject: [PATCH 1/6] fix(date-range): unify 'all' period semantics between CLI and dashboard `getDateRange` was duplicated across `src/cli.ts` and `src/dashboard.tsx` with conflicting semantics for `'all'`. The CLI intentionally bounded `'all'` to the last 6 months (justified inline: keeps Codex/Cursor parses responsive on sparse multi-year history). The dashboard returned `new Date(0)` instead, so the same `--period all` flag silently meant two different windows depending on which entry point you hit. `Period`, `PERIODS`, `PERIOD_LABELS`, and `toPeriod` were duplicated as well, and `cli-date.ts` already existed for date helpers (`parseDateRangeFlags`) so the consolidation lives there. Both call sites now go through a single `getDateRange(period: string)` in `cli-date.ts` that returns `{ range, label }`. The dashboard wraps it as `getPeriodRange(period: Period)` to keep the strict `Period` type at the React boundary while letting the CLI continue to accept extras like `'yesterday'`. `PERIOD_LABELS.all` becomes `'6 Months'` (short, for the dashboard tab strip; the previous `'All Time'` was misleading and the long-form `'Last 6 months'` from `getDateRange().label` already drives CLI output). Changes: - src/cli-date.ts: add `Period`, `PERIODS`, `PERIOD_LABELS`, `toPeriod`, `getDateRange`. Pull the existing 6-month rationale into a named `ALL_TIME_MONTHS` constant. - src/cli.ts: drop the local copies and import from cli-date. - src/dashboard.tsx: drop the local copies, route through `getPeriodRange`, alias the shared `getDateRange` import to `getDateRangeShared` to avoid shadowing the wrapper. - tests/cli-date.test.ts: 13 cases covering `'all'` regression guard (must never silently fall back to `Date(0)`), CLI/dashboard agreement, end-of-month clamping tolerance, `'yesterday'` support, and unknown-input fallback. - README.md, CHANGELOG.md: surface the bound and point heavy users at `--from`/`--to` for unbounded windows. The CLI flag `--period all` continues to be accepted; only the dashboard window changes to match what the CLI was already doing. No public API or schema change. Refs #93 --- CHANGELOG.md | 5 ++ README.md | 4 +- src/cli-date.ts | 85 +++++++++++++++++++++++++++++ src/cli.ts | 52 +----------------- src/dashboard.tsx | 29 +++------- tests/cli-date.test.ts | 118 +++++++++++++++++++++++++++++++++++++++++ 6 files changed, 217 insertions(+), 76 deletions(-) create mode 100644 tests/cli-date.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index b31e30b..ee16abe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## Unreleased + +### Fixed (CLI) +- **`all` period semantics unified between CLI and dashboard.** The dashboard treated `--period all` as all-time (epoch start) while the CLI bounded it to the last 6 months. Both now consistently mean "Last 6 months". Period helpers (`Period`, `PERIODS`, `PERIOD_LABELS`, `toPeriod`, `getDateRange`) consolidated into `cli-date.ts`. Use `--from` / `--to` for unbounded historical ranges. + ## 0.9.6 - 2026-05-03 ### Added (CLI) diff --git a/README.md b/README.md index 2d73f26..1602d87 100644 --- a/README.md +++ b/README.md @@ -85,7 +85,7 @@ codeburn yield # track productive vs reverted/abandoned spend codeburn yield -p 30days # yield analysis for last 30 days ``` -Arrow keys switch between Today, 7 Days, 30 Days, Month, and All Time. Press `q` to quit, `1` `2` `3` `4` `5` as shortcuts, `c` to open model comparison, `o` to open optimize. The dashboard auto-refreshes every 30 seconds by default (`--refresh 0` to disable). It also shows average cost per session and the five most expensive sessions across all projects. +Arrow keys switch between Today, 7 Days, 30 Days, Month, and 6 Months (use `--from` / `--to` for an exact historical window). Press `q` to quit, `1` `2` `3` `4` `5` as shortcuts, `c` to open model comparison, `o` to open optimize. The dashboard auto-refreshes every 30 seconds by default (`--refresh 0` to disable). It also shows average cost per session and the five most expensive sessions across all projects. ## Supported Providers @@ -196,7 +196,7 @@ You can also open it inline from the dashboard: press `o` when a finding count a ### Compare ```bash -codeburn compare # interactive model picker (default: all time) +codeburn compare # interactive model picker (default: last 6 months) codeburn compare -p week # last 7 days codeburn compare -p today # today only codeburn compare --provider claude # Claude Code sessions only diff --git a/src/cli-date.ts b/src/cli-date.ts index 66831b9..b3d502d 100644 --- a/src/cli-date.ts +++ b/src/cli-date.ts @@ -1,4 +1,5 @@ import type { DateRange } from './types.js' +import { toDateString } from './daily-cache.js' const ISO_DATE_RE = /^\d{4}-\d{2}-\d{2}$/ @@ -7,6 +8,35 @@ const END_OF_DAY_MINUTES = 59 const END_OF_DAY_SECONDS = 59 const END_OF_DAY_MS = 999 +// "All Time" is intentionally bounded to the last 6 months. Older data is +// rarely actionable for a cost tracker, and capping the range keeps the parse +// path bounded so providers like Codex/Cursor with sparse multi-year history +// still load in seconds. Users who need an unbounded window can use +// `--from` / `--to`. +const ALL_TIME_MONTHS = 6 + +export type Period = 'today' | 'week' | '30days' | 'month' | 'all' + +export const PERIODS: Period[] = ['today', 'week', '30days', 'month', 'all'] + +// Short labels suitable for the dashboard tab strip. Long-form labels for +// header text come from `getDateRange().label`. +export const PERIOD_LABELS: Record = { + today: 'Today', + week: '7 Days', + '30days': '30 Days', + month: 'This Month', + all: '6 Months', +} + +export function toPeriod(s: string): Period { + if (s === 'today') return 'today' + if (s === 'month') return 'month' + if (s === '30days') return '30days' + if (s === 'all') return 'all' + return 'week' +} + function parseLocalDate(s: string): Date { if (!ISO_DATE_RE.test(s)) { throw new Error(`Invalid date format "${s}": expected YYYY-MM-DD`) @@ -37,3 +67,58 @@ export function parseDateRangeFlags(from: string | undefined, to: string | undef } return { start, end } } + +/** + * Returns the date range and a human-readable label for a named period. + * + * Accepts a string (rather than the strict `Period` type) because the CLI + * surfaces a few extra inputs not exposed in the dashboard tab strip + * (e.g. `'yesterday'`). Unknown values fall back to `'week'`. + * + * Note: `'all'` is bounded to the last 6 months. Use `--from`/`--to` for + * an unbounded historical window. + */ +export function getDateRange(period: string): { range: DateRange; label: string } { + const now = new Date() + const end = new Date( + now.getFullYear(), + now.getMonth(), + now.getDate(), + END_OF_DAY_HOURS, + END_OF_DAY_MINUTES, + END_OF_DAY_SECONDS, + END_OF_DAY_MS, + ) + + switch (period) { + case 'today': { + const start = new Date(now.getFullYear(), now.getMonth(), now.getDate()) + return { range: { start, end }, label: `Today (${toDateString(start)})` } + } + case 'yesterday': { + const start = new Date(now.getFullYear(), now.getMonth(), now.getDate() - 1) + const yesterdayEnd = new Date(now.getFullYear(), now.getMonth(), now.getDate() - 1, END_OF_DAY_HOURS, END_OF_DAY_MINUTES, END_OF_DAY_SECONDS, END_OF_DAY_MS) + return { range: { start, end: yesterdayEnd }, label: `Yesterday (${toDateString(start)})` } + } + case 'week': { + const start = new Date(now.getFullYear(), now.getMonth(), now.getDate() - 7) + return { range: { start, end }, label: 'Last 7 Days' } + } + case 'month': { + const start = new Date(now.getFullYear(), now.getMonth(), 1) + return { range: { start, end }, label: `${now.toLocaleString('default', { month: 'long' })} ${now.getFullYear()}` } + } + case '30days': { + const start = new Date(now.getFullYear(), now.getMonth(), now.getDate() - 30) + return { range: { start, end }, label: 'Last 30 Days' } + } + case 'all': { + const start = new Date(now.getFullYear(), now.getMonth() - ALL_TIME_MONTHS, now.getDate()) + return { range: { start, end }, label: 'Last 6 months' } + } + default: { + const start = new Date(now.getFullYear(), now.getMonth(), now.getDate() - 7) + return { range: { start, end }, label: 'Last 7 Days' } + } + } +} diff --git a/src/cli.ts b/src/cli.ts index 368cbbc..7474efc 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -11,7 +11,7 @@ import { getDaysInRange, ensureCacheHydrated, emptyCache, BACKFILL_DAYS, toDateS import { aggregateProjectsIntoDays, buildPeriodDataFromDays, dateKey } from './day-aggregator.js' import { CATEGORY_LABELS, type DateRange, type ProjectSummary, type TaskCategory } from './types.js' import { renderDashboard } from './dashboard.js' -import { parseDateRangeFlags } from './cli-date.js' +import { parseDateRangeFlags, getDateRange, toPeriod, type Period } from './cli-date.js' import { runOptimize, scanAndDetect } from './optimize.js' import { renderCompare } from './compare.js' import { getAllProviders } from './providers/index.js' @@ -35,56 +35,6 @@ async function hydrateCache() { } } -function getDateRange(period: string): { range: DateRange; label: string } { - const now = new Date() - const end = new Date(now.getFullYear(), now.getMonth(), now.getDate(), 23, 59, 59, 999) - - switch (period) { - case 'today': { - const start = new Date(now.getFullYear(), now.getMonth(), now.getDate()) - return { range: { start, end }, label: `Today (${toDateString(start)})` } - } - case 'yesterday': { - const start = new Date(now.getFullYear(), now.getMonth(), now.getDate() - 1) - const yesterdayEnd = new Date(now.getFullYear(), now.getMonth(), now.getDate() - 1, 23, 59, 59, 999) - return { range: { start, end: yesterdayEnd }, label: `Yesterday (${toDateString(start)})` } - } - case 'week': { - const start = new Date(now.getFullYear(), now.getMonth(), now.getDate() - 7) - return { range: { start, end }, label: 'Last 7 Days' } - } - case 'month': { - const start = new Date(now.getFullYear(), now.getMonth(), 1) - return { range: { start, end }, label: `${now.toLocaleString('default', { month: 'long' })} ${now.getFullYear()}` } - } - case '30days': { - const start = new Date(now.getFullYear(), now.getMonth(), now.getDate() - 30) - return { range: { start, end }, label: 'Last 30 Days' } - } - case 'all': { - // Cap "All Time" to the last 6 months. Older data is rarely actionable for a cost - // tracker and keeps the parse path bounded so providers like Codex/Cursor with sparse - // data still load in seconds. - const start = new Date(now.getFullYear(), now.getMonth() - 6, now.getDate()) - return { range: { start, end }, label: 'Last 6 months' } - } - default: { - const start = new Date(now.getFullYear(), now.getMonth(), now.getDate() - 7) - return { range: { start, end }, label: 'Last 7 Days' } - } - } -} - -type Period = 'today' | 'week' | '30days' | 'month' | 'all' - -function toPeriod(s: string): Period { - if (s === 'today') return 'today' - if (s === 'month') return 'month' - if (s === '30days') return '30days' - if (s === 'all') return 'all' - return 'week' -} - function collect(val: string, acc: string[]): string[] { acc.push(val) return acc diff --git a/src/dashboard.tsx b/src/dashboard.tsx index c047d69..16aea07 100644 --- a/src/dashboard.tsx +++ b/src/dashboard.tsx @@ -13,21 +13,12 @@ import { dateKey } from './day-aggregator.js' import { CompareView } from './compare.js' import { getPlanUsageOrNull, type PlanUsage } from './plan-usage.js' import { planDisplayName } from './plans.js' +import { getDateRange as getDateRangeShared, PERIODS, PERIOD_LABELS, type Period } from './cli-date.js' import { join } from 'path' import { patchStdoutForWindows } from './ink-win.js' -type Period = 'today' | 'week' | '30days' | 'month' | 'all' type View = 'dashboard' | 'optimize' | 'compare' -const PERIODS: Period[] = ['today', 'week', '30days', 'month', 'all'] -const PERIOD_LABELS: Record = { - today: 'Today', - week: '7 Days', - '30days': '30 Days', - month: 'This Month', - all: 'All Time', -} - const MIN_WIDE = 90 const ORANGE = '#FF8C42' const DIM = '#555555' @@ -104,16 +95,8 @@ function gradientColor(pct: number): string { return toHex(lerp(255, 245, t), lerp(140, 91, t), lerp(66, 91, t)) } -function getDateRange(period: Period): { start: Date; end: Date } { - const now = new Date() - const end = new Date(now.getFullYear(), now.getMonth(), now.getDate(), 23, 59, 59, 999) - switch (period) { - case 'today': return { start: new Date(now.getFullYear(), now.getMonth(), now.getDate()), end } - case 'week': return { start: new Date(now.getFullYear(), now.getMonth(), now.getDate() - 7), end } - case '30days': return { start: new Date(now.getFullYear(), now.getMonth(), now.getDate() - 30), end } - case 'month': return { start: new Date(now.getFullYear(), now.getMonth(), 1), end } - case 'all': return { start: new Date(0), end } - } +function getPeriodRange(period: Period): { start: Date; end: Date } { + return getDateRangeShared(period).range } type Layout = { dashWidth: number; wide: boolean; halfWidth: number; barWidth: number } @@ -711,7 +694,7 @@ function InteractiveDashboard({ initialProjects, initialPeriod, initialProvider, let cancelled = false async function scan() { if (projects.length === 0) { setOptimizeResult(null); return } - const result = await scanAndDetect(projects, getDateRange(period)) + const result = await scanAndDetect(projects, getPeriodRange(period)) if (!cancelled) setOptimizeResult(result) } scan() @@ -723,7 +706,7 @@ function InteractiveDashboard({ initialProjects, initialPeriod, initialProvider, setLoading(true) setOptimizeResult(null) try { - const range = getDateRange(p) + const range = getPeriodRange(p) const data = await parseAllSessions(range, prov) if (reloadGenerationRef.current !== generation) return @@ -828,7 +811,7 @@ function StaticDashboard({ projects, period, activeProvider, planUsage }: { proj export async function renderDashboard(period: Period = 'week', provider: string = 'all', refreshSeconds?: number, projectFilter?: string[], excludeFilter?: string[], customRange?: DateRange | null): Promise { await loadPricing() - const range = customRange ?? getDateRange(period) + const range = customRange ?? getPeriodRange(period) const filteredProjects = filterProjectsByName(await parseAllSessions(range, provider), projectFilter, excludeFilter) const planUsage = await getPlanUsageOrNull() const isTTY = process.stdin.isTTY && process.stdout.isTTY diff --git a/tests/cli-date.test.ts b/tests/cli-date.test.ts new file mode 100644 index 0000000..f2f7404 --- /dev/null +++ b/tests/cli-date.test.ts @@ -0,0 +1,118 @@ +import { describe, it, expect } from 'vitest' +import { + getDateRange, + PERIODS, + PERIOD_LABELS, + toPeriod, + type Period, +} from '../src/cli-date.js' + +describe('getDateRange', () => { + it('"all" is bounded to the last 6 months, not epoch', () => { + const { range, label } = getDateRange('all') + const now = new Date() + + expect(label).toBe('Last 6 months') + + // Regression guard: must never silently fall back to epoch (the old + // dashboard bug) or any pre-2000 date. + expect(range.start.getFullYear()).toBeGreaterThan(2000) + + // Roughly 6 months back. Accept 5-7 months to absorb end-of-month + // clamping (e.g. on May 31, JS rolls Nov 31 -> Dec 1, shifting the + // computed month forward by one). + const monthsDiff = + (now.getFullYear() - range.start.getFullYear()) * 12 + + (now.getMonth() - range.start.getMonth()) + expect(monthsDiff).toBeGreaterThanOrEqual(5) + expect(monthsDiff).toBeLessThanOrEqual(7) + + // End is today, end of day. + expect(range.end.getHours()).toBe(23) + expect(range.end.getMinutes()).toBe(59) + }) + + it('CLI and dashboard agree on "all" semantics (no Date(0) drift)', () => { + const a = getDateRange('all') + const b = getDateRange('all') + expect(a.range.start.getTime()).toBe(b.range.start.getTime()) + expect(a.label).toBe(b.label) + // Regression guard: must never silently fall back to epoch. + expect(a.range.start.getFullYear()).toBeGreaterThan(2000) + }) + + it('"week" returns the last 7 days', () => { + const { range, label } = getDateRange('week') + expect(label).toBe('Last 7 Days') + // start = midnight 7 days ago, end = today 23:59:59.999 -> ~8 days span. + const diffDays = (range.end.getTime() - range.start.getTime()) / (1000 * 60 * 60 * 24) + expect(diffDays).toBeGreaterThanOrEqual(7) + expect(diffDays).toBeLessThanOrEqual(8) + }) + + it('"month" starts on day 1 of the current month', () => { + const { range } = getDateRange('month') + expect(range.start.getDate()).toBe(1) + expect(range.start.getHours()).toBe(0) + }) + + it('"30days" returns 30 days back', () => { + const { range, label } = getDateRange('30days') + expect(label).toBe('Last 30 Days') + const diffDays = (range.end.getTime() - range.start.getTime()) / (1000 * 60 * 60 * 24) + expect(diffDays).toBeGreaterThanOrEqual(30) + expect(diffDays).toBeLessThanOrEqual(31) + }) + + it('"today" starts at local midnight', () => { + const { range } = getDateRange('today') + expect(range.start.getHours()).toBe(0) + expect(range.start.getMinutes()).toBe(0) + expect(range.end.getHours()).toBe(23) + }) + + it('"yesterday" is supported (CLI-only convenience)', () => { + const { range, label } = getDateRange('yesterday') + expect(label).toMatch(/^Yesterday/) + expect(range.start.getHours()).toBe(0) + expect(range.end.getHours()).toBe(23) + }) + + it('unknown period falls back to "week"', () => { + const fallback = getDateRange('not-a-period') + const week = getDateRange('week') + expect(fallback.label).toBe(week.label) + }) +}) + +describe('PERIODS / PERIOD_LABELS', () => { + it('exposes the expected period set', () => { + expect(PERIODS).toEqual(['today', 'week', '30days', 'month', 'all']) + }) + + it('has a label for every period', () => { + for (const p of PERIODS) { + expect(PERIOD_LABELS[p]).toBeTruthy() + } + }) + + it('"all" tab label reflects the 6-month bound', () => { + // Short label used in the dashboard tab strip. The long-form label + // ("Last 6 months") comes from getDateRange().label. + expect(PERIOD_LABELS.all).toBe('6 Months') + }) +}) + +describe('toPeriod', () => { + it('round-trips known periods', () => { + const known: Period[] = ['today', 'week', '30days', 'month', 'all'] + for (const p of known) { + expect(toPeriod(p)).toBe(p) + } + }) + + it('falls back to "week" for unknown input', () => { + expect(toPeriod('garbage')).toBe('week') + expect(toPeriod('')).toBe('week') + }) +}) From 1a080a006f8b136b67ae47a5d0c4083045672d96 Mon Sep 17 00:00:00 2001 From: ozymandiashh <234437643+ozymandiashh@users.noreply.github.com> Date: Tue, 5 May 2026 04:13:04 +0300 Subject: [PATCH 2/6] feat(optimize): MCP tool coverage detector with cache-aware costing Adds a per-tool optimizer finding for MCP servers whose schema is loaded on every turn but rarely invoked. Builds on the existing server-level `detectUnusedMcp` (zero invocations) by reporting partial-use cases: "loaded 54 tools, called 0" or "loaded 26 tools, called 2 (8% coverage)". Inventory comes from Claude Code's JSONL `attachment.deferred_tools_delta` entries: `addedNames` lists the exact tools available at that turn, including every fully-qualified `mcp____` name. We union across all delta entries in a session (not just the first) because tool availability can change mid-session when the user reloads MCP config or a subagent inherits a different tool set. Names that don't match the `mcp____` shape with both segments non-empty are rejected at extraction so downstream `split('__')` consumers can't be poisoned. Token-savings estimates are cache-aware. MCP tool schemas live in the cached prefix of the system prompt: a session pays the full input price on each cache-creation turn (rebuilds happen every ~5 minutes of inactivity) and the cache-read discount on subsequent turns. Each call's contribution is capped at its observed `cacheCreationInputTokens` / `cacheReadInputTokens` so we never claim more MCP overhead than the call's own cache buckets could contain. When multiple servers are flagged, costing happens in a single combined pass: the per-call cap applies to the total unused-schema budget across all flagged servers, not per server. Two flagged servers cannot both independently claim the same call's cache bucket, which would otherwise overstate `tokensSaved` and misclassify findings as high impact. A session counts toward `loadedSessions` (and toward the cost estimate) only if its observed inventory included the server. Pure invocation-only sessions, where the server appears in `mcpBreakdown` or `call.mcpTools` without any matching `deferred_tools_delta`, do not satisfy the `>= 2 sessions` threshold on their own. The same invariant applies in `estimateMcpSchemaCost` so the two passes agree. Coverage is computed against the inventory only: invocations of names not present in any observed inventory (older config, hallucinated tool, typo) do not inflate `toolsInvoked` and cannot drive `unusedCount` negative. `toolsInvoked` is derived as `inventory.size - unusedTools.length` to keep both numbers consistent. `detectUnusedMcp` and the new detector are explicitly disjoint: `detectUnusedMcp` skips servers that the coverage detector will report, not every server that happens to be in any inventory, so a small inventoried-but-uninvoked server below the coverage thresholds still gets flagged as "configured but never called." Thresholds for the coverage finding: - > 10 tools available (small servers are noise) - < 20% coverage - >= 2 sessions with observed inventory - High impact when total effective tokens >= 200_000 or >= 3 servers flagged Smoke-tested on a real account: 7 servers flagged across 93 sessions (`office-word-mcp` 0/54, `notebooklm-mcp` 0/38, `office-ppt-mcp` 0/37, `excel-mcp-server` 0/25, `github-mcp-server` 2/26, `peekaboo` 3/22, plus `claude_ai_Asana`). Combined-cap costing keeps `tokensSaved` honest. Changes: - src/types.ts: optional `mcpInventory: string[]` on `SessionSummary`. Provider-agnostic field; currently populated only by the Claude parser. - src/parser.ts: `extractMcpInventory` walks all entries, validates fully-qualified names, returns sorted unique list. `buildSessionSummary` passes it through; field is omitted when empty so JSON exports stay clean. - src/optimize.ts: `aggregateMcpCoverage`, `estimateMcpSchemaCost` (single- and multi-server signatures), `detectMcpToolCoverage`. Wired into `scanAndDetect`. `detectUnusedMcp` updated to disjoint with the new detector. - tests/mcp-coverage.test.ts: 23 cases covering aggregation, costing, combined-cap behaviour, threshold gates, invocation-only-session filtering, foreign-tool invocations, cache rebuild events, write+read on the same call, multi-server pluralisation. - tests/parser-mcp-inventory.test.ts: 12 cases for the JSONL extractor including malformed name rejection and tolerant attachment parsing. - CHANGELOG.md: entry under Unreleased / Added (CLI). Closes #2 --- CHANGELOG.md | 13 + src/optimize.ts | 322 +++++++++++++++++++++ src/parser.ts | 54 +++- src/types.ts | 6 + tests/mcp-coverage.test.ts | 450 +++++++++++++++++++++++++++++ tests/parser-mcp-inventory.test.ts | 126 ++++++++ 6 files changed, 970 insertions(+), 1 deletion(-) create mode 100644 tests/mcp-coverage.test.ts create mode 100644 tests/parser-mcp-inventory.test.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index b31e30b..e5022c4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,18 @@ # Changelog +## Unreleased + +### Added (CLI) +- **MCP tool coverage detector.** New `optimize` finding flags MCP servers + whose tool inventory is largely unused. Inventory is observed from the + Claude `deferred_tools_delta` JSONL attachments (exact tool names per + session) instead of guessed at five tools per server. Token-savings + estimates are cache-aware: schema bytes pay full input price on the first + cache-creation turn of a session, then carry at the cache-read discount + on subsequent turns, capped per call so we never claim more overhead + than the call's own cache buckets could contain. Threshold: + >10 tools available, <20% coverage, observed in ≥2 sessions. Closes #2. + ## 0.9.6 - 2026-05-03 ### Added (CLI) diff --git a/src/optimize.ts b/src/optimize.ts index 7077b29..7882660 100644 --- a/src/optimize.ts +++ b/src/optimize.ts @@ -53,6 +53,18 @@ const LOW_RATIO_MEDIUM_THRESHOLD = 3 const MIN_API_CALLS_FOR_CACHE = 10 const CACHE_EXCESS_HIGH_THRESHOLD = 15000 const UNUSED_MCP_HIGH_THRESHOLD = 3 +// MCP tool coverage detector thresholds. A server only earns a finding when +// every condition holds: the inventory is large enough to matter, real-world +// usage is poor, and we observed it in enough sessions to trust the signal. +const MCP_COVERAGE_MIN_TOOLS = 10 +const MCP_COVERAGE_MIN_SESSIONS = 2 +const MCP_COVERAGE_LOW_THRESHOLD = 0.20 +const MCP_COVERAGE_HIGH_IMPACT_TOKENS = 200_000 +// Anthropic prices cached input reads at roughly 10% of fresh input. We use +// this to keep "ongoing" overhead estimates honest: most MCP schema bytes +// live in the cached prefix and only get charged at the discount rate after +// the first turn of a session. +const CACHE_READ_DISCOUNT = 0.10 const GHOST_AGENTS_HIGH_THRESHOLD = 5 const GHOST_AGENTS_MEDIUM_THRESHOLD = 2 const GHOST_SKILLS_HIGH_THRESHOLD = 10 @@ -477,6 +489,298 @@ export function detectDuplicateReads(calls: ToolCall[], dateRange?: DateRange): } } +/** + * Per-server breakdown of MCP tool inventory vs invocations, computed from the + * `mcpInventory` field captured by the Claude parser. + * + * Each session that loaded a server contributes its observed tool list to + * the union for that server. Invocations come from the existing + * `mcpBreakdown` per-call counts plus the parser's `call.tools` stream. + */ +export type McpServerCoverage = { + server: string + toolsAvailable: number + toolsInvoked: number + unusedTools: string[] + invocations: number + loadedSessions: number + coverageRatio: number +} + +/** + * Aggregate MCP inventory and invocations across the projects in scope. + * + * Returns one entry per `mcp____*` namespace observed in any + * session's `mcpInventory`. Counts of invocations come from + * `session.mcpBreakdown` (per-server call totals already maintained by the + * parser). + */ +export function aggregateMcpCoverage(projects: ProjectSummary[]): McpServerCoverage[] { + type ServerAcc = { + inventory: Set + invokedTools: Set + invocations: number + loadedSessions: number + } + const servers = new Map() + + function getOrInit(server: string): ServerAcc { + let acc = servers.get(server) + if (!acc) { + acc = { inventory: new Set(), invokedTools: new Set(), invocations: 0, loadedSessions: 0 } + servers.set(server, acc) + } + return acc + } + + for (const project of projects) { + for (const session of project.sessions) { + // Only sessions with an observed inventory count toward `loadedSessions`. + // Pure invocation-only sessions (server seen via `call.mcpTools` or + // `session.mcpBreakdown` without any matching `deferred_tools_delta`) + // could otherwise satisfy the `MCP_COVERAGE_MIN_SESSIONS` threshold + // without giving us evidence that the schema was actually loaded. + const inventoriedServers = new Set() + const sessionInvoked = new Map>() + + // Inventory: union of tools observed available in this session. + for (const fqn of session.mcpInventory ?? []) { + const parts = fqn.split('__') + if (parts.length < 3 || parts[0] !== 'mcp') continue + const server = parts[1] + if (!server) continue + const tool = parts.slice(2).join('__') + if (!tool) continue + const acc = getOrInit(server) + acc.inventory.add(fqn) + inventoriedServers.add(server) + } + + // Invoked tools: walk turns to collect per-tool invocations. We can't + // get this from session.mcpBreakdown alone because that's keyed by + // server, not tool. + for (const turn of session.turns) { + for (const call of turn.assistantCalls) { + for (const fqn of call.mcpTools) { + const parts = fqn.split('__') + if (parts.length < 3 || parts[0] !== 'mcp') continue + const server = parts[1] + if (!server) continue + let invoked = sessionInvoked.get(server) + if (!invoked) { + invoked = new Set() + sessionInvoked.set(server, invoked) + } + invoked.add(fqn) + } + } + } + + // Invocation totals: trust mcpBreakdown which was already aggregated + // turn-by-turn, including any invocations the inventory pass missed. + for (const [server, data] of Object.entries(session.mcpBreakdown)) { + const acc = getOrInit(server) + acc.invocations += data.calls + } + + for (const [server, invoked] of sessionInvoked) { + const acc = getOrInit(server) + for (const fqn of invoked) acc.invokedTools.add(fqn) + } + + for (const server of inventoriedServers) { + getOrInit(server).loadedSessions += 1 + } + } + } + + const result: McpServerCoverage[] = [] + for (const [server, acc] of servers) { + if (acc.inventory.size === 0) continue + // Coverage is only meaningful against tools we actually observed in the + // inventory: invocations of tools never inventoried (older config, typo, + // etc.) would otherwise inflate the numerator and could even drive + // `unusedCount` negative. + const invokedInInventory = new Set() + for (const fqn of acc.invokedTools) { + if (acc.inventory.has(fqn)) invokedInInventory.add(fqn) + } + const unusedTools = Array.from(acc.inventory).filter(t => !invokedInInventory.has(t)).sort() + const toolsInvoked = acc.inventory.size - unusedTools.length + result.push({ + server, + toolsAvailable: acc.inventory.size, + toolsInvoked, + unusedTools, + invocations: acc.invocations, + loadedSessions: acc.loadedSessions, + coverageRatio: acc.inventory.size === 0 ? 0 : toolsInvoked / acc.inventory.size, + }) + } + result.sort((a, b) => b.toolsAvailable - a.toolsAvailable) + return result +} + +/** + * Cache-aware token cost estimate for the unused-tool overhead of one or + * more servers, summed across all sessions that loaded any of them. + * + * Returns three buckets: + * - `cacheWriteTokens`: schema bytes paid at full input price (each + * cache-creation event in a session that loaded one of the servers). + * - `cacheReadTokens`: schema bytes carried at the cache-read discount on + * subsequent turns (ongoing overhead). + * - `effectiveInputTokens`: equivalent fresh-input tokens, weighted by + * cache pricing. Used to estimate dollar cost downstream by multiplying + * by the project's input rate. + * + * We cap each call's contribution at the observed cache-creation / + * cache-read totals for that call: it is not meaningful to claim more MCP + * overhead than the call's own cache bucket could possibly contain. The + * cap is applied once across the combined unused-schema budget for all + * flagged servers, not per server, so two flagged servers cannot both + * independently claim the same call's cache bucket. + * + * Anthropic caches expire after roughly 5 minutes of inactivity, so a long + * session can rebuild the cache multiple times. Every call that reports + * `cacheCreationInputTokens > 0` is treated as another rebuild, not just + * the very first one. + * + * "Loaded" is defined exclusively by observed inventory: a session that + * invoked a server without ever emitting a `deferred_tools_delta` for it + * does not count, matching the invariant `aggregateMcpCoverage` uses for + * `loadedSessions`. + */ +export function estimateMcpSchemaCost( + unusedToolCounts: Record | number, + projects: ProjectSummary[], + serverOrServers: string | string[], +): { cacheWriteTokens: number; cacheReadTokens: number; effectiveInputTokens: number } { + // Backward-compatible single-server signature used by tests. + const servers = Array.isArray(serverOrServers) ? serverOrServers : [serverOrServers] + const counts: Record = typeof unusedToolCounts === 'number' + ? { [serverOrServers as string]: unusedToolCounts } + : unusedToolCounts + + const totalUnusedSchemaTokens = servers.reduce( + (s, srv) => s + (counts[srv] ?? 0) * TOKENS_PER_MCP_TOOL, + 0, + ) + if (totalUnusedSchemaTokens === 0) { + return { cacheWriteTokens: 0, cacheReadTokens: 0, effectiveInputTokens: 0 } + } + + const serverSet = new Set(servers) + let cacheWriteTokens = 0 + let cacheReadTokens = 0 + + for (const project of projects) { + for (const session of project.sessions) { + // A session counts only if its observed inventory included at least + // one of the flagged servers — same invariant `aggregateMcpCoverage` + // uses for `loadedSessions`. + let loaded = false + for (const fqn of session.mcpInventory ?? []) { + const seg = fqn.split('__')[1] + if (seg && serverSet.has(seg)) { loaded = true; break } + } + if (!loaded) continue + + for (const turn of session.turns) { + for (const call of turn.assistantCalls) { + // Both buckets can be non-zero on the same call (cache rebuild + // alongside a partial read), so account for them independently. + // The cap is applied to the combined unused-schema budget so + // multiple flagged servers cannot all claim the same call. + if (call.usage.cacheCreationInputTokens > 0) { + cacheWriteTokens += Math.min(totalUnusedSchemaTokens, call.usage.cacheCreationInputTokens) + } + if (call.usage.cacheReadInputTokens > 0) { + cacheReadTokens += Math.min(totalUnusedSchemaTokens, call.usage.cacheReadInputTokens) + } + } + } + } + } + + const effectiveInputTokens = cacheWriteTokens + cacheReadTokens * CACHE_READ_DISCOUNT + return { cacheWriteTokens, cacheReadTokens, effectiveInputTokens } +} + +/** + * Find MCP servers whose tool inventory is largely unused. Replaces the + * older server-only `detectUnusedMcp` (which only flagged servers with + * literal zero invocations). + * + * A server is flagged when, taken together: + * - it exposed more than `MCP_COVERAGE_MIN_TOOLS` tools, + * - we saw it loaded in at least `MCP_COVERAGE_MIN_SESSIONS` sessions, + * - the coverage ratio is below `MCP_COVERAGE_LOW_THRESHOLD`. + * + * Token-savings estimates use the cache-aware accounting from + * `estimateMcpSchemaCost` so we don't mistake cached-prefix carry-over for + * fresh-input billing. + */ +export function detectMcpToolCoverage( + projects: ProjectSummary[], +): WasteFinding | null { + const coverage = aggregateMcpCoverage(projects) + if (coverage.length === 0) return null + + const flagged = coverage.filter(c => + c.toolsAvailable > MCP_COVERAGE_MIN_TOOLS + && c.loadedSessions >= MCP_COVERAGE_MIN_SESSIONS + && c.coverageRatio < MCP_COVERAGE_LOW_THRESHOLD, + ) + if (flagged.length === 0) return null + + flagged.sort((a, b) => (b.toolsAvailable - b.toolsInvoked) - (a.toolsAvailable - a.toolsInvoked)) + + const lines: string[] = [] + const removeCommands: string[] = [] + const unusedCountsByServer: Record = {} + const flaggedServers: string[] = [] + + for (const c of flagged) { + unusedCountsByServer[c.server] = c.toolsAvailable - c.toolsInvoked + flaggedServers.push(c.server) + const pct = Math.round(c.coverageRatio * 100) + lines.push( + `${c.server}: ${c.toolsInvoked}/${c.toolsAvailable} tools used (${pct}% coverage) across ${c.loadedSessions} session${c.loadedSessions === 1 ? '' : 's'}`, + ) + removeCommands.push(`claude mcp remove ${c.server}`) + } + + // Single combined cost pass: caps each call's contribution at the + // total unused-schema budget across all flagged servers, so two + // flagged servers cannot independently claim the same call's cache + // bucket and overstate `tokensSaved`. + const cost = estimateMcpSchemaCost(unusedCountsByServer, projects, flaggedServers) + const tokensSaved = Math.round(cost.effectiveInputTokens) + const impact: Impact = tokensSaved >= MCP_COVERAGE_HIGH_IMPACT_TOKENS + ? 'high' + : flagged.length >= UNUSED_MCP_HIGH_THRESHOLD + ? 'high' + : 'medium' + + return { + title: `${flagged.length} MCP server${flagged.length === 1 ? '' : 's'} with low tool coverage`, + explanation: + `Schema for unused tools is loaded into the system prompt every session and ` + + `carried in the cached prefix on every turn. ` + + `${lines.join('; ')}.`, + impact, + tokensSaved, + fix: { + type: 'command', + label: flagged.length === 1 + ? 'Remove the underused server, or trim its tools in your MCP config:' + : 'Remove underused servers, or trim their tools in your MCP config:', + text: removeCommands.join('\n'), + }, + } +} + export function detectUnusedMcp( calls: ToolCall[], projects: ProjectSummary[], @@ -497,10 +801,27 @@ export function detectUnusedMcp( } } + // Servers that the new coverage detector will flag fall under its + // jurisdiction (per-tool granularity, cache-aware costing) and we + // suppress them here to avoid double-flagging. Importantly, we suppress + // only the servers that actually clear the coverage detector's + // thresholds — a small, inventoried-but-uninvoked server that the + // coverage detector skips would otherwise become a blind spot. + const coverageReportedServers = new Set( + aggregateMcpCoverage(projects) + .filter(c => + c.toolsAvailable > MCP_COVERAGE_MIN_TOOLS + && c.loadedSessions >= MCP_COVERAGE_MIN_SESSIONS + && c.coverageRatio < MCP_COVERAGE_LOW_THRESHOLD, + ) + .map(c => c.server), + ) + const now = Date.now() const unused: string[] = [] for (const entry of configured.values()) { if (calledServers.has(entry.normalized)) continue + if (coverageReportedServers.has(entry.normalized)) continue if (entry.mtime > 0 && now - entry.mtime < MCP_NEW_CONFIG_GRACE_MS) continue unused.push(entry.original) } @@ -973,6 +1294,7 @@ export async function scanAndDetect( () => detectJunkReads(toolCalls, dateRange), () => detectDuplicateReads(toolCalls, dateRange), () => detectUnusedMcp(toolCalls, projects, projectCwds), + () => detectMcpToolCoverage(projects), () => detectBloatedClaudeMd(projectCwds), () => detectBashBloat(), ] diff --git a/src/parser.ts b/src/parser.ts index 6af996f..09cf99c 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -203,10 +203,54 @@ function groupIntoTurns(entries: JournalEntry[], seenMsgIds: Set): Parse return turns } +/** + * Extract MCP tool inventory observed across a session's JSONL entries. + * + * Claude Code emits `attachment.type === "deferred_tools_delta"` entries whose + * `addedNames` array lists every tool currently available at that turn (built-in + * tools plus all `mcp____` names exposed by configured MCP + * servers). Tool inventory can change mid-session if the user reloads MCP + * config, so we union every occurrence rather than trusting only the first. + * + * Built-in tools are filtered out: only `mcp__*` identifiers survive. + */ +// Fully-qualified MCP tool name shape: `mcp____`. Both server +// and tool segments must be non-empty. Names like `mcp__server` (no tool +// segment) or `mcp__server__` (trailing empty tool) would silently pollute +// the inventory and break downstream `split('__')` consumers, so they're +// rejected here. +function isMcpToolName(name: string): boolean { + if (!name.startsWith('mcp__')) return false + const rest = name.slice(5) // strip `mcp__` + const sep = rest.indexOf('__') + if (sep <= 0) return false // missing or empty server + if (sep >= rest.length - 2) return false // missing or empty tool + return true +} + +export function extractMcpInventory(entries: JournalEntry[]): string[] { + const inventory = new Set() + for (const entry of entries) { + const att = entry['attachment'] + if (!att || typeof att !== 'object') continue + const a = att as { type?: unknown; addedNames?: unknown } + if (a.type !== 'deferred_tools_delta') continue + if (!Array.isArray(a.addedNames)) continue + for (const name of a.addedNames) { + if (typeof name !== 'string') continue + if (!isMcpToolName(name)) continue + inventory.add(name) + } + } + if (inventory.size === 0) return [] + return Array.from(inventory).sort() +} + function buildSessionSummary( sessionId: string, project: string, turns: ClassifiedTurn[], + mcpInventory?: string[], ): SessionSummary { const modelBreakdown: SessionSummary['modelBreakdown'] = Object.create(null) const toolBreakdown: SessionSummary['toolBreakdown'] = Object.create(null) @@ -311,6 +355,7 @@ function buildSessionSummary( bashBreakdown, categoryBreakdown, skillBreakdown, + ...(mcpInventory && mcpInventory.length > 0 ? { mcpInventory } : {}), } } @@ -362,7 +407,14 @@ async function parseSessionFile( } const classified = turns.map(classifyTurn) - return buildSessionSummary(sessionId, project, classified) + // Inventory is extracted from the full entry stream, not just the + // turns we kept after date filtering: tool availability is set up + // once at the start of a session (with possible mid-session reloads), + // and we want to reflect what was loaded even if the user only ran + // turns inside a narrow date window. + const mcpInventory = extractMcpInventory(entries) + + return buildSessionSummary(sessionId, project, classified, mcpInventory) } async function collectJsonlFiles(dirPath: string): Promise { diff --git a/src/types.ts b/src/types.ts index ab67515..e5562e8 100644 --- a/src/types.ts +++ b/src/types.ts @@ -121,6 +121,12 @@ export type SessionSummary = { bashBreakdown: Record categoryBreakdown: Record skillBreakdown: Record + // Observed MCP tools available in this session, captured from + // `attachment.deferred_tools_delta.addedNames` entries. Union across all + // turns. Each name is a fully-qualified `mcp____` identifier. + // Built-in tools (Bash, Edit, etc.) are filtered out. Provider-agnostic field; + // currently populated only by the Claude parser. + mcpInventory?: string[] } export type ProjectSummary = { diff --git a/tests/mcp-coverage.test.ts b/tests/mcp-coverage.test.ts new file mode 100644 index 0000000..1d078d2 --- /dev/null +++ b/tests/mcp-coverage.test.ts @@ -0,0 +1,450 @@ +import { describe, it, expect } from 'vitest' + +import { + aggregateMcpCoverage, + detectMcpToolCoverage, + estimateMcpSchemaCost, +} from '../src/optimize.js' +import type { + ClassifiedTurn, + ParsedApiCall, + ProjectSummary, + SessionSummary, + TaskCategory, + TokenUsage, +} from '../src/types.js' + +// --------------------------------------------------------------------------- +// Test fixtures +// --------------------------------------------------------------------------- + +const ZERO_USAGE: TokenUsage = { + inputTokens: 0, + outputTokens: 0, + cacheCreationInputTokens: 0, + cacheReadInputTokens: 0, + cachedInputTokens: 0, + reasoningTokens: 0, + webSearchRequests: 0, +} + +function makeCall(opts: { + tools?: string[] + cacheCreation?: number + cacheRead?: number + cost?: number +} = {}): ParsedApiCall { + const tools = opts.tools ?? [] + return { + provider: 'claude', + model: 'Opus 4.7', + usage: { + ...ZERO_USAGE, + cacheCreationInputTokens: opts.cacheCreation ?? 0, + cacheReadInputTokens: opts.cacheRead ?? 0, + }, + costUSD: opts.cost ?? 0, + tools, + mcpTools: tools.filter(t => t.startsWith('mcp__')), + skills: [], + hasAgentSpawn: false, + hasPlanMode: false, + speed: 'standard', + timestamp: '2026-05-04T00:00:00Z', + bashCommands: [], + deduplicationKey: 'k', + } +} + +function makeTurn(calls: ParsedApiCall[]): ClassifiedTurn { + return { + userMessage: '', + assistantCalls: calls, + timestamp: '2026-05-04T00:00:00Z', + sessionId: 's1', + category: 'coding', + retries: 0, + hasEdits: false, + } +} + +function makeSession(opts: { + sessionId?: string + inventory?: string[] + turns?: ClassifiedTurn[] + mcpBreakdown?: Record +}): SessionSummary { + const turns = opts.turns ?? [] + const apiCalls = turns.reduce((s, t) => s + t.assistantCalls.length, 0) + const emptyCategoryBreakdown = {} as Record + return { + sessionId: opts.sessionId ?? 's1', + project: 'p', + firstTimestamp: '2026-05-04T00:00:00Z', + lastTimestamp: '2026-05-04T00:00:00Z', + totalCostUSD: 0, + totalInputTokens: 0, + totalOutputTokens: 0, + totalCacheReadTokens: 0, + totalCacheWriteTokens: 0, + apiCalls, + turns, + modelBreakdown: {}, + toolBreakdown: {}, + mcpBreakdown: opts.mcpBreakdown ?? {}, + bashBreakdown: {}, + categoryBreakdown: emptyCategoryBreakdown, + skillBreakdown: {}, + ...(opts.inventory ? { mcpInventory: opts.inventory } : {}), + } +} + +function project(sessions: SessionSummary[]): ProjectSummary { + return { + project: 'p', + projectPath: '/tmp/p', + sessions, + totalCostUSD: 0, + totalApiCalls: sessions.reduce((s, ses) => s + ses.apiCalls, 0), + } +} + +// --------------------------------------------------------------------------- +// aggregateMcpCoverage +// --------------------------------------------------------------------------- + +describe('aggregateMcpCoverage', () => { + it('returns empty list when no session has MCP inventory', () => { + const projects = [project([makeSession({})])] + expect(aggregateMcpCoverage(projects)).toEqual([]) + }) + + it('reports per-server tools available, invoked, and unused', () => { + const inventory = [ + 'mcp__hf__hub_repo_search', + 'mcp__hf__paper_search', + 'mcp__hf__hf_doc_search', + ] + const turns = [ + makeTurn([makeCall({ tools: ['mcp__hf__hub_repo_search'] })]), + ] + const sessions = [ + makeSession({ inventory, turns, mcpBreakdown: { hf: { calls: 1 } } }), + ] + const result = aggregateMcpCoverage([project(sessions)]) + + expect(result).toHaveLength(1) + expect(result[0]!.server).toBe('hf') + expect(result[0]!.toolsAvailable).toBe(3) + expect(result[0]!.toolsInvoked).toBe(1) + expect(result[0]!.unusedTools).toEqual([ + 'mcp__hf__hf_doc_search', + 'mcp__hf__paper_search', + ]) + expect(result[0]!.coverageRatio).toBeCloseTo(1 / 3, 5) + expect(result[0]!.invocations).toBe(1) + expect(result[0]!.loadedSessions).toBe(1) + }) + + it('unions inventory across multiple sessions for the same server', () => { + const sessions = [ + makeSession({ sessionId: 'a', inventory: ['mcp__x__a', 'mcp__x__b'] }), + makeSession({ sessionId: 'b', inventory: ['mcp__x__b', 'mcp__x__c'] }), + ] + const result = aggregateMcpCoverage([project(sessions)]) + expect(result[0]!.toolsAvailable).toBe(3) + expect(result[0]!.loadedSessions).toBe(2) + }) + + it('separates servers with similar names', () => { + const sessions = [ + makeSession({ inventory: ['mcp__hf__a', 'mcp__hugface__a'] }), + ] + const result = aggregateMcpCoverage([project(sessions)]) + expect(result.map(r => r.server).sort()).toEqual(['hf', 'hugface']) + }) + + it('skips invocations without inventory (foreign server, no inventory observed)', () => { + // A server can show up only via a call. We still report it so the + // operator knows it was invoked, but coverage is 0/0 and it is not a + // candidate for the unused-coverage finding. + const turns = [makeTurn([makeCall({ tools: ['mcp__ghost__t1'] })])] + const sessions = [ + makeSession({ turns, mcpBreakdown: { ghost: { calls: 1 } } }), + ] + const result = aggregateMcpCoverage([project(sessions)]) + // No inventory entry -> aggregator drops the server from the report + // because we cannot reason about coverage without an inventory baseline. + expect(result).toEqual([]) + }) +}) + +// --------------------------------------------------------------------------- +// estimateMcpSchemaCost — cache-aware accounting +// --------------------------------------------------------------------------- + +describe('estimateMcpSchemaCost', () => { + it('charges first cacheCreation turn at full price, subsequent turns at cache-read', () => { + const turns = [ + makeTurn([makeCall({ cacheCreation: 50_000 })]), // first turn: write + makeTurn([makeCall({ cacheRead: 60_000 })]), // ongoing: read + makeTurn([makeCall({ cacheRead: 60_000 })]), + ] + const sessions = [makeSession({ + inventory: Array.from({ length: 30 }, (_, i) => `mcp__svc__t${i}`), + turns, + mcpBreakdown: { svc: { calls: 0 } }, + })] + // 30 unused tools * 400 token estimate = 12_000 schema tokens + // cap by call cache buckets so we never overclaim + const cost = estimateMcpSchemaCost(30, [project(sessions)], 'svc') + expect(cost.cacheWriteTokens).toBe(12_000) // capped by 50k creation, 12k schema fits + expect(cost.cacheReadTokens).toBe(24_000) // 12k + 12k across two ongoing turns + // effective = write + read * 0.10 (cache discount) + expect(cost.effectiveInputTokens).toBeCloseTo(12_000 + 24_000 * 0.10, 5) + }) + + it('caps by available cache bucket so we never overclaim', () => { + const turns = [makeTurn([makeCall({ cacheCreation: 1_000 })])] + const sessions = [makeSession({ + inventory: Array.from({ length: 30 }, (_, i) => `mcp__svc__t${i}`), + turns, + mcpBreakdown: { svc: { calls: 0 } }, + })] + // 30*400 = 12k schema tokens, but the call only had 1k cache-creation, + // so we should not claim more than 1k of overhead for that turn. + const cost = estimateMcpSchemaCost(30, [project(sessions)], 'svc') + expect(cost.cacheWriteTokens).toBe(1_000) + }) + + it('returns zero when no unused tools', () => { + const sessions = [makeSession({ + inventory: ['mcp__svc__t1'], + turns: [makeTurn([makeCall({ cacheCreation: 5000 })])], + })] + const cost = estimateMcpSchemaCost(0, [project(sessions)], 'svc') + expect(cost).toEqual({ cacheWriteTokens: 0, cacheReadTokens: 0, effectiveInputTokens: 0 }) + }) + + it('counts cache write AND cache read on the same call', () => { + // A long session can have a cache rebuild mid-stream where one call + // reports both buckets. The estimator must charge both, not skip the + // read because of the write. + const turns = [makeTurn([ + makeCall({ cacheCreation: 50_000, cacheRead: 30_000 }), + ])] + const sessions = [makeSession({ + inventory: Array.from({ length: 30 }, (_, i) => `mcp__svc__t${i}`), + turns, + mcpBreakdown: { svc: { calls: 0 } }, + })] + const cost = estimateMcpSchemaCost(30, [project(sessions)], 'svc') + expect(cost.cacheWriteTokens).toBe(12_000) // capped at 50k creation + expect(cost.cacheReadTokens).toBe(12_000) // capped at 30k read + }) + + it('counts every cache rebuild, not just the first one', () => { + // Sessions that span more than 5 minutes can rebuild the cache + // multiple times. The estimator should treat every cacheCreation + // bucket as another write. + const turns = [makeTurn([ + makeCall({ cacheCreation: 50_000 }), + makeCall({ cacheCreation: 50_000 }), // rebuild after cache TTL + makeCall({ cacheRead: 60_000 }), + ])] + const sessions = [makeSession({ + inventory: Array.from({ length: 30 }, (_, i) => `mcp__svc__t${i}`), + turns, + mcpBreakdown: { svc: { calls: 0 } }, + })] + const cost = estimateMcpSchemaCost(30, [project(sessions)], 'svc') + expect(cost.cacheWriteTokens).toBe(24_000) // both rebuilds counted + expect(cost.cacheReadTokens).toBe(12_000) + }) + + it('skips sessions where the server was never loaded', () => { + const turns = [makeTurn([makeCall({ cacheCreation: 100_000 })])] + const sessions = [makeSession({ + inventory: ['mcp__other__t1'], + turns, + })] + const cost = estimateMcpSchemaCost(10, [project(sessions)], 'svc') + expect(cost.cacheWriteTokens).toBe(0) + }) + + it('requires observed inventory for the server, not just invocations', () => { + // Session invoked the server (mcpBreakdown set, mcpTools called) but + // never reported a deferred_tools_delta for it. Cost should be 0 to + // stay consistent with aggregateMcpCoverage's loadedSessions rule. + const turns = [makeTurn([ + makeCall({ tools: ['mcp__svc__t1'], cacheCreation: 100_000 }), + ])] + const sessions = [makeSession({ + // No inventory at all + turns, + mcpBreakdown: { svc: { calls: 1 } }, + })] + const cost = estimateMcpSchemaCost(10, [project(sessions)], 'svc') + expect(cost.cacheWriteTokens).toBe(0) + expect(cost.cacheReadTokens).toBe(0) + }) + + it('caps combined unused-schema budget across multiple flagged servers', () => { + // Two flagged servers, each with 30 unused tools (12k schema each = + // 24k combined). One call has a 50k cache-creation bucket. The + // combined cap means total write tokens reported is min(24k, 50k) = + // 24k, not 24k + 24k = 48k. + const inventory = [ + ...Array.from({ length: 30 }, (_, i) => `mcp__a__t${i}`), + ...Array.from({ length: 30 }, (_, i) => `mcp__b__t${i}`), + ] + const turns = [makeTurn([makeCall({ cacheCreation: 50_000 })])] + const sessions = [makeSession({ inventory, turns })] + const cost = estimateMcpSchemaCost( + { a: 30, b: 30 }, + [project(sessions)], + ['a', 'b'], + ) + expect(cost.cacheWriteTokens).toBe(24_000) + }) + + it('still works with the single-server signature (backward compat)', () => { + const turns = [makeTurn([makeCall({ cacheCreation: 50_000 })])] + const sessions = [makeSession({ + inventory: Array.from({ length: 30 }, (_, i) => `mcp__svc__t${i}`), + turns, + })] + const cost = estimateMcpSchemaCost(30, [project(sessions)], 'svc') + expect(cost.cacheWriteTokens).toBe(12_000) + }) +}) + +// --------------------------------------------------------------------------- +// detectMcpToolCoverage — finding emission with thresholds +// --------------------------------------------------------------------------- + +describe('detectMcpToolCoverage', () => { + it('returns null when no inventory exists at all', () => { + expect(detectMcpToolCoverage([project([makeSession({})])])).toBeNull() + }) + + it('does not flag a server with healthy coverage', () => { + const inventory = Array.from({ length: 20 }, (_, i) => `mcp__svc__t${i}`) + const turns = [makeTurn( + Array.from({ length: 8 }, (_, i) => makeCall({ tools: [`mcp__svc__t${i}`] })), + )] + const sessions = [ + makeSession({ sessionId: 'a', inventory, turns }), + makeSession({ sessionId: 'b', inventory, turns }), + ] + // 8/20 = 40% coverage, above the 20% threshold -> no finding + expect(detectMcpToolCoverage([project(sessions)])).toBeNull() + }) + + it('does not flag a server with too few tools (signal too noisy)', () => { + // Below MCP_COVERAGE_MIN_TOOLS=10 + const inventory = ['mcp__svc__a', 'mcp__svc__b'] + const sessions = [ + makeSession({ sessionId: 'a', inventory }), + makeSession({ sessionId: 'b', inventory }), + ] + expect(detectMcpToolCoverage([project(sessions)])).toBeNull() + }) + + it('does not flag if seen in only one session (insufficient evidence)', () => { + const inventory = Array.from({ length: 20 }, (_, i) => `mcp__svc__t${i}`) + const sessions = [makeSession({ inventory })] + expect(detectMcpToolCoverage([project(sessions)])).toBeNull() + }) + + it('flags a large server with low coverage across multiple sessions', () => { + const inventory = Array.from({ length: 30 }, (_, i) => `mcp__hf__t${i}`) + const turns = [makeTurn([ + makeCall({ tools: ['mcp__hf__t0'], cacheCreation: 100_000 }), + ])] + const sessions = [ + makeSession({ sessionId: 'a', inventory, turns, mcpBreakdown: { hf: { calls: 1 } } }), + makeSession({ sessionId: 'b', inventory, turns, mcpBreakdown: { hf: { calls: 1 } } }), + ] + const finding = detectMcpToolCoverage([project(sessions)]) + expect(finding).not.toBeNull() + expect(finding!.title).toContain('1 MCP server') + expect(finding!.title).toContain('low tool coverage') + expect(finding!.explanation).toContain('hf') + expect(finding!.explanation).toContain('1/30') + expect(finding!.fix.type).toBe('command') + expect((finding!.fix as { text: string }).text).toContain('claude mcp remove hf') + expect(finding!.tokensSaved).toBeGreaterThan(0) + }) + + it('escalates impact to high when token waste crosses the threshold', () => { + const inventory = Array.from({ length: 60 }, (_, i) => `mcp__big__t${i}`) + // 60 tools * 400 tokens = 24k schema. With many sessions and large + // cache-creation buckets, total effective tokens easily clear 200k. + const turns = [makeTurn([ + makeCall({ tools: ['mcp__big__t0'], cacheCreation: 50_000 }), + makeCall({ cacheRead: 60_000 }), + makeCall({ cacheRead: 60_000 }), + ])] + // Need enough sessions so the per-session ~28.8k effective tokens + // (24k write + 48k read × 0.10) sum past the 200k high-impact threshold. + const sessions = Array.from({ length: 8 }, (_, i) => + makeSession({ sessionId: `s${i}`, inventory, turns, mcpBreakdown: { big: { calls: 1 } } }), + ) + const finding = detectMcpToolCoverage([project(sessions)]) + expect(finding).not.toBeNull() + expect(finding!.impact).toBe('high') + }) + + it('does not count invocation-only sessions toward loadedSessions', () => { + // Server `svc` has inventory in only one session, but is invoked in + // a second session that never observed the schema. Pre-fix this + // would have satisfied the >=2 session threshold; it must not now. + const inventory = Array.from({ length: 20 }, (_, i) => `mcp__svc__t${i}`) + const turns = [makeTurn([ + makeCall({ tools: ['mcp__svc__t0'], cacheCreation: 50_000 }), + ])] + const sessions = [ + makeSession({ sessionId: 'a', inventory, turns, mcpBreakdown: { svc: { calls: 1 } } }), + // No inventory — this shouldn't be considered a "loaded" session. + makeSession({ sessionId: 'b', turns, mcpBreakdown: { svc: { calls: 1 } } }), + ] + expect(detectMcpToolCoverage([project(sessions)])).toBeNull() + }) + + it('does not let invocations of un-inventoried tools inflate coverage', () => { + // Inventory has 20 tools, none invoked. Calls hit a 21st tool that + // never appeared in any deferred_tools_delta (could be a renamed/ + // removed tool from an older session config). Coverage must stay 0% + // and unusedCount must not go negative. + const inventory = Array.from({ length: 20 }, (_, i) => `mcp__svc__t${i}`) + const turns = [makeTurn([makeCall({ tools: ['mcp__svc__ghost'] })])] + const sessions = [ + makeSession({ sessionId: 'a', inventory, turns, mcpBreakdown: { svc: { calls: 1 } } }), + makeSession({ sessionId: 'b', inventory, turns, mcpBreakdown: { svc: { calls: 1 } } }), + ] + const result = aggregateMcpCoverage([project(sessions)]) + expect(result[0]!.toolsAvailable).toBe(20) + expect(result[0]!.toolsInvoked).toBe(0) + expect(result[0]!.coverageRatio).toBe(0) + expect(result[0]!.unusedTools).toHaveLength(20) + }) + + it('handles multiple flagged servers and pluralises the title', () => { + const sessions: SessionSummary[] = [] + for (const server of ['svc1', 'svc2']) { + const inventory = Array.from({ length: 20 }, (_, i) => `mcp__${server}__t${i}`) + const turns = [makeTurn([ + makeCall({ tools: [`mcp__${server}__t0`], cacheCreation: 50_000 }), + ])] + sessions.push( + makeSession({ sessionId: `${server}-a`, inventory, turns, mcpBreakdown: { [server]: { calls: 1 } } }), + makeSession({ sessionId: `${server}-b`, inventory, turns, mcpBreakdown: { [server]: { calls: 1 } } }), + ) + } + const finding = detectMcpToolCoverage([project(sessions)]) + expect(finding).not.toBeNull() + expect(finding!.title).toContain('2 MCP servers') + expect((finding!.fix as { text: string }).text.split('\n')).toHaveLength(2) + }) +}) diff --git a/tests/parser-mcp-inventory.test.ts b/tests/parser-mcp-inventory.test.ts new file mode 100644 index 0000000..cbbe34c --- /dev/null +++ b/tests/parser-mcp-inventory.test.ts @@ -0,0 +1,126 @@ +import { describe, it, expect } from 'vitest' + +import { extractMcpInventory } from '../src/parser.js' +import type { JournalEntry } from '../src/types.js' + +function entry(overrides: Partial & Record): JournalEntry { + return { type: 'attachment', ...overrides } as JournalEntry +} + +describe('extractMcpInventory', () => { + it('returns empty array when no entries have an attachment', () => { + expect(extractMcpInventory([entry({ type: 'user' })])).toEqual([]) + }) + + it('returns empty array when no deferred_tools_delta is present', () => { + expect(extractMcpInventory([ + entry({ attachment: { type: 'something_else', addedNames: ['mcp__a__b'] } }), + ])).toEqual([]) + }) + + it('extracts mcp__server__tool names from a single delta', () => { + const result = extractMcpInventory([ + entry({ + attachment: { + type: 'deferred_tools_delta', + addedNames: ['Bash', 'Edit', 'mcp__hf__hub_repo_search', 'mcp__hf__paper_search'], + }, + }), + ]) + expect(result).toEqual(['mcp__hf__hub_repo_search', 'mcp__hf__paper_search']) + }) + + it('filters out built-in tools (no mcp__ prefix)', () => { + const result = extractMcpInventory([ + entry({ + attachment: { + type: 'deferred_tools_delta', + addedNames: ['Bash', 'Edit', 'WebFetch', 'mcp__svc__t1'], + }, + }), + ]) + expect(result).toEqual(['mcp__svc__t1']) + }) + + it('rejects malformed names: empty server segment', () => { + const result = extractMcpInventory([ + entry({ + attachment: { + type: 'deferred_tools_delta', + addedNames: ['mcp____tool', 'mcp__svc__t1'], + }, + }), + ]) + expect(result).toEqual(['mcp__svc__t1']) + }) + + it('rejects malformed names: missing tool segment (no second `__`)', () => { + const result = extractMcpInventory([ + entry({ + attachment: { + type: 'deferred_tools_delta', + addedNames: ['mcp__server', 'mcp__svc__t1'], + }, + }), + ]) + expect(result).toEqual(['mcp__svc__t1']) + }) + + it('rejects malformed names: empty tool segment (trailing `__`)', () => { + const result = extractMcpInventory([ + entry({ + attachment: { + type: 'deferred_tools_delta', + addedNames: ['mcp__server__', 'mcp__svc__t1'], + }, + }), + ]) + expect(result).toEqual(['mcp__svc__t1']) + }) + + it('unions across multiple delta entries (incremental adds)', () => { + const result = extractMcpInventory([ + entry({ attachment: { type: 'deferred_tools_delta', addedNames: ['mcp__a__t1'] } }), + entry({ attachment: { type: 'deferred_tools_delta', addedNames: ['mcp__a__t2', 'mcp__b__t1'] } }), + ]) + expect(result).toEqual(['mcp__a__t1', 'mcp__a__t2', 'mcp__b__t1']) + }) + + it('deduplicates names seen in multiple deltas', () => { + const result = extractMcpInventory([ + entry({ attachment: { type: 'deferred_tools_delta', addedNames: ['mcp__a__t1', 'mcp__a__t1'] } }), + entry({ attachment: { type: 'deferred_tools_delta', addedNames: ['mcp__a__t1'] } }), + ]) + expect(result).toEqual(['mcp__a__t1']) + }) + + it('tolerates missing or non-string addedNames', () => { + const result = extractMcpInventory([ + entry({ attachment: { type: 'deferred_tools_delta' } }), + entry({ attachment: { type: 'deferred_tools_delta', addedNames: 'not-an-array' } }), + entry({ attachment: { type: 'deferred_tools_delta', addedNames: [42, null, 'mcp__svc__t1', undefined] } }), + ]) + expect(result).toEqual(['mcp__svc__t1']) + }) + + it('tolerates malformed attachment object', () => { + const result = extractMcpInventory([ + entry({ attachment: null }), + entry({ attachment: 'string-not-object' }), + entry({ attachment: { type: 'deferred_tools_delta', addedNames: ['mcp__svc__t1'] } }), + ]) + expect(result).toEqual(['mcp__svc__t1']) + }) + + it('returns names in sorted order', () => { + const result = extractMcpInventory([ + entry({ + attachment: { + type: 'deferred_tools_delta', + addedNames: ['mcp__zzz__a', 'mcp__aaa__z', 'mcp__mmm__m'], + }, + }), + ]) + expect(result).toEqual(['mcp__aaa__z', 'mcp__mmm__m', 'mcp__zzz__a']) + }) +}) From 9a258a8a99860f83d48a30cff2fe64c03b08a46b Mon Sep 17 00:00:00 2001 From: ozymandiashh <234437643+ozymandiashh@users.noreply.github.com> Date: Tue, 5 May 2026 05:05:13 +0300 Subject: [PATCH 3/6] fix(date-range): avoid all-period month overflow --- gnome/prefs.js | 2 +- src/cli-date.ts | 2 +- src/dashboard.tsx | 4 ++-- tests/cli-date.test.ts | 29 ++++++++++++++++------------- 4 files changed, 20 insertions(+), 17 deletions(-) diff --git a/gnome/prefs.js b/gnome/prefs.js index 8d80679..b0d13f9 100644 --- a/gnome/prefs.js +++ b/gnome/prefs.js @@ -26,7 +26,7 @@ const PERIODS = [ { id: 'week', label: '7 Days' }, { id: '30days', label: '30 Days' }, { id: 'month', label: 'Month' }, - { id: 'all', label: 'All Time' }, + { id: 'all', label: '6 Months' }, ]; export default class CodeBurnPreferences extends ExtensionPreferences { diff --git a/src/cli-date.ts b/src/cli-date.ts index b3d502d..7dfd06f 100644 --- a/src/cli-date.ts +++ b/src/cli-date.ts @@ -113,7 +113,7 @@ export function getDateRange(period: string): { range: DateRange; label: string return { range: { start, end }, label: 'Last 30 Days' } } case 'all': { - const start = new Date(now.getFullYear(), now.getMonth() - ALL_TIME_MONTHS, now.getDate()) + const start = new Date(now.getFullYear(), now.getMonth() - ALL_TIME_MONTHS, 1) return { range: { start, end }, label: 'Last 6 months' } } default: { diff --git a/src/dashboard.tsx b/src/dashboard.tsx index 16aea07..3193b5a 100644 --- a/src/dashboard.tsx +++ b/src/dashboard.tsx @@ -13,7 +13,7 @@ import { dateKey } from './day-aggregator.js' import { CompareView } from './compare.js' import { getPlanUsageOrNull, type PlanUsage } from './plan-usage.js' import { planDisplayName } from './plans.js' -import { getDateRange as getDateRangeShared, PERIODS, PERIOD_LABELS, type Period } from './cli-date.js' +import { getDateRange, PERIODS, PERIOD_LABELS, type Period } from './cli-date.js' import { join } from 'path' import { patchStdoutForWindows } from './ink-win.js' @@ -96,7 +96,7 @@ function gradientColor(pct: number): string { } function getPeriodRange(period: Period): { start: Date; end: Date } { - return getDateRangeShared(period).range + return getDateRange(period).range } type Layout = { dashWidth: number; wide: boolean; halfWidth: number; barWidth: number } diff --git a/tests/cli-date.test.ts b/tests/cli-date.test.ts index f2f7404..e30096d 100644 --- a/tests/cli-date.test.ts +++ b/tests/cli-date.test.ts @@ -1,4 +1,4 @@ -import { describe, it, expect } from 'vitest' +import { afterEach, describe, it, expect, vi } from 'vitest' import { getDateRange, PERIODS, @@ -7,6 +7,10 @@ import { type Period, } from '../src/cli-date.js' +afterEach(() => { + vi.useRealTimers() +}) + describe('getDateRange', () => { it('"all" is bounded to the last 6 months, not epoch', () => { const { range, label } = getDateRange('all') @@ -18,27 +22,26 @@ describe('getDateRange', () => { // dashboard bug) or any pre-2000 date. expect(range.start.getFullYear()).toBeGreaterThan(2000) - // Roughly 6 months back. Accept 5-7 months to absorb end-of-month - // clamping (e.g. on May 31, JS rolls Nov 31 -> Dec 1, shifting the - // computed month forward by one). const monthsDiff = (now.getFullYear() - range.start.getFullYear()) * 12 + (now.getMonth() - range.start.getMonth()) - expect(monthsDiff).toBeGreaterThanOrEqual(5) - expect(monthsDiff).toBeLessThanOrEqual(7) + expect(monthsDiff).toBe(6) + expect(range.start.getDate()).toBe(1) // End is today, end of day. expect(range.end.getHours()).toBe(23) expect(range.end.getMinutes()).toBe(59) }) - it('CLI and dashboard agree on "all" semantics (no Date(0) drift)', () => { - const a = getDateRange('all') - const b = getDateRange('all') - expect(a.range.start.getTime()).toBe(b.range.start.getTime()) - expect(a.label).toBe(b.label) - // Regression guard: must never silently fall back to epoch. - expect(a.range.start.getFullYear()).toBeGreaterThan(2000) + it('"all" does not overflow past the target month at end-of-month', () => { + vi.useFakeTimers() + vi.setSystemTime(new Date(2026, 7, 31, 12, 0, 0)) + + const { range } = getDateRange('all') + + expect(range.start.getFullYear()).toBe(2026) + expect(range.start.getMonth()).toBe(1) + expect(range.start.getDate()).toBe(1) }) it('"week" returns the last 7 days', () => { From e46b20b9272537f1df42842a49a8a4148c7a7368 Mon Sep 17 00:00:00 2001 From: ozymandiashh <234437643+ozymandiashh@users.noreply.github.com> Date: Tue, 5 May 2026 05:11:00 +0300 Subject: [PATCH 4/6] fix(optimize): reuse mcp coverage and type schema estimator --- src/optimize.ts | 48 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 10 deletions(-) diff --git a/src/optimize.ts b/src/optimize.ts index 7882660..04d95c5 100644 --- a/src/optimize.ts +++ b/src/optimize.ts @@ -507,6 +507,12 @@ export type McpServerCoverage = { coverageRatio: number } +type McpSchemaCostEstimate = { + cacheWriteTokens: number + cacheReadTokens: number + effectiveInputTokens: number +} + /** * Aggregate MCP inventory and invocations across the projects in scope. * @@ -651,16 +657,36 @@ export function aggregateMcpCoverage(projects: ProjectSummary[]): McpServerCover * does not count, matching the invariant `aggregateMcpCoverage` uses for * `loadedSessions`. */ +export function estimateMcpSchemaCost( + unusedToolCount: number, + projects: ProjectSummary[], + server: string, +): McpSchemaCostEstimate +export function estimateMcpSchemaCost( + unusedToolCountsByServer: Record, + projects: ProjectSummary[], + servers: string[], +): McpSchemaCostEstimate export function estimateMcpSchemaCost( unusedToolCounts: Record | number, projects: ProjectSummary[], serverOrServers: string | string[], -): { cacheWriteTokens: number; cacheReadTokens: number; effectiveInputTokens: number } { - // Backward-compatible single-server signature used by tests. - const servers = Array.isArray(serverOrServers) ? serverOrServers : [serverOrServers] - const counts: Record = typeof unusedToolCounts === 'number' - ? { [serverOrServers as string]: unusedToolCounts } - : unusedToolCounts +): McpSchemaCostEstimate { + let servers: string[] + let counts: Record + if (typeof unusedToolCounts === 'number') { + if (typeof serverOrServers !== 'string') { + throw new TypeError('single-server MCP cost estimates require a string server name') + } + servers = [serverOrServers] + counts = { [serverOrServers]: unusedToolCounts } + } else { + if (!Array.isArray(serverOrServers)) { + throw new TypeError('multi-server MCP cost estimates require a string[] server list') + } + servers = serverOrServers + counts = unusedToolCounts + } const totalUnusedSchemaTokens = servers.reduce( (s, srv) => s + (counts[srv] ?? 0) * TOKENS_PER_MCP_TOOL, @@ -723,8 +749,8 @@ export function estimateMcpSchemaCost( */ export function detectMcpToolCoverage( projects: ProjectSummary[], + coverage = aggregateMcpCoverage(projects), ): WasteFinding | null { - const coverage = aggregateMcpCoverage(projects) if (coverage.length === 0) return null const flagged = coverage.filter(c => @@ -785,6 +811,7 @@ export function detectUnusedMcp( calls: ToolCall[], projects: ProjectSummary[], projectCwds: Set, + mcpCoverage = aggregateMcpCoverage(projects), ): WasteFinding | null { const configured = loadMcpConfigs(projectCwds) if (configured.size === 0) return null @@ -808,7 +835,7 @@ export function detectUnusedMcp( // thresholds — a small, inventoried-but-uninvoked server that the // coverage detector skips would otherwise become a blind spot. const coverageReportedServers = new Set( - aggregateMcpCoverage(projects) + mcpCoverage .filter(c => c.toolsAvailable > MCP_COVERAGE_MIN_TOOLS && c.loadedSessions >= MCP_COVERAGE_MIN_SESSIONS @@ -1286,6 +1313,7 @@ export async function scanAndDetect( const costRate = computeInputCostRate(projects) const { toolCalls, projectCwds, apiCalls, userMessages } = await scanSessions(dateRange) + const mcpCoverage = aggregateMcpCoverage(projects) const findings: WasteFinding[] = [] const syncDetectors: Array<() => WasteFinding | null> = [ @@ -1293,8 +1321,8 @@ export async function scanAndDetect( () => detectLowReadEditRatio(toolCalls), () => detectJunkReads(toolCalls, dateRange), () => detectDuplicateReads(toolCalls, dateRange), - () => detectUnusedMcp(toolCalls, projects, projectCwds), - () => detectMcpToolCoverage(projects), + () => detectUnusedMcp(toolCalls, projects, projectCwds, mcpCoverage), + () => detectMcpToolCoverage(projects, mcpCoverage), () => detectBloatedClaudeMd(projectCwds), () => detectBashBloat(), ] From bfa5fe7fa099d96ca8fd3f128e08047f10c628e9 Mon Sep 17 00:00:00 2001 From: iamtoruk Date: Mon, 4 May 2026 19:46:20 -0700 Subject: [PATCH 5/6] fix(labels): update remaining 'all' period labels to '6 Months' PR #221 unified the period logic but missed the TUI hotkey bar, GNOME indicator popup, and macOS menubar app. All surfaces now consistently show '6 Months' instead of 'All' or 'all time'. --- gnome/indicator.js | 2 +- mac/Sources/CodeBurnMenubar/AppStore.swift | 2 +- mac/Sources/CodeBurnMenubar/Views/MenuBarContent.swift | 2 +- src/dashboard.tsx | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/gnome/indicator.js b/gnome/indicator.js index 64199c1..c2f8266 100644 --- a/gnome/indicator.js +++ b/gnome/indicator.js @@ -18,7 +18,7 @@ const PERIODS = [ { id: 'week', label: '7 Days' }, { id: '30days', label: '30 Days' }, { id: 'month', label: 'Month' }, - { id: 'all', label: 'All' }, + { id: 'all', label: '6 Months' }, ]; const INSIGHTS = [ diff --git a/mac/Sources/CodeBurnMenubar/AppStore.swift b/mac/Sources/CodeBurnMenubar/AppStore.swift index 4374f4c..8dd40a1 100644 --- a/mac/Sources/CodeBurnMenubar/AppStore.swift +++ b/mac/Sources/CodeBurnMenubar/AppStore.swift @@ -321,7 +321,7 @@ enum Period: String, CaseIterable, Identifiable { case sevenDays = "7 Days" case thirtyDays = "30 Days" case month = "Month" - case all = "All" + case all = "6 Months" var id: String { rawValue } diff --git a/mac/Sources/CodeBurnMenubar/Views/MenuBarContent.swift b/mac/Sources/CodeBurnMenubar/Views/MenuBarContent.swift index 37befc3..892d0a1 100644 --- a/mac/Sources/CodeBurnMenubar/Views/MenuBarContent.swift +++ b/mac/Sources/CodeBurnMenubar/Views/MenuBarContent.swift @@ -109,7 +109,7 @@ private struct EmptyProviderState: View { case .sevenDays: "the last 7 days" case .thirtyDays: "the last 30 days" case .month: "this month" - case .all: "all time" + case .all: "the last 6 months" } } } diff --git a/src/dashboard.tsx b/src/dashboard.tsx index 3193b5a..b759e64 100644 --- a/src/dashboard.tsx +++ b/src/dashboard.tsx @@ -591,7 +591,7 @@ function StatusBar({ width, showProvider, view, findingCount, optimizeAvailable, 2 week 3 30 days 4 month - 5 all time + 5 6 months {!isOptimize && optimizeAvailable && findingCount != null && findingCount > 0 && ( <> o optimize ({findingCount}) )} From 735f41bc6c89916e417e719e3fe58ec5386fbe95 Mon Sep 17 00:00:00 2001 From: iamtoruk Date: Mon, 4 May 2026 20:11:50 -0700 Subject: [PATCH 6/6] Fix cache-write pricing and shell-quote server names in fix commands - Use 1.25x multiplier for cache-write tokens to match Anthropic's actual pricing (was incorrectly using 1x) - Shell-quote server names in `claude mcp remove` fix text to prevent issues with unusual server names --- src/optimize.ts | 13 +++++++------ tests/mcp-coverage.test.ts | 6 +++--- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/optimize.ts b/src/optimize.ts index 04d95c5..1f2a4cf 100644 --- a/src/optimize.ts +++ b/src/optimize.ts @@ -60,10 +60,11 @@ const MCP_COVERAGE_MIN_TOOLS = 10 const MCP_COVERAGE_MIN_SESSIONS = 2 const MCP_COVERAGE_LOW_THRESHOLD = 0.20 const MCP_COVERAGE_HIGH_IMPACT_TOKENS = 200_000 -// Anthropic prices cached input reads at roughly 10% of fresh input. We use -// this to keep "ongoing" overhead estimates honest: most MCP schema bytes -// live in the cached prefix and only get charged at the discount rate after -// the first turn of a session. +// Anthropic prices cache writes at 125% of base input and cache reads at +// roughly 10% of base input. We use these to keep overhead estimates honest: +// most MCP schema bytes live in the cached prefix and only get charged at +// the discount rate after the first turn of a session. +const CACHE_WRITE_MULTIPLIER = 1.25 const CACHE_READ_DISCOUNT = 0.10 const GHOST_AGENTS_HIGH_THRESHOLD = 5 const GHOST_AGENTS_MEDIUM_THRESHOLD = 2 @@ -729,7 +730,7 @@ export function estimateMcpSchemaCost( } } - const effectiveInputTokens = cacheWriteTokens + cacheReadTokens * CACHE_READ_DISCOUNT + const effectiveInputTokens = cacheWriteTokens * CACHE_WRITE_MULTIPLIER + cacheReadTokens * CACHE_READ_DISCOUNT return { cacheWriteTokens, cacheReadTokens, effectiveInputTokens } } @@ -774,7 +775,7 @@ export function detectMcpToolCoverage( lines.push( `${c.server}: ${c.toolsInvoked}/${c.toolsAvailable} tools used (${pct}% coverage) across ${c.loadedSessions} session${c.loadedSessions === 1 ? '' : 's'}`, ) - removeCommands.push(`claude mcp remove ${c.server}`) + removeCommands.push(`claude mcp remove '${c.server}'`) } // Single combined cost pass: caps each call's contribution at the diff --git a/tests/mcp-coverage.test.ts b/tests/mcp-coverage.test.ts index 1d078d2..c2a4595 100644 --- a/tests/mcp-coverage.test.ts +++ b/tests/mcp-coverage.test.ts @@ -200,8 +200,8 @@ describe('estimateMcpSchemaCost', () => { const cost = estimateMcpSchemaCost(30, [project(sessions)], 'svc') expect(cost.cacheWriteTokens).toBe(12_000) // capped by 50k creation, 12k schema fits expect(cost.cacheReadTokens).toBe(24_000) // 12k + 12k across two ongoing turns - // effective = write + read * 0.10 (cache discount) - expect(cost.effectiveInputTokens).toBeCloseTo(12_000 + 24_000 * 0.10, 5) + // effective = write * 1.25 + read * 0.10 (cache pricing) + expect(cost.effectiveInputTokens).toBeCloseTo(12_000 * 1.25 + 24_000 * 0.10, 5) }) it('caps by available cache bucket so we never overclaim', () => { @@ -373,7 +373,7 @@ describe('detectMcpToolCoverage', () => { expect(finding!.explanation).toContain('hf') expect(finding!.explanation).toContain('1/30') expect(finding!.fix.type).toBe('command') - expect((finding!.fix as { text: string }).text).toContain('claude mcp remove hf') + expect((finding!.fix as { text: string }).text).toContain("claude mcp remove 'hf'") expect(finding!.tokensSaved).toBeGreaterThan(0) })