diff --git a/CHANGELOG.md b/CHANGELOG.md index 1ebdad1..28e2c11 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,14 @@ than the call's own cache buckets could contain. Threshold: >10 tools available, <20% coverage, observed in ≥2 sessions. Closes #2. - **Session cost outlier detector.** New `optimize` finding flags sessions costing more than 2x their peer-session average within the same project. Ignores sub-$1 outliers to avoid noise. Requires at least 3 sessions per project for a baseline. +- **Context bloat detector.** New `optimize` finding flags sessions where + effective input/cache tokens are large and disproportionate to output. + Cache reads are discounted in the estimate to avoid overstating cheap cached + context. The report highlights top sessions by imbalance, notes sharp + growth from the previous project session (within a 7-day baseline window), + and suggests starting fresh with only the current goal, relevant files, + failing output, and constraints. Sessions flagged here are excluded from + the cost-outlier finding so the same session is not listed twice. ### Fixed (CLI) - **Windows Claude project paths.** Claude Code project rollups now prefer diff --git a/README.md b/README.md index 1602d87..7c96264 100644 --- a/README.md +++ b/README.md @@ -188,6 +188,7 @@ Scans your sessions and your `~/.claude/` setup for waste patterns: - Ghost agents, skills, and slash commands defined in `~/.claude/` but never invoked - Bloated `CLAUDE.md` files (with `@-import` expansion counted) - Cache creation overhead and junk directory reads +- Context-heavy sessions where effective input/cache tokens swamp output Each finding shows the estimated token and dollar savings plus a ready-to-paste fix: a `CLAUDE.md` line, an environment variable, or a `mv` command to archive unused items. Findings are ranked by urgency (impact weighted against observed waste) and rolled up into an A to F setup health grade. Repeat runs classify each finding as new, improving, or resolved against a 48-hour recent window. diff --git a/src/optimize.ts b/src/optimize.ts index dcaf91d..82874d6 100644 --- a/src/optimize.ts +++ b/src/optimize.ts @@ -78,6 +78,17 @@ const MIN_SESSIONS_FOR_OUTLIER = 3 const SESSION_OUTLIER_MULTIPLIER = 2 const MIN_SESSION_OUTLIER_COST_USD = 1 const SESSION_OUTLIER_PREVIEW = 5 +const CONTEXT_BLOAT_MIN_INPUT_TOKENS = 75_000 +const CONTEXT_BLOAT_MIN_RATIO = 25 +const CONTEXT_BLOAT_TARGET_RATIO = 15 +const CONTEXT_BLOAT_PREVIEW = 5 +const CONTEXT_BLOAT_LOW_INPUT_TOKENS = 200_000 +const CONTEXT_BLOAT_HIGH_INPUT_TOKENS = 500_000 +const CONTEXT_BLOAT_LOW_MAX_CANDIDATES = 2 +const CONTEXT_BLOAT_HIGH_MIN_CANDIDATES = 10 +const CONTEXT_BLOAT_GROWTH_RATIO = 2 +const CONTEXT_BLOAT_GROWTH_MAX_GAP_MS = 7 * 24 * 60 * 60 * 1000 +const CONTEXT_BLOAT_RATIO_DISPLAY_CAP = 1000 // ============================================================================ // Scoring constants @@ -1213,7 +1224,129 @@ function sessionTokenTotal(session: ProjectSummary['sessions'][number]): number + session.totalCacheWriteTokens } -export function detectSessionOutliers(projects: ProjectSummary[]): WasteFinding | null { +function sessionEffectiveContextTokens(session: ProjectSummary['sessions'][number]): number { + return session.totalInputTokens + + session.totalCacheReadTokens * CACHE_READ_DISCOUNT + + session.totalCacheWriteTokens * CACHE_WRITE_MULTIPLIER +} + +function formatContextRatio(ratio: number): string { + if (ratio >= CONTEXT_BLOAT_RATIO_DISPLAY_CAP) return `${CONTEXT_BLOAT_RATIO_DISPLAY_CAP}+` + return ratio.toFixed(1) +} + +export type ContextBloatCandidate = { + project: string + sessionId: string + date: string + effectiveInputTokens: number + outputTokens: number + ratio: number + excessInputTokens: number + growthRatio: number | null +} + +export function findContextBloatCandidates(projects: ProjectSummary[]): ContextBloatCandidate[] { + const candidates: ContextBloatCandidate[] = [] + + for (const project of projects) { + const sessions = [...project.sessions].sort((a, b) => + new Date(a.firstTimestamp).getTime() - new Date(b.firstTimestamp).getTime() + ) + let previousInputTokens: number | null = null + let previousTimestampMs: number | null = null + + for (const session of sessions) { + const inputTokens = sessionEffectiveContextTokens(session) + const outputTokens = session.totalOutputTokens + const ratio = inputTokens / Math.max(outputTokens, 1) + const currentMs = new Date(session.firstTimestamp).getTime() + const gapMs = previousTimestampMs !== null ? currentMs - previousTimestampMs : null + // Suppress growth ratio when the previous session is too far back to be + // a meaningful baseline (e.g. a small test run weeks before a real + // working session would otherwise produce alarming "1000x" figures). + const growthRatio = previousInputTokens !== null + && previousInputTokens > 0 + && gapMs !== null + && gapMs <= CONTEXT_BLOAT_GROWTH_MAX_GAP_MS + ? inputTokens / previousInputTokens + : null + + // Anchor growth to the immediately previous project session, even if + // that session is below threshold and never becomes a finding. + previousInputTokens = inputTokens + previousTimestampMs = currentMs + + if (inputTokens < CONTEXT_BLOAT_MIN_INPUT_TOKENS) continue + if (ratio < CONTEXT_BLOAT_MIN_RATIO) continue + + candidates.push({ + project: project.project, + sessionId: session.sessionId, + date: session.firstTimestamp.slice(0, 10), + effectiveInputTokens: inputTokens, + outputTokens, + ratio, + excessInputTokens: Math.max(0, inputTokens - outputTokens * CONTEXT_BLOAT_TARGET_RATIO), + growthRatio, + }) + } + } + + candidates.sort((a, b) => + b.excessInputTokens - a.excessInputTokens + || a.date.localeCompare(b.date) + || a.project.localeCompare(b.project) + || a.sessionId.localeCompare(b.sessionId) + ) + return candidates +} + +export function detectContextBloat(projects: ProjectSummary[]): WasteFinding | null { + const candidates = findContextBloatCandidates(projects) + if (candidates.length === 0) return null + + const preview = candidates.slice(0, CONTEXT_BLOAT_PREVIEW) + const list = preview + .map(c => { + const growth = c.growthRatio !== null && c.growthRatio >= CONTEXT_BLOAT_GROWTH_RATIO + ? `, ${c.growthRatio.toFixed(1)}x previous session input` + : '' + return `${c.project}/${c.sessionId} on ${c.date}: ${formatTokens(c.effectiveInputTokens)} effective input/cache vs ${formatTokens(c.outputTokens)} output (${formatContextRatio(c.ratio)}:1${growth})` + }) + .join('; ') + const extra = candidates.length > preview.length ? `; +${candidates.length - preview.length} more` : '' + // Savings estimate only counts context above a healthier 15:1 input-output ratio. + // Detection stays stricter at 25:1 so borderline sessions are not shown. + const tokensSaved = Math.round(candidates.reduce((sum, c) => sum + c.excessInputTokens, 0)) + const totalInputTokens = candidates.reduce((sum, c) => sum + c.effectiveInputTokens, 0) + + // Tier on candidate count first, total context size second. A single 600K + // session is "high"; 1-2 modest-sized sessions are "low"; everything in + // between is "medium". + let impact: Impact + if (candidates.length >= CONTEXT_BLOAT_HIGH_MIN_CANDIDATES || totalInputTokens >= CONTEXT_BLOAT_HIGH_INPUT_TOKENS) { + impact = 'high' + } else if (candidates.length <= CONTEXT_BLOAT_LOW_MAX_CANDIDATES && totalInputTokens < CONTEXT_BLOAT_LOW_INPUT_TOKENS) { + impact = 'low' + } else { + impact = 'medium' + } + + return { + title: `${candidates.length} context-heavy session${candidates.length === 1 ? '' : 's'}`, + explanation: `Effective input/cache tokens swamp output in these sessions: ${list}${extra}. This can come from stale context carryover, inherently context-heavy work, or abandoned runs that loaded too much context; starting fresh with only the current goal and relevant files can cut repeated prompt overhead.`, + impact, + tokensSaved, + fix: { + type: 'paste', + label: 'Start the next expensive thread with a fresh-context constraint:', + text: 'Start fresh before continuing. Use only the current goal, the relevant files, the failing command/output, and the constraints below. Restate the working context in under 10 bullets before editing.', + }, + } +} + +export function detectSessionOutliers(projects: ProjectSummary[], excludedSessionIds?: ReadonlySet): WasteFinding | null { type Outlier = { project: string sessionId: string @@ -1240,6 +1373,11 @@ export function detectSessionOutliers(projects: ProjectSummary[]): WasteFinding const ratio = session.totalCostUSD / avgCost if (ratio <= SESSION_OUTLIER_MULTIPLIER) continue if (session.totalCostUSD < MIN_SESSION_OUTLIER_COST_USD) continue + // Avoid reporting the same session under both this finding and the + // context-bloat finding. Context-bloat takes priority because its + // suggested fix ("start fresh") is more concrete than the generic + // "tighter constraint" advice here. + if (excludedSessionIds?.has(session.sessionId)) continue outliers.push({ project: project.project, @@ -1392,6 +1530,7 @@ export async function scanAndDetect( const mcpCoverage = aggregateMcpCoverage(projects) const findings: WasteFinding[] = [] + const contextBloatSessionIds = new Set(findContextBloatCandidates(projects).map(c => c.sessionId)) const syncDetectors: Array<() => WasteFinding | null> = [ () => detectCacheBloat(apiCalls, projects, dateRange), () => detectLowReadEditRatio(toolCalls), @@ -1399,7 +1538,8 @@ export async function scanAndDetect( () => detectDuplicateReads(toolCalls, dateRange), () => detectUnusedMcp(toolCalls, projects, projectCwds, mcpCoverage), () => detectMcpToolCoverage(projects, mcpCoverage), - () => detectSessionOutliers(projects), + () => detectContextBloat(projects), + () => detectSessionOutliers(projects, contextBloatSessionIds), () => detectBloatedClaudeMd(projectCwds), () => detectBashBloat(), ] diff --git a/tests/optimize.test.ts b/tests/optimize.test.ts index 970b277..b84eab2 100644 --- a/tests/optimize.test.ts +++ b/tests/optimize.test.ts @@ -6,6 +6,7 @@ import { detectLowReadEditRatio, detectCacheBloat, detectBloatedClaudeMd, + detectContextBloat, detectSessionOutliers, computeHealth, computeTrend, @@ -56,6 +57,45 @@ function projectWithSessions(costs: number[], project = 'app'): ProjectSummary { } } +type TestSession = ProjectSummary['sessions'][number] + +function contextSession( + i: number, + overrides: Partial, + project = 'app', +): TestSession { + return { + sessionId: `s${i + 1}`, + project, + firstTimestamp: `2026-05-${String(i + 1).padStart(2, '0')}T10:00:00Z`, + lastTimestamp: `2026-05-${String(i + 1).padStart(2, '0')}T10:30:00Z`, + totalCostUSD: 1, + totalInputTokens: 0, + totalOutputTokens: 0, + totalCacheReadTokens: 0, + totalCacheWriteTokens: 0, + apiCalls: 1, + turns: [], + modelBreakdown: {}, + toolBreakdown: {}, + mcpBreakdown: {}, + bashBreakdown: {}, + categoryBreakdown: {} as TestSession['categoryBreakdown'], + skillBreakdown: {}, + ...overrides, + } +} + +function projectWithContextSessions(sessions: TestSession[], project = 'app'): ProjectSummary { + return { + project, + projectPath: `/tmp/${project}`, + sessions, + totalCostUSD: sessions.reduce((sum, session) => sum + session.totalCostUSD, 0), + totalApiCalls: sessions.reduce((sum, session) => sum + session.apiCalls, 0), + } +} + describe('detectJunkReads', () => { it('returns null below minimum threshold', () => { const calls = [ @@ -241,6 +281,231 @@ describe('detectBloatedClaudeMd', () => { }) }) +describe('detectContextBloat', () => { + it('returns null below the input/context token floor', () => { + const project = projectWithContextSessions([ + contextSession(0, { + totalInputTokens: 74_999, + totalOutputTokens: 100, + }), + ]) + + expect(detectContextBloat([project])).toBeNull() + }) + + it('returns null when output is proportionate to input/context tokens', () => { + const project = projectWithContextSessions([ + contextSession(0, { + totalInputTokens: 100_000, + totalOutputTokens: 5_000, + }), + ]) + + expect(detectContextBloat([project])).toBeNull() + }) + + it('discounts cache reads when estimating context pressure', () => { + const project = projectWithContextSessions([ + contextSession(0, { + totalInputTokens: 5_000, + totalCacheReadTokens: 700_000, + totalOutputTokens: 5_000, + }), + ]) + + expect(detectContextBloat([project])).toBeNull() + }) + + it('weights cache writes when estimating context pressure', () => { + const project = projectWithContextSessions([ + contextSession(0, { + totalInputTokens: 10_000, + totalCacheWriteTokens: 80_000, + totalOutputTokens: 3_000, + }), + ]) + + const finding = detectContextBloat([project]) + expect(finding).not.toBeNull() + expect(finding!.explanation).toContain('110.0K effective input/cache') + expect(finding!.tokensSaved).toBe(65_000) + }) + + it('flags sessions where input/cache tokens swamp output', () => { + const project = projectWithContextSessions([ + contextSession(0, { + totalInputTokens: 90_000, + totalCacheReadTokens: 30_000, + totalOutputTokens: 2_000, + }), + ]) + + const finding = detectContextBloat([project]) + expect(finding).not.toBeNull() + expect(finding!.title).toContain('context-heavy session') + expect(finding!.explanation).toContain('app/s1') + expect(finding!.explanation).toContain('93.0K effective input/cache') + expect(finding!.explanation).toContain('46.5:1') + expect(finding!.impact).toBe('low') + expect(finding!.tokensSaved).toBe(63_000) + }) + + it('uses medium impact between the low and high tiers', () => { + const project = projectWithContextSessions( + Array.from({ length: 4 }, (_, i) => contextSession(i, { + totalInputTokens: 80_000, + totalOutputTokens: 1_000, + })), + ) + + const finding = detectContextBloat([project]) + expect(finding).not.toBeNull() + expect(finding!.impact).toBe('medium') + }) + + it('uses high impact at 10 or more candidates regardless of total size', () => { + const project = projectWithContextSessions( + Array.from({ length: 10 }, (_, i) => contextSession(i, { + totalInputTokens: 80_000, + totalOutputTokens: 1_000, + })), + ) + + const finding = detectContextBloat([project]) + expect(finding).not.toBeNull() + expect(finding!.impact).toBe('high') + }) + + it('includes context growth from the previous session when it is large', () => { + const project = projectWithContextSessions([ + contextSession(0, { + totalInputTokens: 20_000, + totalOutputTokens: 1_000, + }), + contextSession(1, { + totalInputTokens: 100_000, + totalOutputTokens: 2_000, + }), + ]) + + const finding = detectContextBloat([project]) + expect(finding).not.toBeNull() + expect(finding!.explanation).toContain('5.0x previous session input') + }) + + it('calculates context growth within each project only', () => { + const finding = detectContextBloat([ + projectWithContextSessions([ + contextSession(0, { + totalInputTokens: 20_000, + totalOutputTokens: 1_000, + }), + contextSession(1, { + totalInputTokens: 100_000, + totalOutputTokens: 2_000, + }), + ], 'app'), + projectWithContextSessions([ + contextSession(0, { + totalInputTokens: 100_000, + totalOutputTokens: 2_000, + }, 'api'), + ], 'api'), + ]) + + expect(finding).not.toBeNull() + expect(finding!.explanation.match(/previous session input/g)).toHaveLength(1) + }) + + it('summarizes additional candidates after the preview limit', () => { + const project = projectWithContextSessions( + Array.from({ length: 6 }, (_, i) => contextSession(i, { + totalInputTokens: 80_000 + i * 10_000, + totalOutputTokens: 1_000, + })), + ) + + const finding = detectContextBloat([project]) + expect(finding).not.toBeNull() + expect(finding!.explanation).toContain('app/s6') + expect(finding!.explanation).toContain('; +1 more') + expect(finding!.impact).toBe('high') + }) + + it('uses high impact for one very large context-heavy session', () => { + const project = projectWithContextSessions([ + contextSession(0, { + totalInputTokens: 600_000, + totalOutputTokens: 10_000, + }), + ]) + + const finding = detectContextBloat([project]) + expect(finding).not.toBeNull() + expect(finding!.impact).toBe('high') + }) + + it('handles zero-output sessions without dividing by zero', () => { + const project = projectWithContextSessions([ + contextSession(0, { + totalInputTokens: 80_000, + totalOutputTokens: 0, + }), + ]) + + const finding = detectContextBloat([project]) + expect(finding).not.toBeNull() + expect(finding!.explanation).toContain('1000+:1') + expect(finding!.tokensSaved).toBe(80_000) + }) + + it('caps display ratio at 1000+:1 for non-zero-output sessions too', () => { + const project = projectWithContextSessions([ + contextSession(0, { + totalInputTokens: 5_000_000, + totalOutputTokens: 100, + }), + ]) + + const finding = detectContextBloat([project]) + expect(finding).not.toBeNull() + expect(finding!.explanation).toContain('1000+:1') + }) + + it('suppresses the growth ratio when the previous session is more than 7 days back', () => { + const project = projectWithContextSessions([ + { + ...contextSession(0, { totalInputTokens: 20_000, totalOutputTokens: 1_000 }), + firstTimestamp: '2026-05-01T10:00:00Z', + lastTimestamp: '2026-05-01T10:30:00Z', + }, + { + ...contextSession(1, { totalInputTokens: 100_000, totalOutputTokens: 2_000 }), + firstTimestamp: '2026-05-15T10:00:00Z', + lastTimestamp: '2026-05-15T10:30:00Z', + }, + ]) + + const finding = detectContextBloat([project]) + expect(finding).not.toBeNull() + expect(finding!.explanation).not.toContain('previous session input') + }) + + it('anchors growth even when the previous session is below the reporting threshold', () => { + const project = projectWithContextSessions([ + contextSession(0, { totalInputTokens: 20_000, totalOutputTokens: 1_000 }), + contextSession(1, { totalInputTokens: 100_000, totalOutputTokens: 2_000 }), + ]) + + const finding = detectContextBloat([project]) + expect(finding).not.toBeNull() + // The first session sits below CONTEXT_BLOAT_MIN_INPUT_TOKENS (75K) and + // is not itself a candidate, but the growth-from-previous comparison for + // the second session must still anchor against it. + expect(finding!.explanation).toContain('5.0x previous session input') + }) +}) + describe('detectSessionOutliers', () => { it('returns null when there are too few sessions for a project baseline', () => { expect(detectSessionOutliers([projectWithSessions([0.5, 4])])).toBeNull() @@ -277,6 +542,20 @@ describe('detectSessionOutliers', () => { expect(finding!.explanation).toContain('api/s4') expect(finding!.explanation).not.toContain('web/') }) + + it('excludes sessions already flagged by detectContextBloat', () => { + const project = projectWithSessions([1, 1, 1, 10]) + const excluded = new Set(['s4']) + expect(detectSessionOutliers([project], excluded)).toBeNull() + }) + + it('still flags cost outliers that are not context-bloat candidates', () => { + const project = projectWithSessions([1, 1, 1, 10]) + const excluded = new Set(['some-other-session']) + const finding = detectSessionOutliers([project], excluded) + expect(finding).not.toBeNull() + expect(finding!.explanation).toContain('app/s4') + }) }) describe('computeHealth', () => {