codeburn/src/optimize.ts
AgentSeal 77257bcb89
Merge pull request #68 from lfl1337/fix/remove-claudeignore-references
docs(optimize): remove references to .claudeignore (#61)
2026-04-17 14:20:50 +02:00

1134 lines
42 KiB
TypeScript

import chalk from 'chalk'
import { readdir, stat } from 'fs/promises'
import { existsSync, statSync } from 'fs'
import { basename, join } from 'path'
import { homedir } from 'os'
import { readSessionFile, readSessionFileSync } from './fs-utils.js'
import { discoverAllSessions } from './providers/index.js'
import type { DateRange, ProjectSummary } from './types.js'
import { formatCost } from './currency.js'
import { formatTokens } from './format.js'
// ============================================================================
// Display constants
// ============================================================================
const ORANGE = '#FF8C42'
const DIM = '#666666'
const GOLD = '#FFD700'
const CYAN = '#5BF5E0'
const GREEN = '#5BF5A0'
const RED = '#F55B5B'
// ============================================================================
// Token estimation constants
// ============================================================================
const AVG_TOKENS_PER_READ = 600
const TOKENS_PER_MCP_TOOL = 400
const TOOLS_PER_MCP_SERVER = 5
const TOKENS_PER_AGENT_DEF = 80
const TOKENS_PER_SKILL_DEF = 80
const TOKENS_PER_COMMAND_DEF = 60
const CLAUDEMD_TOKENS_PER_LINE = 13
const BASH_TOKENS_PER_CHAR = 0.25
// ============================================================================
// Detector thresholds
// ============================================================================
const CLAUDEMD_HEALTHY_LINES = 200
const CLAUDEMD_HIGH_THRESHOLD_LINES = 400
const MIN_JUNK_READS_TO_FLAG = 3
const JUNK_READS_HIGH_THRESHOLD = 20
const JUNK_READS_MEDIUM_THRESHOLD = 5
const MIN_DUPLICATE_READS_TO_FLAG = 5
const DUPLICATE_READS_HIGH_THRESHOLD = 30
const DUPLICATE_READS_MEDIUM_THRESHOLD = 10
const MIN_EDITS_FOR_RATIO = 10
const HEALTHY_READ_EDIT_RATIO = 4
const LOW_RATIO_HIGH_THRESHOLD = 2
const LOW_RATIO_MEDIUM_THRESHOLD = 3
const MIN_API_CALLS_FOR_CACHE = 10
const CACHE_EXCESS_HIGH_THRESHOLD = 15000
const UNUSED_MCP_HIGH_THRESHOLD = 3
const GHOST_AGENTS_HIGH_THRESHOLD = 5
const GHOST_AGENTS_MEDIUM_THRESHOLD = 2
const GHOST_SKILLS_HIGH_THRESHOLD = 10
const GHOST_SKILLS_MEDIUM_THRESHOLD = 5
const GHOST_COMMANDS_MEDIUM_THRESHOLD = 10
const MCP_NEW_CONFIG_GRACE_MS = 24 * 60 * 60 * 1000
const BASH_DEFAULT_LIMIT = 30000
const BASH_RECOMMENDED_LIMIT = 15000
// ============================================================================
// Scoring constants
// ============================================================================
const HEALTH_WEIGHT_HIGH = 15
const HEALTH_WEIGHT_MEDIUM = 7
const HEALTH_WEIGHT_LOW = 3
const HEALTH_MAX_PENALTY = 80
const GRADE_A_MIN = 90
const GRADE_B_MIN = 75
const GRADE_C_MIN = 55
const GRADE_D_MIN = 30
const URGENCY_IMPACT_WEIGHT = 0.7
const URGENCY_TOKEN_WEIGHT = 0.3
const URGENCY_TOKEN_NORMALIZE = 500_000
// ============================================================================
// File system constants
// ============================================================================
const MAX_IMPORT_DEPTH = 5
const IMPORT_PATTERN = /^@(\.\.?\/[^\s]+|\/[^\s]+)/gm
const COMMAND_PATTERN = /<command-name>([^<]+)<\/command-name>|(?:^|\s)\/([a-zA-Z][\w-]*)/gm
const JUNK_DIRS = [
'node_modules', '.git', 'dist', 'build', '__pycache__', '.next',
'.nuxt', '.output', 'coverage', '.cache', '.tsbuildinfo',
'.venv', 'venv', '.svn', '.hg',
]
const JUNK_PATTERN = new RegExp(`/(?:${JUNK_DIRS.join('|')})/`)
const SHELL_PROFILES = ['.zshrc', '.bashrc', '.bash_profile', '.profile']
const TOP_ITEMS_PREVIEW = 3
const GHOST_NAMES_PREVIEW = 5
const GHOST_CLEANUP_COMMANDS_LIMIT = 10
// ============================================================================
// Types
// ============================================================================
export type Impact = 'high' | 'medium' | 'low'
export type HealthGrade = 'A' | 'B' | 'C' | 'D' | 'F'
export type WasteAction =
| { type: 'paste'; label: string; text: string }
| { type: 'command'; label: string; text: string }
| { type: 'file-content'; label: string; path: string; content: string }
export type Trend = 'active' | 'improving'
export type WasteFinding = {
title: string
explanation: string
impact: Impact
tokensSaved: number
fix: WasteAction
trend?: Trend
}
export type OptimizeResult = {
findings: WasteFinding[]
costRate: number
healthScore: number
healthGrade: HealthGrade
}
export type ToolCall = {
name: string
input: Record<string, unknown>
sessionId: string
project: string
recent?: boolean
}
export type ApiCallMeta = {
cacheCreationTokens: number
version: string
recent?: boolean
}
type ScanData = {
toolCalls: ToolCall[]
projectCwds: Set<string>
apiCalls: ApiCallMeta[]
userMessages: string[]
}
// ============================================================================
// JSONL scanner
// ============================================================================
const FILE_READ_CONCURRENCY = 16
const RESULT_CACHE_TTL_MS = 60_000
const RECENT_WINDOW_HOURS = 48
const RECENT_WINDOW_MS = RECENT_WINDOW_HOURS * 60 * 60 * 1000
const DEFAULT_TREND_PERIOD_DAYS = 30
const DEFAULT_TREND_PERIOD_MS = DEFAULT_TREND_PERIOD_DAYS * 24 * 60 * 60 * 1000
const IMPROVING_THRESHOLD = 0.5
async function collectJsonlFiles(dirPath: string): Promise<string[]> {
const files = await readdir(dirPath).catch(() => [])
const result = files.filter(f => f.endsWith('.jsonl')).map(f => join(dirPath, f))
for (const entry of files) {
if (entry.endsWith('.jsonl')) continue
const subPath = join(dirPath, entry, 'subagents')
const subFiles = await readdir(subPath).catch(() => [])
for (const sf of subFiles) {
if (sf.endsWith('.jsonl')) result.push(join(subPath, sf))
}
}
return result
}
async function isFileStaleForRange(filePath: string, range: DateRange | undefined): Promise<boolean> {
if (!range) return false
try {
const s = await stat(filePath)
return s.mtimeMs < range.start.getTime()
} catch { return false }
}
async function runWithConcurrency<T>(
items: T[],
limit: number,
worker: (item: T) => Promise<void>,
): Promise<void> {
let idx = 0
async function next(): Promise<void> {
while (idx < items.length) {
const current = idx++
await worker(items[current])
}
}
await Promise.all(Array.from({ length: Math.min(limit, items.length) }, () => next()))
}
type ScanFileResult = {
calls: ToolCall[]
cwds: string[]
apiCalls: ApiCallMeta[]
userMessages: string[]
}
function inRange(timestamp: string | undefined, range: DateRange | undefined): boolean {
if (!range) return true
if (!timestamp) return false
const ts = new Date(timestamp)
return ts >= range.start && ts <= range.end
}
function isRecent(timestamp: string | undefined, cutoff: number): boolean {
if (!timestamp) return false
return new Date(timestamp).getTime() >= cutoff
}
export async function scanJsonlFile(
filePath: string,
project: string,
dateRange: DateRange | undefined,
recentCutoffMs = Date.now() - RECENT_WINDOW_MS,
): Promise<ScanFileResult> {
const content = await readSessionFile(filePath)
if (content === null) return { calls: [], cwds: [], apiCalls: [], userMessages: [] }
const calls: ToolCall[] = []
const cwds: string[] = []
const apiCalls: ApiCallMeta[] = []
const userMessages: string[] = []
const sessionId = basename(filePath, '.jsonl')
let lastVersion = ''
for (const line of content.split('\n')) {
if (!line.trim()) continue
let entry: Record<string, unknown>
try { entry = JSON.parse(line) } catch { continue }
if (entry.version && typeof entry.version === 'string') lastVersion = entry.version
const ts = typeof entry.timestamp === 'string' ? entry.timestamp : undefined
const withinRange = inRange(ts, dateRange)
const recent = isRecent(ts, recentCutoffMs)
if (entry.cwd && typeof entry.cwd === 'string' && withinRange) cwds.push(entry.cwd)
if (entry.type === 'user') {
if (!withinRange) continue
const msg = entry.message as Record<string, unknown> | undefined
const msgContent = msg?.content
if (typeof msgContent === 'string') {
userMessages.push(msgContent)
} else if (Array.isArray(msgContent)) {
for (const block of msgContent) {
if (block && typeof block === 'object' && block.type === 'text' && typeof block.text === 'string') {
userMessages.push(block.text)
}
}
}
continue
}
if (entry.type !== 'assistant') continue
if (!withinRange) continue
const msg = entry.message as Record<string, unknown> | undefined
const usage = msg?.usage as Record<string, unknown> | undefined
if (usage) {
const cacheCreate = (usage.cache_creation_input_tokens as number) ?? 0
if (cacheCreate > 0) apiCalls.push({ cacheCreationTokens: cacheCreate, version: lastVersion, recent })
}
const blocks = msg?.content
if (!Array.isArray(blocks)) continue
for (const block of blocks) {
if (block.type !== 'tool_use') continue
calls.push({
name: block.name as string,
input: (block.input as Record<string, unknown>) ?? {},
sessionId,
project,
recent,
})
}
}
return { calls, cwds, apiCalls, userMessages }
}
async function scanSessions(dateRange?: DateRange): Promise<ScanData> {
const sources = await discoverAllSessions('claude')
const allCalls: ToolCall[] = []
const allCwds = new Set<string>()
const allApiCalls: ApiCallMeta[] = []
const allUserMessages: string[] = []
const tasks: Array<{ file: string; project: string }> = []
for (const source of sources) {
const files = await collectJsonlFiles(source.path)
for (const file of files) {
if (await isFileStaleForRange(file, dateRange)) continue
tasks.push({ file, project: source.project })
}
}
await runWithConcurrency(tasks, FILE_READ_CONCURRENCY, async ({ file, project }) => {
const { calls, cwds, apiCalls, userMessages } = await scanJsonlFile(file, project, dateRange)
allCalls.push(...calls)
for (const cwd of cwds) allCwds.add(cwd)
allApiCalls.push(...apiCalls)
allUserMessages.push(...userMessages)
})
return { toolCalls: allCalls, projectCwds: allCwds, apiCalls: allApiCalls, userMessages: allUserMessages }
}
// ============================================================================
// Shared helpers
// ============================================================================
function readJsonFile(path: string): Record<string, unknown> | null {
const raw = readSessionFileSync(path)
if (raw === null) return null
try { return JSON.parse(raw) } catch { return null }
}
function shortHomePath(absPath: string): string {
const home = homedir()
return absPath.startsWith(home) ? '~' + absPath.slice(home.length) : absPath
}
function isReadTool(name: string): boolean {
return name === 'Read' || name === 'FileReadTool'
}
type McpConfigEntry = { normalized: string; original: string; mtime: number }
export function loadMcpConfigs(projectCwds: Iterable<string>): Map<string, McpConfigEntry> {
const servers = new Map<string, McpConfigEntry>()
const configPaths = [
join(homedir(), '.claude', 'settings.json'),
join(homedir(), '.claude', 'settings.local.json'),
]
for (const cwd of projectCwds) {
configPaths.push(join(cwd, '.mcp.json'))
configPaths.push(join(cwd, '.claude', 'settings.json'))
configPaths.push(join(cwd, '.claude', 'settings.local.json'))
}
for (const p of configPaths) {
if (!existsSync(p)) continue
const config = readJsonFile(p)
if (!config) continue
let mtime = 0
try { mtime = statSync(p).mtimeMs } catch {}
const serversObj = (config.mcpServers ?? {}) as Record<string, unknown>
for (const name of Object.keys(serversObj)) {
const normalized = name.replace(/:/g, '_')
const existing = servers.get(normalized)
if (!existing || existing.mtime < mtime) {
servers.set(normalized, { normalized, original: name, mtime })
}
}
}
return servers
}
// ============================================================================
// Detectors
// ============================================================================
export function detectJunkReads(calls: ToolCall[], dateRange?: DateRange): WasteFinding | null {
const dirCounts = new Map<string, number>()
let totalJunkReads = 0
let recentJunkReads = 0
for (const call of calls) {
if (!isReadTool(call.name)) continue
const filePath = call.input.file_path as string | undefined
if (!filePath || !JUNK_PATTERN.test(filePath)) continue
totalJunkReads++
if (call.recent) recentJunkReads++
for (const dir of JUNK_DIRS) {
if (filePath.includes(`/${dir}/`)) {
dirCounts.set(dir, (dirCounts.get(dir) ?? 0) + 1)
break
}
}
}
if (totalJunkReads < MIN_JUNK_READS_TO_FLAG) return null
const hasRecentActivity = calls.some(c => c.recent)
const trend = sessionTrend(recentJunkReads, totalJunkReads, dateRange, hasRecentActivity)
if (trend === 'resolved') return null
const sorted = [...dirCounts.entries()].sort((a, b) => b[1] - a[1])
const dirList = sorted.slice(0, TOP_ITEMS_PREVIEW).map(([d, n]) => `${d}/ (${n}x)`).join(', ')
const tokensSaved = totalJunkReads * AVG_TOKENS_PER_READ
const detected = sorted.map(([d]) => d)
const commonDefaults = ['node_modules', '.git', 'dist', '__pycache__']
const extras = commonDefaults.filter(d => !dirCounts.has(d)).slice(0, Math.max(0, 6 - detected.length))
const dirsToAvoid = [...detected, ...extras].join(', ')
return {
title: 'Claude is reading build/dependency folders',
explanation: `Claude read into ${dirList} (${totalJunkReads} reads). These are generated or dependency directories, not your code. Tell Claude in CLAUDE.md to avoid them.`,
impact: totalJunkReads > JUNK_READS_HIGH_THRESHOLD ? 'high' : totalJunkReads > JUNK_READS_MEDIUM_THRESHOLD ? 'medium' : 'low',
tokensSaved,
fix: {
type: 'paste',
label: 'Append to your project CLAUDE.md:',
text: `Do not read or search files under these directories unless I explicitly ask: ${dirsToAvoid}.`,
},
trend,
}
}
export function detectDuplicateReads(calls: ToolCall[], dateRange?: DateRange): WasteFinding | null {
const sessionFiles = new Map<string, Map<string, { count: number; recent: number }>>()
for (const call of calls) {
if (!isReadTool(call.name)) continue
const filePath = call.input.file_path as string | undefined
if (!filePath || JUNK_PATTERN.test(filePath)) continue
const key = `${call.project}:${call.sessionId}`
if (!sessionFiles.has(key)) sessionFiles.set(key, new Map())
const fm = sessionFiles.get(key)!
const entry = fm.get(filePath) ?? { count: 0, recent: 0 }
entry.count++
if (call.recent) entry.recent++
fm.set(filePath, entry)
}
let totalDuplicates = 0
let recentDuplicates = 0
const fileDupes = new Map<string, number>()
for (const fm of sessionFiles.values()) {
for (const [file, entry] of fm) {
if (entry.count <= 1) continue
const extra = entry.count - 1
totalDuplicates += extra
if (entry.recent > 1) recentDuplicates += entry.recent - 1
const name = basename(file)
fileDupes.set(name, (fileDupes.get(name) ?? 0) + extra)
}
}
if (totalDuplicates < MIN_DUPLICATE_READS_TO_FLAG) return null
const hasRecentActivity = calls.some(c => c.recent)
const trend = sessionTrend(recentDuplicates, totalDuplicates, dateRange, hasRecentActivity)
if (trend === 'resolved') return null
const worst = [...fileDupes.entries()]
.sort((a, b) => b[1] - a[1])
.slice(0, TOP_ITEMS_PREVIEW)
.map(([name, n]) => `${name} (${n + 1}x)`)
.join(', ')
const tokensSaved = totalDuplicates * AVG_TOKENS_PER_READ
return {
title: 'Claude is re-reading the same files',
explanation: `${totalDuplicates} redundant re-reads across sessions. Top repeats: ${worst}. Each re-read loads the same content into context again.`,
impact: totalDuplicates > DUPLICATE_READS_HIGH_THRESHOLD ? 'high' : totalDuplicates > DUPLICATE_READS_MEDIUM_THRESHOLD ? 'medium' : 'low',
tokensSaved,
fix: {
type: 'paste',
label: 'Point Claude at exact locations in your prompt, for example:',
text: 'In <file> lines <start>-<end>, look at the <function> function.',
},
trend,
}
}
export function detectUnusedMcp(
calls: ToolCall[],
projects: ProjectSummary[],
projectCwds: Set<string>,
): WasteFinding | null {
const configured = loadMcpConfigs(projectCwds)
if (configured.size === 0) return null
const calledServers = new Set<string>()
for (const call of calls) {
if (!call.name.startsWith('mcp__')) continue
const seg = call.name.split('__')[1]
if (seg) calledServers.add(seg)
}
for (const p of projects) {
for (const s of p.sessions) {
for (const server of Object.keys(s.mcpBreakdown)) calledServers.add(server)
}
}
const now = Date.now()
const unused: string[] = []
for (const entry of configured.values()) {
if (calledServers.has(entry.normalized)) continue
if (entry.mtime > 0 && now - entry.mtime < MCP_NEW_CONFIG_GRACE_MS) continue
unused.push(entry.original)
}
if (unused.length === 0) return null
const totalSessions = projects.reduce((s, p) => s + p.sessions.length, 0)
const schemaTokensPerSession = unused.length * TOOLS_PER_MCP_SERVER * TOKENS_PER_MCP_TOOL
const tokensSaved = schemaTokensPerSession * Math.max(totalSessions, 1)
return {
title: `${unused.length} MCP server${unused.length > 1 ? 's' : ''} configured but never used`,
explanation: `Never called in this period: ${unused.join(', ')}. Each server loads ~${TOOLS_PER_MCP_SERVER * TOKENS_PER_MCP_TOOL} tokens of tool schema into every session.`,
impact: unused.length >= UNUSED_MCP_HIGH_THRESHOLD ? 'high' : 'medium',
tokensSaved,
fix: {
type: 'command',
label: `Remove unused server${unused.length > 1 ? 's' : ''}:`,
text: unused.map(s => `claude mcp remove ${s}`).join('\n'),
},
}
}
function expandImports(filePath: string, seen: Set<string>, depth: number): { totalLines: number; importedFiles: number } {
if (depth > MAX_IMPORT_DEPTH || seen.has(filePath)) return { totalLines: 0, importedFiles: 0 }
seen.add(filePath)
const content = readSessionFileSync(filePath)
if (content === null) return { totalLines: 0, importedFiles: 0 }
let totalLines = content.split('\n').length
let importedFiles = 0
const dir = join(filePath, '..')
IMPORT_PATTERN.lastIndex = 0
for (const match of content.matchAll(IMPORT_PATTERN)) {
const rawPath = match[1]
if (!rawPath) continue
const resolved = rawPath.startsWith('/') ? rawPath : join(dir, rawPath)
if (!existsSync(resolved)) continue
const nested = expandImports(resolved, seen, depth + 1)
totalLines += nested.totalLines
importedFiles += 1 + nested.importedFiles
}
return { totalLines, importedFiles }
}
export function detectBloatedClaudeMd(projectCwds: Set<string>): WasteFinding | null {
const bloated: { path: string; expandedLines: number; imports: number }[] = []
for (const cwd of projectCwds) {
for (const name of ['CLAUDE.md', '.claude/CLAUDE.md']) {
const fullPath = join(cwd, name)
if (!existsSync(fullPath)) continue
const { totalLines, importedFiles } = expandImports(fullPath, new Set(), 0)
if (totalLines > CLAUDEMD_HEALTHY_LINES) {
bloated.push({ path: `${shortHomePath(cwd)}/${name}`, expandedLines: totalLines, imports: importedFiles })
}
}
}
if (bloated.length === 0) return null
const sorted = bloated.sort((a, b) => b.expandedLines - a.expandedLines)
const worst = sorted[0]
const totalExtraLines = sorted.reduce((s, b) => s + (b.expandedLines - CLAUDEMD_HEALTHY_LINES), 0)
const tokensSaved = totalExtraLines * CLAUDEMD_TOKENS_PER_LINE
const list = sorted.slice(0, TOP_ITEMS_PREVIEW).map(b => {
const importNote = b.imports > 0 ? ` with ${b.imports} @-import${b.imports > 1 ? 's' : ''}` : ''
return `${b.path} (${b.expandedLines} lines${importNote})`
}).join(', ')
return {
title: `Your CLAUDE.md is too long`,
explanation: `${list}. CLAUDE.md plus all @-imported files load into every API call. Trimming below ${CLAUDEMD_HEALTHY_LINES} lines saves ~${formatTokens(tokensSaved)} tokens per call.`,
impact: worst.expandedLines > CLAUDEMD_HIGH_THRESHOLD_LINES ? 'high' : 'medium',
tokensSaved,
fix: {
type: 'paste',
label: 'Ask Claude to trim it:',
text: `Review CLAUDE.md and all @-imported files. Cut total expanded content to under ${CLAUDEMD_HEALTHY_LINES} lines. Remove anything Claude can figure out from the code itself. Keep only rules, gotchas, and non-obvious conventions.`,
},
}
}
const READ_TOOL_NAMES = new Set(['Read', 'Grep', 'Glob', 'FileReadTool', 'GrepTool', 'GlobTool'])
const EDIT_TOOL_NAMES = new Set(['Edit', 'Write', 'FileEditTool', 'FileWriteTool', 'NotebookEdit'])
export function detectLowReadEditRatio(calls: ToolCall[]): WasteFinding | null {
let reads = 0
let edits = 0
let recentEdits = 0
let recentReads = 0
for (const call of calls) {
if (READ_TOOL_NAMES.has(call.name)) {
reads++
if (call.recent) recentReads++
} else if (EDIT_TOOL_NAMES.has(call.name)) {
edits++
if (call.recent) recentEdits++
}
}
if (edits < MIN_EDITS_FOR_RATIO) return null
const ratio = reads / edits
if (ratio >= HEALTHY_READ_EDIT_RATIO) return null
const impact: Impact = ratio < LOW_RATIO_HIGH_THRESHOLD ? 'high' : ratio < LOW_RATIO_MEDIUM_THRESHOLD ? 'medium' : 'low'
const extraReadsNeeded = Math.max(Math.round(edits * HEALTHY_READ_EDIT_RATIO) - reads, 0)
const tokensSaved = extraReadsNeeded * AVG_TOKENS_PER_READ
let trend: Trend | 'resolved' = 'active'
if (recentEdits >= MIN_EDITS_FOR_RATIO) {
const recentRatio = recentReads / recentEdits
if (recentRatio >= HEALTHY_READ_EDIT_RATIO) trend = 'resolved'
else if (recentRatio > ratio * (1 / IMPROVING_THRESHOLD)) trend = 'improving'
}
if (trend === 'resolved') return null
return {
title: 'Claude edits more than it reads',
explanation: `Claude made ${reads} reads and ${edits} edits (ratio ${ratio.toFixed(1)}:1). A healthy ratio is ${HEALTHY_READ_EDIT_RATIO}+ reads per edit. Editing without reading leads to retries and wasted tokens.`,
impact,
tokensSaved,
fix: {
type: 'paste',
label: 'Add to your CLAUDE.md:',
text: 'Before editing any file, read it first. Before modifying a function, grep for all callers. Research before you edit.',
},
trend,
}
}
const DEFAULT_CACHE_BASELINE_TOKENS = 50_000
const CACHE_BASELINE_QUANTILE = 0.25
const CACHE_BLOAT_MULTIPLIER = 1.4
const CACHE_VERSION_MIN_SAMPLES = 5
const CACHE_VERSION_DIFF_THRESHOLD = 10_000
function computeBudgetAwareCacheBaseline(projects: ProjectSummary[]): number {
const sessions = projects.flatMap(p => p.sessions)
if (sessions.length === 0) return DEFAULT_CACHE_BASELINE_TOKENS
const cacheWrites = sessions.map(s => s.totalCacheWriteTokens).filter(n => n > 0)
if (cacheWrites.length < MIN_API_CALLS_FOR_CACHE) return DEFAULT_CACHE_BASELINE_TOKENS
const sorted = cacheWrites.sort((a, b) => a - b)
return sorted[Math.floor(sorted.length * CACHE_BASELINE_QUANTILE)] || DEFAULT_CACHE_BASELINE_TOKENS
}
export function detectCacheBloat(apiCalls: ApiCallMeta[], projects: ProjectSummary[], dateRange?: DateRange): WasteFinding | null {
if (apiCalls.length < MIN_API_CALLS_FOR_CACHE) return null
const sorted = apiCalls.map(c => c.cacheCreationTokens).sort((a, b) => a - b)
const median = sorted[Math.floor(sorted.length / 2)]
const baseline = computeBudgetAwareCacheBaseline(projects)
const bloatThreshold = baseline * CACHE_BLOAT_MULTIPLIER
if (median < bloatThreshold) return null
const recentCalls = apiCalls.filter(c => c.recent)
const totalBloated = apiCalls.filter(c => c.cacheCreationTokens > bloatThreshold).length
const recentBloated = recentCalls.filter(c => c.cacheCreationTokens > bloatThreshold).length
const trend = sessionTrend(recentBloated, totalBloated, dateRange, recentCalls.length > 0)
if (trend === 'resolved') return null
const versionCounts = new Map<string, { total: number; count: number }>()
for (const call of apiCalls) {
if (!call.version) continue
const entry = versionCounts.get(call.version) ?? { total: 0, count: 0 }
entry.total += call.cacheCreationTokens
entry.count++
versionCounts.set(call.version, entry)
}
const versionAvgs = [...versionCounts.entries()]
.filter(([, d]) => d.count >= CACHE_VERSION_MIN_SAMPLES)
.map(([v, d]) => ({ version: v, avg: Math.round(d.total / d.count) }))
.sort((a, b) => b.avg - a.avg)
const excess = median - baseline
const tokensSaved = excess * apiCalls.length
let versionNote = ''
if (versionAvgs.length >= 2) {
const [high, ...rest] = versionAvgs
const low = rest[rest.length - 1]
if (high.avg - low.avg > CACHE_VERSION_DIFF_THRESHOLD) {
versionNote = ` Version ${high.version} averages ${formatTokens(high.avg)} vs ${low.version} at ${formatTokens(low.avg)}.`
}
}
return {
title: 'Session warmup is unusually large',
explanation: `Median cache_creation per call is ${formatTokens(median)} tokens, about ${formatTokens(excess)} above your baseline of ${formatTokens(baseline)}.${versionNote}`,
impact: excess > CACHE_EXCESS_HIGH_THRESHOLD ? 'high' : 'medium',
tokensSaved,
fix: {
type: 'paste',
label: 'Check for recent Claude Code updates or heavy MCP/skill additions. As a workaround (not officially supported):',
text: 'export ANTHROPIC_CUSTOM_HEADERS=\'User-Agent: claude-cli/2.1.98 (external, sdk-cli)\'',
},
trend,
}
}
async function listMarkdownFiles(dir: string): Promise<string[]> {
if (!existsSync(dir)) return []
try {
const entries = await readdir(dir)
return entries.filter(e => e.endsWith('.md')).map(e => e.replace(/\.md$/, ''))
} catch { return [] }
}
async function listSkillDirs(dir: string): Promise<string[]> {
if (!existsSync(dir)) return []
try {
const entries = await readdir(dir)
const names: string[] = []
for (const entry of entries) {
if (existsSync(join(dir, entry, 'SKILL.md'))) names.push(entry)
}
return names
} catch { return [] }
}
export async function detectGhostAgents(calls: ToolCall[]): Promise<WasteFinding | null> {
const defined = await listMarkdownFiles(join(homedir(), '.claude', 'agents'))
if (defined.length === 0) return null
const invoked = new Set<string>()
for (const call of calls) {
if (call.name !== 'Agent' && call.name !== 'Task') continue
const subType = call.input.subagent_type as string | undefined
if (subType) invoked.add(subType)
}
const ghosts = defined.filter(name => !invoked.has(name))
if (ghosts.length === 0) return null
const tokensSaved = ghosts.length * TOKENS_PER_AGENT_DEF
const list = ghosts.slice(0, GHOST_NAMES_PREVIEW).join(', ') + (ghosts.length > GHOST_NAMES_PREVIEW ? `, +${ghosts.length - GHOST_NAMES_PREVIEW} more` : '')
return {
title: `${ghosts.length} custom agent${ghosts.length > 1 ? 's' : ''} you never use`,
explanation: `Defined in ~/.claude/agents/ but never invoked in this period: ${list}. Each adds ~${TOKENS_PER_AGENT_DEF} tokens to the Task tool schema on every session.`,
impact: ghosts.length >= GHOST_AGENTS_HIGH_THRESHOLD ? 'high' : ghosts.length >= GHOST_AGENTS_MEDIUM_THRESHOLD ? 'medium' : 'low',
tokensSaved,
fix: {
type: 'command',
label: `Archive unused agent${ghosts.length > 1 ? 's' : ''}:`,
text: ghosts.slice(0, GHOST_CLEANUP_COMMANDS_LIMIT).map(name => `mv ~/.claude/agents/${name}.md ~/.claude/agents/.archived/`).join('\n'),
},
}
}
export async function detectGhostSkills(calls: ToolCall[]): Promise<WasteFinding | null> {
const defined = await listSkillDirs(join(homedir(), '.claude', 'skills'))
if (defined.length === 0) return null
const invoked = new Set<string>()
for (const call of calls) {
if (call.name !== 'Skill') continue
const skillName = (call.input.skill as string) || (call.input.name as string)
if (skillName) invoked.add(skillName)
}
const ghosts = defined.filter(name => !invoked.has(name))
if (ghosts.length === 0) return null
const tokensSaved = ghosts.length * TOKENS_PER_SKILL_DEF
const list = ghosts.slice(0, GHOST_NAMES_PREVIEW).join(', ') + (ghosts.length > GHOST_NAMES_PREVIEW ? `, +${ghosts.length - GHOST_NAMES_PREVIEW} more` : '')
return {
title: `${ghosts.length} skill${ghosts.length > 1 ? 's' : ''} you never use`,
explanation: `In ~/.claude/skills/ but not invoked this period: ${list}. Each adds ~${TOKENS_PER_SKILL_DEF} tokens of metadata to every session.`,
impact: ghosts.length >= GHOST_SKILLS_HIGH_THRESHOLD ? 'high' : ghosts.length >= GHOST_SKILLS_MEDIUM_THRESHOLD ? 'medium' : 'low',
tokensSaved,
fix: {
type: 'command',
label: `Archive unused skill${ghosts.length > 1 ? 's' : ''}:`,
text: ghosts.slice(0, GHOST_CLEANUP_COMMANDS_LIMIT).map(name => `mv ~/.claude/skills/${name} ~/.claude/skills/.archived/`).join('\n'),
},
}
}
export async function detectGhostCommands(userMessages: string[]): Promise<WasteFinding | null> {
const defined = await listMarkdownFiles(join(homedir(), '.claude', 'commands'))
if (defined.length === 0) return null
const invoked = new Set<string>()
for (const msg of userMessages) {
COMMAND_PATTERN.lastIndex = 0
for (const m of msg.matchAll(COMMAND_PATTERN)) {
const name = (m[1] || m[2] || '').trim()
if (name) invoked.add(name)
}
}
const ghosts = defined.filter(name => !invoked.has(name))
if (ghosts.length === 0) return null
const tokensSaved = ghosts.length * TOKENS_PER_COMMAND_DEF
const list = ghosts.slice(0, GHOST_NAMES_PREVIEW).join(', ') + (ghosts.length > GHOST_NAMES_PREVIEW ? `, +${ghosts.length - GHOST_NAMES_PREVIEW} more` : '')
return {
title: `${ghosts.length} slash command${ghosts.length > 1 ? 's' : ''} you never use`,
explanation: `In ~/.claude/commands/ but not referenced this period: ${list}. Each adds ~${TOKENS_PER_COMMAND_DEF} tokens of definition per session.`,
impact: ghosts.length >= GHOST_COMMANDS_MEDIUM_THRESHOLD ? 'medium' : 'low',
tokensSaved,
fix: {
type: 'command',
label: `Archive unused command${ghosts.length > 1 ? 's' : ''}:`,
text: ghosts.slice(0, GHOST_CLEANUP_COMMANDS_LIMIT).map(name => `mv ~/.claude/commands/${name}.md ~/.claude/commands/.archived/`).join('\n'),
},
}
}
function readShellProfileLimit(): number | null {
for (const profile of SHELL_PROFILES) {
const path = join(homedir(), profile)
if (!existsSync(path)) continue
const content = readSessionFileSync(path)
if (content === null) continue
const match = content.match(/^\s*export\s+BASH_MAX_OUTPUT_LENGTH\s*=\s*['"]?(\d+)['"]?/m)
if (match) return parseInt(match[1], 10)
}
return null
}
export function detectBashBloat(): WasteFinding | null {
const profileLimit = readShellProfileLimit()
const envLimit = process.env['BASH_MAX_OUTPUT_LENGTH']
const configured = profileLimit ?? (envLimit ? parseInt(envLimit, 10) : null)
if (configured !== null && configured <= BASH_RECOMMENDED_LIMIT) return null
const limit = configured ?? BASH_DEFAULT_LIMIT
const extraChars = limit - BASH_RECOMMENDED_LIMIT
const tokensSaved = Math.round(extraChars * BASH_TOKENS_PER_CHAR)
return {
title: 'Shrink bash output limit',
explanation: `Your bash output cap is ${(limit / 1000).toFixed(0)}K chars (${configured ? 'configured' : 'default'}). Most output fits in ${(BASH_RECOMMENDED_LIMIT / 1000).toFixed(0)}K. The extra ~${formatTokens(tokensSaved)} tokens per bash call is trailing noise.`,
impact: 'medium',
tokensSaved,
fix: {
type: 'paste',
label: 'Add to ~/.zshrc or ~/.bashrc:',
text: `export BASH_MAX_OUTPUT_LENGTH=${BASH_RECOMMENDED_LIMIT}`,
},
}
}
// ============================================================================
// Scoring
// ============================================================================
const HEALTH_WEIGHTS: Record<Impact, number> = {
high: HEALTH_WEIGHT_HIGH,
medium: HEALTH_WEIGHT_MEDIUM,
low: HEALTH_WEIGHT_LOW,
}
export function computeHealth(findings: WasteFinding[]): { score: number; grade: HealthGrade } {
if (findings.length === 0) return { score: 100, grade: 'A' }
let penalty = 0
for (const f of findings) penalty += HEALTH_WEIGHTS[f.impact] ?? 0
const score = Math.max(0, 100 - Math.min(HEALTH_MAX_PENALTY, penalty))
const grade: HealthGrade =
score >= GRADE_A_MIN ? 'A' :
score >= GRADE_B_MIN ? 'B' :
score >= GRADE_C_MIN ? 'C' :
score >= GRADE_D_MIN ? 'D' : 'F'
return { score, grade }
}
const URGENCY_WEIGHTS: Record<Impact, number> = { high: 1, medium: 0.5, low: 0.2 }
function urgencyScore(f: WasteFinding): number {
const normalizedTokens = Math.min(1, f.tokensSaved / URGENCY_TOKEN_NORMALIZE)
return URGENCY_WEIGHTS[f.impact] * URGENCY_IMPACT_WEIGHT + normalizedTokens * URGENCY_TOKEN_WEIGHT
}
type TrendInputs = {
recentCount: number
recentWindowMs: number
baselineCount: number
baselineWindowMs: number
hasRecentActivity: boolean
}
export function computeTrend(inputs: TrendInputs): Trend | 'resolved' {
const { recentCount, recentWindowMs, baselineCount, baselineWindowMs, hasRecentActivity } = inputs
if (baselineCount === 0) return 'active'
if (recentCount === 0 && hasRecentActivity) return 'resolved'
if (!hasRecentActivity) return 'active'
const baselineRate = baselineCount / baselineWindowMs
const recentRate = recentCount / Math.max(recentWindowMs, 1)
if (recentRate < baselineRate * IMPROVING_THRESHOLD) return 'improving'
return 'active'
}
function sessionTrend(
recentItemCount: number,
totalItemCount: number,
dateRange: DateRange | undefined,
hasRecentActivity: boolean,
): Trend | 'resolved' {
const now = Date.now()
const baselineCount = totalItemCount - recentItemCount
const periodStart = dateRange ? dateRange.start.getTime() : now - DEFAULT_TREND_PERIOD_MS
const recentStart = now - RECENT_WINDOW_MS
const baselineWindowMs = Math.max(recentStart - periodStart, 1)
return computeTrend({
recentCount: recentItemCount,
recentWindowMs: RECENT_WINDOW_MS,
baselineCount,
baselineWindowMs,
hasRecentActivity,
})
}
// ============================================================================
// Cost estimation
// ============================================================================
const INPUT_COST_RATIO = 0.7
const DEFAULT_COST_PER_TOKEN = 0
function computeInputCostRate(projects: ProjectSummary[]): number {
const sessions = projects.flatMap(p => p.sessions)
const totalCost = sessions.reduce((s, sess) => s + sess.totalCostUSD, 0)
const totalTokens = sessions.reduce((s, sess) =>
s + sess.totalInputTokens + sess.totalCacheReadTokens + sess.totalCacheWriteTokens, 0)
if (totalTokens === 0 || totalCost === 0) return DEFAULT_COST_PER_TOKEN
return (totalCost * INPUT_COST_RATIO) / totalTokens
}
// ============================================================================
// Main entry points
// ============================================================================
type CacheEntry = { data: OptimizeResult; ts: number }
const resultCache = new Map<string, CacheEntry>()
function cacheKey(projects: ProjectSummary[], dateRange: DateRange | undefined): string {
const dr = dateRange ? `${dateRange.start.getTime()}-${dateRange.end.getTime()}` : 'all'
const fingerprint = projects.length + ':' + projects.reduce((s, p) => s + p.totalApiCalls, 0)
return `${dr}:${fingerprint}`
}
export async function scanAndDetect(
projects: ProjectSummary[],
dateRange?: DateRange,
): Promise<OptimizeResult> {
if (projects.length === 0) {
return { findings: [], costRate: 0, healthScore: 100, healthGrade: 'A' }
}
const key = cacheKey(projects, dateRange)
const cached = resultCache.get(key)
if (cached && Date.now() - cached.ts < RESULT_CACHE_TTL_MS) return cached.data
const costRate = computeInputCostRate(projects)
const { toolCalls, projectCwds, apiCalls, userMessages } = await scanSessions(dateRange)
const findings: WasteFinding[] = []
const syncDetectors: Array<() => WasteFinding | null> = [
() => detectCacheBloat(apiCalls, projects, dateRange),
() => detectLowReadEditRatio(toolCalls),
() => detectJunkReads(toolCalls, dateRange),
() => detectDuplicateReads(toolCalls, dateRange),
() => detectUnusedMcp(toolCalls, projects, projectCwds),
() => detectBloatedClaudeMd(projectCwds),
() => detectBashBloat(),
]
for (const detect of syncDetectors) {
const finding = detect()
if (finding) findings.push(finding)
}
const ghostResults = await Promise.all([
detectGhostAgents(toolCalls),
detectGhostSkills(toolCalls),
detectGhostCommands(userMessages),
])
for (const f of ghostResults) if (f) findings.push(f)
findings.sort((a, b) => urgencyScore(b) - urgencyScore(a))
const { score, grade } = computeHealth(findings)
const result: OptimizeResult = { findings, costRate, healthScore: score, healthGrade: grade }
resultCache.set(key, { data: result, ts: Date.now() })
return result
}
// ============================================================================
// CLI rendering
// ============================================================================
const PANEL_WIDTH = 62
const SEP = '\u2500'
const IMPACT_COLORS: Record<Impact, string> = { high: RED, medium: ORANGE, low: DIM }
const GRADE_COLORS: Record<HealthGrade, string> = { A: GREEN, B: GREEN, C: GOLD, D: ORANGE, F: RED }
function wrap(text: string, width: number, indent: string): string {
const words = text.split(' ')
const lines: string[] = []
let current = ''
for (const word of words) {
if (current && current.length + word.length + 1 > width) {
lines.push(indent + current)
current = word
} else {
current = current ? current + ' ' + word : word
}
}
if (current) lines.push(indent + current)
return lines.join('\n')
}
function renderFinding(n: number, f: WasteFinding, costRate: number): string[] {
const lines: string[] = []
const costSaved = f.tokensSaved * costRate
const impactLabel = f.impact.charAt(0).toUpperCase() + f.impact.slice(1)
const trendBadge = f.trend === 'improving' ? ' improving \u2193 ' : ''
const savings = `~${formatTokens(f.tokensSaved)} tokens (~${formatCost(costSaved)})`
const titlePad = PANEL_WIDTH - f.title.length - impactLabel.length - trendBadge.length - 8
const pad = titlePad > 0 ? ' ' + SEP.repeat(titlePad) + ' ' : ' '
lines.push(chalk.hex(DIM)(` ${SEP}${SEP}${SEP} `) +
chalk.bold(`${n}. ${f.title}`) +
chalk.hex(DIM)(pad) +
chalk.hex(IMPACT_COLORS[f.impact])(impactLabel) +
(trendBadge ? chalk.hex(GREEN)(trendBadge) : '') +
chalk.hex(DIM)(` ${SEP}${SEP}${SEP}`))
lines.push('')
lines.push(wrap(f.explanation, PANEL_WIDTH - 4, ' '))
lines.push('')
lines.push(chalk.hex(GOLD)(` Potential savings: ${savings}`))
lines.push('')
const a = f.fix
if (a.type === 'file-content') {
lines.push(chalk.hex(DIM)(` ${a.label}`))
for (const line of a.content.split('\n')) lines.push(chalk.hex(CYAN)(` ${line}`))
} else if (a.type === 'command') {
lines.push(chalk.hex(DIM)(` ${a.label}`))
for (const line of a.text.split('\n')) lines.push(chalk.hex(CYAN)(` ${line}`))
} else {
lines.push(chalk.hex(DIM)(` ${a.label}`))
lines.push(chalk.hex(CYAN)(` ${a.text}`))
}
lines.push('')
return lines
}
function renderOptimize(
findings: WasteFinding[],
costRate: number,
periodLabel: string,
periodCost: number,
sessionCount: number,
callCount: number,
healthScore: number,
healthGrade: HealthGrade,
): string {
const lines: string[] = []
lines.push('')
lines.push(` ${chalk.bold.hex(ORANGE)('CodeBurn config health')}${chalk.dim(' ' + periodLabel)}`)
lines.push(chalk.hex(DIM)(' ' + SEP.repeat(PANEL_WIDTH)))
const issueSuffix = findings.length > 0 ? `, ${findings.length} issue${findings.length > 1 ? 's' : ''}` : ''
lines.push(' ' + [
`${sessionCount} sessions`,
`${callCount.toLocaleString()} calls`,
chalk.hex(GOLD)(formatCost(periodCost)),
`Health: ${chalk.bold.hex(GRADE_COLORS[healthGrade])(healthGrade)}${chalk.dim(` (${healthScore}/100${issueSuffix})`)}`,
].join(chalk.hex(DIM)(' ')))
lines.push('')
if (findings.length === 0) {
lines.push(chalk.hex(GREEN)(' Nothing to fix. Your setup is lean.'))
lines.push('')
lines.push(chalk.dim(' CodeBurn optimize scans your Claude Code sessions and config for'))
lines.push(chalk.dim(' token waste: junk directory reads, duplicate file reads, unused'))
lines.push(chalk.dim(' agents/skills/MCP servers, bloated CLAUDE.md, and more.'))
lines.push('')
return lines.join('\n')
}
const totalTokens = findings.reduce((s, f) => s + f.tokensSaved, 0)
const totalCost = totalTokens * costRate
const pctRaw = periodCost > 0 ? (totalCost / periodCost) * 100 : 0
const pct = pctRaw >= 1 ? pctRaw.toFixed(0) : pctRaw.toFixed(1)
const costText = costRate > 0 ? ` (~${formatCost(totalCost)}, ~${pct}% of spend)` : ''
lines.push(chalk.hex(GREEN)(` Potential savings: ~${formatTokens(totalTokens)} tokens${costText}`))
lines.push('')
for (let i = 0; i < findings.length; i++) {
lines.push(...renderFinding(i + 1, findings[i], costRate))
}
lines.push(chalk.hex(DIM)(' ' + SEP.repeat(PANEL_WIDTH)))
lines.push(chalk.dim(' Estimates only.'))
lines.push('')
return lines.join('\n')
}
export async function runOptimize(
projects: ProjectSummary[],
periodLabel: string,
dateRange?: DateRange,
): Promise<void> {
if (projects.length === 0) {
console.log(chalk.dim('\n No usage data found for this period.\n'))
return
}
process.stderr.write(chalk.dim(' Analyzing your sessions...\n'))
const { findings, costRate, healthScore, healthGrade } = await scanAndDetect(projects, dateRange)
const sessions = projects.flatMap(p => p.sessions)
const periodCost = projects.reduce((s, p) => s + p.totalCostUSD, 0)
const callCount = projects.reduce((s, p) => s + p.totalApiCalls, 0)
const output = renderOptimize(findings, costRate, periodLabel, periodCost, sessions.length, callCount, healthScore, healthGrade)
console.log(output)
}