From 75d4701bd8d8341da0adb3c98cfdf2998e3fa130 Mon Sep 17 00:00:00 2001 From: Resham Joshi <65915470+iamtoruk@users.noreply.github.com> Date: Wed, 6 May 2026 00:35:41 -0700 Subject: [PATCH] feat(optimize): flag low-worth expensive sessions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a low-worth detector to codeburn optimize that flags expensive sessions with weak delivery signals (no edits, repeated retries, or no one-shot edits) when no git/gh delivery command is observed. Priority order is low-worth → context-bloat → outliers; each later detector excludes sessions named by an earlier one so the same session is never listed in three findings. Detection: floor, for no-edit, 3+ retries, regex matches git commit/push and gh pr create/merge but excludes commit-tree/commit-graph and dry-run. Three impact tiers consistent with #246. Token-savings uses full session tokens for no-edit sessions and the retry fraction for edit-with-retry sessions. Supersedes #241 with review fixes. Original implementation by @ozymandiashh. --- CHANGELOG.md | 7 ++ README.md | 2 + src/optimize.ts | 201 ++++++++++++++++++++++++++++++- tests/optimize.test.ts | 265 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 471 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 28e2c11..9a86c48 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,13 @@ and suggests starting fresh with only the current goal, relevant files, failing output, and constraints. Sessions flagged here are excluded from the cost-outlier finding so the same session is not listed twice. +- **Worth-it score detector.** New `optimize` finding flags expensive sessions + with weak delivery signals: no edit turns, repeated retries, or edit work + that never landed in one shot, when no `git`/`gh` delivery command is + observed. Framed as a conservative review candidate, not proof of waste. + Sessions flagged here take priority and are excluded from both the + context-bloat and cost-outlier findings so the same session is not listed + more than once. ### Fixed (CLI) - **Windows Claude project paths.** Claude Code project rollups now prefer diff --git a/README.md b/README.md index 7c96264..8c09893 100644 --- a/README.md +++ b/README.md @@ -189,6 +189,8 @@ Scans your sessions and your `~/.claude/` setup for waste patterns: - Bloated `CLAUDE.md` files (with `@-import` expansion counted) - Cache creation overhead and junk directory reads - Context-heavy sessions where effective input/cache tokens swamp output +- Possibly low-worth expensive sessions with no edit turns or repeated retries + when no `git`/`gh` delivery command is observed Each finding shows the estimated token and dollar savings plus a ready-to-paste fix: a `CLAUDE.md` line, an environment variable, or a `mv` command to archive unused items. Findings are ranked by urgency (impact weighted against observed waste) and rolled up into an A to F setup health grade. Repeat runs classify each finding as new, improving, or resolved against a 48-hour recent window. diff --git a/src/optimize.ts b/src/optimize.ts index 82874d6..7974c3f 100644 --- a/src/optimize.ts +++ b/src/optimize.ts @@ -89,6 +89,15 @@ const CONTEXT_BLOAT_HIGH_MIN_CANDIDATES = 10 const CONTEXT_BLOAT_GROWTH_RATIO = 2 const CONTEXT_BLOAT_GROWTH_MAX_GAP_MS = 7 * 24 * 60 * 60 * 1000 const CONTEXT_BLOAT_RATIO_DISPLAY_CAP = 1000 +const WORTH_IT_MIN_COST_USD = 2 +const WORTH_IT_NO_EDIT_MIN_COST_USD = 3 +const WORTH_IT_MIN_RETRIES = 3 +const WORTH_IT_RETRY_WITH_EDIT_MIN_RETRIES = 2 +const WORTH_IT_PREVIEW = 5 +const WORTH_IT_LOW_MAX_CANDIDATES = 2 +const WORTH_IT_LOW_MAX_TOTAL_COST_USD = 10 +const WORTH_IT_HIGH_MIN_CANDIDATES = 10 +const WORTH_IT_HIGH_TOTAL_COST_USD = 50 // ============================================================================ // Scoring constants @@ -1235,6 +1244,179 @@ function formatContextRatio(ratio: number): string { return ratio.toFixed(1) } +// ============================================================================ +// Worth-it / low-worth-session detector helpers +// ============================================================================ + +// Use (\s|$|--) instead of \b after commit/push so `git commit-tree` and +// `git commit-graph` are not treated as deliveries. The `--` clause keeps +// `git commit --amend` matching as a real delivery command. +const DELIVERY_COMMAND_PATTERNS = [ + /(?:^|[;&|]\s*)git\s+(?:commit|push)(?=\s|$|--)(?![^;&|]*--dry-run)/, + /(?:^|[;&|]\s*)gh\s+pr\s+(?:create|merge)(?=\s|$|--)(?![^;&|]*--dry-run)/, +] + +function sessionDeliveryCommand(session: ProjectSummary['sessions'][number]): string | null { + const commands = Object.keys(session.bashBreakdown) + return commands.find(command => DELIVERY_COMMAND_PATTERNS.some(pattern => pattern.test(command))) ?? null +} + +function hasCategoryBreakdownData(session: ProjectSummary['sessions'][number]): boolean { + return Object.values(session.categoryBreakdown).some(category => + category.turns > 0 + || category.costUSD > 0 + || category.retries > 0 + || category.editTurns > 0 + || category.oneShotTurns > 0 + ) +} + +function sessionEditTurns(session: ProjectSummary['sessions'][number]): number { + if (hasCategoryBreakdownData(session)) { + return Object.values(session.categoryBreakdown).reduce((sum, c) => sum + c.editTurns, 0) + } + return session.turns.filter(turn => turn.hasEdits).length +} + +function sessionOneShotTurns(session: ProjectSummary['sessions'][number]): number { + if (hasCategoryBreakdownData(session)) { + return Object.values(session.categoryBreakdown).reduce((sum, c) => sum + c.oneShotTurns, 0) + } + return session.turns.filter(turn => turn.hasEdits && turn.retries === 0).length +} + +function sessionRetryCount(session: ProjectSummary['sessions'][number]): number { + if (hasCategoryBreakdownData(session)) { + return Object.values(session.categoryBreakdown).reduce((sum, c) => sum + c.retries, 0) + } + return session.turns.reduce((sum, turn) => sum + turn.retries, 0) +} + +function sessionTotalTurns(session: ProjectSummary['sessions'][number]): number { + if (hasCategoryBreakdownData(session)) { + return Object.values(session.categoryBreakdown).reduce((sum, c) => sum + c.turns, 0) + } + return session.turns.length +} + +// Token-savings estimate for a low-worth candidate. Two regimes: +// - No-edit sessions: full session tokens are at risk (the session produced +// no apparent output to weigh against the spend). +// - Sessions with edits but with retries / no one-shot: only the retry +// fraction is counted as recoverable. Edits may still have been useful; +// we credit the model with that and only flag the retry overhead. +// Ratio is bounded to [0, 1] so retry-heavy sessions with weird turn counts +// can't claim more than the full session token total. +function estimateLowWorthRecoverableTokens( + session: ProjectSummary['sessions'][number], + editTurns: number, + retries: number, +): number { + const tokens = sessionTokenTotal(session) + if (editTurns === 0) return tokens + const totalTurns = sessionTotalTurns(session) + if (totalTurns === 0) return 0 + const fraction = Math.min(1, Math.max(0, retries / totalTurns)) + return Math.round(tokens * fraction) +} + +export type LowWorthCandidate = { + project: string + sessionId: string + date: string + cost: number + tokens: number + reasons: string[] +} + +export function findLowWorthCandidates(projects: ProjectSummary[]): LowWorthCandidate[] { + const candidates: LowWorthCandidate[] = [] + + for (const project of projects) { + for (const session of project.sessions) { + if (session.totalCostUSD < WORTH_IT_MIN_COST_USD) continue + if (sessionDeliveryCommand(session)) continue + + const editTurns = sessionEditTurns(session) + const oneShotTurns = sessionOneShotTurns(session) + const retries = sessionRetryCount(session) + const reasons: string[] = [] + + if (editTurns === 0 && session.totalCostUSD >= WORTH_IT_NO_EDIT_MIN_COST_USD) { + reasons.push('no edit turns') + } + if (retries >= WORTH_IT_MIN_RETRIES) { + reasons.push(`${retries} retries`) + } + if ( + editTurns > 0 + && oneShotTurns === 0 + && retries >= WORTH_IT_RETRY_WITH_EDIT_MIN_RETRIES + ) { + reasons.push('no one-shot edit turns') + } + + if (reasons.length === 0) continue + + candidates.push({ + project: project.project, + sessionId: session.sessionId, + date: session.firstTimestamp.slice(0, 10), + cost: session.totalCostUSD, + tokens: estimateLowWorthRecoverableTokens(session, editTurns, retries), + reasons, + }) + } + } + + candidates.sort((a, b) => + b.cost - a.cost + || a.date.localeCompare(b.date) + || a.project.localeCompare(b.project) + || a.sessionId.localeCompare(b.sessionId) + ) + return candidates +} + +export function detectLowWorthSessions(projects: ProjectSummary[]): WasteFinding | null { + const candidates = findLowWorthCandidates(projects) + if (candidates.length === 0) return null + + const preview = candidates.slice(0, WORTH_IT_PREVIEW) + const list = preview + .map(s => `${s.project}/${s.sessionId} on ${s.date}: ${formatCost(s.cost)} (${s.reasons.join(', ')})`) + .join('; ') + const extra = candidates.length > preview.length ? `; +${candidates.length - preview.length} more` : '' + // Per-candidate `tokens` is already the recoverable estimate (full session + // for no-edit, retry-fraction for edit-with-retries). Sum across candidates. + const tokensSaved = Math.round(candidates.reduce((sum, s) => sum + s.tokens, 0)) + const totalCost = candidates.reduce((sum, s) => sum + s.cost, 0) + + // Three tiers consistent with detectContextBloat: high at >=10 candidates + // or >=$50 total spend at risk; low at <=2 candidates AND <$10 total; + // medium in between. + let impact: Impact + if (candidates.length >= WORTH_IT_HIGH_MIN_CANDIDATES || totalCost >= WORTH_IT_HIGH_TOTAL_COST_USD) { + impact = 'high' + } else if (candidates.length <= WORTH_IT_LOW_MAX_CANDIDATES && totalCost < WORTH_IT_LOW_MAX_TOTAL_COST_USD) { + impact = 'low' + } else { + impact = 'medium' + } + + return { + title: `${candidates.length} possibly low-worth expensive session${candidates.length === 1 ? '' : 's'}`, + explanation: `Sessions with meaningful spend but weak delivery signals: ${list}${extra}. This is a review candidate, not proof of waste: CodeBurn flags missing edit turns, repeated retries, and sessions without git delivery commands so you can decide whether the work was worth its cost before it becomes a habit.`, + impact, + tokensSaved, + fix: { + type: 'paste', + label: 'Set a delivery checkpoint at the start of the next expensive thread:', + text: 'Before continuing, name the deliverable in one sentence (PR title, file changed, command output you expect). Stop and check with me if (a) you spend more than 10 minutes without an edit, or (b) the same approach fails twice. Do not retry past two attempts on any single fix.', + }, + } +} + export type ContextBloatCandidate = { project: string sessionId: string @@ -1302,8 +1484,9 @@ export function findContextBloatCandidates(projects: ProjectSummary[]): ContextB return candidates } -export function detectContextBloat(projects: ProjectSummary[]): WasteFinding | null { +export function detectContextBloat(projects: ProjectSummary[], excludedSessionIds?: ReadonlySet): WasteFinding | null { const candidates = findContextBloatCandidates(projects) + .filter(c => !excludedSessionIds?.has(c.sessionId)) if (candidates.length === 0) return null const preview = candidates.slice(0, CONTEXT_BLOAT_PREVIEW) @@ -1530,7 +1713,16 @@ export async function scanAndDetect( const mcpCoverage = aggregateMcpCoverage(projects) const findings: WasteFinding[] = [] - const contextBloatSessionIds = new Set(findContextBloatCandidates(projects).map(c => c.sessionId)) + // Priority order for the per-session findings: low-worth → context-bloat → + // outliers. Each later detector excludes sessions already named by an + // earlier one so a single session is not listed in three findings. + const lowWorthSessionIds = new Set(findLowWorthCandidates(projects).map(c => c.sessionId)) + const contextBloatVisibleIds = new Set( + findContextBloatCandidates(projects) + .filter(c => !lowWorthSessionIds.has(c.sessionId)) + .map(c => c.sessionId), + ) + const outlierExclusions = new Set([...lowWorthSessionIds, ...contextBloatVisibleIds]) const syncDetectors: Array<() => WasteFinding | null> = [ () => detectCacheBloat(apiCalls, projects, dateRange), () => detectLowReadEditRatio(toolCalls), @@ -1538,8 +1730,9 @@ export async function scanAndDetect( () => detectDuplicateReads(toolCalls, dateRange), () => detectUnusedMcp(toolCalls, projects, projectCwds, mcpCoverage), () => detectMcpToolCoverage(projects, mcpCoverage), - () => detectContextBloat(projects), - () => detectSessionOutliers(projects, contextBloatSessionIds), + () => detectLowWorthSessions(projects), + () => detectContextBloat(projects, lowWorthSessionIds), + () => detectSessionOutliers(projects, outlierExclusions), () => detectBloatedClaudeMd(projectCwds), () => detectBashBloat(), ] diff --git a/tests/optimize.test.ts b/tests/optimize.test.ts index b84eab2..8fb30e8 100644 --- a/tests/optimize.test.ts +++ b/tests/optimize.test.ts @@ -7,6 +7,7 @@ import { detectCacheBloat, detectBloatedClaudeMd, detectContextBloat, + detectLowWorthSessions, detectSessionOutliers, computeHealth, computeTrend, @@ -504,6 +505,270 @@ describe('detectContextBloat', () => { // the second session must still anchor against it. expect(finding!.explanation).toContain('5.0x previous session input') }) + + it('honors excludedSessionIds passed by the orchestrator', () => { + const project = projectWithContextSessions([ + contextSession(0, { + totalInputTokens: 90_000, + totalCacheReadTokens: 30_000, + totalOutputTokens: 2_000, + }), + ]) + + const finding = detectContextBloat([project], new Set(['s1'])) + expect(finding).toBeNull() + }) +}) + +type LowWorthTurn = TestSession['turns'][number] + +function lowWorthTurn(overrides: Partial = {}): LowWorthTurn { + return { + userMessage: 'do the work', + assistantCalls: [], + timestamp: '2026-05-01T10:00:00Z', + sessionId: 's1', + category: 'coding', + retries: 0, + hasEdits: false, + ...overrides, + } +} + +function lowWorthSession(cost: number, i: number, overrides: Partial = {}, project = 'app'): TestSession { + const tokens = Math.round(cost * 1000) + return { + sessionId: `s${i + 1}`, + project, + firstTimestamp: `2026-05-${String(i + 1).padStart(2, '0')}T10:00:00Z`, + lastTimestamp: `2026-05-${String(i + 1).padStart(2, '0')}T10:30:00Z`, + totalCostUSD: cost, + totalInputTokens: tokens, + totalOutputTokens: tokens, + totalCacheReadTokens: 0, + totalCacheWriteTokens: 0, + apiCalls: 1, + turns: [], + modelBreakdown: {}, + toolBreakdown: {}, + mcpBreakdown: {}, + bashBreakdown: {}, + categoryBreakdown: {} as TestSession['categoryBreakdown'], + skillBreakdown: {}, + ...overrides, + } +} + +function projectWithLowWorthSessions(sessions: TestSession[], project = 'app'): ProjectSummary { + return { + project, + projectPath: `/tmp/${project}`, + sessions, + totalCostUSD: sessions.reduce((sum, s) => sum + s.totalCostUSD, 0), + totalApiCalls: sessions.reduce((sum, s) => sum + s.apiCalls, 0), + } +} + +describe('detectLowWorthSessions', () => { + it('returns null for cheap sessions', () => { + const project = projectWithLowWorthSessions([ + lowWorthSession(1.99, 0, { turns: [lowWorthTurn({ hasEdits: false })] }), + ]) + expect(detectLowWorthSessions([project])).toBeNull() + }) + + it('does not flag the no-edit cost boundary', () => { + const project = projectWithLowWorthSessions([ + lowWorthSession(2.99, 0, { turns: [lowWorthTurn({ hasEdits: false })] }), + ]) + expect(detectLowWorthSessions([project])).toBeNull() + }) + + it('flags expensive sessions with no edit turns', () => { + const project = projectWithLowWorthSessions([ + lowWorthSession(4, 0, { turns: [lowWorthTurn({ hasEdits: false })] }), + ]) + const finding = detectLowWorthSessions([project]) + expect(finding).not.toBeNull() + expect(finding!.title).toContain('possibly low-worth') + expect(finding!.explanation).toContain('app/s1') + expect(finding!.explanation).toContain('no edit turns') + // sessionTokenTotal = input + output + cache. The lowWorthSession helper + // sets input=output=cost*1000, so the savings ceiling is 2x cost*1000. + expect(finding!.tokensSaved).toBe(8_000) + }) + + it('flags retry-heavy sessions', () => { + const project = projectWithLowWorthSessions([ + lowWorthSession(2.5, 0, { + turns: [ + lowWorthTurn({ hasEdits: true, retries: 1 }), + lowWorthTurn({ hasEdits: true, retries: 2 }), + ], + }), + ]) + const finding = detectLowWorthSessions([project]) + expect(finding).not.toBeNull() + expect(finding!.explanation).toContain('3 retries') + }) + + it('estimates recoverable tokens by retry fraction for sessions with edits', () => { + // 4 turns, 2 retries spread across 2 edits, 0 one-shot edits → trips the + // 'no one-shot edit turns' reason. totalTurns=4, fraction=2/4=0.5, + // sessionTokenTotal=8K, so recoverable savings ceiling is 4K — half the + // session, not the full ceiling that no-edit sessions get. + const project = projectWithLowWorthSessions([ + lowWorthSession(4, 0, { + turns: [ + lowWorthTurn({ hasEdits: true, retries: 1 }), + lowWorthTurn({ hasEdits: true, retries: 1 }), + lowWorthTurn({ hasEdits: false }), + lowWorthTurn({ hasEdits: false }), + ], + }), + ]) + const finding = detectLowWorthSessions([project]) + expect(finding).not.toBeNull() + expect(finding!.tokensSaved).toBe(4_000) + }) + + it('uses full session tokens as the savings ceiling for no-edit sessions', () => { + const project = projectWithLowWorthSessions([ + lowWorthSession(4, 0, { turns: [lowWorthTurn({ hasEdits: false })] }), + ]) + const finding = detectLowWorthSessions([project]) + // No edits at all -> entire session is at risk. sessionTokenTotal = 8K. + expect(finding!.tokensSaved).toBe(8_000) + }) + + it('keeps all reasons that apply to the same session', () => { + const project = projectWithLowWorthSessions([ + lowWorthSession(4, 0, { + turns: [ + lowWorthTurn({ hasEdits: false, retries: 1 }), + lowWorthTurn({ hasEdits: false, retries: 2 }), + ], + }), + ]) + const finding = detectLowWorthSessions([project]) + expect(finding).not.toBeNull() + expect(finding!.explanation).toContain('no edit turns') + expect(finding!.explanation).toContain('3 retries') + }) + + it('flags edit sessions with retries but no one-shot edit turns via categoryBreakdown', () => { + const project = projectWithLowWorthSessions([ + lowWorthSession(2.25, 0, { + categoryBreakdown: { + coding: { turns: 2, costUSD: 2.25, retries: 2, editTurns: 2, oneShotTurns: 0 }, + } as TestSession['categoryBreakdown'], + }), + ]) + const finding = detectLowWorthSessions([project]) + expect(finding).not.toBeNull() + expect(finding!.explanation).toContain('no one-shot edit turns') + }) + + it('skips sessions with a git delivery command', () => { + const project = projectWithLowWorthSessions([ + lowWorthSession(8, 0, { + turns: [lowWorthTurn({ hasEdits: false })], + bashBreakdown: { 'cd /tmp/app && git commit -m "ship fix"': { calls: 1 } }, + }), + ]) + expect(detectLowWorthSessions([project])).toBeNull() + }) + + it('skips sessions with gh pr create', () => { + const project = projectWithLowWorthSessions([ + lowWorthSession(8, 0, { + turns: [lowWorthTurn({ hasEdits: false })], + bashBreakdown: { 'gh pr create --fill': { calls: 1 } }, + }), + ]) + expect(detectLowWorthSessions([project])).toBeNull() + }) + + it('does not treat read-only git commands as delivery', () => { + const project = projectWithLowWorthSessions([ + lowWorthSession(8, 0, { + turns: [lowWorthTurn({ hasEdits: false })], + bashBreakdown: { 'git tag -l': { calls: 1 } }, + }), + ]) + expect(detectLowWorthSessions([project])).not.toBeNull() + }) + + it('does not treat dry-run git commands as delivery', () => { + const project = projectWithLowWorthSessions([ + lowWorthSession(8, 0, { + turns: [lowWorthTurn({ hasEdits: false })], + bashBreakdown: { 'git push --dry-run origin main': { calls: 1 } }, + }), + ]) + expect(detectLowWorthSessions([project])).not.toBeNull() + }) + + it('does not treat git commit-tree as a delivery command', () => { + // Regex must match `git commit` only, not `git commit-tree` / + // `git commit-graph`. Without the (?:\s|$|--) lookahead this would be a + // false positive and the session would silently skip detection. + const project = projectWithLowWorthSessions([ + lowWorthSession(8, 0, { + turns: [lowWorthTurn({ hasEdits: false })], + bashBreakdown: { 'git commit-tree HEAD^{tree}': { calls: 1 } }, + }), + ]) + expect(detectLowWorthSessions([project])).not.toBeNull() + }) + + it('still treats `git commit --amend` as a delivery command', () => { + const project = projectWithLowWorthSessions([ + lowWorthSession(8, 0, { + turns: [lowWorthTurn({ hasEdits: false })], + bashBreakdown: { 'git commit --amend --no-edit': { calls: 1 } }, + }), + ]) + expect(detectLowWorthSessions([project])).toBeNull() + }) + + it('uses low impact for a single small candidate', () => { + const project = projectWithLowWorthSessions([ + lowWorthSession(4, 0, { turns: [lowWorthTurn({ hasEdits: false })] }), + ]) + const finding = detectLowWorthSessions([project]) + expect(finding!.impact).toBe('low') + }) + + it('uses medium impact between low and high tiers', () => { + const project = projectWithLowWorthSessions( + Array.from({ length: 3 }, (_, i) => lowWorthSession(4, i, { + turns: [lowWorthTurn({ hasEdits: false })], + })), + ) + const finding = detectLowWorthSessions([project]) + expect(finding!.impact).toBe('medium') + }) + + it('uses high impact at 10 or more candidates', () => { + const project = projectWithLowWorthSessions( + Array.from({ length: 10 }, (_, i) => lowWorthSession(3, i, { + turns: [lowWorthTurn({ hasEdits: false })], + })), + ) + const finding = detectLowWorthSessions([project]) + expect(finding!.impact).toBe('high') + }) + + it('summarizes additional candidates after the preview limit', () => { + const project = projectWithLowWorthSessions( + Array.from({ length: 6 }, (_, i) => lowWorthSession(4 + i, i, { + turns: [lowWorthTurn({ hasEdits: false })], + })), + ) + const finding = detectLowWorthSessions([project]) + expect(finding!.explanation).toContain('; +1 more') + }) }) describe('detectSessionOutliers', () => {