feat(optimize): detect session cost outliers

2026-05-17 03:56:45 +00:00 · 2026-05-05 05:25:49 +03:00 · 2026-05-05 05:25:49 +03:00 · d18ba3d2fe
commit d18ba3d2fe
parent f5cbfe28bb
2 changed files with 148 additions and 0 deletions
--- a/src/optimize.ts
+++ b/src/optimize.ts
@ -61,6 +61,10 @@ const GHOST_COMMANDS_MEDIUM_THRESHOLD = 10
 const MCP_NEW_CONFIG_GRACE_MS = 24 * 60 * 60 * 1000
 const BASH_DEFAULT_LIMIT = 30000
 const BASH_RECOMMENDED_LIMIT = 15000
+const MIN_SESSIONS_FOR_OUTLIER = 3
+const SESSION_OUTLIER_MULTIPLIER = 2
+const MIN_SESSION_OUTLIER_COST_USD = 1
+const SESSION_OUTLIER_PREVIEW = 5

 // ============================================================================
 // Scoring constants
@ -853,6 +857,77 @@ export function detectBashBloat(): WasteFinding | null {
  }
 }

+function sessionTokenTotal(session: ProjectSummary['sessions'][number]): number {
+  return session.totalInputTokens
+    + session.totalOutputTokens
+    + session.totalCacheReadTokens
+    + session.totalCacheWriteTokens
+}
+
+export function detectSessionOutliers(projects: ProjectSummary[]): WasteFinding | null {
+  type Outlier = {
+    project: string
+    sessionId: string
+    date: string
+    cost: number
+    avgCost: number
+    ratio: number
+    tokenExcess: number
+  }
+
+  const outliers: Outlier[] = []
+
+  for (const project of projects) {
+    const sessions = project.sessions.filter(s => s.totalCostUSD > 0)
+    if (sessions.length < MIN_SESSIONS_FOR_OUTLIER) continue
+
+    const totalCost = sessions.reduce((sum, s) => sum + s.totalCostUSD, 0)
+    const totalTokens = sessions.reduce((sum, s) => sum + sessionTokenTotal(s), 0)
+    for (const session of sessions) {
+      const avgCost = (totalCost - session.totalCostUSD) / (sessions.length - 1)
+      const avgTokens = (totalTokens - sessionTokenTotal(session)) / (sessions.length - 1)
+      if (avgCost <= 0) continue
+
+      const ratio = session.totalCostUSD / avgCost
+      if (ratio <= SESSION_OUTLIER_MULTIPLIER) continue
+      if (session.totalCostUSD < MIN_SESSION_OUTLIER_COST_USD) continue
+
+      outliers.push({
+        project: project.project,
+        sessionId: session.sessionId,
+        date: session.firstTimestamp.slice(0, 10),
+        cost: session.totalCostUSD,
+        avgCost,
+        ratio,
+        tokenExcess: Math.max(0, sessionTokenTotal(session) - avgTokens),
+      })
+    }
+  }
+
+  if (outliers.length === 0) return null
+
+  outliers.sort((a, b) => b.cost - a.cost)
+  const preview = outliers.slice(0, SESSION_OUTLIER_PREVIEW)
+  const list = preview
+    .map(o => `${o.project}/${o.sessionId} on ${o.date}: ${formatCost(o.cost)} (${o.ratio.toFixed(1)}x avg)`)
+    .join('; ')
+  const extra = outliers.length > preview.length ? `; +${outliers.length - preview.length} more` : ''
+  const tokensSaved = Math.round(outliers.reduce((sum, o) => sum + o.tokenExcess, 0))
+  const totalExcessCost = outliers.reduce((sum, o) => sum + Math.max(0, o.cost - o.avgCost), 0)
+
+  return {
+    title: `${outliers.length} high-cost session outlier${outliers.length === 1 ? '' : 's'}`,
+    explanation: `Sessions costing more than ${SESSION_OUTLIER_MULTIPLIER}x their peer-session average in the same project: ${list}${extra}. These usually come from broad prompts, runaway loops, or context-heavy work that should be split into smaller sessions.`,
+    impact: outliers.length >= 3 || totalExcessCost >= 10 ? 'high' : 'medium',
+    tokensSaved,
+    fix: {
+      type: 'paste',
+      label: 'For expensive work, start with a tighter operating constraint:',
+      text: 'Before making changes, summarize the smallest viable plan. Keep context narrow, avoid broad searches, and stop after the first working patch so I can review before continuing.',
+    },
+  }
+}
+
 // ============================================================================
 // Scoring
 // ============================================================================
@ -973,6 +1048,7 @@ export async function scanAndDetect(
    () => detectJunkReads(toolCalls, dateRange),
    () => detectDuplicateReads(toolCalls, dateRange),
    () => detectUnusedMcp(toolCalls, projects, projectCwds),
+    () => detectSessionOutliers(projects),
    () => detectBloatedClaudeMd(projectCwds),
    () => detectBashBloat(),
  ]
--- a/tests/optimize.test.ts
+++ b/tests/optimize.test.ts
@ -6,6 +6,7 @@ import {
  detectLowReadEditRatio,
  detectCacheBloat,
  detectBloatedClaudeMd,
+  detectSessionOutliers,
  computeHealth,
  computeTrend,
  type ToolCall,
@ -22,6 +23,39 @@ function emptyProjects(): ProjectSummary[] {
  return []
 }

+function projectWithSessions(costs: number[], project = 'app'): ProjectSummary {
+  const sessions = costs.map((cost, i) => {
+    const tokens = Math.round(cost * 1000)
+    return {
+      sessionId: `s${i + 1}`,
+      project,
+      firstTimestamp: `2026-05-${String(i + 1).padStart(2, '0')}T10:00:00Z`,
+      lastTimestamp: `2026-05-${String(i + 1).padStart(2, '0')}T10:30:00Z`,
+      totalCostUSD: cost,
+      totalInputTokens: tokens,
+      totalOutputTokens: tokens,
+      totalCacheReadTokens: 0,
+      totalCacheWriteTokens: 0,
+      apiCalls: 1,
+      turns: [],
+      modelBreakdown: {},
+      toolBreakdown: {},
+      mcpBreakdown: {},
+      bashBreakdown: {},
+      categoryBreakdown: {} as ProjectSummary['sessions'][number]['categoryBreakdown'],
+      skillBreakdown: {},
+    }
+  })
+
+  return {
+    project,
+    projectPath: `/tmp/${project}`,
+    sessions,
+    totalCostUSD: costs.reduce((sum, cost) => sum + cost, 0),
+    totalApiCalls: sessions.length,
+  }
+}
+
 describe('detectJunkReads', () => {
  it('returns null below minimum threshold', () => {
    const calls = [
@ -207,6 +241,44 @@ describe('detectBloatedClaudeMd', () => {
  })
 })

+describe('detectSessionOutliers', () => {
+  it('returns null when there are too few sessions for a project baseline', () => {
+    expect(detectSessionOutliers([projectWithSessions([0.5, 4])])).toBeNull()
+  })
+
+  it('returns null when no session exceeds twice the project average', () => {
+    expect(detectSessionOutliers([projectWithSessions([1, 1.2, 1.4, 1.6])])).toBeNull()
+  })
+
+  it('does not flag the exact 2x boundary', () => {
+    expect(detectSessionOutliers([projectWithSessions([1, 1, 2])])).toBeNull()
+  })
+
+  it('flags sessions costing more than twice their project average', () => {
+    const finding = detectSessionOutliers([projectWithSessions([1, 1, 1, 10])])
+    expect(finding).not.toBeNull()
+    expect(finding!.title).toContain('high-cost session outlier')
+    expect(finding!.explanation).toContain('app/s4')
+    expect(finding!.impact).toBe('medium')
+    expect(finding!.tokensSaved).toBeGreaterThan(0)
+  })
+
+  it('ignores tiny absolute-cost outliers', () => {
+    expect(detectSessionOutliers([projectWithSessions([0.01, 0.01, 0.01, 0.2])])).toBeNull()
+  })
+
+  it('isolates baselines per project', () => {
+    const finding = detectSessionOutliers([
+      projectWithSessions([8, 9, 10], 'web'),
+      projectWithSessions([1, 1, 1, 12], 'api'),
+    ])
+
+    expect(finding).not.toBeNull()
+    expect(finding!.explanation).toContain('api/s4')
+    expect(finding!.explanation).not.toContain('web/')
+  })
+})
+
 describe('computeHealth', () => {
  it('returns A with 100 for no findings', () => {
    const { score, grade } = computeHealth([])