codeburn/tests/optimize.test.ts
AgentSeal a9ca2a1134 feat(optimize): fix tracking via recent vs baseline split
Solves the problem where users who fixed an issue continued to see
the finding for the remainder of the period. Findings now show
visible progress or disappear entirely.

Mechanism (no state file, no new I/O):
- ToolCall and ApiCallMeta gain a `recent` boolean, set when the
  entry's timestamp falls inside a rolling 48-hour window.
- Each session-based detector counts recent vs total occurrences.
- computeTrend classifies each finding:
    active    -- recent rate matches baseline
    improving -- recent rate under half of baseline (green arrow)
    resolved  -- zero recent waste AND confirmed recent activity
- Resolved findings are suppressed. Improving findings render with
  a green "improving down-arrow" badge next to the impact label.
- When no recent activity exists, findings default to active so a
  user who simply paused is not told everything is fixed.

Applies to the four session-based detectors: junk reads, duplicate
reads, low read:edit ratio, cache bloat. The filesystem detectors
(missing .claudeignore, bloated CLAUDE.md, unused MCP, ghost agents
/ skills / commands, bash limit) already self-heal on next run.

5 new tests cover computeTrend edge cases. 126 tests pass.
2026-04-16 06:53:08 -07:00

311 lines
9.8 KiB
TypeScript

import { describe, it, expect } from 'vitest'
import {
detectJunkReads,
detectDuplicateReads,
detectLowReadEditRatio,
detectCacheBloat,
detectBloatedClaudeMd,
detectMissingClaudeignore,
computeHealth,
computeTrend,
type ToolCall,
type ApiCallMeta,
type WasteFinding,
} from '../src/optimize.js'
import type { ProjectSummary } from '../src/types.js'
function call(name: string, input: Record<string, unknown>, sessionId = 's1', project = 'p1'): ToolCall {
return { name, input, sessionId, project }
}
function emptyProjects(): ProjectSummary[] {
return []
}
describe('detectJunkReads', () => {
it('returns null below minimum threshold', () => {
const calls = [
call('Read', { file_path: '/x/node_modules/a.js' }),
call('Read', { file_path: '/x/node_modules/b.js' }),
]
expect(detectJunkReads(calls)).toBeNull()
})
it('flags when threshold is met', () => {
const calls = [
call('Read', { file_path: '/x/node_modules/a.js' }),
call('Read', { file_path: '/x/node_modules/b.js' }),
call('Read', { file_path: '/x/.git/config' }),
]
const finding = detectJunkReads(calls)
expect(finding).not.toBeNull()
expect(finding!.impact).toBe('low')
})
it('scales impact with read count', () => {
const make = (n: number) => Array.from({ length: n }, (_, i) =>
call('Read', { file_path: `/x/node_modules/file-${i}.js` })
)
expect(detectJunkReads(make(25))!.impact).toBe('high')
expect(detectJunkReads(make(10))!.impact).toBe('medium')
})
it('ignores non-junk paths', () => {
const calls = [
call('Read', { file_path: '/x/src/a.ts' }),
call('Read', { file_path: '/x/src/b.ts' }),
call('Read', { file_path: '/x/README.md' }),
]
expect(detectJunkReads(calls)).toBeNull()
})
it('ignores non-read tools', () => {
const calls = [
call('Edit', { file_path: '/x/node_modules/a.js' }),
call('Bash', { command: 'ls node_modules' }),
call('Grep', { pattern: 'test', path: '/x/node_modules' }),
]
expect(detectJunkReads(calls)).toBeNull()
})
it('handles missing file_path gracefully', () => {
const calls = [
call('Read', {}),
call('Read', { file_path: null as unknown as string }),
]
expect(detectJunkReads(calls)).toBeNull()
})
it('builds .claudeignore content from detected + common extras', () => {
const calls = Array.from({ length: 5 }, () => call('Read', { file_path: '/x/node_modules/a.js' }))
const finding = detectJunkReads(calls)!
expect(finding.fix.type).toBe('file-content')
if (finding.fix.type === 'file-content') {
expect(finding.fix.content).toContain('node_modules')
}
})
})
describe('detectDuplicateReads', () => {
it('counts same file read multiple times in same session', () => {
const calls = [
...Array.from({ length: 4 }, () => call('Read', { file_path: '/src/a.ts' }, 's1')),
...Array.from({ length: 4 }, () => call('Read', { file_path: '/src/b.ts' }, 's1')),
]
const finding = detectDuplicateReads(calls)
expect(finding).not.toBeNull()
})
it('does not count across sessions', () => {
const calls = [
call('Read', { file_path: '/src/a.ts' }, 's1'),
call('Read', { file_path: '/src/a.ts' }, 's2'),
call('Read', { file_path: '/src/a.ts' }, 's3'),
]
expect(detectDuplicateReads(calls)).toBeNull()
})
it('excludes junk directory reads', () => {
const calls = Array.from({ length: 10 }, () =>
call('Read', { file_path: '/x/node_modules/foo.js' }, 's1')
)
expect(detectDuplicateReads(calls)).toBeNull()
})
it('returns null for single reads', () => {
const calls = [
call('Read', { file_path: '/src/a.ts' }, 's1'),
call('Read', { file_path: '/src/b.ts' }, 's1'),
]
expect(detectDuplicateReads(calls)).toBeNull()
})
})
describe('detectLowReadEditRatio', () => {
it('returns null below minimum edit count', () => {
const calls = [
call('Edit', {}),
call('Edit', {}),
call('Read', {}),
]
expect(detectLowReadEditRatio(calls)).toBeNull()
})
it('returns null when ratio is healthy', () => {
const calls = [
...Array.from({ length: 40 }, () => call('Read', {})),
...Array.from({ length: 10 }, () => call('Edit', {})),
]
expect(detectLowReadEditRatio(calls)).toBeNull()
})
it('flags when edits outpace reads', () => {
const calls = [
...Array.from({ length: 5 }, () => call('Read', {})),
...Array.from({ length: 10 }, () => call('Edit', {})),
]
const finding = detectLowReadEditRatio(calls)
expect(finding).not.toBeNull()
expect(finding!.impact).toBe('high')
})
it('counts Grep and Glob as reads for ratio', () => {
const calls = [
...Array.from({ length: 40 }, () => call('Grep', {})),
...Array.from({ length: 10 }, () => call('Edit', {})),
]
expect(detectLowReadEditRatio(calls)).toBeNull()
})
it('counts Write as edit', () => {
const calls = [
...Array.from({ length: 15 }, () => call('Read', {})),
...Array.from({ length: 10 }, () => call('Write', {})),
]
const finding = detectLowReadEditRatio(calls)
expect(finding).not.toBeNull()
})
})
describe('detectCacheBloat', () => {
it('returns null below minimum api calls', () => {
const apiCalls: ApiCallMeta[] = [
{ cacheCreationTokens: 80000, version: '2.1.100' },
{ cacheCreationTokens: 80000, version: '2.1.100' },
]
expect(detectCacheBloat(apiCalls, emptyProjects())).toBeNull()
})
it('returns null when median is close to baseline', () => {
const apiCalls: ApiCallMeta[] = Array.from({ length: 20 }, () => ({
cacheCreationTokens: 50000,
version: '2.1.98',
}))
expect(detectCacheBloat(apiCalls, emptyProjects())).toBeNull()
})
it('flags when median exceeds 1.4x baseline', () => {
const apiCalls: ApiCallMeta[] = Array.from({ length: 20 }, () => ({
cacheCreationTokens: 80000,
version: '2.1.100',
}))
const finding = detectCacheBloat(apiCalls, emptyProjects())
expect(finding).not.toBeNull()
})
})
describe('detectBloatedClaudeMd', () => {
it('returns null when no projects have CLAUDE.md', () => {
const result = detectBloatedClaudeMd(new Set(['/nonexistent/path']))
expect(result).toBeNull()
})
it('returns null for empty project set', () => {
const result = detectBloatedClaudeMd(new Set())
expect(result).toBeNull()
})
})
describe('detectMissingClaudeignore', () => {
it('returns null for empty set', () => {
expect(detectMissingClaudeignore(new Set())).toBeNull()
})
it('returns null for non-existent cwds', () => {
expect(detectMissingClaudeignore(new Set(['/does/not/exist']))).toBeNull()
})
})
describe('computeHealth', () => {
it('returns A with 100 for no findings', () => {
const { score, grade } = computeHealth([])
expect(score).toBe(100)
expect(grade).toBe('A')
})
function mockFinding(impact: 'high' | 'medium' | 'low'): WasteFinding {
return {
title: 't', explanation: 'e', impact, tokensSaved: 1000,
fix: { type: 'paste', label: 'l', text: 't' },
}
}
it('one low finding stays at A', () => {
const { score, grade } = computeHealth([mockFinding('low')])
expect(score).toBe(97)
expect(grade).toBe('A')
})
it('two high findings drop to C', () => {
const { score, grade } = computeHealth([mockFinding('high'), mockFinding('high')])
expect(score).toBe(70)
expect(grade).toBe('C')
})
it('caps penalty at 80 to prevent score below 20', () => {
const findings = Array.from({ length: 20 }, () => mockFinding('high'))
const { score } = computeHealth(findings)
expect(score).toBe(20)
})
it('progresses grades predictably', () => {
expect(computeHealth([mockFinding('low')]).grade).toBe('A')
expect(computeHealth([mockFinding('medium')]).grade).toBe('A')
expect(computeHealth([mockFinding('medium'), mockFinding('medium')]).grade).toBe('B')
expect(computeHealth([mockFinding('high'), mockFinding('high'), mockFinding('high')]).grade).toBe('C')
expect(computeHealth([mockFinding('high'), mockFinding('high'), mockFinding('high'), mockFinding('high'), mockFinding('high')]).grade).toBe('F')
})
})
describe('computeTrend', () => {
const window = 48 * 60 * 60 * 1000
const baselineWindow = 5 * 24 * 60 * 60 * 1000
it('returns active when no recent activity detected', () => {
const trend = computeTrend({
recentCount: 0, recentWindowMs: window,
baselineCount: 100, baselineWindowMs: baselineWindow,
hasRecentActivity: false,
})
expect(trend).toBe('active')
})
it('returns resolved when recent activity exists but zero waste in it', () => {
const trend = computeTrend({
recentCount: 0, recentWindowMs: window,
baselineCount: 100, baselineWindowMs: baselineWindow,
hasRecentActivity: true,
})
expect(trend).toBe('resolved')
})
it('returns improving when recent rate is less than half of baseline rate', () => {
const trend = computeTrend({
recentCount: 5, recentWindowMs: window,
baselineCount: 100, baselineWindowMs: baselineWindow,
hasRecentActivity: true,
})
expect(trend).toBe('improving')
})
it('returns active when recent rate matches baseline rate', () => {
const recentRate = 100 / baselineWindow
const recentCount = Math.ceil(recentRate * window)
const trend = computeTrend({
recentCount, recentWindowMs: window,
baselineCount: 100, baselineWindowMs: baselineWindow,
hasRecentActivity: true,
})
expect(trend).toBe('active')
})
it('returns active when baseline is empty (new finding)', () => {
const trend = computeTrend({
recentCount: 10, recentWindowMs: window,
baselineCount: 0, baselineWindowMs: baselineWindow,
hasRecentActivity: true,
})
expect(trend).toBe('active')
})
})