fix: include agent/subagent sessions, fix Codex cache hit and cost calculation

- Remove agent-*.jsonl exclusion filter that was dropping ~46% of API calls
- Scan subagents/ directories for subagent session files
- Normalize Codex token semantics: OpenAI includes cached tokens inside
  input_tokens, subtract them to match Anthropic's separate reporting
- Fixes cost double-counting and 100% cache hit display for Codex users
This commit is contained in:
AgentSeal 2026-04-14 10:18:14 -07:00
parent ce2c1d2995
commit 51c56d0726
3 changed files with 27 additions and 8 deletions

View file

@ -1,4 +1,4 @@
import { readdir, readFile, stat } from 'fs/promises'
import { readdir, readFile } from 'fs/promises'
import { basename, join } from 'path'
import { calculateCost, getShortModelName } from './models.js'
import { discoverAllSessions, getProvider } from './providers/index.js'
@ -298,15 +298,30 @@ async function parseSessionFile(
return buildSessionSummary(sessionId, project, classified)
}
async function collectJsonlFiles(dirPath: string): Promise<string[]> {
const files = await readdir(dirPath).catch(() => [])
const jsonlFiles = files.filter(f => f.endsWith('.jsonl')).map(f => join(dirPath, f))
for (const entry of files) {
if (entry.endsWith('.jsonl')) continue
const subagentsPath = join(dirPath, entry, 'subagents')
const subFiles = await readdir(subagentsPath).catch(() => [])
for (const sf of subFiles) {
if (sf.endsWith('.jsonl')) jsonlFiles.push(join(subagentsPath, sf))
}
}
return jsonlFiles
}
async function scanProjectDirs(dirs: Array<{ path: string; name: string }>, seenMsgIds: Set<string>, dateRange?: DateRange): Promise<ProjectSummary[]> {
const projectMap = new Map<string, SessionSummary[]>()
for (const { path: dirPath, name: dirName } of dirs) {
const files = await readdir(dirPath).catch(() => [])
const jsonlFiles = files.filter(f => f.endsWith('.jsonl') && !f.startsWith('agent-'))
const jsonlFiles = await collectJsonlFiles(dirPath)
for (const file of jsonlFiles) {
const session = await parseSessionFile(join(dirPath, file), dirName, seenMsgIds, dateRange)
for (const filePath of jsonlFiles) {
const session = await parseSessionFile(filePath, dirName, seenMsgIds, dateRange)
if (session && session.apiCalls > 0) {
const existing = projectMap.get(dirName) ?? []
existing.push(session)

View file

@ -227,6 +227,10 @@ function createParser(source: SessionSource, seenKeys: Set<string>): SessionPars
const totalTokens = inputTokens + cachedInputTokens + outputTokens + reasoningTokens
if (totalTokens === 0) continue
// OpenAI includes cached tokens inside input_tokens; Anthropic does not.
// Normalize to Anthropic semantics: inputTokens = non-cached only.
const uncachedInputTokens = Math.max(0, inputTokens - cachedInputTokens)
const model = resolveModel(entry.payload, sessionModel)
const timestamp = entry.timestamp ?? ''
const dedupKey = `codex:${source.path}:${timestamp}:${cumulativeTotal}`
@ -236,7 +240,7 @@ function createParser(source: SessionSource, seenKeys: Set<string>): SessionPars
const costUSD = calculateCost(
model,
inputTokens,
uncachedInputTokens,
outputTokens + reasoningTokens,
0,
cachedInputTokens,
@ -246,7 +250,7 @@ function createParser(source: SessionSource, seenKeys: Set<string>): SessionPars
yield {
provider: 'codex',
model,
inputTokens,
inputTokens: uncachedInputTokens,
outputTokens,
cacheCreationInputTokens: 0,
cacheReadInputTokens: cachedInputTokens,

View file

@ -153,7 +153,7 @@ describe('codex provider - JSONL parsing', () => {
const call = calls[0]!
expect(call.provider).toBe('codex')
expect(call.model).toBe('gpt-5.3-codex')
expect(call.inputTokens).toBe(500)
expect(call.inputTokens).toBe(400)
expect(call.cachedInputTokens).toBe(100)
expect(call.cacheReadInputTokens).toBe(100)
expect(call.outputTokens).toBe(200)