fix(parser): use Claude cwd for Windows project paths

Reads the canonical cwd already stored inside Claude session JSONL files and uses it as the project path, then groups sessions by a normalized path key (case + slash insensitive) so Windows projects no longer split into 3+ rows on case/slash variants. Falls back to the legacy slug-derived path when cwd is missing. Closes #217. Supersedes #228 with a fix that preserves the canonical cwd even when mixed with slug-only sessions in the same directory. Original implementation by @ozymandiashh.
This commit is contained in:
Resham Joshi 2026-05-05 23:53:31 -07:00 committed by GitHub
parent be6068b244
commit 6151cf6d73
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 217 additions and 11 deletions

View file

@ -15,6 +15,10 @@
- **Session cost outlier detector.** New `optimize` finding flags sessions costing more than 2x their peer-session average within the same project. Ignores sub-$1 outliers to avoid noise. Requires at least 3 sessions per project for a baseline.
### Fixed (CLI)
- **Windows Claude project paths.** Claude Code project rollups now prefer
the canonical `cwd` stored in session JSONL files instead of reconstructing
paths from lossy directory slugs, and group case/slash variants together.
Closes #217.
- **`all` period semantics unified between CLI and dashboard.** The dashboard treated `--period all` as all-time (epoch start) while the CLI bounded it to the last 6 months. Both now consistently mean "Last 6 months". Period helpers (`Period`, `PERIODS`, `PERIOD_LABELS`, `toPeriod`, `getDateRange`) consolidated into `cli-date.ts`. Use `--from` / `--to` for unbounded historical ranges.
### Fixed (macOS menubar)

View file

@ -26,6 +26,11 @@ function unsanitizePath(dirName: string): string {
return dirName.replace(/-/g, '/')
}
function normalizeProjectPathKey(projectPath: string): string {
const normalized = projectPath.trim().replace(/\\/g, '/')
return (normalized.replace(/\/+$/, '') || normalized).toLowerCase()
}
function parseJsonlLine(line: string): JournalEntry | null {
try {
return JSON.parse(line) as JournalEntry
@ -246,6 +251,15 @@ export function extractMcpInventory(entries: JournalEntry[]): string[] {
return Array.from(inventory).sort()
}
function extractCanonicalCwd(entries: JournalEntry[]): string | undefined {
for (const entry of entries) {
if (typeof entry.cwd !== 'string') continue
const cwd = entry.cwd.trim()
if (cwd) return cwd
}
return undefined
}
function buildSessionSummary(
sessionId: string,
project: string,
@ -364,7 +378,7 @@ async function parseSessionFile(
project: string,
seenMsgIds: Set<string>,
dateRange?: DateRange,
): Promise<SessionSummary | null> {
): Promise<{ session: SessionSummary; canonicalCwd?: string } | null> {
// Skip files whose mtime is older than the range start. A session file
// can only contain entries up to its last-modified time; if that predates
// the requested range, nothing in this file can match.
@ -413,8 +427,12 @@ async function parseSessionFile(
// and we want to reflect what was loaded even if the user only ran
// turns inside a narrow date window.
const mcpInventory = extractMcpInventory(entries)
const canonicalCwd = extractCanonicalCwd(entries)
return buildSessionSummary(sessionId, project, classified, mcpInventory)
return {
session: buildSessionSummary(sessionId, project, classified, mcpInventory),
...(canonicalCwd ? { canonicalCwd } : {}),
}
}
async function collectJsonlFiles(dirPath: string): Promise<string[]> {
@ -434,26 +452,50 @@ async function collectJsonlFiles(dirPath: string): Promise<string[]> {
}
async function scanProjectDirs(dirs: Array<{ path: string; name: string }>, seenMsgIds: Set<string>, dateRange?: DateRange): Promise<ProjectSummary[]> {
const projectMap = new Map<string, SessionSummary[]>()
const projectMap = new Map<string, { project: string; projectPath: string; sessions: SessionSummary[] }>()
for (const { path: dirPath, name: dirName } of dirs) {
const jsonlFiles = await collectJsonlFiles(dirPath)
for (const filePath of jsonlFiles) {
const session = await parseSessionFile(filePath, dirName, seenMsgIds, dateRange)
if (session && session.apiCalls > 0) {
const existing = projectMap.get(dirName) ?? []
existing.push(session)
projectMap.set(dirName, existing)
const parsed = await parseSessionFile(filePath, dirName, seenMsgIds, dateRange)
if (parsed && parsed.session.apiCalls > 0) {
const projectPath = parsed.canonicalCwd ?? unsanitizePath(dirName)
const projectKey = parsed.canonicalCwd ? normalizeProjectPathKey(parsed.canonicalCwd) : `slug:${dirName}`
const existing = projectMap.get(projectKey)
if (existing) {
existing.sessions.push(parsed.session)
} else {
projectMap.set(projectKey, { project: dirName, projectPath, sessions: [parsed.session] })
}
}
}
}
// If a slug has both cwd-keyed and slug-keyed entries (mixed sessions where
// some carry a canonical cwd and some don't), fold the slug-keyed sessions
// into the cwd-keyed entry so the canonical projectPath is preserved
// regardless of file iteration order.
const cwdKeyByDirName = new Map<string, string>()
for (const [key, entry] of projectMap) {
if (!key.startsWith('slug:') && !cwdKeyByDirName.has(entry.project)) {
cwdKeyByDirName.set(entry.project, key)
}
}
for (const [key, entry] of [...projectMap]) {
if (!key.startsWith('slug:')) continue
const cwdKey = cwdKeyByDirName.get(entry.project)
if (!cwdKey) continue
const target = projectMap.get(cwdKey)!
target.sessions.push(...entry.sessions)
projectMap.delete(key)
}
const projects: ProjectSummary[] = []
for (const [dirName, sessions] of projectMap) {
for (const { project, projectPath, sessions } of projectMap.values()) {
projects.push({
project: dirName,
projectPath: unsanitizePath(dirName),
project,
projectPath,
sessions,
totalCostUSD: sessions.reduce((s, sess) => s + sess.totalCostUSD, 0),
totalApiCalls: sessions.reduce((s, sess) => s + sess.apiCalls, 0),

View file

@ -0,0 +1,160 @@
import { describe, it, expect, beforeEach, afterEach } from 'vitest'
import { mkdtemp, mkdir, writeFile, rm, utimes } from 'fs/promises'
import { join } from 'path'
import { tmpdir } from 'os'
import { parseAllSessions } from '../src/parser.js'
import type { DateRange } from '../src/types.js'
let tmpDir: string
let originalConfigDir: string | undefined
beforeEach(async () => {
tmpDir = await mkdtemp(join(tmpdir(), 'claude-cwd-test-'))
originalConfigDir = process.env['CLAUDE_CONFIG_DIR']
process.env['CLAUDE_CONFIG_DIR'] = tmpDir
})
afterEach(async () => {
if (originalConfigDir === undefined) {
delete process.env['CLAUDE_CONFIG_DIR']
} else {
process.env['CLAUDE_CONFIG_DIR'] = originalConfigDir
}
await rm(tmpDir, { recursive: true, force: true })
})
function dayRange(day: string): DateRange {
return {
start: new Date(`${day}T00:00:00.000Z`),
end: new Date(`${day}T23:59:59.999Z`),
}
}
async function writeClaudeSession(projectSlug: string, sessionId: string, cwd: string, timestamp: string): Promise<void> {
const projectDir = join(tmpDir, 'projects', projectSlug)
await mkdir(projectDir, { recursive: true })
const filePath = join(projectDir, `${sessionId}.jsonl`)
await writeFile(filePath, JSON.stringify({
type: 'assistant',
sessionId,
timestamp,
cwd,
message: {
id: `msg-${sessionId}`,
type: 'message',
role: 'assistant',
model: 'claude-sonnet-4-5',
content: [],
usage: {
input_tokens: 100,
output_tokens: 50,
},
},
}) + '\n')
const mtime = new Date(timestamp)
await utimes(filePath, mtime, mtime)
}
describe('Claude cwd project paths', () => {
it('uses the JSONL cwd as the canonical project path instead of the lossy directory slug', async () => {
await writeClaudeSession(
'c--AI-LAB-OPENCLAW',
'windows-session',
'C:\\AI_LAB\\OPENCLAW',
'2099-05-01T12:00:00.000Z',
)
const projects = await parseAllSessions(dayRange('2099-05-01'), 'claude')
expect(projects).toHaveLength(1)
expect(projects[0]!.projectPath).toBe('C:\\AI_LAB\\OPENCLAW')
expect(projects[0]!.projectPath).not.toBe('c//AI/LAB/OPENCLAW')
expect(projects[0]!.totalApiCalls).toBe(1)
})
it('groups Windows cwd case and slash variants into one project', async () => {
await writeClaudeSession(
'windows-openclaw-a',
'upper-backslash',
'C:\\AI_LAB\\OPENCLAW',
'2099-05-02T10:00:00.000Z',
)
await writeClaudeSession(
'windows-openclaw-b',
'lower-forward-slash',
'c:/AI_LAB/OPENCLAW/',
'2099-05-02T11:00:00.000Z',
)
const projects = await parseAllSessions(dayRange('2099-05-02'), 'claude')
expect(projects).toHaveLength(1)
expect(projects[0]!.sessions).toHaveLength(2)
expect(projects[0]!.totalApiCalls).toBe(2)
expect(projects[0]!.sessions.map(s => s.sessionId).sort()).toEqual([
'lower-forward-slash',
'upper-backslash',
])
})
it('prefers the canonical cwd path even when mixed with slug-only sessions in the same directory', async () => {
const slug = 'c--AI-LAB-OPENCLAW'
const projectDir = join(tmpDir, 'projects', slug)
await mkdir(projectDir, { recursive: true })
const noCwdPath = join(projectDir, 'a-no-cwd.jsonl')
await writeFile(noCwdPath, JSON.stringify({
type: 'assistant',
sessionId: 'no-cwd',
timestamp: '2099-05-03T10:00:00.000Z',
message: {
id: 'msg-no-cwd', type: 'message', role: 'assistant',
model: 'claude-sonnet-4-5', content: [],
usage: { input_tokens: 100, output_tokens: 50 },
},
}) + '\n')
await utimes(noCwdPath, new Date('2099-05-03T10:00:00.000Z'), new Date('2099-05-03T10:00:00.000Z'))
await writeClaudeSession(slug, 'b-with-cwd', 'C:\\AI_LAB\\OPENCLAW', '2099-05-03T11:00:00.000Z')
const projects = await parseAllSessions(dayRange('2099-05-03'), 'claude')
expect(projects).toHaveLength(1)
expect(projects[0]!.sessions).toHaveLength(2)
expect(projects[0]!.projectPath).toBe('C:\\AI_LAB\\OPENCLAW')
expect(projects[0]!.projectPath).not.toBe('c//AI/LAB/OPENCLAW')
})
it('falls back to the slug-derived path when cwd is null, missing, or empty', async () => {
const slug = 'fallback-slug'
const projectDir = join(tmpDir, 'projects', slug)
await mkdir(projectDir, { recursive: true })
async function writeWith(name: string, sessionId: string, cwdField: unknown, ts: string) {
const filePath = join(projectDir, `${name}.jsonl`)
const obj: Record<string, unknown> = {
type: 'assistant', sessionId, timestamp: ts,
message: {
id: `msg-${sessionId}`, type: 'message', role: 'assistant',
model: 'claude-sonnet-4-5', content: [],
usage: { input_tokens: 100, output_tokens: 50 },
},
}
if (cwdField !== undefined) obj.cwd = cwdField
await writeFile(filePath, JSON.stringify(obj) + '\n')
await utimes(filePath, new Date(ts), new Date(ts))
}
await writeWith('null-cwd', 's-null', null, '2099-05-04T10:00:00.000Z')
await writeWith('empty-cwd', 's-empty', '', '2099-05-04T10:30:00.000Z')
await writeWith('whitespace-cwd', 's-ws', ' ', '2099-05-04T11:00:00.000Z')
await writeWith('missing-cwd', 's-miss', undefined, '2099-05-04T11:30:00.000Z')
const projects = await parseAllSessions(dayRange('2099-05-04'), 'claude')
expect(projects).toHaveLength(1)
expect(projects[0]!.sessions).toHaveLength(4)
expect(projects[0]!.projectPath).toBe('fallback/slug')
})
})