Harden menubar refresh recovery

This commit is contained in:
iamtoruk 2026-05-11 20:44:06 -07:00
parent f058f36dbd
commit b4b28becc8
6 changed files with 170 additions and 42 deletions

View file

@ -51,6 +51,7 @@ final class AppStore {
private var cache: [PayloadCacheKey: CachedPayload] = [:]
private var cacheDate: String = ""
private var switchTask: Task<Void, Never>?
private var payloadRefreshGeneration: UInt64 = 0
/// Tracks the last successful fetch timestamp per key for stuck-loading
/// diagnostics. NOT used for cache-freshness logic `CachedPayload.fetchedAt`
/// is authoritative there. This map persists across cache wipes (day
@ -87,6 +88,20 @@ final class AppStore {
cache[currentKey] != nil
}
var hasStaleLoading: Bool {
let now = Date()
return loadingStartedAtByKey.values.contains {
now.timeIntervalSince($0) > loadingWatchdogSeconds
}
}
var needsInteractivePayloadRefresh: Bool {
let todayKey = PayloadCacheKey(period: .today, provider: .all)
return cache[currentKey]?.isFresh != true ||
cache[todayKey]?.isFresh != true ||
hasStaleLoading
}
/// True if any cached payload reports at least one provider. Used to keep the
/// AgentTabStrip visible across period/provider switches even when the current
/// key's payload is briefly empty (e.g. immediately after a `switchTo` and
@ -135,6 +150,7 @@ final class AppStore {
private var inFlightKeys: Set<PayloadCacheKey> = []
func resetLoadingState() {
payloadRefreshGeneration &+= 1
loadingCountsByKey.removeAll()
loadingStartedAtByKey.removeAll()
inFlightKeys.removeAll()
@ -161,6 +177,7 @@ final class AppStore {
}
guard !staleEntries.isEmpty else { return false }
payloadRefreshGeneration &+= 1
for (key, started) in staleEntries {
NSLog("CodeBurn: loading stuck for %ds on %@/%@ — auto-clearing",
Int(now.timeIntervalSince(started)), key.period.rawValue, key.provider.rawValue)
@ -209,6 +226,7 @@ final class AppStore {
invalidateStaleDayCache()
let key = currentKey
let cacheDateAtStart = cacheDate
let generationAtStart = payloadRefreshGeneration
if !force, cache[key]?.isFresh == true { return }
if !force, inFlightKeys.contains(key) { return }
inFlightKeys.insert(key)
@ -237,6 +255,10 @@ final class AppStore {
}
do {
let fresh = try await DataClient.fetch(period: key.period, provider: key.provider, includeOptimize: includeOptimize)
if generationAtStart != payloadRefreshGeneration {
NSLog("CodeBurn: dropping fetch result for \(key.period.rawValue)/\(key.provider.rawValue) — refresh pipeline reset mid-fetch")
return
}
if Task.isCancelled {
// Distinguish cancellation (user switched tabs mid-fetch) from
// the silent-no-result path. Without this log, a cancelled
@ -263,6 +285,7 @@ final class AppStore {
do {
let fallback = try await DataClient.fetch(period: key.period, provider: key.provider, includeOptimize: false)
guard !Task.isCancelled else { return }
if generationAtStart != payloadRefreshGeneration { return }
if cacheDate != cacheDateAtStart { return }
cache[key] = CachedPayload(payload: fallback, fetchedAt: Date())
lastSuccessByKey[key] = Date()
@ -288,8 +311,13 @@ final class AppStore {
func refreshQuietly(period: Period) async {
invalidateStaleDayCache()
let cacheDateAtStart = cacheDate
let generationAtStart = payloadRefreshGeneration
do {
let fresh = try await DataClient.fetch(period: period, provider: .all, includeOptimize: false)
if generationAtStart != payloadRefreshGeneration {
NSLog("CodeBurn: dropping quiet fetch result for \(period.rawValue) — refresh pipeline reset mid-fetch")
return
}
// Same day-rollover guard as refresh(): drop yesterday's payload if
// the calendar rolled over during the fetch.
if cacheDate != cacheDateAtStart { return }

View file

@ -6,6 +6,8 @@ private let refreshIntervalSeconds: UInt64 = 30
private let nanosPerSecond: UInt64 = 1_000_000_000
private let refreshIntervalNanos: UInt64 = refreshIntervalSeconds * nanosPerSecond
private let forceRefreshWatchdogSeconds: TimeInterval = 90
private let refreshLoopWatchdogSeconds: TimeInterval = 90
private let refreshRateLimitSeconds: TimeInterval = 5
private let interactiveQuotaRefreshFloorSeconds: TimeInterval = 30
private let statusItemWidth: CGFloat = NSStatusItem.variableLength
private let popoverWidth: CGFloat = 360
@ -42,6 +44,7 @@ final class AppDelegate: NSObject, NSApplicationDelegate, NSPopoverDelegate {
private var forceRefreshGeneration: UInt64 = 0
private var manualRefreshTask: Task<Void, Never>?
private var manualRefreshGeneration: UInt64 = 0
private var refreshLoopHeartbeatAt: Date = .distantPast
func applicationWillFinishLaunching(_ notification: Notification) {
// Set accessory policy before the app's focus chain forms. On macOS Tahoe
@ -94,30 +97,21 @@ final class AppDelegate: NSObject, NSApplicationDelegate, NSPopoverDelegate {
queue: .main
) { [weak self] _ in
Task { @MainActor in
self?.forceRefreshTask?.cancel()
self?.forceRefreshTask = nil
self?.forceRefreshStartedAt = nil
self?.forceRefreshGeneration &+= 1
self?.manualRefreshTask?.cancel()
self?.manualRefreshTask = nil
self?.manualRefreshGeneration &+= 1
self?.store.resetLoadingState()
self?.refreshLoopTask?.cancel()
self?.refreshLoopTask = nil
self?.prepareRefreshPipelineForSleep()
}
}
// didWakeNotification + screensDidWakeNotification can both fire on
// the same wake. forceRefresh has a 5-second rate-limit gate so the
// duplicate is squashed there. Restart the refresh loop too, since
// we cancelled it on willSleep.
// the same wake. forceRefreshTask squashes overlap; both notifications
// still bypass the short manual-click rate limit so a just-before-sleep
// refresh cannot block wake recovery.
NSWorkspace.shared.notificationCenter.addObserver(
forName: NSWorkspace.didWakeNotification,
object: nil,
queue: .main
) { [weak self] _ in
Task { @MainActor in
self?.recoverRefreshPipelineAfterInterruption(resetLoading: true)
self?.recoverRefreshPipelineAfterInterruption(resetLoading: true, reason: "wake")
}
}
@ -127,7 +121,7 @@ final class AppDelegate: NSObject, NSApplicationDelegate, NSPopoverDelegate {
queue: .main
) { [weak self] _ in
Task { @MainActor in
self?.recoverRefreshPipelineAfterInterruption(resetLoading: true)
self?.recoverRefreshPipelineAfterInterruption(resetLoading: true, reason: "screen wake")
}
}
}
@ -139,21 +133,50 @@ final class AppDelegate: NSObject, NSApplicationDelegate, NSPopoverDelegate {
queue: .main
) { [weak self] _ in
Task { @MainActor in
self?.recoverRefreshPipelineAfterInterruption(resetLoading: false)
self?.recoverRefreshPipelineAfterInterruption(resetLoading: false, reason: "launch agent")
}
}
}
private func recoverRefreshPipelineAfterInterruption(resetLoading: Bool) {
private func prepareRefreshPipelineForSleep() {
forceRefreshTask?.cancel()
forceRefreshTask = nil
forceRefreshStartedAt = nil
forceRefreshGeneration &+= 1
manualRefreshTask?.cancel()
manualRefreshTask = nil
manualRefreshGeneration &+= 1
store.resetLoadingState()
refreshLoopTask?.cancel()
refreshLoopTask = nil
refreshLoopHeartbeatAt = .distantPast
lastRefreshTime = .distantPast
}
private func recoverRefreshPipelineAfterInterruption(resetLoading: Bool, clearCache: Bool = false, reason: String) {
if resetLoading {
store.resetLoadingState()
forceRefreshTask?.cancel()
forceRefreshTask = nil
forceRefreshStartedAt = nil
forceRefreshGeneration &+= 1
manualRefreshTask?.cancel()
manualRefreshTask = nil
manualRefreshGeneration &+= 1
store.resetRefreshState(clearCache: clearCache)
} else {
_ = store.clearStaleLoadingIfNeeded()
}
if refreshLoopTask == nil {
let now = Date()
let loopAge = now.timeIntervalSince(refreshLoopHeartbeatAt)
if refreshLoopTask == nil || loopAge > refreshLoopWatchdogSeconds {
if refreshLoopTask != nil {
NSLog("CodeBurn: refresh loop stale for %ds after %@ — restarting", Int(loopAge), reason)
}
refreshLoopTask?.cancel()
refreshLoopTask = nil
startRefreshLoop()
}
forceRefresh()
forceRefresh(bypassRateLimit: true, forceQuota: true)
}
private func installLaunchAgentIfNeeded() {
@ -250,11 +273,13 @@ final class AppDelegate: NSObject, NSApplicationDelegate, NSPopoverDelegate {
return false
}
private func forceRefresh() {
private func forceRefresh(bypassRateLimit: Bool = false, forceQuota: Bool = false) {
let now = Date()
_ = clearStaleForceRefreshIfNeeded(now: now)
guard forceRefreshTask == nil else { return }
guard now.timeIntervalSince(lastRefreshTime) > 5 else { return }
if !bypassRateLimit {
guard now.timeIntervalSince(lastRefreshTime) > refreshRateLimitSeconds else { return }
}
lastRefreshTime = now
forceRefreshStartedAt = now
forceRefreshGeneration &+= 1
@ -262,9 +287,11 @@ final class AppDelegate: NSObject, NSApplicationDelegate, NSPopoverDelegate {
forceRefreshTask = Task {
async let main: Void = store.refresh(includeOptimize: false, force: true, showLoading: true)
async let today: Void = store.refreshQuietly(period: .today)
async let quotas: Bool = refreshLiveQuotaProgressIfDue()
_ = await (main, today, quotas)
async let quotas: Bool = refreshLiveQuotaProgressIfDue(force: forceQuota)
if store.selectedPeriod != .today || store.selectedProvider != .all {
await store.refreshQuietly(period: .today)
}
_ = await main
refreshStatusButton()
await MainActor.run { [weak self] in
guard let self, self.forceRefreshGeneration == generation else { return }
@ -272,6 +299,7 @@ final class AppDelegate: NSObject, NSApplicationDelegate, NSPopoverDelegate {
self.forceRefreshStartedAt = nil
self.lastRefreshTime = Date()
}
_ = await quotas
}
}
@ -344,8 +372,17 @@ final class AppDelegate: NSObject, NSApplicationDelegate, NSPopoverDelegate {
}
}
private func refreshPayloadForPopoverOpen() {
guard store.needsInteractivePayloadRefresh else { return }
recoverRefreshPipelineAfterInterruption(
resetLoading: store.hasStaleLoading,
reason: "popover open"
)
}
private func startRefreshLoop() {
refreshLoopTask?.cancel()
refreshLoopHeartbeatAt = Date()
refreshLoopTask = Task { [weak self] in
// Provider refreshes only run when the user has explicitly connected.
// Each refresh is a no-op until its corresponding bootstrap flag is set.
@ -354,6 +391,7 @@ final class AppDelegate: NSObject, NSApplicationDelegate, NSPopoverDelegate {
}
while !Task.isCancelled {
guard let self else { return }
self.refreshLoopHeartbeatAt = Date()
let clearedStaleForceRefresh = self.clearStaleForceRefreshIfNeeded()
let clearedStaleLoading = self.store.clearStaleLoadingIfNeeded()
// Skip the loop's tick if a wake / manual / distributed-
@ -617,6 +655,7 @@ final class AppDelegate: NSObject, NSApplicationDelegate, NSPopoverDelegate {
window.collectionBehavior.insert(.canJoinAllSpaces)
window.makeKeyAndOrderFront(nil)
}
refreshPayloadForPopoverOpen()
refreshLiveQuotaProgressForPopoverOpen()
}
}

View file

@ -5,6 +5,7 @@ import { calculateCost, getShortModelName } from './models.js'
import { discoverAllSessions, getProvider } from './providers/index.js'
import { flushCodexCache } from './codex-cache.js'
import { flushAntigravityCache } from './providers/antigravity.js'
import { isSqliteBusyError } from './sqlite.js'
import type { ParsedProviderCall } from './providers/types.js'
import type {
AssistantMessageContent,
@ -541,6 +542,19 @@ function providerCallToTurn(call: ParsedProviderCall): ParsedTurn {
}
}
const warnedProviderReadFailures = new Set<string>()
function warnProviderReadFailureOnce(providerName: string, err: unknown): void {
const key = `${providerName}:sqlite-busy`
if (warnedProviderReadFailures.has(key)) return
warnedProviderReadFailures.add(key)
if (isSqliteBusyError(err)) {
process.stderr.write(
`codeburn: skipped ${providerName} data because its SQLite database is temporarily locked; will retry on the next refresh.\n`
)
}
}
async function parseProviderSources(
providerName: string,
sources: Array<{ path: string; project: string }>,
@ -565,23 +579,31 @@ async function parseProviderSources(
seenKeys,
)
for await (const call of parser.parse()) {
if (dateRange) {
if (!call.timestamp) continue
const ts = new Date(call.timestamp)
if (ts < dateRange.start || ts > dateRange.end) continue
}
try {
for await (const call of parser.parse()) {
if (dateRange) {
if (!call.timestamp) continue
const ts = new Date(call.timestamp)
if (ts < dateRange.start || ts > dateRange.end) continue
}
const turn = providerCallToTurn(call)
const classified = classifyTurn(turn)
const key = `${providerName}:${call.sessionId}:${source.project}`
const turn = providerCallToTurn(call)
const classified = classifyTurn(turn)
const key = `${providerName}:${call.sessionId}:${source.project}`
const existing = sessionMap.get(key)
if (existing) {
existing.turns.push(classified)
} else {
sessionMap.set(key, { project: source.project, turns: [classified] })
const existing = sessionMap.get(key)
if (existing) {
existing.turns.push(classified)
} else {
sessionMap.set(key, { project: source.project, turns: [classified] })
}
}
} catch (err) {
if (isSqliteBusyError(err)) {
warnProviderReadFailureOnce(providerName, err)
continue
}
throw err
}
}
} finally {

View file

@ -4,7 +4,7 @@ import { homedir } from 'os'
import { calculateCost, getShortModelName } from '../models.js'
import { extractBashCommands } from '../bash-utils.js'
import { isSqliteAvailable, getSqliteLoadError, openDatabase, blobToText, type SqliteDatabase } from '../sqlite.js'
import { isSqliteAvailable, getSqliteLoadError, openDatabase, blobToText, isSqliteBusyError, type SqliteDatabase } from '../sqlite.js'
import type {
Provider,
SessionSource,
@ -107,7 +107,8 @@ function validateSchemaDetailed(db: SqliteDatabase): SchemaCheckResult {
for (const table of required) {
try {
db.query<{ cnt: number }>(`SELECT COUNT(*) as cnt FROM ${table} LIMIT 1`)
} catch {
} catch (err) {
if (isSqliteBusyError(err)) throw err
missing.push(table)
}
}

View file

@ -16,6 +16,7 @@ export type SqliteDatabase = {
type DatabaseSyncCtor = new (path: string, options?: { readOnly?: boolean }) => {
prepare(sql: string): { all(...params: unknown[]): Row[] }
exec?(sql: string): void
close(): void
}
@ -97,12 +98,35 @@ export function getSqliteLoadError(): string {
return loadError ?? 'SQLite driver not available'
}
export function isSqliteBusyError(err: unknown): boolean {
const e = err as { code?: unknown; errcode?: unknown; errstr?: unknown; message?: unknown } | null
const code = typeof e?.code === 'string' ? e.code : ''
const errcode = typeof e?.errcode === 'number' ? e.errcode : null
const message = [
typeof e?.message === 'string' ? e.message : '',
typeof e?.errstr === 'string' ? e.errstr : '',
].join(' ')
return (
errcode === 5 ||
errcode === 6 ||
code === 'SQLITE_BUSY' ||
code === 'SQLITE_LOCKED' ||
/\bSQLITE_(BUSY|LOCKED)\b|database (?:is |table is )?locked/i.test(message)
)
}
export function openDatabase(path: string): SqliteDatabase {
if (!loadDriver() || DatabaseSync === null) {
throw new Error(getSqliteLoadError())
}
const db = new DatabaseSync(path, { readOnly: true })
try {
db.exec?.('PRAGMA busy_timeout = 1000')
} catch {
// Best effort. Some Node sqlite builds may not expose exec on DatabaseSync.
}
return {
query<T extends Row = Row>(sql: string, params: unknown[] = []): T[] {

View file

@ -1,5 +1,5 @@
import { describe, it, expect } from 'vitest'
import { blobToText } from '../src/sqlite.js'
import { blobToText, isSqliteBusyError } from '../src/sqlite.js'
describe('blobToText', () => {
it('returns empty string for null', () => {
@ -37,3 +37,17 @@ describe('blobToText', () => {
expect(blobToText(new Uint8Array(0))).toBe('')
})
})
describe('isSqliteBusyError', () => {
it('detects node:sqlite busy errors by errcode', () => {
expect(isSqliteBusyError({ code: 'ERR_SQLITE_ERROR', errcode: 5, errstr: 'database is locked' })).toBe(true)
})
it('detects sqlite locked messages', () => {
expect(isSqliteBusyError(new Error('SQLITE_LOCKED: database table is locked'))).toBe(true)
})
it('ignores unrelated sqlite errors', () => {
expect(isSqliteBusyError(new Error('no such table: session'))).toBe(false)
})
})