Merge fix/menubar-loading-watchdog

This commit is contained in:
iamtoruk 2026-05-10 04:06:23 -07:00
commit 31e6f967f3
4 changed files with 152 additions and 51 deletions

View file

@ -25,9 +25,14 @@ final class AppStore {
}
var showingAccentPicker: Bool = false
var currency: String = "USD"
var isLoading: Bool { loadingCount > 0 }
private var loadingCount: Int = 0
var lastError: String?
var isLoading: Bool { loadingCountsByKey.values.contains { $0 > 0 } }
var isCurrentKeyLoading: Bool { loadingCountsByKey[currentKey, default: 0] > 0 }
var hasAttemptedCurrentKeyLoad: Bool { attemptedKeys.contains(currentKey) }
var lastError: String? { lastErrorByKey[currentKey] }
private var loadingCountsByKey: [PayloadCacheKey: Int] = [:]
private var loadingStartedAtByKey: [PayloadCacheKey: Date] = [:]
private var attemptedKeys: Set<PayloadCacheKey> = []
private var lastErrorByKey: [PayloadCacheKey: String] = [:]
var subscription: SubscriptionUsage?
var subscriptionError: String?
var subscriptionLoadState: SubscriptionLoadState = ClaudeCredentialStore.isBootstrapCompleted ? .loading : .notBootstrapped
@ -130,10 +135,51 @@ final class AppStore {
private var inFlightKeys: Set<PayloadCacheKey> = []
func resetLoadingState() {
loadingCount = 0
loadingCountsByKey.removeAll()
loadingStartedAtByKey.removeAll()
inFlightKeys.removeAll()
}
private let loadingWatchdogSeconds: TimeInterval = 60
@discardableResult
func clearStaleLoadingIfNeeded() -> Bool {
let now = Date()
let staleEntries = loadingStartedAtByKey.filter {
now.timeIntervalSince($0.value) > loadingWatchdogSeconds
}
guard !staleEntries.isEmpty else { return false }
for (key, started) in staleEntries {
NSLog("CodeBurn: loading stuck for %ds on %@/%@ — auto-clearing",
Int(now.timeIntervalSince(started)), key.period.rawValue, key.provider.rawValue)
loadingCountsByKey[key] = nil
loadingStartedAtByKey[key] = nil
inFlightKeys.remove(key)
if cache[key] == nil {
lastErrorByKey[key] = "Refresh took longer than expected. CodeBurn will keep retrying in the background."
}
}
return true
}
private func beginLoading(for key: PayloadCacheKey) {
if loadingCountsByKey[key, default: 0] == 0 {
loadingStartedAtByKey[key] = Date()
}
loadingCountsByKey[key, default: 0] += 1
}
private func finishLoading(for key: PayloadCacheKey) {
guard let count = loadingCountsByKey[key], count > 0 else { return }
if count == 1 {
loadingCountsByKey[key] = nil
loadingStartedAtByKey[key] = nil
} else {
loadingCountsByKey[key] = count - 1
}
}
private func invalidateStaleDayCache() {
let formatter = DateFormatter()
formatter.dateFormat = "yyyy-MM-dd"
@ -155,9 +201,11 @@ final class AppStore {
if !force, cache[key]?.isFresh == true { return }
if !force, inFlightKeys.contains(key) { return }
inFlightKeys.insert(key)
attemptedKeys.insert(key)
lastErrorByKey[key] = nil
let didShowLoading = showLoading || cache[key] == nil
if didShowLoading {
loadingCount += 1
beginLoading(for: key)
}
// Diagnostic anchor: if this key has been empty for a long time (the
// popover would currently be showing "Loading..."), log how stale the
@ -172,7 +220,9 @@ final class AppStore {
}
defer {
inFlightKeys.remove(key)
if didShowLoading { loadingCount = max(loadingCount - 1, 0) }
if didShowLoading {
finishLoading(for: key)
}
}
do {
let fresh = try await DataClient.fetch(period: key.period, provider: key.provider, includeOptimize: includeOptimize)
@ -194,7 +244,7 @@ final class AppStore {
}
cache[key] = CachedPayload(payload: fresh, fetchedAt: Date())
lastSuccessByKey[key] = Date()
lastError = nil
lastErrorByKey[key] = nil
} catch {
if Task.isCancelled { return }
NSLog("CodeBurn: fetch failed for \(key.period.rawValue)/\(key.provider.rawValue): \(error)")
@ -205,14 +255,14 @@ final class AppStore {
if cacheDate != cacheDateAtStart { return }
cache[key] = CachedPayload(payload: fallback, fetchedAt: Date())
lastSuccessByKey[key] = Date()
lastError = nil
lastErrorByKey[key] = nil
return
} catch {
if Task.isCancelled { return }
NSLog("CodeBurn: fallback fetch also failed: \(error)")
}
}
lastError = String(describing: error)
lastErrorByKey[key] = String(describing: error)
}
let allKey = PayloadCacheKey(period: selectedPeriod, provider: .all)
@ -232,7 +282,10 @@ final class AppStore {
// Same day-rollover guard as refresh(): drop yesterday's payload if
// the calendar rolled over during the fetch.
if cacheDate != cacheDateAtStart { return }
cache[PayloadCacheKey(period: period, provider: .all)] = CachedPayload(payload: fresh, fetchedAt: Date())
let key = PayloadCacheKey(period: period, provider: .all)
cache[key] = CachedPayload(payload: fresh, fetchedAt: Date())
lastSuccessByKey[key] = Date()
lastErrorByKey[key] = nil
} catch {
NSLog("CodeBurn: quiet refresh failed for \(period.rawValue): \(error)")
}

View file

@ -5,6 +5,7 @@ import Observation
private let refreshIntervalSeconds: UInt64 = 30
private let nanosPerSecond: UInt64 = 1_000_000_000
private let refreshIntervalNanos: UInt64 = refreshIntervalSeconds * nanosPerSecond
private let forceRefreshWatchdogSeconds: TimeInterval = 90
private let statusItemWidth: CGFloat = NSStatusItem.variableLength
private let popoverWidth: CGFloat = 360
private let popoverHeight: CGFloat = 660
@ -36,6 +37,8 @@ final class AppDelegate: NSObject, NSApplicationDelegate, NSPopoverDelegate {
private var pendingRefreshWork: DispatchWorkItem?
private var refreshLoopTask: Task<Void, Never>?
private var forceRefreshTask: Task<Void, Never>?
private var forceRefreshStartedAt: Date?
private var forceRefreshGeneration: UInt64 = 0
func applicationWillFinishLaunching(_ notification: Notification) {
// Set accessory policy before the app's focus chain forms. On macOS Tahoe
@ -90,6 +93,8 @@ final class AppDelegate: NSObject, NSApplicationDelegate, NSPopoverDelegate {
Task { @MainActor in
self?.forceRefreshTask?.cancel()
self?.forceRefreshTask = nil
self?.forceRefreshStartedAt = nil
self?.forceRefreshGeneration &+= 1
self?.refreshLoopTask?.cancel()
self?.refreshLoopTask = nil
}
@ -208,17 +213,42 @@ final class AppDelegate: NSObject, NSApplicationDelegate, NSPopoverDelegate {
private var lastRefreshTime: Date = .distantPast
@discardableResult
private func clearStaleForceRefreshIfNeeded(now: Date = Date()) -> Bool {
if let started = forceRefreshStartedAt, forceRefreshTask != nil {
let elapsed = now.timeIntervalSince(started)
guard elapsed > forceRefreshWatchdogSeconds else { return false }
NSLog("CodeBurn: force refresh stuck for %ds — cancelling and restarting", Int(elapsed))
forceRefreshTask?.cancel()
forceRefreshTask = nil
forceRefreshStartedAt = nil
forceRefreshGeneration &+= 1
store.resetLoadingState()
return true
}
return false
}
private func forceRefresh() {
let now = Date()
_ = clearStaleForceRefreshIfNeeded(now: now)
guard now.timeIntervalSince(lastRefreshTime) > 5 else { return }
lastRefreshTime = now
forceRefreshStartedAt = now
forceRefreshGeneration &+= 1
let generation = forceRefreshGeneration
forceRefreshTask?.cancel()
forceRefreshTask = Task {
async let main: Void = store.refresh(includeOptimize: false, force: true, showLoading: true)
async let today: Void = store.refreshQuietly(period: .today)
_ = await (main, today)
refreshStatusButton()
await MainActor.run { [weak self] in
guard let self, self.forceRefreshGeneration == generation else { return }
self.forceRefreshTask = nil
self.forceRefreshStartedAt = nil
self.lastRefreshTime = Date()
}
}
}
@ -259,12 +289,14 @@ final class AppDelegate: NSObject, NSApplicationDelegate, NSPopoverDelegate {
}
while !Task.isCancelled {
guard let self else { return }
let clearedStaleForceRefresh = self.clearStaleForceRefreshIfNeeded()
let clearedStaleLoading = self.store.clearStaleLoadingIfNeeded()
// Skip the loop's tick if a wake / manual / distributed-
// notification refresh just ran. Without this gate, every
// wake produced two refreshes (forceRefresh from the wake
// observer plus the loop's natural tick).
let sinceLast = Date().timeIntervalSince(self.lastRefreshTime)
if sinceLast >= 5 {
if self.forceRefreshTask == nil && (clearedStaleForceRefresh || clearedStaleLoading || sinceLast >= 5) {
if self.store.selectedPeriod != .today || self.store.selectedProvider != .all {
async let quiet: Void = self.store.refreshQuietly(period: .today)
async let main: Void = self.store.refresh(includeOptimize: false, force: true)

View file

@ -61,41 +61,27 @@ struct DataClient {
throw DataClientError.spawn(error.localizedDescription)
}
// Wall-clock timeout: if the CLI hangs (parser stuck, disk stall), kill it.
// Log when this fires so a recurring stuck-popover state has an actual
// diagnostic historically users saw "Loading..." forever with no signal
// about what failed; the only way to debug was to read process state at
// the wrong time. The log line names the subcommand so we can correlate
// with a specific period/provider combination.
let timeoutTask = Task.detached(priority: .utility) {
try? await Task.sleep(nanoseconds: spawnTimeoutSeconds * 1_000_000_000)
if process.isRunning {
NSLog("CodeBurn: CLI subprocess timed out after %llus for %@ — terminating",
spawnTimeoutSeconds, subcommand.joined(separator: " "))
process.terminate()
terminateWithEscalation(process)
}
}
defer { timeoutTask.cancel() }
// If the caller cancels its Task (rapid period/provider tab clicks
// cancel switchTask in AppStore), terminate the in-flight subprocess.
// Without this the cancelled Task returns immediately but the spawned
// CLI keeps running to completion, piling up zombie codeburn processes
// on rapid UI interactions. We hold a strong reference to the Process
// in the cancellation handler so the closure can find it even if the
// surrounding scope has gone async.
let outHandle = outPipe.fileHandleForReading
let errHandle = errPipe.fileHandleForReading
let (out, err) = await withTaskCancellationHandler {
// Drain both pipes concurrently so a large stderr can't deadlock stdout
// (the child blocks on write once the pipe buffer fills). `drain`
// also enforces a byte cap.
async let stdoutData = drain(outPipe.fileHandleForReading, limit: maxPayloadBytes)
async let stderrData = drain(errPipe.fileHandleForReading, limit: maxStderrBytes)
async let stdoutData = drain(outHandle, limit: maxPayloadBytes)
async let stderrData = drain(errHandle, limit: maxStderrBytes)
return await (stdoutData, stderrData)
} onCancel: {
if process.isRunning {
process.terminate()
}
terminateWithEscalation(process)
}
try? outHandle.close()
try? errHandle.close()
process.waitUntilExit()
if out.count >= maxPayloadBytes {
@ -106,22 +92,45 @@ struct DataClient {
return ProcessResult(stdout: out, stderr: stderrString, exitCode: process.terminationStatus)
}
/// Pulls bytes off a pipe until EOF or `limit`. Intentionally uses `availableData`, which
/// returns empty on EOF -- no blocking once the child exits.
private static func terminateWithEscalation(_ process: Process) {
guard process.isRunning else { return }
process.terminate()
let pid = process.processIdentifier
DispatchQueue.global(qos: .utility).asyncAfter(deadline: .now() + 0.5) {
if process.isRunning { kill(pid, SIGKILL) }
}
}
private static func drain(_ handle: FileHandle, limit: Int) async -> Data {
await Task.detached(priority: .utility) {
var buffer = Data()
while buffer.count < limit {
let chunk = handle.availableData
if chunk.isEmpty { break }
let remaining = limit - buffer.count
if chunk.count > remaining {
buffer.append(chunk.prefix(remaining))
break
}
buffer.append(chunk)
let fd = handle.fileDescriptor
let flags = Darwin.fcntl(fd, F_GETFL)
if flags >= 0 {
_ = Darwin.fcntl(fd, F_SETFL, flags | O_NONBLOCK)
} else {
NSLog("CodeBurn: fcntl F_GETFL failed on fd %d, drain may block", fd)
}
var buffer = Data()
var chunk = [UInt8](repeating: 0, count: 65_536)
while buffer.count < limit && !Task.isCancelled {
let toRead = min(chunk.count, limit - buffer.count)
let n = chunk.withUnsafeMutableBufferPointer { ptr in
Darwin.read(fd, ptr.baseAddress!, toRead)
}
return buffer
}.value
if n > 0 {
buffer.append(contentsOf: chunk.prefix(n))
} else if n == 0 {
break
} else if errno == EAGAIN || errno == EWOULDBLOCK {
try? await Task.sleep(nanoseconds: 5_000_000)
} else if errno == EINTR {
continue
} else {
NSLog("CodeBurn: drain read() failed on fd %d: errno %d", fd, errno)
break
}
}
return buffer
}
}

View file

@ -47,7 +47,10 @@ struct MenuBarContent: View {
// error, etc.), surface a retry card instead of leaving the
// user stuck on a perpetual "Loading..." spinner.
if !store.hasCachedData {
if let err = store.lastError, !store.isLoading {
if store.isCurrentKeyLoading || !store.hasAttemptedCurrentKeyLoad {
BurnLoadingOverlay(periodLabel: store.selectedPeriod.rawValue)
.transition(.opacity)
} else if let err = store.lastError {
FetchErrorOverlay(
error: err,
periodLabel: store.selectedPeriod.rawValue,
@ -55,7 +58,11 @@ struct MenuBarContent: View {
)
.transition(.opacity)
} else {
BurnLoadingOverlay(periodLabel: store.selectedPeriod.rawValue)
FetchErrorOverlay(
error: "The last refresh stopped before returning data. CodeBurn will keep retrying, or you can retry now.",
periodLabel: store.selectedPeriod.rawValue,
retry: { Task { await store.refresh(includeOptimize: false, force: true, showLoading: true) } }
)
.transition(.opacity)
}
}