From 66316aba38adea609a7d9635fe572d33aa95bc2d Mon Sep 17 00:00:00 2001 From: iamtoruk Date: Sat, 9 May 2026 14:27:48 -0700 Subject: [PATCH] Fix menubar stuck loading with non-blocking pipe I/O and watchdog Replace blocking availableData drain with non-blocking POSIX read that respects Task cancellation. Handle EINTR from child SIGCHLD, close pipe fds after drain to prevent deadlock on oversized output, and escalate SIGTERM to SIGKILL after 0.5s grace period. Add 60-second loading watchdog as safety net that auto-clears stuck state on each refresh loop tick. Fixes #282 --- mac/Sources/CodeBurnMenubar/AppStore.swift | 19 ++++- mac/Sources/CodeBurnMenubar/CodeBurnApp.swift | 1 + .../CodeBurnMenubar/Data/DataClient.swift | 83 ++++++++++--------- 3 files changed, 65 insertions(+), 38 deletions(-) diff --git a/mac/Sources/CodeBurnMenubar/AppStore.swift b/mac/Sources/CodeBurnMenubar/AppStore.swift index 1d7ad6a..f0c65a6 100644 --- a/mac/Sources/CodeBurnMenubar/AppStore.swift +++ b/mac/Sources/CodeBurnMenubar/AppStore.swift @@ -27,6 +27,7 @@ final class AppStore { var currency: String = "USD" var isLoading: Bool { loadingCount > 0 } private var loadingCount: Int = 0 + private var loadingStartedAt: Date? var lastError: String? var subscription: SubscriptionUsage? var subscriptionError: String? @@ -131,9 +132,21 @@ final class AppStore { func resetLoadingState() { loadingCount = 0 + loadingStartedAt = nil inFlightKeys.removeAll() } + private let loadingWatchdogSeconds: TimeInterval = 60 + + @discardableResult + func clearStaleLoadingIfNeeded() -> Bool { + guard isLoading, let started = loadingStartedAt, + Date().timeIntervalSince(started) > loadingWatchdogSeconds else { return false } + NSLog("CodeBurn: loading stuck for %ds — auto-clearing", Int(Date().timeIntervalSince(started))) + resetLoadingState() + return true + } + private func invalidateStaleDayCache() { let formatter = DateFormatter() formatter.dateFormat = "yyyy-MM-dd" @@ -157,6 +170,7 @@ final class AppStore { inFlightKeys.insert(key) let didShowLoading = showLoading || cache[key] == nil if didShowLoading { + if loadingCount == 0 { loadingStartedAt = Date() } loadingCount += 1 } // Diagnostic anchor: if this key has been empty for a long time (the @@ -172,7 +186,10 @@ final class AppStore { } defer { inFlightKeys.remove(key) - if didShowLoading { loadingCount = max(loadingCount - 1, 0) } + if didShowLoading { + loadingCount = max(loadingCount - 1, 0) + if loadingCount == 0 { loadingStartedAt = nil } + } } do { let fresh = try await DataClient.fetch(period: key.period, provider: key.provider, includeOptimize: includeOptimize) diff --git a/mac/Sources/CodeBurnMenubar/CodeBurnApp.swift b/mac/Sources/CodeBurnMenubar/CodeBurnApp.swift index 899f273..f7e57a0 100644 --- a/mac/Sources/CodeBurnMenubar/CodeBurnApp.swift +++ b/mac/Sources/CodeBurnMenubar/CodeBurnApp.swift @@ -259,6 +259,7 @@ final class AppDelegate: NSObject, NSApplicationDelegate, NSPopoverDelegate { } while !Task.isCancelled { guard let self else { return } + self.store.clearStaleLoadingIfNeeded() // Skip the loop's tick if a wake / manual / distributed- // notification refresh just ran. Without this gate, every // wake produced two refreshes (forceRefresh from the wake diff --git a/mac/Sources/CodeBurnMenubar/Data/DataClient.swift b/mac/Sources/CodeBurnMenubar/Data/DataClient.swift index e18a03a..4b0083c 100644 --- a/mac/Sources/CodeBurnMenubar/Data/DataClient.swift +++ b/mac/Sources/CodeBurnMenubar/Data/DataClient.swift @@ -61,41 +61,27 @@ struct DataClient { throw DataClientError.spawn(error.localizedDescription) } - // Wall-clock timeout: if the CLI hangs (parser stuck, disk stall), kill it. - // Log when this fires so a recurring stuck-popover state has an actual - // diagnostic — historically users saw "Loading..." forever with no signal - // about what failed; the only way to debug was to read process state at - // the wrong time. The log line names the subcommand so we can correlate - // with a specific period/provider combination. let timeoutTask = Task.detached(priority: .utility) { try? await Task.sleep(nanoseconds: spawnTimeoutSeconds * 1_000_000_000) if process.isRunning { NSLog("CodeBurn: CLI subprocess timed out after %llus for %@ — terminating", spawnTimeoutSeconds, subcommand.joined(separator: " ")) - process.terminate() + terminateWithEscalation(process) } } defer { timeoutTask.cancel() } - // If the caller cancels its Task (rapid period/provider tab clicks - // cancel switchTask in AppStore), terminate the in-flight subprocess. - // Without this the cancelled Task returns immediately but the spawned - // CLI keeps running to completion, piling up zombie codeburn processes - // on rapid UI interactions. We hold a strong reference to the Process - // in the cancellation handler so the closure can find it even if the - // surrounding scope has gone async. + let outHandle = outPipe.fileHandleForReading + let errHandle = errPipe.fileHandleForReading let (out, err) = await withTaskCancellationHandler { - // Drain both pipes concurrently so a large stderr can't deadlock stdout - // (the child blocks on write once the pipe buffer fills). `drain` - // also enforces a byte cap. - async let stdoutData = drain(outPipe.fileHandleForReading, limit: maxPayloadBytes) - async let stderrData = drain(errPipe.fileHandleForReading, limit: maxStderrBytes) + async let stdoutData = drain(outHandle, limit: maxPayloadBytes) + async let stderrData = drain(errHandle, limit: maxStderrBytes) return await (stdoutData, stderrData) } onCancel: { - if process.isRunning { - process.terminate() - } + terminateWithEscalation(process) } + try? outHandle.close() + try? errHandle.close() process.waitUntilExit() if out.count >= maxPayloadBytes { @@ -106,22 +92,45 @@ struct DataClient { return ProcessResult(stdout: out, stderr: stderrString, exitCode: process.terminationStatus) } - /// Pulls bytes off a pipe until EOF or `limit`. Intentionally uses `availableData`, which - /// returns empty on EOF -- no blocking once the child exits. + private static func terminateWithEscalation(_ process: Process) { + guard process.isRunning else { return } + process.terminate() + let pid = process.processIdentifier + DispatchQueue.global(qos: .utility).asyncAfter(deadline: .now() + 0.5) { + if process.isRunning { kill(pid, SIGKILL) } + } + } + private static func drain(_ handle: FileHandle, limit: Int) async -> Data { - await Task.detached(priority: .utility) { - var buffer = Data() - while buffer.count < limit { - let chunk = handle.availableData - if chunk.isEmpty { break } - let remaining = limit - buffer.count - if chunk.count > remaining { - buffer.append(chunk.prefix(remaining)) - break - } - buffer.append(chunk) + let fd = handle.fileDescriptor + let flags = Darwin.fcntl(fd, F_GETFL) + if flags >= 0 { + _ = Darwin.fcntl(fd, F_SETFL, flags | O_NONBLOCK) + } else { + NSLog("CodeBurn: fcntl F_GETFL failed on fd %d, drain may block", fd) + } + + var buffer = Data() + var chunk = [UInt8](repeating: 0, count: 65_536) + + while buffer.count < limit && !Task.isCancelled { + let toRead = min(chunk.count, limit - buffer.count) + let n = chunk.withUnsafeMutableBufferPointer { ptr in + Darwin.read(fd, ptr.baseAddress!, toRead) } - return buffer - }.value + if n > 0 { + buffer.append(contentsOf: chunk.prefix(n)) + } else if n == 0 { + break + } else if errno == EAGAIN || errno == EWOULDBLOCK { + try? await Task.sleep(nanoseconds: 5_000_000) + } else if errno == EINTR { + continue + } else { + NSLog("CodeBurn: drain read() failed on fd %d: errno %d", fd, errno) + break + } + } + return buffer } }