From 7f201bdf6ff3a50e8d97920013e9ec0d3f21ba01 Mon Sep 17 00:00:00 2001 From: rUv Date: Sat, 25 Apr 2026 20:03:03 -0400 Subject: [PATCH] fix(tracker): exclude Lost tracks from bridge output (#420, ADR-082) (#426) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `tracker_bridge::tracker_to_person_detections` documented itself as filtering to `is_alive()` but never actually filtered — it forwarded every non-Terminated track to the WebSocket stream. With 3 ESP32-S3 nodes × ~10 Hz CSI, transient detections that fell outside the Mahalanobis gate created a steady stream of new Tentative tracks that aged through Active and into Lost. Lost tracks are kept in the tracker for `reid_window` (~3 s) so re-identification can match them when a similar detection reappears, but they are NOT currently observed and must not render as live skeletons. Up to ~90 ghost skeletons could accumulate at any moment, hence the 22-24 phantoms users saw while `estimated_persons` correctly reported 1. Add `PoseTracker::confirmed_tracks()` that returns only `Tentative ∪ Active` and rewire the bridge to use it. `Lost` tracks remain in the tracker for re-ID; they just no longer ship to the UI. `active_tracks()` is left unchanged for the AETHER re-ID consumers (ADR-024). Regression test `test_lost_tracks_excluded_from_bridge_output` drives a track to Active, lapses for `loss_misses + 1` ticks to push it to Lost, and asserts `tracker_update` returns an empty Vec while the Lost track is still present in `all_tracks()` (re-ID still works). Validated: - cargo test --workspace --no-default-features → 1,539 passed, 0 failed - ESP32-S3 on COM7 still streaming live CSI (cb #32800) --- CHANGELOG.md | 11 ++ ...82-pose-tracker-confirmed-output-filter.md | 185 ++++++++++++++++++ .../src/tracker_bridge.rs | 79 +++++++- .../wifi-densepose-signal/src/ruvsense/mod.rs | 2 +- .../src/ruvsense/pose_tracker.rs | 16 ++ 5 files changed, 289 insertions(+), 4 deletions(-) create mode 100644 docs/adr/ADR-082-pose-tracker-confirmed-output-filter.md diff --git a/CHANGELOG.md b/CHANGELOG.md index b9e5ea88..a6dc16ba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Fixed +- **Ghost skeletons in live UI with multi-node ESP32 setups** (#420, ADR-082) — + `tracker_bridge::tracker_to_person_detections` documented itself as filtering + to `is_alive()` tracks but in fact passed every non-Terminated track to the + WebSocket stream. `Lost` tracks — kept inside `reid_window` for + re-identification but not currently observed — were rendering as phantom + skeletons, accumulating to 22-24 with 3 nodes × 10 Hz CSI while + `estimated_persons` correctly reported 1. Added + `PoseTracker::confirmed_tracks()` (Tentative + Active only) and rewired the + bridge to use it. Lost tracks remain in the tracker for re-ID; they just + no longer ship to the UI. Regression test: + `test_lost_tracks_excluded_from_bridge_output`. - **Rust workspace build with `--no-default-features` on Windows** (#366, #415) — `wifi-densepose-mat`, `wifi-densepose-sensing-server`, and `wifi-densepose-train` all depended on `wifi-densepose-signal` with default features enabled, which diff --git a/docs/adr/ADR-082-pose-tracker-confirmed-output-filter.md b/docs/adr/ADR-082-pose-tracker-confirmed-output-filter.md new file mode 100644 index 00000000..645d1ad7 --- /dev/null +++ b/docs/adr/ADR-082-pose-tracker-confirmed-output-filter.md @@ -0,0 +1,185 @@ +# ADR-082: Pose Tracker Confirmed-Track Output Filter + +| Field | Value | +|-------------|-----------------------------------------------------------------------| +| **Status** | Accepted — implemented in commit landing this ADR | +| **Date** | 2026-04-25 | +| **Authors** | ruv | +| **Issue** | [#420 — "24 ghost people in the UI with 3× ESP32-S3 nodes"](https://github.com/ruvnet/RuView/issues/420) | +| **Depends** | ADR-026 (track lifecycle), ADR-024 (AETHER re-ID embeddings) | + +## Context + +Multiple users running the Rust sensing server with 3 ESP32-S3 nodes have +reported the same symptom: the live UI renders 22–24 phantom skeletons that +flicker at high rate, while `GET /api/v1/sensing/latest` correctly reports +`estimated_persons: 1`. The problem is reproducible across both Docker and +native deployments and is independent of the firmware MGMT-only mitigation +shipped for #396. + +The two-number contradiction (1 in the snapshot, ~24 in the WebSocket stream) +narrows the bug to the path that produces `update.persons`. That path is +`tracker_bridge::tracker_update` → `tracker_bridge::tracker_to_person_detections` +→ WebSocket frame. + +### Pose tracker lifecycle (per ADR-026) + +`signal::ruvsense::pose_tracker::TrackLifecycleState` has four states: + +``` +Tentative -> Active -> Lost -> Terminated +``` + +The state machine and its predicates: + +| State | `is_alive()` | `accepts_updates()` | Meaning | +|--------------|--------------|---------------------|---------| +| `Tentative` | true | true | New detection, < 2 confirmed hits | +| `Active` | true | true | Confirmed track, currently observed | +| `Lost` | **true** | false | Confirmed track, missed `loss_misses` updates, still inside `reid_window` | +| `Terminated` | false | false | Removed on next `prune_terminated()` | + +`PoseTracker::active_tracks()` filters by `is_alive()`, which means it returns +`Tentative ∪ Active ∪ Lost` — every track that has not yet been Terminated. + +### Root cause + +`crates/wifi-densepose-sensing-server/src/tracker_bridge.rs` exposes the +tracker output to the WebSocket stream via: + +```rust +/// Convert active PoseTracker tracks back into server-side PersonDetection values. +/// +/// Only tracks whose lifecycle `is_alive()` are included. +pub fn tracker_to_person_detections(tracker: &PoseTracker) -> Vec { + tracker + .active_tracks() + .into_iter() + .map(|track| { /* ... */ }) + .collect() +} +``` + +The doc comment is correct as a description of `is_alive()`, but `is_alive()` +is the wrong gate for *rendering*. `Lost` tracks have not received a +measurement in `loss_misses` ticks; they are kept around only so the +re-identification machinery can attempt to match them when a similar +detection reappears within `reid_window`. They are not currently observed and +must not appear as live skeletons in the UI. + +With 3 ESP32-S3 nodes streaming CSI at ~10 Hz each, `derive_pose_from_sensing` +emits a per-node detection every tick. Detections that fall outside the +Mahalanobis gate (cost ≥ 9.0) cannot match an existing track, so a new +`Tentative` track is created and the previous one ages into `Lost`. With +`reid_window ≈ 30` ticks (~3 s at 10 Hz), up to 30 ticks × 3 nodes ≈ 90 +phantom Lost tracks can co-exist before any of them reach `Terminated`. +The actually-observed-now person is one of them; the other ~22–89 are ghosts. + +The snapshot endpoint `/api/v1/sensing/latest` reads `estimated_persons` from +the multistatic eigenvalue counter (`signal::ruvsense::field_model`), which +operates on the CSI data directly and reports 1. The WebSocket stream reads +`update.persons`, which is the unfiltered `is_alive()` set — hence the +22-vs-1 mismatch. + +This is a documentation/implementation discrepancy in `tracker_bridge`, not a +flaw in the lifecycle state machine itself. + +## Decision + +Introduce a **confirmed-track filter** at the bridge boundary that returns +only tracks the UI is meant to render: + +* `Active` — confirmed and currently observed; always render. +* `Tentative` — confirmed for the *current* tick (created or matched this + cycle); render so first-frame visibility latency stays at one tick. +* `Lost` — **never** render. They exist only to support re-ID over the + `reid_window` and have, by definition, not been observed for at least + `loss_misses` ticks. +* `Terminated` — never render (already excluded by `is_alive()`). + +### Naming + +Add `PoseTracker::confirmed_tracks()` — the name reflects "tracks the system +is currently confirming a person is present at this position." Keep +`active_tracks()` unchanged so callers that legitimately need the re-ID set +(re-identification, soft-confidence overlays, debug UIs) still have it. + +The bridge’s public surface stays the same; only the internal accessor +swaps. WebSocket consumers see the corrected `update.persons` automatically. + +### Why include `Tentative` + +A walking person’s first detection lands in `Tentative` until two consecutive +hits arrive (~0.1 s at 10 Hz). Excluding `Tentative` makes the UI +under-render by one tick on every entry; the gain (filtering out spurious +single-detection ghosts) is real but small relative to the much larger Lost +problem and isn’t worth the visible latency. If single-tick ghosts become +the dominant complaint after this ADR ships, escalate to `Active`-only and +revisit `birth_hits` calibration. + +## Consequences + +### Positive + +* `update.persons.length` matches `estimated_persons` within ±1 (Tentative + vs. Active hand-off frame) under steady state. #420 closed. +* No change to the lifecycle state machine, no change to `reid_window` or + `loss_misses`, no change to the WebSocket schema. Pure filter at egress. +* `PoseTracker::active_tracks()` keeps its semantics for re-ID consumers; + this avoids breaking ADR-024 (AETHER) call sites. + +### Negative / risks + +* Existing test `test_tracker_update_stable_ids` exercises three sequential + identical-person updates and asserts the ID is stable across all three. + Filtering Lost out doesn’t affect it (the track stays in `Tentative` → + `Active`, never Lost during the test). Confirmed by reading the test; + no regression expected. +* Single-tick `Tentative` exposure means very-spurious one-frame detections + *can* still flicker briefly. Acceptable trade-off as discussed above. + +### Neutral + +* `prune_terminated()` and the existing transition logic + (`predict_all` → `mark_lost` → `terminate`) are unchanged. + +## Implementation + +1. **`signal::ruvsense::pose_tracker`** — add: + ```rust + /// Tracks the UI is meant to render: Tentative + Active. + /// Excludes Lost (re-ID candidates) and Terminated. + pub fn confirmed_tracks(&self) -> Vec<&PoseTrack> { + self.tracks + .iter() + .filter(|t| matches!( + t.lifecycle, + TrackLifecycleState::Tentative | TrackLifecycleState::Active + )) + .collect() + } + ``` +2. **`sensing-server::tracker_bridge`** — change + `tracker_to_person_detections` to call `tracker.confirmed_tracks()` and + update the doc comment to describe the new contract. +3. **Regression test** in `tracker_bridge.rs::tests`: + * Drive a track to `Active` over two updates. + * Submit empty detections for `loss_misses + 1` predict cycles to push + the track to `Lost`. + * Assert `tracker_update(... empty ...)` returns an empty `Vec`. +4. **Validation**: workspace tests + ESP32-S3 on COM7 streaming round-trip. + +## Validation + +* `cargo test --workspace --no-default-features` — must stay green + (≥ 1,538 passed, 0 failed; new regression test adds one). +* Live verification on ESP32 setup: WebSocket `update.persons.length` + must equal `estimated_persons` ± 1 in steady state. + +## Related + +* ADR-026 — Track lifecycle state machine (this ADR doesn’t change it) +* ADR-024 — AETHER re-ID embeddings (uses `active_tracks()`, unchanged) +* PR #425 — Workspace `--no-default-features` build fix (unrelated, just + the prior PR on this branch line) +* Issue #420 — original report diff --git a/rust-port/wifi-densepose-rs/crates/wifi-densepose-sensing-server/src/tracker_bridge.rs b/rust-port/wifi-densepose-rs/crates/wifi-densepose-sensing-server/src/tracker_bridge.rs index b66d0fcf..97a67f4e 100644 --- a/rust-port/wifi-densepose-rs/crates/wifi-densepose-sensing-server/src/tracker_bridge.rs +++ b/rust-port/wifi-densepose-rs/crates/wifi-densepose-sensing-server/src/tracker_bridge.rs @@ -92,12 +92,15 @@ fn detections_to_tracker_keypoints(persons: &[PersonDetection]) -> Vec<[[f32; 3] .collect() } -/// Convert active PoseTracker tracks back into server-side PersonDetection values. +/// Convert confirmed PoseTracker tracks back into server-side PersonDetection values. /// -/// Only tracks whose lifecycle `is_alive()` are included. +/// Returns only tracks the UI is meant to render right now (Tentative + Active). +/// `Lost` tracks — kept around inside `reid_window` for re-identification but +/// not currently observed — are excluded so they don't ship to the WebSocket +/// stream as ghost skeletons. See ADR-082 and #420. pub fn tracker_to_person_detections(tracker: &PoseTracker) -> Vec { tracker - .active_tracks() + .confirmed_tracks() .into_iter() .map(|track| { let id = track.id.0 as u32; @@ -406,4 +409,74 @@ mod tests { assert_eq!(id1, id2, "Track ID should be stable across updates"); assert_eq!(id2, id3, "Track ID should be stable across updates"); } + + /// Regression test for #420 (ADR-082): tracks that have transitioned to + /// `Lost` must NOT appear in `tracker_update`'s returned PersonDetection + /// vector, even though they remain in the tracker for re-identification. + #[test] + fn test_lost_tracks_excluded_from_bridge_output() { + use wifi_densepose_signal::ruvsense::{TrackerConfig, TrackLifecycleState}; + + // Tight config so the test doesn't have to spin for hundreds of ticks. + let cfg = TrackerConfig { + loss_misses: 3, + reid_window: 100, // intentionally large — we want Lost, not Terminated + ..TrackerConfig::default() + }; + let mut tracker = PoseTracker::with_config(cfg); + let mut last_instant: Option = None; + + let person = make_person( + 0, + vec![ + make_keypoint("nose", 1.0, 2.0, 0.0), + make_keypoint("left_shoulder", 0.8, 2.5, 0.0), + make_keypoint("right_shoulder", 1.2, 2.5, 0.0), + make_keypoint("left_hip", 0.9, 3.5, 0.0), + make_keypoint("right_hip", 1.1, 3.5, 0.0), + ], + ); + + // Drive the track to Active (≥2 consecutive hits). + let r1 = tracker_update(&mut tracker, &mut last_instant, vec![person.clone()]); + let r2 = tracker_update(&mut tracker, &mut last_instant, vec![person.clone()]); + assert_eq!(r1.len(), 1); + assert_eq!(r2.len(), 1); + + // Submit empty detections enough times to push the track into Lost. + // Each empty call increments time_since_update via predict_all(). + for _ in 0..6 { + let _ = tracker_update(&mut tracker, &mut last_instant, vec![]); + } + + // Pre-condition: a track exists internally and is in Lost state. + let has_lost = tracker + .all_tracks() + .iter() + .any(|t| t.lifecycle == TrackLifecycleState::Lost); + assert!( + has_lost, + "Test setup invariant violated: expected the track to be Lost \ + after {} empty updates with loss_misses=3", + 6 + ); + + // The fix: `tracker_update` must NOT return any phantom detections + // for the Lost track when there are no current detections. + let after_lost = tracker_update(&mut tracker, &mut last_instant, vec![]); + assert_eq!( + after_lost.len(), + 0, + "Lost tracks must not appear in bridge output (ADR-082, #420). \ + Got {} phantom detection(s).", + after_lost.len() + ); + + // Sanity: the Lost track is still tracked internally (for re-ID), it + // just shouldn't ship to the UI. + assert!( + tracker.all_tracks().iter().any(|t| t.lifecycle == TrackLifecycleState::Lost), + "Lost track must remain in tracker for re-identification window" + ); + } } diff --git a/rust-port/wifi-densepose-rs/crates/wifi-densepose-signal/src/ruvsense/mod.rs b/rust-port/wifi-densepose-rs/crates/wifi-densepose-signal/src/ruvsense/mod.rs index 88c65b5d..bd488ad1 100644 --- a/rust-port/wifi-densepose-rs/crates/wifi-densepose-signal/src/ruvsense/mod.rs +++ b/rust-port/wifi-densepose-rs/crates/wifi-densepose-signal/src/ruvsense/mod.rs @@ -63,7 +63,7 @@ pub use multistatic::FusedSensingFrame; pub use phase_align::{PhaseAligner, PhaseAlignError}; pub use pose_tracker::{ CompressedPoseHistory, KeypointState, PoseTrack, SkeletonConstraints, - TemporalKeypointAttention, TrackLifecycleState, + TemporalKeypointAttention, TrackLifecycleState, TrackerConfig, }; /// Number of keypoints in a full-body pose skeleton (COCO-17). diff --git a/rust-port/wifi-densepose-rs/crates/wifi-densepose-signal/src/ruvsense/pose_tracker.rs b/rust-port/wifi-densepose-rs/crates/wifi-densepose-signal/src/ruvsense/pose_tracker.rs index ac6ea669..a93f82d4 100644 --- a/rust-port/wifi-densepose-rs/crates/wifi-densepose-signal/src/ruvsense/pose_tracker.rs +++ b/rust-port/wifi-densepose-rs/crates/wifi-densepose-signal/src/ruvsense/pose_tracker.rs @@ -492,6 +492,22 @@ impl PoseTracker { .collect() } + /// Tracks the UI is meant to render: Tentative + Active. + /// + /// Excludes `Lost` (re-ID candidates that haven't been observed for + /// `loss_misses` ticks) and `Terminated`. Use this at any boundary that + /// emits "currently visible" pose state — for example, the WebSocket + /// stream sent to the live UI. See ADR-082. + pub fn confirmed_tracks(&self) -> Vec<&PoseTrack> { + self.tracks + .iter() + .filter(|t| matches!( + t.lifecycle, + TrackLifecycleState::Tentative | TrackLifecycleState::Active + )) + .collect() + } + /// Return all tracks including terminated ones. pub fn all_tracks(&self) -> &[PoseTrack] { &self.tracks