From c604ca1150dcd4b1bd092dc3ee32ca80d97739cc Mon Sep 17 00:00:00 2001 From: rUv Date: Mon, 11 May 2026 23:49:00 -0400 Subject: [PATCH] feat(train): TrainingConfig subcarrier-layout presets + real MmFiDataset loader test (#537) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the remaining doable items from the 2026-05-11 training-pipeline audit: #6 (CSI format default = 56-sc / 1 NIC) + #7 (multi-band 168-sc mesh not in config): new `TrainingConfig::for_subcarriers(native, target)` plus named presets `mmfi()` (114→56), `ht40_192()` (≈192-sc ESP32 HT40 → 56) and `multiband_168()` (168-sc ADR-078 multi-band mesh → 56). Non-MM-Fi CSI shapes are now first-class instead of requiring manual `native_subcarriers` / `num_subcarriers` overrides; the field docs list the supported source counts and the multi-NIC mapping (a 2–3-node mesh currently rides on `n_rx` until a dedicated node dimension lands). Model input width stays `num_subcarriers`; the presets only vary the resampling input. #4 (proof.rs uses synthetic data): reframed — a deterministic proof *must* use a reproducible source, so `verify-training` correctly stays on `SyntheticCsiDataset`. The real gap was that nothing exercised the on-disk `MmFiDataset` path. New `tests/test_real_loader.rs` writes synthetic CSI to `.npy` files in the `MmFiDataset::discover` layout, loads it back, and checks the resulting `CsiSample` — covering the no-interp case, the subcarrier-interpolation branch, and the empty-root case. Adds `ndarray` / `ndarray-npy` as dev-deps for the fixture writing. cargo check + cargo test -p wifi-densepose-train --no-default-features: clean, all existing tests green, 3 new loader tests + the updated config doctest pass. Purely additive — no model-shape change, no tch-module change. --- CHANGELOG.md | 1 + v2/crates/wifi-densepose-train/Cargo.toml | 5 ++ v2/crates/wifi-densepose-train/src/config.rs | 66 ++++++++++++-- .../tests/test_real_loader.rs | 86 +++++++++++++++++++ 4 files changed, 153 insertions(+), 5 deletions(-) create mode 100644 v2/crates/wifi-densepose-train/tests/test_real_loader.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index a62f1915..e12d1756 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - **`wifi-densepose-train`: `signal_features` module — wires `wifi-densepose-signal` into the training pipeline.** `wifi-densepose-signal` was previously a phantom dependency of `wifi-densepose-train` (listed in `Cargo.toml`, never imported). New `wifi_densepose_train::signal_features::extract_signal_features` (and `CsiSample::signal_features()`) run a windowed CSI observation's centre frame through `wifi_densepose_signal::features::FeatureExtractor`, producing a fixed-length (`FEATURE_LEN = 12`) amplitude/phase/PSD feature vector — the hook for a future vitals / multi-task supervision head (breathing- and heart-rate-band power are read off the PSD summary). The vector is produced on demand and not yet fed back into the loss. Surfaced by the 2026-05-11 training-pipeline audit (findings #1 "vitals features absent from training" and #2 "`wifi-densepose-signal` ghost dep"). +- **`wifi-densepose-train`: `TrainingConfig` subcarrier-layout presets + a real-loader integration test.** New `TrainingConfig::for_subcarriers(native, target)` plus named presets `ht40_192()` (≈192-sc ESP32 HT40 → 56) and `multiband_168()` (168-sc ADR-078 multi-band mesh → 56), so non-MM-Fi CSI shapes are first-class instead of requiring manual `native_subcarriers`/`num_subcarriers` overrides; field docs now list the supported source counts and the multi-NIC mapping. New `tests/test_real_loader.rs` round-trips synthetic CSI through `.npy` files → `MmFiDataset::discover`/`get` (including the subcarrier-interpolation branch and the empty-root case) — exercising the on-disk loader path the deterministic `verify-training` proof intentionally bypasses. Addresses training-pipeline audit findings #6 (56-sc/1-NIC config default) and #7 (multi-band mesh not in config); the #4 concern ("proof uses synthetic data") is reframed — the proof *should* use a reproducible source, and this test covers the real loader it skips. ### Fixed - **HuggingFace `MODEL_CARD.md`: marked the PIR/BME280 environmental-sensor ground-truth path as planned, not implemented** (training-pipeline audit finding #3) — the card presented PIR/BME280 weak-label fine-tuning as a current capability; there is no env-sensor ingestion in the training pipeline today. diff --git a/v2/crates/wifi-densepose-train/Cargo.toml b/v2/crates/wifi-densepose-train/Cargo.toml index ac0fa37d..52b6235e 100644 --- a/v2/crates/wifi-densepose-train/Cargo.toml +++ b/v2/crates/wifi-densepose-train/Cargo.toml @@ -85,6 +85,11 @@ criterion.workspace = true proptest.workspace = true tempfile = "3.10" approx = "0.5" +# Used by tests/test_real_loader.rs to write .npy fixtures that exercise the +# real MmFiDataset disk-loading path (the deterministic proof uses the +# in-memory SyntheticCsiDataset, which bypasses .npy parsing). +ndarray.workspace = true +ndarray-npy.workspace = true [[bench]] name = "training_bench" diff --git a/v2/crates/wifi-densepose-train/src/config.rs b/v2/crates/wifi-densepose-train/src/config.rs index 8e27d19c..b82c44b3 100644 --- a/v2/crates/wifi-densepose-train/src/config.rs +++ b/v2/crates/wifi-densepose-train/src/config.rs @@ -15,6 +15,15 @@ //! //! assert_eq!(cfg.num_subcarriers, 56); //! assert_eq!(cfg.num_keypoints, 17); +//! +//! // Adapt for a non-MM-Fi source — e.g. an ESP32 HT40 capture (~192 raw +//! // subcarriers) or the ADR-078 multi-band mesh (168). The model still sees +//! // `num_subcarriers`; the loader resamples the native count down to it. +//! let ht40 = TrainingConfig::ht40_192(); +//! assert_eq!(ht40.native_subcarriers, 192); +//! assert!(ht40.needs_subcarrier_interp()); +//! let mesh = TrainingConfig::for_subcarriers(168, 56); +//! assert_eq!(mesh.native_subcarriers, 168); //! ``` use serde::{Deserialize, Serialize}; @@ -36,16 +45,26 @@ pub struct TrainingConfig { // ----------------------------------------------------------------------- // Data / Signal // ----------------------------------------------------------------------- - /// Number of subcarriers after interpolation (system target). + /// Number of subcarriers after interpolation (the *model's* input width). /// /// The model always sees this many subcarriers regardless of the raw - /// hardware output. Default: **56**. + /// hardware output; [`crate::subcarrier::interpolate_subcarriers`] resamples + /// `native_subcarriers` → `num_subcarriers` when they differ. Default: **56**. pub num_subcarriers: usize, - /// Number of subcarriers in the raw dataset before interpolation. + /// Number of subcarriers in the *raw* dataset, before interpolation. /// - /// MM-Fi provides 114 subcarriers; set this to 56 when the dataset - /// already matches the target count. Default: **114**. + /// Common sources: MM-Fi = 114, ESP32 HT20 = 56, ESP32 HT40 ≈ 192 (or 114), + /// multi-band mesh = 168 (ADR-078). When it equals [`Self::num_subcarriers`] + /// no interpolation happens ([`Self::needs_subcarrier_interp`]). For the + /// non-MM-Fi shapes prefer the preset constructors + /// ([`Self::for_subcarriers`], [`Self::ht40_192`], [`Self::multiband_168`]) + /// over overriding both fields by hand. Default: **114**. + /// + /// **Multi-NIC note:** a 2–3-node CSI mesh currently maps onto the existing + /// `[T, n_tx, n_rx, n_sc]` layout by treating the nodes' receive chains as + /// extra `n_rx` (i.e. `num_antennas_rx = nodes × per_node_rx`); a dedicated + /// node dimension is a separate dataset-loader change. pub native_subcarriers: usize, /// Number of transmit antennas. Default: **3**. @@ -238,6 +257,43 @@ impl TrainingConfig { Ok(()) } + /// Build a config for a dataset whose raw CSI has `native` subcarriers, + /// resampling to `target` (the model's input width) before training. + /// + /// All other fields take their [`Default`] values. Prefer this over + /// overriding `native_subcarriers` / `num_subcarriers` directly so the + /// relationship between the dataset's shape and the model's is explicit. + #[must_use] + pub fn for_subcarriers(native: usize, target: usize) -> Self { + Self { + native_subcarriers: native, + num_subcarriers: target, + ..Self::default() + } + } + + /// Preset for the MM-Fi dataset (114 raw subcarriers → 56). Identical to + /// [`Self::default()`]; provided as a named counterpart to the other + /// presets. + #[must_use] + pub fn mmfi() -> Self { + Self::default() + } + + /// Preset for ESP32 HT40 captures (≈192 raw subcarriers → 56). Use + /// [`Self::for_subcarriers`] if your capture reports a different native + /// count (some HT40 firmwares yield 114). + #[must_use] + pub fn ht40_192() -> Self { + Self::for_subcarriers(192, 56) + } + + /// Preset for the ADR-078 multi-band mesh (168 raw subcarriers → 56). + #[must_use] + pub fn multiband_168() -> Self { + Self::for_subcarriers(168, 56) + } + /// Returns `true` when the native dataset subcarrier count differs from the /// model's target count and interpolation is therefore required. pub fn needs_subcarrier_interp(&self) -> bool { diff --git a/v2/crates/wifi-densepose-train/tests/test_real_loader.rs b/v2/crates/wifi-densepose-train/tests/test_real_loader.rs new file mode 100644 index 00000000..64594922 --- /dev/null +++ b/v2/crates/wifi-densepose-train/tests/test_real_loader.rs @@ -0,0 +1,86 @@ +//! Integration test for the *real* on-disk dataset loader ([`MmFiDataset`]). +//! +//! The deterministic training proof (`verify-training`) runs on the in-memory +//! `SyntheticCsiDataset`, which never touches `.npy` files — by design (a +//! reproducible source is the whole point of the proof). This test covers the +//! path the proof bypasses: it writes synthetic CSI to `.npy` files in the +//! directory layout [`MmFiDataset::discover`] expects, loads it back, and +//! checks the resulting [`CsiSample`] — including the subcarrier-interpolation +//! branch. + +use ndarray::{Array3, Array4}; +use ndarray_npy::write_npy; +use tempfile::TempDir; +use wifi_densepose_train::dataset::{CsiDataset, MmFiDataset}; + +/// Write one deterministic `S01/A01` recording (no RNG) under `root`, with +/// `n_t` frames, `[n_tx, n_rx]` antennas and `n_sc` subcarriers. +fn write_recording(root: &std::path::Path, n_t: usize, n_tx: usize, n_rx: usize, n_sc: usize) { + let dir = root.join("S01").join("A01"); + std::fs::create_dir_all(&dir).expect("create S01/A01"); + + let amplitude = Array4::::from_shape_fn((n_t, n_tx, n_rx, n_sc), |(t, tx, rx, sc)| { + 0.5 + 0.4 * (((t * 7 + tx * 3 + rx * 2 + sc) % 17) as f32 / 17.0) + }); + let phase = Array4::::from_shape_fn((n_t, n_tx, n_rx, n_sc), |(t, tx, rx, sc)| { + ((t + tx + rx + sc) as f32 * 0.05).sin() + }); + let mut kp = Array3::::zeros((n_t, 17, 3)); + for t in 0..n_t { + for j in 0..17 { + kp[[t, j, 0]] = ((j as f32 + 1.0) / 18.0).clamp(0.0, 1.0); // x + kp[[t, j, 1]] = (((j * 3 + t) % 18) as f32 / 18.0).clamp(0.0, 1.0); // y + kp[[t, j, 2]] = 2.0; // COCO "visible" + } + } + write_npy(dir.join("wifi_csi.npy"), &litude).expect("write wifi_csi.npy"); + write_npy(dir.join("wifi_csi_phase.npy"), &phase).expect("write wifi_csi_phase.npy"); + write_npy(dir.join("gt_keypoints.npy"), &kp).expect("write gt_keypoints.npy"); +} + +/// Round-trip: write `.npy`, discover, load — no interpolation (native == target). +#[test] +fn mmfi_loads_real_npy_without_interpolation() { + let tmp = TempDir::new().expect("tempdir"); + write_recording(tmp.path(), 8, 3, 3, 56); + + let ds = MmFiDataset::discover(tmp.path(), 8, 56, 17).expect("discover the recording"); + assert!(ds.len() >= 1, "must discover at least one sample, got {}", ds.len()); + + let sample = ds.get(0).expect("sample 0"); + assert_eq!(sample.amplitude.shape(), &[8, 3, 3, 56], "amplitude shape"); + assert_eq!(sample.phase.shape(), &[8, 3, 3, 56], "phase shape"); + assert_eq!(sample.keypoints.shape(), &[17, 2], "keypoints shape"); + assert_eq!(sample.keypoint_visibility.shape(), &[17], "visibility shape"); + assert!(sample.amplitude.iter().all(|v| v.is_finite()), "amplitude must be finite"); + assert!(sample.phase.iter().all(|v| v.is_finite()), "phase must be finite"); + assert!(sample.keypoints.iter().all(|v| v.is_finite()), "keypoints must be finite"); +} + +/// The loader resamples the subcarrier axis when the requested target differs +/// from the dataset's native count. +#[test] +fn mmfi_resamples_subcarriers_on_load() { + let tmp = TempDir::new().expect("tempdir"); + write_recording(tmp.path(), 8, 3, 3, 56); + + // target (28) < native (56) — the loader must interpolate down. + let ds = MmFiDataset::discover(tmp.path(), 8, 28, 17).expect("discover"); + let sample = ds.get(0).expect("sample 0"); + assert_eq!( + sample.amplitude.shape(), + &[8, 3, 3, 28], + "amplitude must be resampled to the requested 28 subcarriers" + ); + assert_eq!(sample.phase.shape(), &[8, 3, 3, 28], "phase must be resampled too"); + assert!(sample.amplitude.iter().all(|v| v.is_finite()), "resampled amplitude must be finite"); +} + +/// An empty root directory yields an empty dataset (no panic, no spurious +/// samples) — the same loader code path, just with nothing to discover. +#[test] +fn mmfi_empty_root_is_empty() { + let tmp = TempDir::new().expect("tempdir"); + let ds = MmFiDataset::discover(tmp.path(), 8, 56, 17).expect("discover empty root"); + assert_eq!(ds.len(), 0, "empty root must produce an empty dataset"); +}