mirror of
https://github.com/ruvnet/RuView.git
synced 2026-05-17 04:19:13 +00:00
feat(train): TrainingConfig subcarrier-layout presets + real MmFiDataset loader test (#537)
Closes the remaining doable items from the 2026-05-11 training-pipeline audit: #6 (CSI format default = 56-sc / 1 NIC) + #7 (multi-band 168-sc mesh not in config): new `TrainingConfig::for_subcarriers(native, target)` plus named presets `mmfi()` (114→56), `ht40_192()` (≈192-sc ESP32 HT40 → 56) and `multiband_168()` (168-sc ADR-078 multi-band mesh → 56). Non-MM-Fi CSI shapes are now first-class instead of requiring manual `native_subcarriers` / `num_subcarriers` overrides; the field docs list the supported source counts and the multi-NIC mapping (a 2–3-node mesh currently rides on `n_rx` until a dedicated node dimension lands). Model input width stays `num_subcarriers`; the presets only vary the resampling input. #4 (proof.rs uses synthetic data): reframed — a deterministic proof *must* use a reproducible source, so `verify-training` correctly stays on `SyntheticCsiDataset`. The real gap was that nothing exercised the on-disk `MmFiDataset` path. New `tests/test_real_loader.rs` writes synthetic CSI to `.npy` files in the `MmFiDataset::discover` layout, loads it back, and checks the resulting `CsiSample` — covering the no-interp case, the subcarrier-interpolation branch, and the empty-root case. Adds `ndarray` / `ndarray-npy` as dev-deps for the fixture writing. cargo check + cargo test -p wifi-densepose-train --no-default-features: clean, all existing tests green, 3 new loader tests + the updated config doctest pass. Purely additive — no model-shape change, no tch-module change.
This commit is contained in:
parent
eaedfded6f
commit
c604ca1150
4 changed files with 153 additions and 5 deletions
|
|
@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|||
|
||||
### Added
|
||||
- **`wifi-densepose-train`: `signal_features` module — wires `wifi-densepose-signal` into the training pipeline.** `wifi-densepose-signal` was previously a phantom dependency of `wifi-densepose-train` (listed in `Cargo.toml`, never imported). New `wifi_densepose_train::signal_features::extract_signal_features` (and `CsiSample::signal_features()`) run a windowed CSI observation's centre frame through `wifi_densepose_signal::features::FeatureExtractor`, producing a fixed-length (`FEATURE_LEN = 12`) amplitude/phase/PSD feature vector — the hook for a future vitals / multi-task supervision head (breathing- and heart-rate-band power are read off the PSD summary). The vector is produced on demand and not yet fed back into the loss. Surfaced by the 2026-05-11 training-pipeline audit (findings #1 "vitals features absent from training" and #2 "`wifi-densepose-signal` ghost dep").
|
||||
- **`wifi-densepose-train`: `TrainingConfig` subcarrier-layout presets + a real-loader integration test.** New `TrainingConfig::for_subcarriers(native, target)` plus named presets `ht40_192()` (≈192-sc ESP32 HT40 → 56) and `multiband_168()` (168-sc ADR-078 multi-band mesh → 56), so non-MM-Fi CSI shapes are first-class instead of requiring manual `native_subcarriers`/`num_subcarriers` overrides; field docs now list the supported source counts and the multi-NIC mapping. New `tests/test_real_loader.rs` round-trips synthetic CSI through `.npy` files → `MmFiDataset::discover`/`get` (including the subcarrier-interpolation branch and the empty-root case) — exercising the on-disk loader path the deterministic `verify-training` proof intentionally bypasses. Addresses training-pipeline audit findings #6 (56-sc/1-NIC config default) and #7 (multi-band mesh not in config); the #4 concern ("proof uses synthetic data") is reframed — the proof *should* use a reproducible source, and this test covers the real loader it skips.
|
||||
|
||||
### Fixed
|
||||
- **HuggingFace `MODEL_CARD.md`: marked the PIR/BME280 environmental-sensor ground-truth path as planned, not implemented** (training-pipeline audit finding #3) — the card presented PIR/BME280 weak-label fine-tuning as a current capability; there is no env-sensor ingestion in the training pipeline today.
|
||||
|
|
|
|||
|
|
@ -85,6 +85,11 @@ criterion.workspace = true
|
|||
proptest.workspace = true
|
||||
tempfile = "3.10"
|
||||
approx = "0.5"
|
||||
# Used by tests/test_real_loader.rs to write .npy fixtures that exercise the
|
||||
# real MmFiDataset disk-loading path (the deterministic proof uses the
|
||||
# in-memory SyntheticCsiDataset, which bypasses .npy parsing).
|
||||
ndarray.workspace = true
|
||||
ndarray-npy.workspace = true
|
||||
|
||||
[[bench]]
|
||||
name = "training_bench"
|
||||
|
|
|
|||
|
|
@ -15,6 +15,15 @@
|
|||
//!
|
||||
//! assert_eq!(cfg.num_subcarriers, 56);
|
||||
//! assert_eq!(cfg.num_keypoints, 17);
|
||||
//!
|
||||
//! // Adapt for a non-MM-Fi source — e.g. an ESP32 HT40 capture (~192 raw
|
||||
//! // subcarriers) or the ADR-078 multi-band mesh (168). The model still sees
|
||||
//! // `num_subcarriers`; the loader resamples the native count down to it.
|
||||
//! let ht40 = TrainingConfig::ht40_192();
|
||||
//! assert_eq!(ht40.native_subcarriers, 192);
|
||||
//! assert!(ht40.needs_subcarrier_interp());
|
||||
//! let mesh = TrainingConfig::for_subcarriers(168, 56);
|
||||
//! assert_eq!(mesh.native_subcarriers, 168);
|
||||
//! ```
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
|
@ -36,16 +45,26 @@ pub struct TrainingConfig {
|
|||
// -----------------------------------------------------------------------
|
||||
// Data / Signal
|
||||
// -----------------------------------------------------------------------
|
||||
/// Number of subcarriers after interpolation (system target).
|
||||
/// Number of subcarriers after interpolation (the *model's* input width).
|
||||
///
|
||||
/// The model always sees this many subcarriers regardless of the raw
|
||||
/// hardware output. Default: **56**.
|
||||
/// hardware output; [`crate::subcarrier::interpolate_subcarriers`] resamples
|
||||
/// `native_subcarriers` → `num_subcarriers` when they differ. Default: **56**.
|
||||
pub num_subcarriers: usize,
|
||||
|
||||
/// Number of subcarriers in the raw dataset before interpolation.
|
||||
/// Number of subcarriers in the *raw* dataset, before interpolation.
|
||||
///
|
||||
/// MM-Fi provides 114 subcarriers; set this to 56 when the dataset
|
||||
/// already matches the target count. Default: **114**.
|
||||
/// Common sources: MM-Fi = 114, ESP32 HT20 = 56, ESP32 HT40 ≈ 192 (or 114),
|
||||
/// multi-band mesh = 168 (ADR-078). When it equals [`Self::num_subcarriers`]
|
||||
/// no interpolation happens ([`Self::needs_subcarrier_interp`]). For the
|
||||
/// non-MM-Fi shapes prefer the preset constructors
|
||||
/// ([`Self::for_subcarriers`], [`Self::ht40_192`], [`Self::multiband_168`])
|
||||
/// over overriding both fields by hand. Default: **114**.
|
||||
///
|
||||
/// **Multi-NIC note:** a 2–3-node CSI mesh currently maps onto the existing
|
||||
/// `[T, n_tx, n_rx, n_sc]` layout by treating the nodes' receive chains as
|
||||
/// extra `n_rx` (i.e. `num_antennas_rx = nodes × per_node_rx`); a dedicated
|
||||
/// node dimension is a separate dataset-loader change.
|
||||
pub native_subcarriers: usize,
|
||||
|
||||
/// Number of transmit antennas. Default: **3**.
|
||||
|
|
@ -238,6 +257,43 @@ impl TrainingConfig {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Build a config for a dataset whose raw CSI has `native` subcarriers,
|
||||
/// resampling to `target` (the model's input width) before training.
|
||||
///
|
||||
/// All other fields take their [`Default`] values. Prefer this over
|
||||
/// overriding `native_subcarriers` / `num_subcarriers` directly so the
|
||||
/// relationship between the dataset's shape and the model's is explicit.
|
||||
#[must_use]
|
||||
pub fn for_subcarriers(native: usize, target: usize) -> Self {
|
||||
Self {
|
||||
native_subcarriers: native,
|
||||
num_subcarriers: target,
|
||||
..Self::default()
|
||||
}
|
||||
}
|
||||
|
||||
/// Preset for the MM-Fi dataset (114 raw subcarriers → 56). Identical to
|
||||
/// [`Self::default()`]; provided as a named counterpart to the other
|
||||
/// presets.
|
||||
#[must_use]
|
||||
pub fn mmfi() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
/// Preset for ESP32 HT40 captures (≈192 raw subcarriers → 56). Use
|
||||
/// [`Self::for_subcarriers`] if your capture reports a different native
|
||||
/// count (some HT40 firmwares yield 114).
|
||||
#[must_use]
|
||||
pub fn ht40_192() -> Self {
|
||||
Self::for_subcarriers(192, 56)
|
||||
}
|
||||
|
||||
/// Preset for the ADR-078 multi-band mesh (168 raw subcarriers → 56).
|
||||
#[must_use]
|
||||
pub fn multiband_168() -> Self {
|
||||
Self::for_subcarriers(168, 56)
|
||||
}
|
||||
|
||||
/// Returns `true` when the native dataset subcarrier count differs from the
|
||||
/// model's target count and interpolation is therefore required.
|
||||
pub fn needs_subcarrier_interp(&self) -> bool {
|
||||
|
|
|
|||
86
v2/crates/wifi-densepose-train/tests/test_real_loader.rs
Normal file
86
v2/crates/wifi-densepose-train/tests/test_real_loader.rs
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
//! Integration test for the *real* on-disk dataset loader ([`MmFiDataset`]).
|
||||
//!
|
||||
//! The deterministic training proof (`verify-training`) runs on the in-memory
|
||||
//! `SyntheticCsiDataset`, which never touches `.npy` files — by design (a
|
||||
//! reproducible source is the whole point of the proof). This test covers the
|
||||
//! path the proof bypasses: it writes synthetic CSI to `.npy` files in the
|
||||
//! directory layout [`MmFiDataset::discover`] expects, loads it back, and
|
||||
//! checks the resulting [`CsiSample`] — including the subcarrier-interpolation
|
||||
//! branch.
|
||||
|
||||
use ndarray::{Array3, Array4};
|
||||
use ndarray_npy::write_npy;
|
||||
use tempfile::TempDir;
|
||||
use wifi_densepose_train::dataset::{CsiDataset, MmFiDataset};
|
||||
|
||||
/// Write one deterministic `S01/A01` recording (no RNG) under `root`, with
|
||||
/// `n_t` frames, `[n_tx, n_rx]` antennas and `n_sc` subcarriers.
|
||||
fn write_recording(root: &std::path::Path, n_t: usize, n_tx: usize, n_rx: usize, n_sc: usize) {
|
||||
let dir = root.join("S01").join("A01");
|
||||
std::fs::create_dir_all(&dir).expect("create S01/A01");
|
||||
|
||||
let amplitude = Array4::<f32>::from_shape_fn((n_t, n_tx, n_rx, n_sc), |(t, tx, rx, sc)| {
|
||||
0.5 + 0.4 * (((t * 7 + tx * 3 + rx * 2 + sc) % 17) as f32 / 17.0)
|
||||
});
|
||||
let phase = Array4::<f32>::from_shape_fn((n_t, n_tx, n_rx, n_sc), |(t, tx, rx, sc)| {
|
||||
((t + tx + rx + sc) as f32 * 0.05).sin()
|
||||
});
|
||||
let mut kp = Array3::<f32>::zeros((n_t, 17, 3));
|
||||
for t in 0..n_t {
|
||||
for j in 0..17 {
|
||||
kp[[t, j, 0]] = ((j as f32 + 1.0) / 18.0).clamp(0.0, 1.0); // x
|
||||
kp[[t, j, 1]] = (((j * 3 + t) % 18) as f32 / 18.0).clamp(0.0, 1.0); // y
|
||||
kp[[t, j, 2]] = 2.0; // COCO "visible"
|
||||
}
|
||||
}
|
||||
write_npy(dir.join("wifi_csi.npy"), &litude).expect("write wifi_csi.npy");
|
||||
write_npy(dir.join("wifi_csi_phase.npy"), &phase).expect("write wifi_csi_phase.npy");
|
||||
write_npy(dir.join("gt_keypoints.npy"), &kp).expect("write gt_keypoints.npy");
|
||||
}
|
||||
|
||||
/// Round-trip: write `.npy`, discover, load — no interpolation (native == target).
|
||||
#[test]
|
||||
fn mmfi_loads_real_npy_without_interpolation() {
|
||||
let tmp = TempDir::new().expect("tempdir");
|
||||
write_recording(tmp.path(), 8, 3, 3, 56);
|
||||
|
||||
let ds = MmFiDataset::discover(tmp.path(), 8, 56, 17).expect("discover the recording");
|
||||
assert!(ds.len() >= 1, "must discover at least one sample, got {}", ds.len());
|
||||
|
||||
let sample = ds.get(0).expect("sample 0");
|
||||
assert_eq!(sample.amplitude.shape(), &[8, 3, 3, 56], "amplitude shape");
|
||||
assert_eq!(sample.phase.shape(), &[8, 3, 3, 56], "phase shape");
|
||||
assert_eq!(sample.keypoints.shape(), &[17, 2], "keypoints shape");
|
||||
assert_eq!(sample.keypoint_visibility.shape(), &[17], "visibility shape");
|
||||
assert!(sample.amplitude.iter().all(|v| v.is_finite()), "amplitude must be finite");
|
||||
assert!(sample.phase.iter().all(|v| v.is_finite()), "phase must be finite");
|
||||
assert!(sample.keypoints.iter().all(|v| v.is_finite()), "keypoints must be finite");
|
||||
}
|
||||
|
||||
/// The loader resamples the subcarrier axis when the requested target differs
|
||||
/// from the dataset's native count.
|
||||
#[test]
|
||||
fn mmfi_resamples_subcarriers_on_load() {
|
||||
let tmp = TempDir::new().expect("tempdir");
|
||||
write_recording(tmp.path(), 8, 3, 3, 56);
|
||||
|
||||
// target (28) < native (56) — the loader must interpolate down.
|
||||
let ds = MmFiDataset::discover(tmp.path(), 8, 28, 17).expect("discover");
|
||||
let sample = ds.get(0).expect("sample 0");
|
||||
assert_eq!(
|
||||
sample.amplitude.shape(),
|
||||
&[8, 3, 3, 28],
|
||||
"amplitude must be resampled to the requested 28 subcarriers"
|
||||
);
|
||||
assert_eq!(sample.phase.shape(), &[8, 3, 3, 28], "phase must be resampled too");
|
||||
assert!(sample.amplitude.iter().all(|v| v.is_finite()), "resampled amplitude must be finite");
|
||||
}
|
||||
|
||||
/// An empty root directory yields an empty dataset (no panic, no spurious
|
||||
/// samples) — the same loader code path, just with nothing to discover.
|
||||
#[test]
|
||||
fn mmfi_empty_root_is_empty() {
|
||||
let tmp = TempDir::new().expect("tempdir");
|
||||
let ds = MmFiDataset::discover(tmp.path(), 8, 56, 17).expect("discover empty root");
|
||||
assert_eq!(ds.len(), 0, "empty root must produce an empty dataset");
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue