diff --git a/examples/connectome-fly/src/analysis/mod.rs b/examples/connectome-fly/src/analysis/mod.rs index 2c547859..75dc76b3 100644 --- a/examples/connectome-fly/src/analysis/mod.rs +++ b/examples/connectome-fly/src/analysis/mod.rs @@ -17,6 +17,7 @@ pub mod gpu; pub mod motif; pub mod partition; +pub mod rate_encoder; pub mod structural; pub mod types; @@ -95,6 +96,19 @@ impl Analysis { &self.cfg, &self.sdpa, &self.w_q, &self.w_k, &self.w_v, conn, spikes, k, ) } + + /// Same as [`Self::retrieve_motifs`] but uses the rate-histogram + /// encoder (see `analysis::rate_encoder`) instead of SDPA. Exposed + /// for the ADR-154 §17 item 10 encoder-vs-substrate A/B + /// diagnostic; prefer the SDPA path for production use. + pub fn retrieve_motifs_rate( + &self, + conn: &Connectome, + spikes: &[Spike], + k: usize, + ) -> (MotifIndex, Vec) { + rate_encoder::rate_histogram_retrieve_motifs(&self.cfg, conn, spikes, k) + } } #[cfg(test)] diff --git a/examples/connectome-fly/src/analysis/motif.rs b/examples/connectome-fly/src/analysis/motif.rs index 2b56a291..961ae2c9 100644 --- a/examples/connectome-fly/src/analysis/motif.rs +++ b/examples/connectome-fly/src/analysis/motif.rs @@ -56,12 +56,12 @@ pub(crate) fn retrieve_motifs( (index, hits) } -struct WindowMeta { - spike_count: u32, - dominant_class_idx: u8, +pub(super) struct WindowMeta { + pub(super) spike_count: u32, + pub(super) dominant_class_idx: u8, } -fn build_raster( +pub(super) fn build_raster( conn: &Connectome, spikes: &[Spike], t_start: f32, diff --git a/examples/connectome-fly/src/analysis/rate_encoder.rs b/examples/connectome-fly/src/analysis/rate_encoder.rs new file mode 100644 index 00000000..788a5647 --- /dev/null +++ b/examples/connectome-fly/src/analysis/rate_encoder.rs @@ -0,0 +1,151 @@ +//! Rate-histogram motif encoder — alternative to the SDPA path in +//! `motif.rs`. Designed as a *controlled A/B baseline* for the AC-2 +//! encoder-vs-substrate diagnosis in ADR-154 §17 item 10. +//! +//! Design intent: +//! +//! - The shipped SDPA + deterministic-low-rank-projection encoder is +//! protocol-blind on the expanded 8-protocol labeled corpus +//! (precision@5 ≈ random). Three remediations plateau at ≤ 0.60. +//! The ADR calls for pinning the bottleneck: encoder, substrate, or +//! labels. +//! - This module implements the *encoder* axis: a trivial +//! row-major flatten of the normalised raster produced by +//! `motif::build_raster`. No projection, no attention, no additional +//! normalisation. Every bin of every class is preserved verbatim. +//! - If this cheap baseline scores *higher* than SDPA on the same +//! 8-protocol labeled corpus, SDPA is actively hurting. If it scores +//! the same or lower, the substrate — not the encoder — is the +//! bottleneck. +//! +//! The encoder is deterministic (no RNG, no state) and uses exactly +//! one allocation (the output vector). + +use crate::connectome::Connectome; +use crate::lif::Spike; + +use super::motif::build_raster; +use super::types::{AnalysisConfig, MotifHit, MotifIndex, MotifWindow}; + +/// Flatten a raster `[n_rows][n_cols]` into a row-major `Vec`. +/// +/// The output length is `n_rows * n_cols` and `out[r * n_cols + c] == +/// raster[r][c]`. No normalisation beyond what the caller already +/// applied — we preserve the row-normalised form emitted by +/// `motif::build_raster` verbatim so the A/B comparison isolates "what +/// does SDPA add beyond the raster itself". +/// +/// Empty rasters return an empty vector. +pub fn rate_histogram_encode(raster: &[Vec]) -> Vec { + if raster.is_empty() { + return Vec::new(); + } + let n_cols = raster[0].len(); + // Guard against ragged rasters (shouldn't occur from build_raster but + // we validate anyway — the public API surface treats this as input). + for row in raster.iter() { + debug_assert_eq!( + row.len(), + n_cols, + "rate_histogram_encode: ragged raster (row len differs from first)" + ); + } + let n_rows = raster.len(); + let mut out = Vec::with_capacity(n_rows * n_cols); + for row in raster { + // Explicit raw-bin-count copy; `extend_from_slice` compiles to a + // `memcpy` on contiguous data. No projection, no attention. + out.extend_from_slice(row); + } + out +} + +/// Build motif embeddings over sliding windows using the rate-histogram +/// encoder and index them. Mirrors `motif::retrieve_motifs` so the two +/// paths can be swapped at call sites without other changes. Returns +/// the index plus the top-k repeated motifs. +/// +/// The sliding-window schedule, the in-memory kNN index, and the +/// dominant-class accounting are *identical* to the SDPA path — the +/// only difference is the per-window embedding function. This is the +/// A/B invariant the diagnostic test relies on. +pub fn rate_histogram_retrieve_motifs( + cfg: &AnalysisConfig, + conn: &Connectome, + spikes: &[Spike], + k: usize, +) -> (MotifIndex, Vec) { + let mut index = MotifIndex::new(cfg.index_capacity); + if spikes.is_empty() { + return (index, Vec::new()); + } + let t_end = spikes.last().map(|s| s.t_ms).unwrap_or(0.0); + let win = cfg.motif_window_ms; + let bins = cfg.motif_bins; + let step = win / 2.0; + let mut t = 0.0; + while t + win <= t_end + step { + let (raster, meta) = build_raster(conn, spikes, t, win, bins); + if meta.spike_count == 0 { + t += step; + continue; + } + let vec = rate_histogram_encode(&raster); + index.insert( + vec, + MotifWindow { + t_center_ms: t + win * 0.5, + spike_count: meta.spike_count, + dominant_class_idx: meta.dominant_class_idx, + }, + ); + t += step; + } + let hits = index.top_k(k); + (index, hits) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn encode_empty_raster_returns_empty_vec() { + let raster: Vec> = Vec::new(); + let v = rate_histogram_encode(&raster); + assert!(v.is_empty()); + } + + #[test] + fn encode_is_row_major_and_preserves_values() { + // 3 rows × 4 cols — pick distinct values so we can spot + // row-vs-column-major mistakes. + let raster: Vec> = vec![ + vec![1.0, 2.0, 3.0, 4.0], + vec![5.0, 6.0, 7.0, 8.0], + vec![9.0, 10.0, 11.0, 12.0], + ]; + let flat = rate_histogram_encode(&raster); + assert_eq!(flat.len(), 12); + for r in 0..3 { + for c in 0..4 { + assert_eq!(flat[r * 4 + c], raster[r][c]); + } + } + } + + #[test] + fn encode_is_deterministic_across_runs() { + let raster: Vec> = vec![ + vec![0.1, 0.2, 0.3], + vec![0.4, 0.5, 0.6], + vec![0.7, 0.8, 0.9], + ]; + let a = rate_histogram_encode(&raster); + let b = rate_histogram_encode(&raster); + assert_eq!(a.len(), b.len()); + for (x, y) in a.iter().zip(b.iter()) { + assert_eq!(x.to_bits(), y.to_bits(), "bit-level determinism required"); + } + } +} diff --git a/examples/connectome-fly/tests/ac_2_encoder_comparison.rs b/examples/connectome-fly/tests/ac_2_encoder_comparison.rs new file mode 100644 index 00000000..0e79270b --- /dev/null +++ b/examples/connectome-fly/tests/ac_2_encoder_comparison.rs @@ -0,0 +1,349 @@ +#![allow(clippy::needless_range_loop)] +//! ADR-154 §17 item 10 follow-up — encoder-vs-substrate diagnostic. +//! +//! The shipped SDPA + deterministic-low-rank-projection motif encoder +//! was measured protocol-blind on this substrate: expanded-corpus AC-2 +//! at 8 protocols landed at `precision@5 = 0.117` (random = 0.125). The +//! ADR names three axes to fix this — different encoder, different +//! substrate, different labels — and asks that the cheapest axis +//! (encoder) be investigated first with a controlled A/B. +//! +//! This test is that A/B. It runs the same 8-protocol labeled corpus +//! through BOTH encoders and reports precision@5 side-by-side. The +//! test is **publish-only**: it does not gate on absolute precision +//! numbers. It fails only on non-deterministic output, malformed +//! vectors, or an empty corpus — the AC-2 precision numbers go into +//! the ADR §17 table, not into a regression gate. +//! +//! Interpretation rubric (fill in the commit message from the +//! printed verdict): +//! +//! - rate > SDPA by a meaningful margin (≥ 0.05) → SDPA is actively +//! hurting on this substrate. +//! - rate ≈ SDPA (within 0.05) → encoder is NOT the bottleneck; try +//! substrate or labels next. +//! - rate < SDPA → rate histogram is actively worse; SDPA at least +//! preserves some protocol-specific signal. + +use connectome_fly::{ + Analysis, AnalysisConfig, Connectome, ConnectomeConfig, Engine, EngineConfig, MotifIndex, + NeuronId, Observer, Spike, Stimulus, +}; + +// ----------------------------------------------------------------- +// 8-protocol corpus +// ----------------------------------------------------------------- + +/// One stimulus protocol in the 8-protocol labeled corpus. +/// +/// Axes (ADR-154 §17 item 10 mirrors this): +/// - `sensory_subset` — 0 = first half of sensory neurons, 1 = second half. +/// - `freq_hz` — pulse-train rate. +/// - `amplitude_pa` — per-pulse charge. +/// - `duration_ms` — pulse-train window width. +#[derive(Clone, Copy, Debug)] +struct Protocol { + id: u8, + sensory_subset: u8, + freq_hz: f32, + amplitude_pa: f32, + duration_ms: f32, +} + +/// Build an 8-protocol corpus spanning the four axes called out in +/// ADR-154 §17 item 10. The eight points are an asymmetric partial +/// factorial — not all 2⁴ combinations (the budget is 8 protocols) — +/// chosen so every axis varies at least once against the P0 baseline. +fn eight_protocols() -> [Protocol; 8] { + [ + Protocol { + id: 0, + sensory_subset: 0, + freq_hz: 60.0, + amplitude_pa: 80.0, + duration_ms: 200.0, + }, + Protocol { + id: 1, + sensory_subset: 0, + freq_hz: 60.0, + amplitude_pa: 80.0, + duration_ms: 300.0, + }, + Protocol { + id: 2, + sensory_subset: 0, + freq_hz: 60.0, + amplitude_pa: 130.0, + duration_ms: 200.0, + }, + Protocol { + id: 3, + sensory_subset: 0, + freq_hz: 120.0, + amplitude_pa: 80.0, + duration_ms: 200.0, + }, + Protocol { + id: 4, + sensory_subset: 1, + freq_hz: 60.0, + amplitude_pa: 80.0, + duration_ms: 200.0, + }, + Protocol { + id: 5, + sensory_subset: 1, + freq_hz: 120.0, + amplitude_pa: 130.0, + duration_ms: 200.0, + }, + Protocol { + id: 6, + sensory_subset: 0, + freq_hz: 120.0, + amplitude_pa: 130.0, + duration_ms: 300.0, + }, + Protocol { + id: 7, + sensory_subset: 1, + freq_hz: 60.0, + amplitude_pa: 130.0, + duration_ms: 300.0, + }, + ] +} + +/// Build the current-injection schedule for one protocol. +fn stimulus_for(conn: &Connectome, p: &Protocol) -> Stimulus { + let sensory = conn.sensory_neurons(); + let half = sensory.len() / 2; + let subset: Vec = if p.sensory_subset == 0 { + sensory[..half].to_vec() + } else { + sensory[half..].to_vec() + }; + Stimulus::pulse_train(&subset, 20.0, p.duration_ms, p.amplitude_pa, p.freq_hz) +} + +/// Run one protocol through a fresh LIF engine and return all spikes +/// plus the simulation end-time. +fn run_protocol(conn: &Connectome, p: &Protocol) -> (f32, Vec) { + let stim = stimulus_for(conn, p); + let mut eng = Engine::new(conn, EngineConfig::default()); + let mut obs = Observer::new(conn.num_neurons()); + let t_end = 20.0 + p.duration_ms + 80.0; + eng.run_with(&stim, &mut obs, t_end); + (t_end, obs.spikes().to_vec()) +} + +/// One labeled motif vector: the encoder output plus the protocol id +/// it was produced under. +#[derive(Clone, Debug)] +struct LabeledVec { + vector: Vec, + protocol_id: u8, +} + +/// Run all 8 protocols and collect labeled motif vectors from the +/// given encoder (SDPA via `retrieve_motifs`, rate histogram via +/// `retrieve_motifs_rate`). `encoder_fn` takes `(connectome, spikes)` +/// and returns the populated motif index; the caller decides which +/// `Analysis` method to call. +fn collect_labeled_vectors( + conn: &Connectome, + protocols: &[Protocol], + mut encoder_fn: F, +) -> Vec +where + F: FnMut(&Connectome, &[Spike]) -> MotifIndex, +{ + let mut labeled: Vec = Vec::new(); + for p in protocols { + let (_t_end, spikes) = run_protocol(conn, p); + if spikes.is_empty() { + continue; + } + let index = encoder_fn(conn, &spikes); + for v in index.vectors() { + labeled.push(LabeledVec { + vector: v.clone(), + protocol_id: p.id, + }); + } + } + labeled +} + +// ----------------------------------------------------------------- +// Precision@k +// ----------------------------------------------------------------- + +#[inline] +fn l2(a: &[f32], b: &[f32]) -> f32 { + let mut s = 0.0_f32; + let n = a.len().min(b.len()); + for i in 0..n { + let d = a[i] - b[i]; + s += d * d; + } + s.sqrt() +} + +/// Labeled precision@k: for each labeled vector, brute-force find its +/// top-k nearest neighbours in the labeled corpus (excluding itself), +/// count how many share its label. Returns the mean across the corpus. +fn precision_at_k(corpus: &[LabeledVec], k: usize) -> f32 { + if corpus.len() < 2 || k == 0 { + return 0.0; + } + let mut total = 0.0_f32; + for (qi, q) in corpus.iter().enumerate() { + // Score every other vector; keep top-k smallest distances. + let mut pairs: Vec<(f32, u8)> = Vec::with_capacity(corpus.len() - 1); + for (ci, c) in corpus.iter().enumerate() { + if ci == qi { + continue; + } + pairs.push((l2(&q.vector, &c.vector), c.protocol_id)); + } + pairs.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal)); + let take = k.min(pairs.len()); + let hits = pairs[..take] + .iter() + .filter(|(_, lbl)| *lbl == q.protocol_id) + .count(); + total += hits as f32 / take as f32; + } + total / corpus.len() as f32 +} + +// ----------------------------------------------------------------- +// Main A/B diagnostic +// ----------------------------------------------------------------- + +#[test] +fn ac_2_encoder_comparison_sdpa_vs_rate_histogram() { + // Same connectome for both encoders — isolates the encoder as the + // only variable. + let conn = Connectome::generate(&ConnectomeConfig::default()); + let protocols = eight_protocols(); + + // Motif-window config matches the expanded-corpus AC-2 test + // described in ADR-154 §17 item 10: 20 ms windows, 10 bins. The + // index is large enough to hold every window from all 8 protocols + // (≈ 8 × 20 = 160 at 200 ms, more at 300 ms). + let cfg = AnalysisConfig { + motif_window_ms: 20.0, + motif_bins: 10, + index_capacity: 1024, + ..AnalysisConfig::default() + }; + let an = Analysis::new(cfg.clone()); + + // ---- SDPA path (shipped) ---- + let sdpa_corpus = collect_labeled_vectors(&conn, &protocols, |c, sp| { + let (index, _hits) = an.retrieve_motifs(c, sp, 5); + index + }); + + // ---- Rate-histogram path (this commit) ---- + let rate_corpus = collect_labeled_vectors(&conn, &protocols, |c, sp| { + let (index, _hits) = an.retrieve_motifs_rate(c, sp, 5); + index + }); + + // ---- Hard asserts: diagnostic sanity, NOT precision floor ---- + assert!( + !sdpa_corpus.is_empty(), + "SDPA corpus is empty — LIF engine or SDPA path failed" + ); + assert!( + !rate_corpus.is_empty(), + "rate-histogram corpus is empty — LIF engine or rate path failed" + ); + // Both encoders see the same windows — they must produce the same + // count of labeled vectors. If this differs the A/B is invalid + // (one path is dropping or inserting windows the other isn't). + assert_eq!( + sdpa_corpus.len(), + rate_corpus.len(), + "corpus size mismatch ({} SDPA vs {} rate) — one encoder is \ + filtering differently, invalidating the A/B", + sdpa_corpus.len(), + rate_corpus.len() + ); + // Each protocol must be represented — otherwise the 1/8 random + // baseline is not the right floor. + let mut counts = [0_u32; 8]; + for v in &sdpa_corpus { + counts[v.protocol_id as usize] += 1; + } + let distinct = counts.iter().filter(|c| **c > 0).count(); + assert!( + distinct >= 2, + "corpus collapsed to {distinct} distinct protocols out of 8 \ + — random baseline not comparable" + ); + + // Determinism check: re-run the rate path and confirm bit-identical + // vectors. (SDPA relies on an external crate whose internal ordering + // we don't gate here; the rate path has no RNG so MUST be exact.) + let rate_corpus_b = collect_labeled_vectors(&conn, &protocols, |c, sp| { + let (index, _hits) = an.retrieve_motifs_rate(c, sp, 5); + index + }); + assert_eq!(rate_corpus.len(), rate_corpus_b.len()); + for (a, b) in rate_corpus.iter().zip(rate_corpus_b.iter()) { + assert_eq!(a.vector.len(), b.vector.len(), "rate: vector length drift"); + for (x, y) in a.vector.iter().zip(b.vector.iter()) { + assert_eq!( + x.to_bits(), + y.to_bits(), + "rate encoder is non-deterministic" + ); + } + } + + // Malformed-vector guard: every rate vector should have length + // 15 * motif_bins (15 classes in the connectome). + let expected_dim = 15 * cfg.motif_bins; + for v in &rate_corpus { + assert_eq!( + v.vector.len(), + expected_dim, + "rate vector has dim {} expected {expected_dim}", + v.vector.len() + ); + } + + // ---- Soft measurement: precision@5 ---- + let k = 5; + let sdpa_p = precision_at_k(&sdpa_corpus, k); + let rate_p = precision_at_k(&rate_corpus, k); + let delta = rate_p - sdpa_p; + let random_baseline = 1.0 / 8.0; + + // Verdict marker for the ADR §17 follow-up row. + let marker = if delta > 0.05 { + "PASS (rate > SDPA — SDPA is actively hurting)" + } else if delta < -0.05 { + "MISS (rate < SDPA — rate histogram is actively worse)" + } else { + "TIE (rate ≈ SDPA — encoder is NOT the bottleneck; try substrate or labels)" + }; + + eprintln!( + "ac-2-encoder-comparison:\n\ + corpus_size = {} windows\n\ + distinct_protocols = {}/8\n\ + SDPA precision@{k} = {sdpa_p:.3}\n\ + rate precision@{k} = {rate_p:.3}\n\ + delta (rate - SDPA) = {delta:+.3}\n\ + random baseline (1/8) = {random_baseline:.3}\n\ + verdict = {marker}", + sdpa_corpus.len(), + distinct, + ); +}