mirror of
https://github.com/ruvnet/RuVector.git
synced 2026-05-27 17:23:34 +00:00
feat(analysis): rate-histogram motif encoder + A/B vs SDPA — ADR-154 §17 item 10 follow-up
Adds src/analysis/rate_encoder.rs + tests/ac_2_encoder_comparison.rs. Controlled A/B diagnostic on the 8-protocol labeled corpus that disproved SDPA in ADR §17 item 10. Measured precision@5: SDPA (shipped) : 0.072 rate histogram (this path): 0.079 delta : +0.007 Verdict: encoder is NOT the bottleneck. Both encoders sit below the 1/8 = 0.125 random baseline on the 8-protocol corpus (SDPA 0.072 and rate histogram 0.079), with the two scores within +0.007 of each other. Swapping the encoder from SDPA + deterministic-low-rank projection to a trivial row-major flatten of the normalised raster did not materially move the number. By ADR §17 item 10's three-axis framing (encoder / substrate / labels), this rules out the encoder axis: remaining levers are substrate (real FlyWire ingest) or labels (raster-regime rather than stimulus-protocol). Max file 349 LOC (tests/ac_2_encoder_comparison.rs). New LOC 500 (rate_encoder 151 + test 349). Co-Authored-By: claude-flow <ruv@ruv.net>
This commit is contained in:
parent
70003115df
commit
d06e80fe20
4 changed files with 518 additions and 4 deletions
|
|
@ -17,6 +17,7 @@
|
|||
pub mod gpu;
|
||||
pub mod motif;
|
||||
pub mod partition;
|
||||
pub mod rate_encoder;
|
||||
pub mod structural;
|
||||
pub mod types;
|
||||
|
||||
|
|
@ -95,6 +96,19 @@ impl Analysis {
|
|||
&self.cfg, &self.sdpa, &self.w_q, &self.w_k, &self.w_v, conn, spikes, k,
|
||||
)
|
||||
}
|
||||
|
||||
/// Same as [`Self::retrieve_motifs`] but uses the rate-histogram
|
||||
/// encoder (see `analysis::rate_encoder`) instead of SDPA. Exposed
|
||||
/// for the ADR-154 §17 item 10 encoder-vs-substrate A/B
|
||||
/// diagnostic; prefer the SDPA path for production use.
|
||||
pub fn retrieve_motifs_rate(
|
||||
&self,
|
||||
conn: &Connectome,
|
||||
spikes: &[Spike],
|
||||
k: usize,
|
||||
) -> (MotifIndex, Vec<MotifHit>) {
|
||||
rate_encoder::rate_histogram_retrieve_motifs(&self.cfg, conn, spikes, k)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
|
|||
|
|
@ -56,12 +56,12 @@ pub(crate) fn retrieve_motifs(
|
|||
(index, hits)
|
||||
}
|
||||
|
||||
struct WindowMeta {
|
||||
spike_count: u32,
|
||||
dominant_class_idx: u8,
|
||||
pub(super) struct WindowMeta {
|
||||
pub(super) spike_count: u32,
|
||||
pub(super) dominant_class_idx: u8,
|
||||
}
|
||||
|
||||
fn build_raster(
|
||||
pub(super) fn build_raster(
|
||||
conn: &Connectome,
|
||||
spikes: &[Spike],
|
||||
t_start: f32,
|
||||
|
|
|
|||
151
examples/connectome-fly/src/analysis/rate_encoder.rs
Normal file
151
examples/connectome-fly/src/analysis/rate_encoder.rs
Normal file
|
|
@ -0,0 +1,151 @@
|
|||
//! Rate-histogram motif encoder — alternative to the SDPA path in
|
||||
//! `motif.rs`. Designed as a *controlled A/B baseline* for the AC-2
|
||||
//! encoder-vs-substrate diagnosis in ADR-154 §17 item 10.
|
||||
//!
|
||||
//! Design intent:
|
||||
//!
|
||||
//! - The shipped SDPA + deterministic-low-rank-projection encoder is
|
||||
//! protocol-blind on the expanded 8-protocol labeled corpus
|
||||
//! (precision@5 ≈ random). Three remediations plateau at ≤ 0.60.
|
||||
//! The ADR calls for pinning the bottleneck: encoder, substrate, or
|
||||
//! labels.
|
||||
//! - This module implements the *encoder* axis: a trivial
|
||||
//! row-major flatten of the normalised raster produced by
|
||||
//! `motif::build_raster`. No projection, no attention, no additional
|
||||
//! normalisation. Every bin of every class is preserved verbatim.
|
||||
//! - If this cheap baseline scores *higher* than SDPA on the same
|
||||
//! 8-protocol labeled corpus, SDPA is actively hurting. If it scores
|
||||
//! the same or lower, the substrate — not the encoder — is the
|
||||
//! bottleneck.
|
||||
//!
|
||||
//! The encoder is deterministic (no RNG, no state) and uses exactly
|
||||
//! one allocation (the output vector).
|
||||
|
||||
use crate::connectome::Connectome;
|
||||
use crate::lif::Spike;
|
||||
|
||||
use super::motif::build_raster;
|
||||
use super::types::{AnalysisConfig, MotifHit, MotifIndex, MotifWindow};
|
||||
|
||||
/// Flatten a raster `[n_rows][n_cols]` into a row-major `Vec<f32>`.
|
||||
///
|
||||
/// The output length is `n_rows * n_cols` and `out[r * n_cols + c] ==
|
||||
/// raster[r][c]`. No normalisation beyond what the caller already
|
||||
/// applied — we preserve the row-normalised form emitted by
|
||||
/// `motif::build_raster` verbatim so the A/B comparison isolates "what
|
||||
/// does SDPA add beyond the raster itself".
|
||||
///
|
||||
/// Empty rasters return an empty vector.
|
||||
pub fn rate_histogram_encode(raster: &[Vec<f32>]) -> Vec<f32> {
|
||||
if raster.is_empty() {
|
||||
return Vec::new();
|
||||
}
|
||||
let n_cols = raster[0].len();
|
||||
// Guard against ragged rasters (shouldn't occur from build_raster but
|
||||
// we validate anyway — the public API surface treats this as input).
|
||||
for row in raster.iter() {
|
||||
debug_assert_eq!(
|
||||
row.len(),
|
||||
n_cols,
|
||||
"rate_histogram_encode: ragged raster (row len differs from first)"
|
||||
);
|
||||
}
|
||||
let n_rows = raster.len();
|
||||
let mut out = Vec::with_capacity(n_rows * n_cols);
|
||||
for row in raster {
|
||||
// Explicit raw-bin-count copy; `extend_from_slice` compiles to a
|
||||
// `memcpy` on contiguous data. No projection, no attention.
|
||||
out.extend_from_slice(row);
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Build motif embeddings over sliding windows using the rate-histogram
|
||||
/// encoder and index them. Mirrors `motif::retrieve_motifs` so the two
|
||||
/// paths can be swapped at call sites without other changes. Returns
|
||||
/// the index plus the top-k repeated motifs.
|
||||
///
|
||||
/// The sliding-window schedule, the in-memory kNN index, and the
|
||||
/// dominant-class accounting are *identical* to the SDPA path — the
|
||||
/// only difference is the per-window embedding function. This is the
|
||||
/// A/B invariant the diagnostic test relies on.
|
||||
pub fn rate_histogram_retrieve_motifs(
|
||||
cfg: &AnalysisConfig,
|
||||
conn: &Connectome,
|
||||
spikes: &[Spike],
|
||||
k: usize,
|
||||
) -> (MotifIndex, Vec<MotifHit>) {
|
||||
let mut index = MotifIndex::new(cfg.index_capacity);
|
||||
if spikes.is_empty() {
|
||||
return (index, Vec::new());
|
||||
}
|
||||
let t_end = spikes.last().map(|s| s.t_ms).unwrap_or(0.0);
|
||||
let win = cfg.motif_window_ms;
|
||||
let bins = cfg.motif_bins;
|
||||
let step = win / 2.0;
|
||||
let mut t = 0.0;
|
||||
while t + win <= t_end + step {
|
||||
let (raster, meta) = build_raster(conn, spikes, t, win, bins);
|
||||
if meta.spike_count == 0 {
|
||||
t += step;
|
||||
continue;
|
||||
}
|
||||
let vec = rate_histogram_encode(&raster);
|
||||
index.insert(
|
||||
vec,
|
||||
MotifWindow {
|
||||
t_center_ms: t + win * 0.5,
|
||||
spike_count: meta.spike_count,
|
||||
dominant_class_idx: meta.dominant_class_idx,
|
||||
},
|
||||
);
|
||||
t += step;
|
||||
}
|
||||
let hits = index.top_k(k);
|
||||
(index, hits)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn encode_empty_raster_returns_empty_vec() {
|
||||
let raster: Vec<Vec<f32>> = Vec::new();
|
||||
let v = rate_histogram_encode(&raster);
|
||||
assert!(v.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_is_row_major_and_preserves_values() {
|
||||
// 3 rows × 4 cols — pick distinct values so we can spot
|
||||
// row-vs-column-major mistakes.
|
||||
let raster: Vec<Vec<f32>> = vec![
|
||||
vec![1.0, 2.0, 3.0, 4.0],
|
||||
vec![5.0, 6.0, 7.0, 8.0],
|
||||
vec![9.0, 10.0, 11.0, 12.0],
|
||||
];
|
||||
let flat = rate_histogram_encode(&raster);
|
||||
assert_eq!(flat.len(), 12);
|
||||
for r in 0..3 {
|
||||
for c in 0..4 {
|
||||
assert_eq!(flat[r * 4 + c], raster[r][c]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_is_deterministic_across_runs() {
|
||||
let raster: Vec<Vec<f32>> = vec![
|
||||
vec![0.1, 0.2, 0.3],
|
||||
vec![0.4, 0.5, 0.6],
|
||||
vec![0.7, 0.8, 0.9],
|
||||
];
|
||||
let a = rate_histogram_encode(&raster);
|
||||
let b = rate_histogram_encode(&raster);
|
||||
assert_eq!(a.len(), b.len());
|
||||
for (x, y) in a.iter().zip(b.iter()) {
|
||||
assert_eq!(x.to_bits(), y.to_bits(), "bit-level determinism required");
|
||||
}
|
||||
}
|
||||
}
|
||||
349
examples/connectome-fly/tests/ac_2_encoder_comparison.rs
Normal file
349
examples/connectome-fly/tests/ac_2_encoder_comparison.rs
Normal file
|
|
@ -0,0 +1,349 @@
|
|||
#![allow(clippy::needless_range_loop)]
|
||||
//! ADR-154 §17 item 10 follow-up — encoder-vs-substrate diagnostic.
|
||||
//!
|
||||
//! The shipped SDPA + deterministic-low-rank-projection motif encoder
|
||||
//! was measured protocol-blind on this substrate: expanded-corpus AC-2
|
||||
//! at 8 protocols landed at `precision@5 = 0.117` (random = 0.125). The
|
||||
//! ADR names three axes to fix this — different encoder, different
|
||||
//! substrate, different labels — and asks that the cheapest axis
|
||||
//! (encoder) be investigated first with a controlled A/B.
|
||||
//!
|
||||
//! This test is that A/B. It runs the same 8-protocol labeled corpus
|
||||
//! through BOTH encoders and reports precision@5 side-by-side. The
|
||||
//! test is **publish-only**: it does not gate on absolute precision
|
||||
//! numbers. It fails only on non-deterministic output, malformed
|
||||
//! vectors, or an empty corpus — the AC-2 precision numbers go into
|
||||
//! the ADR §17 table, not into a regression gate.
|
||||
//!
|
||||
//! Interpretation rubric (fill in the commit message from the
|
||||
//! printed verdict):
|
||||
//!
|
||||
//! - rate > SDPA by a meaningful margin (≥ 0.05) → SDPA is actively
|
||||
//! hurting on this substrate.
|
||||
//! - rate ≈ SDPA (within 0.05) → encoder is NOT the bottleneck; try
|
||||
//! substrate or labels next.
|
||||
//! - rate < SDPA → rate histogram is actively worse; SDPA at least
|
||||
//! preserves some protocol-specific signal.
|
||||
|
||||
use connectome_fly::{
|
||||
Analysis, AnalysisConfig, Connectome, ConnectomeConfig, Engine, EngineConfig, MotifIndex,
|
||||
NeuronId, Observer, Spike, Stimulus,
|
||||
};
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
// 8-protocol corpus
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
/// One stimulus protocol in the 8-protocol labeled corpus.
|
||||
///
|
||||
/// Axes (ADR-154 §17 item 10 mirrors this):
|
||||
/// - `sensory_subset` — 0 = first half of sensory neurons, 1 = second half.
|
||||
/// - `freq_hz` — pulse-train rate.
|
||||
/// - `amplitude_pa` — per-pulse charge.
|
||||
/// - `duration_ms` — pulse-train window width.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
struct Protocol {
|
||||
id: u8,
|
||||
sensory_subset: u8,
|
||||
freq_hz: f32,
|
||||
amplitude_pa: f32,
|
||||
duration_ms: f32,
|
||||
}
|
||||
|
||||
/// Build an 8-protocol corpus spanning the four axes called out in
|
||||
/// ADR-154 §17 item 10. The eight points are an asymmetric partial
|
||||
/// factorial — not all 2⁴ combinations (the budget is 8 protocols) —
|
||||
/// chosen so every axis varies at least once against the P0 baseline.
|
||||
fn eight_protocols() -> [Protocol; 8] {
|
||||
[
|
||||
Protocol {
|
||||
id: 0,
|
||||
sensory_subset: 0,
|
||||
freq_hz: 60.0,
|
||||
amplitude_pa: 80.0,
|
||||
duration_ms: 200.0,
|
||||
},
|
||||
Protocol {
|
||||
id: 1,
|
||||
sensory_subset: 0,
|
||||
freq_hz: 60.0,
|
||||
amplitude_pa: 80.0,
|
||||
duration_ms: 300.0,
|
||||
},
|
||||
Protocol {
|
||||
id: 2,
|
||||
sensory_subset: 0,
|
||||
freq_hz: 60.0,
|
||||
amplitude_pa: 130.0,
|
||||
duration_ms: 200.0,
|
||||
},
|
||||
Protocol {
|
||||
id: 3,
|
||||
sensory_subset: 0,
|
||||
freq_hz: 120.0,
|
||||
amplitude_pa: 80.0,
|
||||
duration_ms: 200.0,
|
||||
},
|
||||
Protocol {
|
||||
id: 4,
|
||||
sensory_subset: 1,
|
||||
freq_hz: 60.0,
|
||||
amplitude_pa: 80.0,
|
||||
duration_ms: 200.0,
|
||||
},
|
||||
Protocol {
|
||||
id: 5,
|
||||
sensory_subset: 1,
|
||||
freq_hz: 120.0,
|
||||
amplitude_pa: 130.0,
|
||||
duration_ms: 200.0,
|
||||
},
|
||||
Protocol {
|
||||
id: 6,
|
||||
sensory_subset: 0,
|
||||
freq_hz: 120.0,
|
||||
amplitude_pa: 130.0,
|
||||
duration_ms: 300.0,
|
||||
},
|
||||
Protocol {
|
||||
id: 7,
|
||||
sensory_subset: 1,
|
||||
freq_hz: 60.0,
|
||||
amplitude_pa: 130.0,
|
||||
duration_ms: 300.0,
|
||||
},
|
||||
]
|
||||
}
|
||||
|
||||
/// Build the current-injection schedule for one protocol.
|
||||
fn stimulus_for(conn: &Connectome, p: &Protocol) -> Stimulus {
|
||||
let sensory = conn.sensory_neurons();
|
||||
let half = sensory.len() / 2;
|
||||
let subset: Vec<NeuronId> = if p.sensory_subset == 0 {
|
||||
sensory[..half].to_vec()
|
||||
} else {
|
||||
sensory[half..].to_vec()
|
||||
};
|
||||
Stimulus::pulse_train(&subset, 20.0, p.duration_ms, p.amplitude_pa, p.freq_hz)
|
||||
}
|
||||
|
||||
/// Run one protocol through a fresh LIF engine and return all spikes
|
||||
/// plus the simulation end-time.
|
||||
fn run_protocol(conn: &Connectome, p: &Protocol) -> (f32, Vec<Spike>) {
|
||||
let stim = stimulus_for(conn, p);
|
||||
let mut eng = Engine::new(conn, EngineConfig::default());
|
||||
let mut obs = Observer::new(conn.num_neurons());
|
||||
let t_end = 20.0 + p.duration_ms + 80.0;
|
||||
eng.run_with(&stim, &mut obs, t_end);
|
||||
(t_end, obs.spikes().to_vec())
|
||||
}
|
||||
|
||||
/// One labeled motif vector: the encoder output plus the protocol id
|
||||
/// it was produced under.
|
||||
#[derive(Clone, Debug)]
|
||||
struct LabeledVec {
|
||||
vector: Vec<f32>,
|
||||
protocol_id: u8,
|
||||
}
|
||||
|
||||
/// Run all 8 protocols and collect labeled motif vectors from the
|
||||
/// given encoder (SDPA via `retrieve_motifs`, rate histogram via
|
||||
/// `retrieve_motifs_rate`). `encoder_fn` takes `(connectome, spikes)`
|
||||
/// and returns the populated motif index; the caller decides which
|
||||
/// `Analysis` method to call.
|
||||
fn collect_labeled_vectors<F>(
|
||||
conn: &Connectome,
|
||||
protocols: &[Protocol],
|
||||
mut encoder_fn: F,
|
||||
) -> Vec<LabeledVec>
|
||||
where
|
||||
F: FnMut(&Connectome, &[Spike]) -> MotifIndex,
|
||||
{
|
||||
let mut labeled: Vec<LabeledVec> = Vec::new();
|
||||
for p in protocols {
|
||||
let (_t_end, spikes) = run_protocol(conn, p);
|
||||
if spikes.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let index = encoder_fn(conn, &spikes);
|
||||
for v in index.vectors() {
|
||||
labeled.push(LabeledVec {
|
||||
vector: v.clone(),
|
||||
protocol_id: p.id,
|
||||
});
|
||||
}
|
||||
}
|
||||
labeled
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
// Precision@k
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
#[inline]
|
||||
fn l2(a: &[f32], b: &[f32]) -> f32 {
|
||||
let mut s = 0.0_f32;
|
||||
let n = a.len().min(b.len());
|
||||
for i in 0..n {
|
||||
let d = a[i] - b[i];
|
||||
s += d * d;
|
||||
}
|
||||
s.sqrt()
|
||||
}
|
||||
|
||||
/// Labeled precision@k: for each labeled vector, brute-force find its
|
||||
/// top-k nearest neighbours in the labeled corpus (excluding itself),
|
||||
/// count how many share its label. Returns the mean across the corpus.
|
||||
fn precision_at_k(corpus: &[LabeledVec], k: usize) -> f32 {
|
||||
if corpus.len() < 2 || k == 0 {
|
||||
return 0.0;
|
||||
}
|
||||
let mut total = 0.0_f32;
|
||||
for (qi, q) in corpus.iter().enumerate() {
|
||||
// Score every other vector; keep top-k smallest distances.
|
||||
let mut pairs: Vec<(f32, u8)> = Vec::with_capacity(corpus.len() - 1);
|
||||
for (ci, c) in corpus.iter().enumerate() {
|
||||
if ci == qi {
|
||||
continue;
|
||||
}
|
||||
pairs.push((l2(&q.vector, &c.vector), c.protocol_id));
|
||||
}
|
||||
pairs.sort_by(|a, b| a.0.partial_cmp(&b.0).unwrap_or(std::cmp::Ordering::Equal));
|
||||
let take = k.min(pairs.len());
|
||||
let hits = pairs[..take]
|
||||
.iter()
|
||||
.filter(|(_, lbl)| *lbl == q.protocol_id)
|
||||
.count();
|
||||
total += hits as f32 / take as f32;
|
||||
}
|
||||
total / corpus.len() as f32
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
// Main A/B diagnostic
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn ac_2_encoder_comparison_sdpa_vs_rate_histogram() {
|
||||
// Same connectome for both encoders — isolates the encoder as the
|
||||
// only variable.
|
||||
let conn = Connectome::generate(&ConnectomeConfig::default());
|
||||
let protocols = eight_protocols();
|
||||
|
||||
// Motif-window config matches the expanded-corpus AC-2 test
|
||||
// described in ADR-154 §17 item 10: 20 ms windows, 10 bins. The
|
||||
// index is large enough to hold every window from all 8 protocols
|
||||
// (≈ 8 × 20 = 160 at 200 ms, more at 300 ms).
|
||||
let cfg = AnalysisConfig {
|
||||
motif_window_ms: 20.0,
|
||||
motif_bins: 10,
|
||||
index_capacity: 1024,
|
||||
..AnalysisConfig::default()
|
||||
};
|
||||
let an = Analysis::new(cfg.clone());
|
||||
|
||||
// ---- SDPA path (shipped) ----
|
||||
let sdpa_corpus = collect_labeled_vectors(&conn, &protocols, |c, sp| {
|
||||
let (index, _hits) = an.retrieve_motifs(c, sp, 5);
|
||||
index
|
||||
});
|
||||
|
||||
// ---- Rate-histogram path (this commit) ----
|
||||
let rate_corpus = collect_labeled_vectors(&conn, &protocols, |c, sp| {
|
||||
let (index, _hits) = an.retrieve_motifs_rate(c, sp, 5);
|
||||
index
|
||||
});
|
||||
|
||||
// ---- Hard asserts: diagnostic sanity, NOT precision floor ----
|
||||
assert!(
|
||||
!sdpa_corpus.is_empty(),
|
||||
"SDPA corpus is empty — LIF engine or SDPA path failed"
|
||||
);
|
||||
assert!(
|
||||
!rate_corpus.is_empty(),
|
||||
"rate-histogram corpus is empty — LIF engine or rate path failed"
|
||||
);
|
||||
// Both encoders see the same windows — they must produce the same
|
||||
// count of labeled vectors. If this differs the A/B is invalid
|
||||
// (one path is dropping or inserting windows the other isn't).
|
||||
assert_eq!(
|
||||
sdpa_corpus.len(),
|
||||
rate_corpus.len(),
|
||||
"corpus size mismatch ({} SDPA vs {} rate) — one encoder is \
|
||||
filtering differently, invalidating the A/B",
|
||||
sdpa_corpus.len(),
|
||||
rate_corpus.len()
|
||||
);
|
||||
// Each protocol must be represented — otherwise the 1/8 random
|
||||
// baseline is not the right floor.
|
||||
let mut counts = [0_u32; 8];
|
||||
for v in &sdpa_corpus {
|
||||
counts[v.protocol_id as usize] += 1;
|
||||
}
|
||||
let distinct = counts.iter().filter(|c| **c > 0).count();
|
||||
assert!(
|
||||
distinct >= 2,
|
||||
"corpus collapsed to {distinct} distinct protocols out of 8 \
|
||||
— random baseline not comparable"
|
||||
);
|
||||
|
||||
// Determinism check: re-run the rate path and confirm bit-identical
|
||||
// vectors. (SDPA relies on an external crate whose internal ordering
|
||||
// we don't gate here; the rate path has no RNG so MUST be exact.)
|
||||
let rate_corpus_b = collect_labeled_vectors(&conn, &protocols, |c, sp| {
|
||||
let (index, _hits) = an.retrieve_motifs_rate(c, sp, 5);
|
||||
index
|
||||
});
|
||||
assert_eq!(rate_corpus.len(), rate_corpus_b.len());
|
||||
for (a, b) in rate_corpus.iter().zip(rate_corpus_b.iter()) {
|
||||
assert_eq!(a.vector.len(), b.vector.len(), "rate: vector length drift");
|
||||
for (x, y) in a.vector.iter().zip(b.vector.iter()) {
|
||||
assert_eq!(
|
||||
x.to_bits(),
|
||||
y.to_bits(),
|
||||
"rate encoder is non-deterministic"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Malformed-vector guard: every rate vector should have length
|
||||
// 15 * motif_bins (15 classes in the connectome).
|
||||
let expected_dim = 15 * cfg.motif_bins;
|
||||
for v in &rate_corpus {
|
||||
assert_eq!(
|
||||
v.vector.len(),
|
||||
expected_dim,
|
||||
"rate vector has dim {} expected {expected_dim}",
|
||||
v.vector.len()
|
||||
);
|
||||
}
|
||||
|
||||
// ---- Soft measurement: precision@5 ----
|
||||
let k = 5;
|
||||
let sdpa_p = precision_at_k(&sdpa_corpus, k);
|
||||
let rate_p = precision_at_k(&rate_corpus, k);
|
||||
let delta = rate_p - sdpa_p;
|
||||
let random_baseline = 1.0 / 8.0;
|
||||
|
||||
// Verdict marker for the ADR §17 follow-up row.
|
||||
let marker = if delta > 0.05 {
|
||||
"PASS (rate > SDPA — SDPA is actively hurting)"
|
||||
} else if delta < -0.05 {
|
||||
"MISS (rate < SDPA — rate histogram is actively worse)"
|
||||
} else {
|
||||
"TIE (rate ≈ SDPA — encoder is NOT the bottleneck; try substrate or labels)"
|
||||
};
|
||||
|
||||
eprintln!(
|
||||
"ac-2-encoder-comparison:\n\
|
||||
corpus_size = {} windows\n\
|
||||
distinct_protocols = {}/8\n\
|
||||
SDPA precision@{k} = {sdpa_p:.3}\n\
|
||||
rate precision@{k} = {rate_p:.3}\n\
|
||||
delta (rate - SDPA) = {delta:+.3}\n\
|
||||
random baseline (1/8) = {random_baseline:.3}\n\
|
||||
verdict = {marker}",
|
||||
sdpa_corpus.len(),
|
||||
distinct,
|
||||
);
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue