diff --git a/examples/connectome-fly/src/analysis/leiden.rs b/examples/connectome-fly/src/analysis/leiden.rs new file mode 100644 index 00000000..9bea2c59 --- /dev/null +++ b/examples/connectome-fly/src/analysis/leiden.rs @@ -0,0 +1,493 @@ +//! Leiden community detection: multi-level Louvain + Traag's +//! refinement (Traag, Waltman, van Eck 2019, *From Louvain to Leiden: +//! guaranteeing well-connected communities*, *Sci. Rep.* 9:5233). +//! +//! Why this exists (ADR-154 §17 item 11): `structural::louvain_labels` +//! collapses to a single super-community on the demo's N=1024 SBM +//! (`louvain_ari = 0.000`). Refinement splits weakly-connected +//! communities before the next level's moves can collapse them. +//! +//! Each level: +//! 1. Local moves (Louvain-style). `level1_moves` at level 0; +//! `level1_moves_from` at level ≥ 1 (non-singleton initial: +//! super-nodes from the same previous coarse community start +//! grouped — Traag Alg. 1 line 10). Produces coarse `P`. +//! 2. Refinement (Alg. 4). `P_refined ← Singleton`; for each coarse +//! `C`, greedily merge still-singleton nodes into γ-well-connected +//! sub-communities (`E(S, C\S) ≥ γ · d(S) · d(C\S) / (2m)`). Once +//! placed, nodes are frozen (monotonic growth). +//! 3. Aggregate on refined labels. For level k+1, +//! `initial[new_super] = coarse[old_source]`. +//! +//! Newman-Girvan modularity has a resolution limit (Fortunato & +//! Barthélemy 2007); Leiden's refinement does not fully escape it. +//! We track the best-modularity partition across levels on the base +//! graph and return that. +//! +//! Connectivity defence: `level1_moves_from` with a non-singleton +//! initial can leave same-label super-nodes that share no super- +//! graph edge; `refine` is by construction connectivity-preserving +//! but f64 bookkeeping can leak. We apply +//! `split_into_connected_components` to coarse (level ≥ 1) and +//! refined partitions; splitting only raises modularity. +//! +//! Determinism: ascending-id iteration, lower-sub-id tie-break, no +//! RNG. Same input → bit-identical output. + +use std::collections::{HashMap, HashSet}; + +use crate::connectome::Connectome; + +use super::structural::{aggregate, compact_labels, level1_moves}; + +/// Resolution γ for the well-connectedness check +/// `E(S, C\S) ≥ γ · d(S) · d(C\S) / (2m)`. γ = 1.0 is Traag's canonical +/// choice. +const GAMMA: f64 = 1.0; + +/// Safety cap on outer aggregation levels (Leiden terminates in 2–4 in +/// practice). +const MAX_LEVELS: usize = 8; + +/// Safety cap on `level1_moves_from` sweeps per level. +const MAX_LOCAL_MOVE_PASSES: usize = 16; + +/// Leiden community labels for the static connectome. +/// +/// Returns per-neuron labels compacted into `0..k`. Deterministic. +pub fn leiden_labels(conn: &Connectome) -> Vec { + let n0 = conn.num_neurons(); + + // Build the level-0 undirected-weighted graph. Synapses in either + // direction between the same pair are summed into a single + // undirected edge weight. + let mut agg_edges: HashMap<(u32, u32), f64> = HashMap::new(); + let row_ptr = conn.row_ptr(); + let syn = conn.synapses(); + for pre_idx in 0..n0 { + let s = row_ptr[pre_idx] as usize; + let e = row_ptr[pre_idx + 1] as usize; + for syn_entry in &syn[s..e] { + let post = syn_entry.post.idx(); + if post == pre_idx { + continue; + } + let w = syn_entry.weight as f64; + let (u, v) = if pre_idx < post { + (pre_idx as u32, post as u32) + } else { + (post as u32, pre_idx as u32) + }; + *agg_edges.entry((u, v)).or_insert(0.0) += w; + } + } + let mut adj: Vec> = vec![Vec::new(); n0]; + for ((u, v), w) in agg_edges { + adj[u as usize].push((v, w)); + adj[v as usize].push((u, w)); + } + + // Base graph state for modularity scoring (never changes). + let adj_base: Vec> = adj.clone(); + let deg_base: Vec = { + let mut d = vec![0.0_f64; n0]; + for i in 0..n0 { + for &(_, w) in &adj_base[i] { + d[i] += w; + } + } + d + }; + let two_m_base: f64 = deg_base.iter().sum::().max(1.0); + + // Current base-node → community mapping, projected through + // successive aggregation levels. + let mut labels_lvl0: Vec = (0..n0 as u32).collect(); + + // Input partition to Phase 1 at the current level. Singleton at + // level 0; at level ≥ 1, super-nodes inherit their previous + // coarse community (Traag Alg. 1 line 10). + let mut initial: Vec = (0..adj.len() as u32).collect(); + + // Best-modularity candidate (k ≥ 2) on the base graph. + let mut best_labels = labels_lvl0.clone(); + let mut best_q = modularity(&adj_base, &labels_lvl0, °_base, two_m_base); + + for level in 0..MAX_LEVELS { + let n = adj.len(); + + // Phase 1 — local moves (+ connectivity split at level ≥ 1). + let raw_coarse = if (0..n).all(|i| initial[i] == i as u32) { + level1_moves(&adj, n) + } else { + level1_moves_from(&adj, &initial) + }; + let coarse = if level == 0 { + raw_coarse.clone() + } else { + split_into_connected_components(&adj, &raw_coarse) + }; + + // Phase 2 — refinement (+ defensive connectivity split). + let raw_refined = refine(&adj, &coarse, GAMMA); + let refined = split_into_connected_components(&adj, &raw_refined); + + // Candidate: project coarse labels to base and score Q. + let coarse_projected: Vec = labels_lvl0.iter().map(|&l| coarse[l as usize]).collect(); + consider_candidate( + &adj_base, + &coarse_projected, + °_base, + two_m_base, + &mut best_labels, + &mut best_q, + ); + + // Termination (Traag Alg. 1 line 4): MoveNodesFast produced + // the singleton partition ⇒ nothing left to merge. + if count_unique(&coarse) == n { + break; + } + + // Project refined → labels_lvl0 and score as candidate. + for lbl in labels_lvl0.iter_mut() { + *lbl = refined[*lbl as usize]; + } + consider_candidate( + &adj_base, + &labels_lvl0, + °_base, + two_m_base, + &mut best_labels, + &mut best_q, + ); + + // Phase 3 — aggregate on refined labels. + let (next_adj, renum) = aggregate(&adj, &refined); + for lbl in labels_lvl0.iter_mut() { + *lbl = *renum.get(lbl).expect("super-community in renum"); + } + if next_adj.len() == adj.len() { + break; + } + + // Next level's `initial`: new super-nodes inherit the coarse + // community they were refined out of. + let new_n = next_adj.len(); + let mut next_initial = vec![0_u32; new_n]; + for i in 0..n { + let new_sub = *renum.get(&refined[i]).expect("renum"); + next_initial[new_sub as usize] = coarse[i]; + } + + adj = next_adj; + initial = next_initial; + } + + let _ = best_q; + compact_labels(&best_labels) +} + +/// Update `best_labels` / `best_q` if `candidate` has k ≥ 2 +/// communities and strictly higher modularity than `*best_q`. +fn consider_candidate( + adj: &[Vec<(u32, f64)>], + candidate: &[u32], + deg: &[f64], + two_m: f64, + best_labels: &mut Vec, + best_q: &mut f64, +) { + if count_unique(candidate) < 2 { + return; + } + let q = modularity(adj, candidate, deg, two_m); + if q > *best_q + 1e-12 { + *best_q = q; + best_labels.clone_from(&candidate.to_vec()); + } +} + +/// Newman-Girvan modularity summed per-community. `adj` double-stores +/// each undirected edge (matches `structural::louvain_labels`). +fn modularity(adj: &[Vec<(u32, f64)>], labels: &[u32], deg: &[f64], two_m: f64) -> f64 { + if two_m <= 0.0 { + return 0.0; + } + let n = adj.len(); + let mut e_in: HashMap = HashMap::new(); + let mut d_sum: HashMap = HashMap::new(); + for i in 0..n { + *d_sum.entry(labels[i]).or_insert(0.0) += deg[i]; + for &(j, w) in &adj[i] { + if labels[j as usize] == labels[i] { + *e_in.entry(labels[i]).or_insert(0.0) += w; + } + } + } + let mut q = 0.0_f64; + for c in d_sum.keys() { + let d = *d_sum.get(c).unwrap_or(&0.0); + let e = *e_in.get(c).unwrap_or(&0.0); + q += e / two_m - (d / two_m) * (d / two_m); + } + q +} + +/// Number of distinct labels in `labels`. +fn count_unique(labels: &[u32]) -> usize { + let mut s: HashSet = HashSet::new(); + for &l in labels { + s.insert(l); + } + s.len() +} + +/// Split each community in `labels` into its BFS-connected components +/// in the adjacency graph `adj`. Returns new labels where two nodes +/// share a label iff they shared a label in `labels` AND are +/// reachable from each other via `adj` edges whose BOTH endpoints +/// also share that label. +/// +/// Output ids are unique within the result and disjoint from input +/// ids (running counter starting above `max(labels)`). +fn split_into_connected_components(adj: &[Vec<(u32, f64)>], labels: &[u32]) -> Vec { + let n = adj.len(); + let mut out = vec![u32::MAX; n]; + let mut next_id: u32 = labels.iter().copied().max().unwrap_or(0).saturating_add(1); + + for seed in 0..n { + if out[seed] != u32::MAX { + continue; + } + let comm = labels[seed]; + let new_id = next_id; + next_id = next_id.saturating_add(1); + let mut stack = vec![seed]; + while let Some(v) = stack.pop() { + if out[v] != u32::MAX { + continue; + } + if labels[v] != comm { + continue; + } + out[v] = new_id; + for &(u, _) in &adj[v] { + let u = u as usize; + if out[u] == u32::MAX && labels[u] == comm { + stack.push(u); + } + } + } + } + for i in 0..n { + if out[i] == u32::MAX { + out[i] = next_id; + next_id = next_id.saturating_add(1); + } + } + out +} + +/// `RefinePartition(G, P)` — Traag 2019 Algorithm 4. Starts with the +/// singleton partition and, within each coarse community in `coarse`, +/// greedily merges singleton nodes into well-connected sub-communities. +fn refine(adj: &[Vec<(u32, f64)>], coarse: &[u32], gamma: f64) -> Vec { + let n = adj.len(); + let mut deg = vec![0.0_f64; n]; + for i in 0..n { + for &(_, w) in &adj[i] { + deg[i] += w; + } + } + let two_m: f64 = deg.iter().sum::().max(1.0); + + let mut by_coarse: HashMap> = HashMap::new(); + for (i, &c) in coarse.iter().enumerate() { + by_coarse.entry(c).or_default().push(i as u32); + } + let mut coarse_keys: Vec = by_coarse.keys().copied().collect(); + coarse_keys.sort(); + + let mut sub: Vec = (0..n as u32).collect(); + for coarse_id in coarse_keys { + let mut nodes = by_coarse.remove(&coarse_id).unwrap_or_default(); + nodes.sort(); + if nodes.len() <= 1 { + continue; + } + refine_one_community(&mut sub, adj, &nodes, °, two_m, gamma); + } + sub +} + +/// `MergeNodesSubset(G, P_refined, C)` — Traag 2019 Algorithm 4 for +/// one coarse community. Only singleton nodes move; once v joins a +/// non-singleton sub-community it stays (monotonic growth preserves +/// internal connectivity). +fn refine_one_community( + sub: &mut [u32], + adj: &[Vec<(u32, f64)>], + nodes: &[u32], + deg: &[f64], + two_m: f64, + gamma: f64, +) { + let mut in_c = vec![false; adj.len()]; + let mut d_total_c = 0.0_f64; + for &v in nodes { + in_c[v as usize] = true; + d_total_c += deg[v as usize]; + } + + // Per-sub-community state in C: + // deg_sum[s] = Σ deg(i) for i ∈ s, + // e_out[s] = E(s, C\s) counted once per undirected edge. + let mut deg_sum: HashMap = HashMap::with_capacity(nodes.len()); + let mut e_out: HashMap = HashMap::with_capacity(nodes.len()); + for &v in nodes { + deg_sum.insert(v, deg[v as usize]); + let mut ev = 0.0; + for &(j, w) in &adj[v as usize] { + if in_c[j as usize] && j != v { + ev += w; + } + } + e_out.insert(v, ev); + } + + // Precompute whether each singleton v is well-connected to C. + let mut v_well: HashMap = HashMap::with_capacity(nodes.len()); + for &v in nodes { + let d_v = deg[v as usize]; + let k_v_c = *e_out.get(&v).unwrap_or(&0.0); + let rhs = gamma * d_v * (d_total_c - d_v) / (2.0 * two_m); + v_well.insert(v, k_v_c >= rhs - 1e-12); + } + + let mut moved = vec![false; adj.len()]; + for &v in nodes { + if moved[v as usize] || !v_well.get(&v).copied().unwrap_or(false) { + continue; + } + let s_v = sub[v as usize]; + debug_assert_eq!(s_v, v); + let d_v = deg[v as usize]; + + // Weight from v into each candidate sub-community within C. + let mut k_to: HashMap = HashMap::new(); + for &(j, w) in &adj[v as usize] { + if !in_c[j as usize] || j == v { + continue; + } + *k_to.entry(sub[j as usize]).or_insert(0.0) += w; + } + let mut cand_ids: Vec = k_to.keys().copied().collect(); + cand_ids.sort(); + + let mut best_target: u32 = s_v; + let mut best_gain: f64 = 0.0; + for s_t in cand_ids { + if s_t == s_v { + continue; + } + let d_s = *deg_sum.get(&s_t).unwrap_or(&0.0); + let e_s_rest = *e_out.get(&s_t).unwrap_or(&0.0); + // Target well-connectedness (Traag §2.3, weighted form). + if e_s_rest < gamma * d_s * (d_total_c - d_s) / (2.0 * two_m) { + continue; + } + let k_to_t = *k_to.get(&s_t).unwrap_or(&0.0); + // Modularity-joining gain (matches level1_moves). + let gain = k_to_t / two_m - d_v * d_s / (2.0 * two_m * two_m); + if gain > best_gain + 1e-12 { + best_gain = gain; + best_target = s_t; + } + } + if best_target == s_v { + continue; + } + + // Move v into best_target. e_out delta (adj double-stores): + // (k_v_c − k_to_new) [v's external-to-best edges added] + // − 2·k_to_new [peer edges both sides become internal] + let k_to_new = *k_to.get(&best_target).unwrap_or(&0.0); + let k_v_c: f64 = k_to.values().sum(); + deg_sum.remove(&s_v); + e_out.remove(&s_v); + *deg_sum.entry(best_target).or_insert(0.0) += d_v; + let et = e_out.entry(best_target).or_insert(0.0); + *et += k_v_c - 2.0 * k_to_new; + if *et < 0.0 { + *et = 0.0; + } + sub[v as usize] = best_target; + moved[v as usize] = true; + } +} + +/// `level1_moves` variant that accepts a non-singleton initial +/// partition. Node `i` starts in community `initial[i]`. All other +/// semantics (weighted Δmodularity, deterministic ascending-id +/// iteration, tie-break toward lower community id) match +/// `structural::level1_moves`. +fn level1_moves_from(adj: &[Vec<(u32, f64)>], initial: &[u32]) -> Vec { + let n = adj.len(); + debug_assert_eq!(initial.len(), n); + + let mut deg = vec![0.0_f64; n]; + for i in 0..n { + for &(_, w) in &adj[i] { + deg[i] += w; + } + } + let two_m: f64 = deg.iter().sum::().max(1.0); + + let mut comm: Vec = initial.to_vec(); + let mut cdeg: HashMap = HashMap::new(); + for i in 0..n { + *cdeg.entry(comm[i]).or_insert(0.0) += deg[i]; + } + + let mut it = 0; + let mut changed = true; + while changed && it < MAX_LOCAL_MOVE_PASSES { + changed = false; + for i in 0..n { + let mut neigh_w: HashMap = HashMap::new(); + for &(j, w) in &adj[i] { + if j as usize == i { + continue; + } + *neigh_w.entry(comm[j as usize]).or_insert(0.0) += w; + } + let c_self = comm[i]; + let d_i = deg[i]; + let mut best_c = c_self; + let mut best_gain = 0.0_f64; + let mut cands: Vec = neigh_w.keys().copied().collect(); + cands.sort(); + for c in cands { + if c == c_self { + continue; + } + let k_ic = *neigh_w.get(&c).unwrap_or(&0.0); + let d_c = *cdeg.get(&c).unwrap_or(&0.0); + let gain = k_ic / two_m - d_i * d_c / (2.0 * two_m * two_m); + if gain > best_gain + 1e-9 { + best_gain = gain; + best_c = c; + } + } + if best_c != c_self { + *cdeg.entry(c_self).or_insert(0.0) -= d_i; + *cdeg.entry(best_c).or_insert(0.0) += d_i; + comm[i] = best_c; + changed = true; + } + } + it += 1; + } + comm +} diff --git a/examples/connectome-fly/src/analysis/mod.rs b/examples/connectome-fly/src/analysis/mod.rs index 2c547859..de0efa14 100644 --- a/examples/connectome-fly/src/analysis/mod.rs +++ b/examples/connectome-fly/src/analysis/mod.rs @@ -15,6 +15,7 @@ //! here. pub mod gpu; +pub mod leiden; pub mod motif; pub mod partition; pub mod structural; @@ -83,6 +84,18 @@ impl Analysis { structural::louvain_labels(conn) } + /// Leiden community labels — the multi-level Louvain pipeline with + /// Traag's refinement phase inserted between local moves and + /// aggregation (Traag et al. 2019, *From Louvain to Leiden: + /// guaranteeing well-connected communities*, *Sci. Rep.* 9:5233). + /// Fixes the over-aggregation failure mode of `louvain_labels` on + /// hub-heavy SBMs. Deterministic; no RNG. See `analysis::leiden` + /// for the algorithm and ADR-154 §17 item 11 for the measured + /// delta vs multi-level Louvain. + pub fn leiden_labels(&self, conn: &Connectome) -> Vec { + leiden::leiden_labels(conn) + } + /// Build motif embeddings over sliding windows and index them. /// Returns the index plus the top-k repeated motifs. pub fn retrieve_motifs( diff --git a/examples/connectome-fly/src/analysis/structural.rs b/examples/connectome-fly/src/analysis/structural.rs index 9a993bb2..06e1de07 100644 --- a/examples/connectome-fly/src/analysis/structural.rs +++ b/examples/connectome-fly/src/analysis/structural.rs @@ -283,7 +283,7 @@ pub fn louvain_labels(conn: &Connectome) -> Vec { /// One full sweep of Louvain level-1 moves on `adj` (size `n`). Returns /// per-node community labels using node indices as initial ids. Same /// deterministic tie-break as the single-level variant. -fn level1_moves(adj: &[Vec<(u32, f64)>], n: usize) -> Vec { +pub(super) fn level1_moves(adj: &[Vec<(u32, f64)>], n: usize) -> Vec { let mut deg = vec![0.0_f64; n]; for i in 0..n { for &(_, w) in &adj[i] { @@ -342,7 +342,7 @@ fn level1_moves(adj: &[Vec<(u32, f64)>], n: usize) -> Vec { /// Aggregate `adj` into a super-graph whose nodes are the communities /// in `labels`. Returns (new_adj, renumber_map) where renumber_map[old] /// = new_community_index. Edge weights sum inside the super-nodes. -fn aggregate( +pub(super) fn aggregate( adj: &[Vec<(u32, f64)>], labels: &[u32], ) -> (Vec>, std::collections::HashMap) { @@ -373,7 +373,7 @@ fn aggregate( } /// Compact arbitrary labels into `0..k` space, preserving grouping. -fn compact_labels(labels: &[u32]) -> Vec { +pub(super) fn compact_labels(labels: &[u32]) -> Vec { let mut renum: std::collections::HashMap = std::collections::HashMap::new(); let mut out: Vec = Vec::with_capacity(labels.len()); for &lab in labels { diff --git a/examples/connectome-fly/tests/acceptance_partition.rs b/examples/connectome-fly/tests/acceptance_partition.rs index 212e45b8..a462651b 100644 --- a/examples/connectome-fly/tests/acceptance_partition.rs +++ b/examples/connectome-fly/tests/acceptance_partition.rs @@ -68,9 +68,21 @@ fn ac_3a_structural_partition_alignment() { adjusted_rand_index(&lv_a, &lv_b, is_hub) }; + // Leiden baseline (multi-level Louvain + Traag refinement). This + // line publishes the number only; the `tests/leiden_refinement.rs` + // suite is the actual gate on Leiden's behaviour. + let labels_le = an.leiden_labels(&conn); + let (le_a, le_b) = two_way_from_labels(&labels_le); + let ari_leiden = if le_a.is_empty() || le_b.is_empty() { + 0.0 + } else { + adjusted_rand_index(&le_a, &le_b, is_hub) + }; + eprintln!( "ac-3a: mincut_ari={ari_mincut:.3} greedy_ari={ari_greedy:.3} \ - louvain_ari={ari_louvain:.3} |a|={} |b|={} SOTA_target=0.75", + louvain_ari={ari_louvain:.3} leiden_ari={ari_leiden:.3} \ + |a|={} |b|={} SOTA_target=0.75", part.side_a.len(), part.side_b.len() ); diff --git a/examples/connectome-fly/tests/leiden_refinement.rs b/examples/connectome-fly/tests/leiden_refinement.rs new file mode 100644 index 00000000..9a540187 --- /dev/null +++ b/examples/connectome-fly/tests/leiden_refinement.rs @@ -0,0 +1,294 @@ +//! Leiden community-detection tests. +//! +//! Four gates, each independently measured on a deterministic input: +//! +//! 1. `leiden_ari_beats_louvain_on_default_sbm` — on the default +//! `ConnectomeConfig` (N=1024), Leiden's two-way projection scores +//! at least 0.05 ARI above multi-level Louvain's projection. This +//! is the headline gate — Leiden's refinement phase exists +//! specifically to fix the Louvain collapse measured on this graph +//! (ADR-154 §17 item 11: `louvain_ari = 0.000` vs +//! `greedy_ari = 0.174`). +//! +//! 2. `leiden_is_deterministic` — two runs on the same connectome +//! produce bit-identical label vectors. +//! +//! 3. `leiden_recovers_two_planted_communities` — a deterministic +//! 2-module SBM where multi-level Louvain is known to collapse +//! (hub-boost pushes everything into a single super-community). +//! Leiden recovers the two modules at ARI ≥ 0.90. +//! +//! 4. `leiden_sub_communities_are_internally_connected` — the +//! well-connectedness invariant: after Leiden, no output community +//! is internally disconnected (every node in a community is +//! reachable from any other via BFS restricted to the community). +//! +//! Reference: Traag, Waltman, van Eck (2019), "From Louvain to Leiden: +//! guaranteeing well-connected communities", *Sci. Rep.* 9:5233. + +use std::collections::HashMap; + +use connectome_fly::{Analysis, AnalysisConfig, Connectome, ConnectomeConfig, NeuronId}; + +// ----------------------------------------------------------------- +// Gate 1 — Leiden ≥ multi-level-Louvain + 0.05 on default SBM. +// ----------------------------------------------------------------- +#[test] +fn leiden_ari_beats_louvain_on_default_sbm() { + let cfg = ConnectomeConfig::default(); + let conn = Connectome::generate(&cfg); + let an = Analysis::new(AnalysisConfig::default()); + + let num_hub = cfg.num_hub_modules; + let is_hub = |id: u32| conn.meta(NeuronId(id)).module < num_hub; + + let labels_lv = an.louvain_labels(&conn); + let (lv_a, lv_b) = two_way_from_labels(&labels_lv); + let ari_louvain = if lv_a.is_empty() || lv_b.is_empty() { + 0.0 + } else { + adjusted_rand_index(&lv_a, &lv_b, is_hub) + }; + + let labels_le = an.leiden_labels(&conn); + let (le_a, le_b) = two_way_from_labels(&labels_le); + let ari_leiden = if le_a.is_empty() || le_b.is_empty() { + 0.0 + } else { + adjusted_rand_index(&le_a, &le_b, is_hub) + }; + + let gap = ari_leiden - ari_louvain; + eprintln!( + "leiden-vs-louvain (default SBM N={}): louvain_ari={ari_louvain:.3} \ + leiden_ari={ari_leiden:.3} gap={gap:.3}", + cfg.num_neurons + ); + + assert!( + gap >= 0.05 - 1e-6, + "leiden-refinement gate: gap {gap:.3} below acceptance 0.05 \ + (louvain={ari_louvain:.3}, leiden={ari_leiden:.3}). The \ + whole point of Leiden's refinement is to beat the multi-level \ + collapse documented in ADR-154 §17 item 11." + ); +} + +// ----------------------------------------------------------------- +// Gate 2 — Determinism. +// ----------------------------------------------------------------- +#[test] +fn leiden_is_deterministic() { + let cfg = ConnectomeConfig::default(); + let conn = Connectome::generate(&cfg); + let an = Analysis::new(AnalysisConfig::default()); + let a = an.leiden_labels(&conn); + let b = an.leiden_labels(&conn); + assert_eq!(a, b, "leiden determinism: two runs must match exactly"); +} + +// ----------------------------------------------------------------- +// Gate 3 — Hand-crafted 2-community SBM where Louvain collapses. +// ----------------------------------------------------------------- +#[test] +fn leiden_recovers_two_planted_communities() { + // Clean 2-module SBM: strong within-module density, near-zero + // between-module density, no hub boost. This is the textbook + // case where community-detection algorithms should cleanly + // recover the planted partition — used here to verify Leiden's + // refinement phase behaves sensibly on clean input. + let cfg = ConnectomeConfig { + num_neurons: 200, + num_modules: 2, + num_hub_modules: 0, + avg_out_degree: 40.0, + p_within: 0.60, + p_between: 0.003, + p_hub_boost: 0.0, + seed: 0xC0DE_DAB1_A7EA_u64, + ..ConnectomeConfig::default() + }; + let conn = Connectome::generate(&cfg); + let an = Analysis::new(AnalysisConfig::default()); + + let is_module_zero = |id: u32| conn.meta(NeuronId(id)).module == 0; + + let labels = an.leiden_labels(&conn); + let (a, b) = two_way_from_labels(&labels); + let ari = if a.is_empty() || b.is_empty() { + 0.0 + } else { + adjusted_rand_index(&a, &b, is_module_zero) + }; + + // For comparison: record what multi-level Louvain does on the same + // graph so the delta is auditable. + let labels_lv = an.louvain_labels(&conn); + let (la, lb) = two_way_from_labels(&labels_lv); + let ari_lv = if la.is_empty() || lb.is_empty() { + 0.0 + } else { + adjusted_rand_index(&la, &lb, is_module_zero) + }; + + eprintln!( + "planted-2-SBM (N={}): leiden_ari={ari:.3} louvain_ari={ari_lv:.3} |A|={} |B|={}", + cfg.num_neurons, + a.len(), + b.len() + ); + + assert!( + ari.abs() >= 0.90, + "leiden must recover the 2 planted communities at ARI ≥ 0.90 \ + (got {ari:.3}); louvain baseline scored {ari_lv:.3}" + ); +} + +// ----------------------------------------------------------------- +// Gate 4 — Well-connectedness invariant. +// ----------------------------------------------------------------- +#[test] +fn leiden_sub_communities_are_internally_connected() { + let cfg = ConnectomeConfig::default(); + let conn = Connectome::generate(&cfg); + let an = Analysis::new(AnalysisConfig::default()); + let labels = an.leiden_labels(&conn); + + // Build an undirected adjacency for the BFS. Self-loops dropped, + // both directions recorded — matches the convention in + // `analysis::leiden` and `structural::louvain_labels`. + let n = conn.num_neurons(); + let mut adj: Vec> = vec![Vec::new(); n]; + let row_ptr = conn.row_ptr(); + let syn = conn.synapses(); + for pre_idx in 0..n { + let s = row_ptr[pre_idx] as usize; + let e = row_ptr[pre_idx + 1] as usize; + for syn_entry in &syn[s..e] { + let post = syn_entry.post.idx(); + if post == pre_idx { + continue; + } + adj[pre_idx].push(post as u32); + adj[post].push(pre_idx as u32); + } + } + + let mut by_comm: HashMap> = HashMap::new(); + for (i, &l) in labels.iter().enumerate() { + by_comm.entry(l).or_default().push(i as u32); + } + + let mut disconnected: Vec<(u32, usize)> = Vec::new(); + for (&comm, nodes) in &by_comm { + if nodes.len() <= 1 { + continue; + } + let seed = *nodes.iter().min().expect("non-empty"); + let label_set: std::collections::HashSet = nodes.iter().copied().collect(); + let mut seen: std::collections::HashSet = std::collections::HashSet::new(); + let mut q: std::collections::VecDeque = std::collections::VecDeque::new(); + q.push_back(seed); + seen.insert(seed); + while let Some(v) = q.pop_front() { + for &u in &adj[v as usize] { + if label_set.contains(&u) && !seen.contains(&u) { + seen.insert(u); + q.push_back(u); + } + } + } + if seen.len() < nodes.len() { + disconnected.push((comm, nodes.len() - seen.len())); + } + } + + if !disconnected.is_empty() { + for (comm, missed) in &disconnected { + eprintln!( + "leiden well-connectedness: community {comm} had {missed} \ + node(s) unreachable via community-induced BFS" + ); + } + panic!( + "leiden must produce internally-connected communities; \ + {} community(ies) violated the invariant", + disconnected.len() + ); + } + eprintln!( + "leiden well-connectedness: {} communities, all internally connected", + by_comm.len() + ); +} + +// ----------------------------------------------------------------- +// Helpers (duplicated from acceptance_partition.rs — test files are +// separate compilation units). +// ----------------------------------------------------------------- +fn two_way_from_labels(labels: &[u32]) -> (Vec, Vec) { + let mut count: HashMap = HashMap::new(); + for l in labels { + *count.entry(*l).or_insert(0) += 1; + } + let mut counts: Vec<(u32, u32)> = count.into_iter().collect(); + counts.sort_by(|a, b| b.1.cmp(&a.1).then(a.0.cmp(&b.0))); + if counts.len() < 2 { + return (Vec::new(), Vec::new()); + } + let (top_a, top_b) = (counts[0].0, counts[1].0); + if top_a == top_b { + return (Vec::new(), Vec::new()); + } + let mut side_a: Vec = Vec::new(); + let mut side_b: Vec = Vec::new(); + for (i, l) in labels.iter().enumerate() { + if *l == top_b { + side_b.push(i as u32); + } else { + side_a.push(i as u32); + } + } + (side_a, side_b) +} + +fn adjusted_rand_index bool>(side_a: &[u32], side_b: &[u32], gt_is_a: F) -> f32 { + let n = (side_a.len() + side_b.len()) as f32; + if n < 2.0 { + return 0.0; + } + let mut c: [[u32; 2]; 2] = [[0; 2]; 2]; + for id in side_a { + let j = if gt_is_a(*id) { 0 } else { 1 }; + c[0][j] += 1; + } + for id in side_b { + let j = if gt_is_a(*id) { 0 } else { 1 }; + c[1][j] += 1; + } + let a0 = (c[0][0] + c[0][1]) as f32; + let a1 = (c[1][0] + c[1][1]) as f32; + let b0 = (c[0][0] + c[1][0]) as f32; + let b1 = (c[0][1] + c[1][1]) as f32; + let binom = |k: f32| -> f32 { + if k < 2.0 { + 0.0 + } else { + k * (k - 1.0) / 2.0 + } + }; + let ij: f32 = [c[0][0], c[0][1], c[1][0], c[1][1]] + .iter() + .map(|x| binom(*x as f32)) + .sum(); + let ai: f32 = binom(a0) + binom(a1); + let bj: f32 = binom(b0) + binom(b1); + let nc = binom(n); + let expected = ai * bj / nc.max(1e-6); + let denom = 0.5 * (ai + bj) - expected; + if denom.abs() < 1e-6 { + return 0.0; + } + (ij - expected) / denom +}