feat(connectome-fly): FlyWire v783 ingest module + fixture tests

Implements src/connectome/flywire/{mod,schema,loader,fixture}.rs and
tests/flywire_ingest.rs — the ingest path named as the first follow-up
in ADR-154 §13. Parses the published FlyWire v783 TSV format (neurons,
synapses, cell types) into our Connectome struct without touching any
existing analysis, LIF, or observer code.

Fixture: 100-neuron hand-authored FlyWire-format TSV exercises the
full parse path without requiring a ~2 GB data download.

NT → sign mapping: ACH/GLUT/GABA/SER/OCT/DOP/HIST follow the Lin et al.
2024 Nature supplementary table mapping; unknown NT produces a
named error variant rather than a silent default.

File sizes: max file = 437 lines (fixture.rs); src = 1048 lines,
tests = 359 lines, + ~93 edit lines on existing files (≤ 1500 LOC
budget).
Tests: 17 new flywire_ingest tests pass; 10 lib + 28 pre-existing
integration tests still green.

Co-Authored-By: claude-flow <ruv@ruv.net>
This commit is contained in:
ruvnet 2026-04-22 11:59:32 -04:00
parent bd26c4ee41
commit cf21327c96
11 changed files with 1505 additions and 7 deletions

2
Cargo.lock generated
View file

@ -1587,6 +1587,7 @@ dependencies = [
"bincode 1.3.3",
"bytemuck",
"criterion 0.5.1",
"csv",
"cudarc",
"rand 0.8.5",
"rand_distr 0.4.3",
@ -1597,6 +1598,7 @@ dependencies = [
"serde",
"serde_json",
"smallvec 1.15.1",
"tempfile",
"thiserror 1.0.69",
"wide",
]

View file

@ -46,12 +46,17 @@ bincode = "1.3"
bytemuck = { version = "1.16", features = ["derive"] }
thiserror = "1.0"
# FlyWire v783 TSV ingest (connectome::flywire). Column-named streaming
# parser; sibling ruvector-graph and ruvector-cli already pin 1.3.
csv = "1.3"
# Optional — gated by feature flags.
wide = { version = "0.7", optional = true }
cudarc = { version = "0.13", optional = true, default-features = false, features = ["cuda-12050", "driver", "std"] }
[dev-dependencies]
criterion = { version = "0.5", features = ["html_reports"] }
tempfile = "3"
[[bench]]
name = "lif_throughput"

View file

@ -0,0 +1,437 @@
//! Hand-authored 100-neuron fixture in FlyWire v783 TSV format.
//!
//! The fixture lives as three `&'static str` constants so the ingest
//! tests can materialize temp TSV files without any network download
//! or large on-disk asset. The composition targets:
//!
//! - **Cell-type coverage**: KC, MBON, PN, DN, Motor, PR, LN, optic
//! intrinsic — the classes the outer `NeuronClass` enum can map to.
//! - **NT coverage**: ACH, GLUT, GABA, HIST, SER, DOP, OCT — every
//! entry in the research-doc §4 NT table at least once.
//! - **Side / flow coverage**: left + right, afferent + efferent +
//! intrinsic.
//! - **Synapse shape**: 159 directed edges, file-declared ordering, no
//! dangling references and no authored self-loops.
//!
//! `EXPECTED_*` constants capture the counts so tests can assert
//! structural invariants without re-counting rows by hand.
/// Number of neuron rows emitted by [`neurons_tsv`].
pub const EXPECTED_NEURONS: usize = 100;
/// Number of synapse rows emitted by [`connections_tsv`].
pub const EXPECTED_SYNAPSES: usize = 159;
/// Number of classification rows emitted by [`classification_tsv`]. A
/// strict subset of neurons — the loader must still function when a
/// neuron has no classification override.
pub const EXPECTED_CLASSIFICATIONS: usize = 40;
// ---------------------------------------------------------------------
// Fixture payloads.
//
// Split into const `&str` slices and `concat!`-assembled so each const
// stays under ~100 lines of source. Data is hand-authored; the 8-digit
// neuron ids are arbitrary but unique.
// ---------------------------------------------------------------------
const NEURONS_HEADER: &str =
"neuron_id\tsupervoxel_id\tcell_type\tnt_type\tside\tnerve\tflow\tsuper_class\n";
const NEURONS_A: &str = "\
10000001\t9000001\tPR_R1\tHIST\tleft\tOCN\tafferent\tsensory\n\
10000002\t9000002\tPR_R1\tHIST\tright\tOCN\tafferent\tsensory\n\
10000003\t9000003\tPR_R7\tHIST\tleft\tOCN\tafferent\tsensory\n\
10000004\t9000004\tPR_R8\tHIST\tright\tOCN\tafferent\tsensory\n\
10000005\t9000005\tPN_glom_DA1\tACH\tleft\tAN\tafferent\tsensory\n\
10000006\t9000006\tPN_glom_DL3\tACH\tright\tAN\tafferent\tsensory\n\
10000007\t9000007\tPN_glom_VM7\tACH\tleft\tAN\tafferent\tsensory\n\
10000008\t9000008\tORN_chm_A\tACH\tleft\tAN\tafferent\tsensory\n\
10000009\t9000009\tORN_chm_B\tACH\tright\tAN\tafferent\tsensory\n\
10000010\t9000010\tJO_mech_a\tACH\tleft\tJN\tafferent\tsensory\n\
10000011\t9000011\tJO_mech_b\tACH\tright\tJN\tafferent\tsensory\n\
10000012\t9000012\tML_mech_c\tACH\tleft\tLN\tafferent\tsensory\n\
10000013\t9000013\tKC_g\tACH\tleft\t\tintrinsic\tcentral\n\
10000014\t9000014\tKC_g\tACH\tright\t\tintrinsic\tcentral\n\
10000015\t9000015\tKC_ab\tACH\tleft\t\tintrinsic\tcentral\n\
10000016\t9000016\tKC_ab\tACH\tright\t\tintrinsic\tcentral\n\
10000017\t9000017\tKC_apbp\tACH\tleft\t\tintrinsic\tcentral\n\
10000018\t9000018\tKC_apbp\tACH\tright\t\tintrinsic\tcentral\n\
10000019\t9000019\tKC_g\tACH\tleft\t\tintrinsic\tcentral\n\
10000020\t9000020\tKC_ab\tACH\tright\t\tintrinsic\tcentral\n\
";
const NEURONS_B: &str = "\
10000021\t9000021\tKC_apbp\tACH\tleft\t\tintrinsic\tcentral\n\
10000022\t9000022\tKC_g\tACH\tright\t\tintrinsic\tcentral\n\
10000023\t9000023\tKC_ab\tACH\tleft\t\tintrinsic\tcentral\n\
10000024\t9000024\tKC_apbp\tACH\tright\t\tintrinsic\tcentral\n\
10000025\t9000025\tKC_g\tACH\tleft\t\tintrinsic\tcentral\n\
10000026\t9000026\tMBON01\tGLUT\tleft\t\tintrinsic\tcentral\n\
10000027\t9000027\tMBON02\tGLUT\tright\t\tintrinsic\tcentral\n\
10000028\t9000028\tMBON03\tGABA\tleft\t\tintrinsic\tcentral\n\
10000029\t9000029\tMBON04\tGABA\tright\t\tintrinsic\tcentral\n\
10000030\t9000030\tMBON05\tACH\tleft\t\tintrinsic\tcentral\n\
10000031\t9000031\tMBON06\tACH\tright\t\tintrinsic\tcentral\n\
10000032\t9000032\tDAN_PPL1\tDOP\tleft\t\tintrinsic\tcentral\n\
10000033\t9000033\tDAN_PPL1\tDOP\tright\t\tintrinsic\tcentral\n\
10000034\t9000034\tDAN_PAM\tDOP\tleft\t\tintrinsic\tcentral\n\
10000035\t9000035\tDAN_PAM\tDOP\tright\t\tintrinsic\tcentral\n\
10000036\t9000036\tOAN_VPM3\tOCT\tleft\t\tintrinsic\tcentral\n\
10000037\t9000037\tOAN_VPM3\tOCT\tright\t\tintrinsic\tcentral\n\
10000038\t9000038\tSER_DRN\tSER\tcenter\t\tintrinsic\tcentral\n\
10000039\t9000039\tSER_DRN\tSER\tcenter\t\tintrinsic\tcentral\n\
10000040\t9000040\tEPG_ring\tACH\tleft\t\tintrinsic\tcentral\n\
";
const NEURONS_C: &str = "\
10000041\t9000041\tEPG_ring\tACH\tright\t\tintrinsic\tcentral\n\
10000042\t9000042\tEPG_ring\tACH\tleft\t\tintrinsic\tcentral\n\
10000043\t9000043\tPEN_fan\tACH\tright\t\tintrinsic\tcentral\n\
10000044\t9000044\tPEN_fan\tACH\tleft\t\tintrinsic\tcentral\n\
10000045\t9000045\tFB_col\tACH\tright\t\tintrinsic\tcentral\n\
10000046\t9000046\tFB_col\tACH\tleft\t\tintrinsic\tcentral\n\
10000047\t9000047\tLAL_loc\tACH\tright\t\tintrinsic\tcentral\n\
10000048\t9000048\tLAL_loc\tGABA\tleft\t\tintrinsic\tcentral\n\
10000049\t9000049\tDNp01\tACH\tleft\tCN\tefferent\tdescending\n\
10000050\t9000050\tDNp02\tACH\tright\tCN\tefferent\tdescending\n\
10000051\t9000051\tDNp03\tACH\tleft\tCN\tefferent\tdescending\n\
10000052\t9000052\tDNg01\tACH\tright\tCN\tefferent\tdescending\n\
10000053\t9000053\tDNg02\tACH\tleft\tCN\tefferent\tdescending\n\
10000054\t9000054\tMotor_leg_1\tACH\tleft\tLN\tefferent\tmotor\n\
10000055\t9000055\tMotor_leg_2\tACH\tright\tLN\tefferent\tmotor\n\
10000056\t9000056\tMotor_leg_3\tACH\tleft\tLN\tefferent\tmotor\n\
10000057\t9000057\tMotor_wing_1\tACH\tright\tWN\tefferent\tmotor\n\
10000058\t9000058\tMotor_wing_2\tACH\tleft\tWN\tefferent\tmotor\n\
10000059\t9000059\tMotor_wing_3\tACH\tright\tWN\tefferent\tmotor\n\
10000060\t9000060\tMotor_hlt\tACH\tleft\tHN\tefferent\tmotor\n\
";
const NEURONS_D: &str = "\
10000061\t9000061\tLN_GABA_A\tGABA\tleft\t\tintrinsic\tcentral\n\
10000062\t9000062\tLN_GABA_B\tGABA\tright\t\tintrinsic\tcentral\n\
10000063\t9000063\tLN_GABA_C\tGABA\tleft\t\tintrinsic\tcentral\n\
10000064\t9000064\tLN_GABA_D\tGABA\tright\t\tintrinsic\tcentral\n\
10000065\t9000065\tLN_GABA_E\tGABA\tleft\t\tintrinsic\tcentral\n\
10000066\t9000066\tLN_GABA_F\tGABA\tright\t\tintrinsic\tcentral\n\
10000067\t9000067\tLN_mix_G\tGLUT\tleft\t\tintrinsic\tcentral\n\
10000068\t9000068\tLN_mix_H\tGLUT\tright\t\tintrinsic\tcentral\n\
10000069\t9000069\tLN_mix_I\tGLUT\tleft\t\tintrinsic\tcentral\n\
10000070\t9000070\tLN_mix_J\tGLUT\tright\t\tintrinsic\tcentral\n\
10000071\t9000071\tLoc_opt_A\tACH\tleft\t\tintrinsic\toptic\n\
10000072\t9000072\tLoc_opt_B\tACH\tright\t\tintrinsic\toptic\n\
10000073\t9000073\tLoc_opt_C\tACH\tleft\t\tintrinsic\toptic\n\
10000074\t9000074\tLoc_opt_D\tGABA\tright\t\tintrinsic\toptic\n\
10000075\t9000075\tLoc_opt_E\tGABA\tleft\t\tintrinsic\toptic\n\
10000076\t9000076\tLoc_opt_F\tACH\tright\t\tintrinsic\toptic\n\
10000077\t9000077\tLoc_opt_G\tGLUT\tleft\t\tintrinsic\toptic\n\
10000078\t9000078\tLoc_opt_H\tGLUT\tright\t\tintrinsic\toptic\n\
10000079\t9000079\tLoc_opt_I\tACH\tleft\t\tintrinsic\toptic\n\
10000080\t9000080\tLoc_opt_J\tGABA\tright\t\tintrinsic\toptic\n\
";
const NEURONS_E: &str = "\
10000081\t9000081\tPN_glom_DM1\tACH\tleft\tAN\tafferent\tsensory\n\
10000082\t9000082\tPN_glom_DM2\tACH\tright\tAN\tafferent\tsensory\n\
10000083\t9000083\tPN_glom_DM3\tACH\tleft\tAN\tafferent\tsensory\n\
10000084\t9000084\tAscending_A\tACH\tright\t\tintrinsic\tascending\n\
10000085\t9000085\tAscending_B\tACH\tleft\t\tintrinsic\tascending\n\
10000086\t9000086\tAscending_C\tACH\tright\t\tintrinsic\tascending\n\
10000087\t9000087\tAscending_D\tACH\tleft\t\tintrinsic\tascending\n\
10000088\t9000088\tProj_lcb_A\tACH\tleft\t\tintrinsic\tcentral\n\
10000089\t9000089\tProj_lcb_B\tACH\tright\t\tintrinsic\tcentral\n\
10000090\t9000090\tProj_lcb_C\tACH\tleft\t\tintrinsic\tcentral\n\
10000091\t9000091\tProj_lcb_D\tACH\tright\t\tintrinsic\tcentral\n\
10000092\t9000092\tProj_lcb_E\tACH\tleft\t\tintrinsic\tcentral\n\
10000093\t9000093\tMisc_X_A\tACH\tleft\t\tintrinsic\tother\n\
10000094\t9000094\tMisc_X_B\tACH\tright\t\tintrinsic\tother\n\
10000095\t9000095\tMisc_X_C\tACH\tleft\t\tintrinsic\tother\n\
10000096\t9000096\tMisc_X_D\tACH\tright\t\tintrinsic\tother\n\
10000097\t9000097\tMisc_X_E\tACH\tleft\t\tintrinsic\tother\n\
10000098\t9000098\tMisc_X_F\tACH\tright\t\tintrinsic\tother\n\
10000099\t9000099\tMisc_X_G\tACH\tleft\t\tintrinsic\tother\n\
10000100\t9000100\tMisc_X_H\tACH\tright\t\tintrinsic\tother\n\
";
/// Return the full neurons TSV payload (header + 100 data rows).
pub fn neurons_tsv() -> String {
let mut s = String::with_capacity(12 * 1024);
s.push_str(NEURONS_HEADER);
s.push_str(NEURONS_A);
s.push_str(NEURONS_B);
s.push_str(NEURONS_C);
s.push_str(NEURONS_D);
s.push_str(NEURONS_E);
s
}
const CONNECTIONS_HEADER: &str = "pre_id\tpost_id\tneuropil\tsyn_count\tsyn_weight\tnt_type\n";
const CONNECTIONS_A: &str = "\
10000001\t10000071\tME_L\t12\t12.0\tHIST\n\
10000001\t10000072\tME_L\t8\t8.0\tHIST\n\
10000002\t10000071\tME_R\t10\t10.0\tHIST\n\
10000002\t10000073\tME_R\t7\t7.0\tHIST\n\
10000003\t10000074\tME_L\t9\t9.0\tHIST\n\
10000003\t10000075\tME_L\t11\t11.0\tHIST\n\
10000004\t10000076\tME_R\t5\t5.0\tHIST\n\
10000004\t10000077\tME_R\t6\t6.0\tHIST\n\
10000005\t10000013\tMB_CA_L\t14\t14.0\tACH\n\
10000005\t10000015\tMB_CA_L\t9\t9.0\tACH\n\
10000005\t10000017\tMB_CA_L\t7\t7.0\tACH\n\
10000006\t10000014\tMB_CA_R\t13\t13.0\tACH\n\
10000006\t10000016\tMB_CA_R\t11\t11.0\tACH\n\
10000006\t10000018\tMB_CA_R\t8\t8.0\tACH\n\
10000007\t10000013\tMB_CA_L\t6\t6.0\tACH\n\
10000007\t10000019\tMB_CA_L\t5\t5.0\tACH\n\
10000008\t10000013\tMB_CA_L\t10\t10.0\tACH\n\
10000008\t10000020\tMB_CA_R\t4\t4.0\tACH\n\
10000009\t10000014\tMB_CA_R\t12\t12.0\tACH\n\
10000009\t10000021\tMB_CA_L\t3\t3.0\tACH\n\
10000010\t10000022\tMB_CA_R\t8\t8.0\tACH\n\
10000010\t10000025\tMB_CA_L\t4\t4.0\tACH\n\
10000011\t10000023\tMB_CA_L\t7\t7.0\tACH\n\
10000011\t10000024\tMB_CA_R\t6\t6.0\tACH\n\
10000012\t10000025\tMB_CA_L\t5\t5.0\tACH\n\
10000081\t10000013\tMB_CA_L\t9\t9.0\tACH\n\
10000081\t10000015\tMB_CA_L\t6\t6.0\tACH\n\
10000082\t10000014\tMB_CA_R\t11\t11.0\tACH\n\
10000082\t10000016\tMB_CA_R\t8\t8.0\tACH\n\
10000083\t10000017\tMB_CA_L\t5\t5.0\tACH\n\
10000083\t10000019\tMB_CA_L\t7\t7.0\tACH\n\
";
const CONNECTIONS_B: &str = "\
10000013\t10000026\tMB_LH_L\t4\t4.0\tACH\n\
10000013\t10000030\tMB_LH_L\t3\t3.0\tACH\n\
10000014\t10000027\tMB_LH_R\t5\t5.0\tACH\n\
10000014\t10000031\tMB_LH_R\t4\t4.0\tACH\n\
10000015\t10000026\tMB_LH_L\t6\t6.0\tACH\n\
10000015\t10000028\tMB_LH_L\t3\t3.0\tACH\n\
10000016\t10000027\tMB_LH_R\t5\t5.0\tACH\n\
10000016\t10000029\tMB_LH_R\t4\t4.0\tACH\n\
10000017\t10000030\tMB_LH_L\t3\t3.0\tACH\n\
10000018\t10000031\tMB_LH_R\t5\t5.0\tACH\n\
10000019\t10000028\tMB_LH_L\t6\t6.0\tACH\n\
10000020\t10000029\tMB_LH_R\t4\t4.0\tACH\n\
10000021\t10000030\tMB_LH_L\t5\t5.0\tACH\n\
10000022\t10000031\tMB_LH_R\t7\t7.0\tACH\n\
10000023\t10000026\tMB_LH_L\t3\t3.0\tACH\n\
10000024\t10000027\tMB_LH_R\t4\t4.0\tACH\n\
10000025\t10000030\tMB_LH_L\t6\t6.0\tACH\n\
10000032\t10000013\tMB_PPL1_L\t3\t3.0\tDOP\n\
10000033\t10000014\tMB_PPL1_R\t4\t4.0\tDOP\n\
10000034\t10000015\tMB_PAM_L\t3\t3.0\tDOP\n\
10000035\t10000016\tMB_PAM_R\t4\t4.0\tDOP\n\
10000036\t10000017\tMB_OA_L\t2\t2.0\tOCT\n\
10000037\t10000018\tMB_OA_R\t3\t3.0\tOCT\n\
10000038\t10000040\tEB_L\t2\t2.0\tSER\n\
10000039\t10000041\tEB_R\t2\t2.0\tSER\n\
10000040\t10000044\tEB_L\t5\t5.0\tACH\n\
10000041\t10000043\tEB_R\t4\t4.0\tACH\n\
10000042\t10000044\tEB_L\t6\t6.0\tACH\n\
10000043\t10000045\tFB_L\t4\t4.0\tACH\n\
10000044\t10000046\tFB_L\t5\t5.0\tACH\n\
10000045\t10000047\tLAL_L\t6\t6.0\tACH\n\
10000046\t10000048\tLAL_R\t4\t4.0\tACH\n\
";
const CONNECTIONS_C: &str = "\
10000047\t10000049\tLAL_L\t5\t5.0\tACH\n\
10000048\t10000050\tLAL_R\t4\t4.0\tGABA\n\
10000026\t10000049\tSMP_L\t6\t6.0\tGLUT\n\
10000027\t10000050\tSMP_R\t5\t5.0\tGLUT\n\
10000028\t10000049\tSMP_L\t3\t3.0\tGABA\n\
10000029\t10000050\tSMP_R\t4\t4.0\tGABA\n\
10000030\t10000051\tSMP_L\t5\t5.0\tACH\n\
10000031\t10000052\tSMP_R\t4\t4.0\tACH\n\
10000049\t10000054\tGNG_L\t8\t8.0\tACH\n\
10000049\t10000056\tGNG_L\t5\t5.0\tACH\n\
10000050\t10000055\tGNG_R\t7\t7.0\tACH\n\
10000050\t10000057\tGNG_R\t4\t4.0\tACH\n\
10000051\t10000058\tGNG_L\t5\t5.0\tACH\n\
10000052\t10000059\tGNG_R\t4\t4.0\tACH\n\
10000053\t10000060\tGNG_L\t6\t6.0\tACH\n\
10000051\t10000054\tGNG_L\t3\t3.0\tACH\n\
10000052\t10000055\tGNG_R\t3\t3.0\tACH\n\
10000053\t10000057\tGNG_R\t4\t4.0\tACH\n\
10000061\t10000013\tMB_CA_L\t2\t2.0\tGABA\n\
10000062\t10000014\tMB_CA_R\t3\t3.0\tGABA\n\
10000063\t10000015\tMB_CA_L\t2\t2.0\tGABA\n\
10000064\t10000016\tMB_CA_R\t3\t3.0\tGABA\n\
10000065\t10000017\tMB_CA_L\t2\t2.0\tGABA\n\
10000066\t10000018\tMB_CA_R\t3\t3.0\tGABA\n\
10000067\t10000019\tAL_L\t4\t4.0\tGLUT\n\
10000068\t10000020\tAL_R\t5\t5.0\tGLUT\n\
10000069\t10000021\tAL_L\t3\t3.0\tGLUT\n\
10000070\t10000022\tAL_R\t4\t4.0\tGLUT\n\
10000005\t10000061\tAL_L\t3\t3.0\tACH\n\
10000006\t10000062\tAL_R\t3\t3.0\tACH\n\
10000007\t10000063\tAL_L\t2\t2.0\tACH\n\
10000008\t10000064\tAL_R\t2\t2.0\tACH\n\
";
const CONNECTIONS_D: &str = "\
10000009\t10000065\tAL_L\t3\t3.0\tACH\n\
10000010\t10000066\tAL_R\t3\t3.0\tACH\n\
10000081\t10000067\tAL_L\t2\t2.0\tACH\n\
10000082\t10000068\tAL_R\t2\t2.0\tACH\n\
10000083\t10000069\tAL_L\t3\t3.0\tACH\n\
10000071\t10000013\tLO_L\t4\t4.0\tACH\n\
10000072\t10000014\tLO_R\t4\t4.0\tACH\n\
10000073\t10000015\tLO_L\t3\t3.0\tACH\n\
10000074\t10000016\tLO_R\t3\t3.0\tGABA\n\
10000075\t10000017\tLO_L\t2\t2.0\tGABA\n\
10000076\t10000018\tLO_R\t3\t3.0\tACH\n\
10000077\t10000019\tLO_L\t2\t2.0\tGLUT\n\
10000078\t10000020\tLO_R\t2\t2.0\tGLUT\n\
10000079\t10000040\tLO_L\t3\t3.0\tACH\n\
10000080\t10000041\tLO_R\t3\t3.0\tGABA\n\
10000054\t10000084\tVNC_L\t6\t6.0\tACH\n\
10000055\t10000085\tVNC_R\t5\t5.0\tACH\n\
10000056\t10000086\tVNC_L\t4\t4.0\tACH\n\
10000057\t10000087\tVNC_R\t5\t5.0\tACH\n\
10000084\t10000049\tSMP_L\t3\t3.0\tACH\n\
10000085\t10000050\tSMP_R\t3\t3.0\tACH\n\
10000086\t10000051\tSMP_L\t2\t2.0\tACH\n\
10000087\t10000052\tSMP_R\t2\t2.0\tACH\n\
10000088\t10000026\tSMP_L\t4\t4.0\tACH\n\
10000088\t10000049\tSMP_L\t3\t3.0\tACH\n\
10000089\t10000027\tSMP_R\t4\t4.0\tACH\n\
10000089\t10000050\tSMP_R\t3\t3.0\tACH\n\
10000090\t10000028\tSMP_L\t3\t3.0\tACH\n\
10000090\t10000040\tSMP_L\t2\t2.0\tACH\n\
10000091\t10000029\tSMP_R\t3\t3.0\tACH\n\
10000091\t10000041\tSMP_R\t2\t2.0\tACH\n\
10000092\t10000030\tSMP_L\t3\t3.0\tACH\n\
";
const CONNECTIONS_E: &str = "\
10000092\t10000043\tSMP_L\t2\t2.0\tACH\n\
10000093\t10000013\tGNG_L\t1\t1.0\tACH\n\
10000094\t10000014\tGNG_R\t1\t1.0\tACH\n\
10000095\t10000015\tGNG_L\t1\t1.0\tACH\n\
10000096\t10000016\tGNG_R\t1\t1.0\tACH\n\
10000097\t10000017\tGNG_L\t1\t1.0\tACH\n\
10000098\t10000018\tGNG_R\t1\t1.0\tACH\n\
10000099\t10000019\tGNG_L\t1\t1.0\tACH\n\
10000100\t10000020\tGNG_R\t1\t1.0\tACH\n\
10000032\t10000026\tMB_MBON_L\t2\t2.0\tDOP\n\
10000033\t10000027\tMB_MBON_R\t2\t2.0\tDOP\n\
10000034\t10000028\tMB_MBON_L\t2\t2.0\tDOP\n\
10000035\t10000029\tMB_MBON_R\t2\t2.0\tDOP\n\
10000036\t10000030\tMB_MBON_L\t1\t1.0\tOCT\n\
10000037\t10000031\tMB_MBON_R\t1\t1.0\tOCT\n\
10000058\t10000084\tVNC_L\t3\t3.0\tACH\n\
10000059\t10000085\tVNC_R\t3\t3.0\tACH\n\
10000060\t10000086\tVNC_L\t2\t2.0\tACH\n\
10000026\t10000040\tSMP_L\t3\t3.0\tGLUT\n\
10000027\t10000041\tSMP_R\t3\t3.0\tGLUT\n\
10000028\t10000040\tSMP_L\t2\t2.0\tGABA\n\
10000029\t10000041\tSMP_R\t2\t2.0\tGABA\n\
10000030\t10000042\tSMP_L\t3\t3.0\tACH\n\
10000031\t10000043\tSMP_R\t3\t3.0\tACH\n\
10000067\t10000026\tAL_L\t2\t2.0\tGLUT\n\
10000068\t10000027\tAL_R\t2\t2.0\tGLUT\n\
10000069\t10000028\tAL_L\t2\t2.0\tGLUT\n\
10000070\t10000029\tAL_R\t2\t2.0\tGLUT\n\
10000071\t10000026\tLO_L\t2\t2.0\tACH\n\
10000072\t10000027\tLO_R\t2\t2.0\tACH\n\
10000073\t10000028\tLO_L\t2\t2.0\tACH\n\
10000074\t10000029\tLO_R\t2\t2.0\tGABA\n\
";
/// FlyWire-format connections TSV (header + 260 data rows).
pub fn connections_tsv() -> String {
let mut s = String::with_capacity(16 * 1024);
s.push_str(CONNECTIONS_HEADER);
s.push_str(CONNECTIONS_A);
s.push_str(CONNECTIONS_B);
s.push_str(CONNECTIONS_C);
s.push_str(CONNECTIONS_D);
s.push_str(CONNECTIONS_E);
s
}
const CLASSIFICATION_HEADER: &str = "neuron_id\tcell_type\tsuper_class\n";
/// FlyWire-format classification TSV (40 authoritative overrides).
const CLASSIFICATION_BODY: &str = "\
10000013\tKC_g\tcentral\n\
10000014\tKC_g\tcentral\n\
10000015\tKC_ab\tcentral\n\
10000016\tKC_ab\tcentral\n\
10000017\tKC_apbp\tcentral\n\
10000018\tKC_apbp\tcentral\n\
10000019\tKC_g\tcentral\n\
10000020\tKC_ab\tcentral\n\
10000021\tKC_apbp\tcentral\n\
10000022\tKC_g\tcentral\n\
10000026\tMBON01\tcentral\n\
10000027\tMBON02\tcentral\n\
10000028\tMBON03\tcentral\n\
10000029\tMBON04\tcentral\n\
10000030\tMBON05\tcentral\n\
10000031\tMBON06\tcentral\n\
10000049\tDNp01\tdescending\n\
10000050\tDNp02\tdescending\n\
10000051\tDNp03\tdescending\n\
10000052\tDNg01\tdescending\n\
10000053\tDNg02\tdescending\n\
10000054\tMotor_leg_1\tmotor\n\
10000055\tMotor_leg_2\tmotor\n\
10000056\tMotor_leg_3\tmotor\n\
10000057\tMotor_wing_1\tmotor\n\
10000058\tMotor_wing_2\tmotor\n\
10000059\tMotor_wing_3\tmotor\n\
10000060\tMotor_hlt\tmotor\n\
10000001\tPR_R1\tsensory\n\
10000002\tPR_R1\tsensory\n\
10000003\tPR_R7\tsensory\n\
10000004\tPR_R8\tsensory\n\
10000032\tDAN_PPL1\tcentral\n\
10000033\tDAN_PPL1\tcentral\n\
10000034\tDAN_PAM\tcentral\n\
10000035\tDAN_PAM\tcentral\n\
10000036\tOAN_VPM3\tcentral\n\
10000037\tOAN_VPM3\tcentral\n\
10000038\tSER_DRN\tcentral\n\
10000039\tSER_DRN\tcentral\n\
";
/// FlyWire-format classification TSV (header + 40 override rows).
pub fn classification_tsv() -> String {
let mut s = String::with_capacity(2 * 1024);
s.push_str(CLASSIFICATION_HEADER);
s.push_str(CLASSIFICATION_BODY);
s
}
/// Write the three fixture TSVs to `dir`, returning the paths of
/// `(neurons, connections, classification)`. The files are named
/// `neurons.tsv`, `connections.tsv`, `classification.tsv` — the same
/// names used on the FlyWire release.
pub fn write_fixture(dir: &std::path::Path) -> std::io::Result<FixturePaths> {
let neurons = dir.join("neurons.tsv");
let connections = dir.join("connections.tsv");
let classification = dir.join("classification.tsv");
std::fs::write(&neurons, neurons_tsv())?;
std::fs::write(&connections, connections_tsv())?;
std::fs::write(&classification, classification_tsv())?;
Ok(FixturePaths {
neurons,
connections,
classification,
})
}
/// Paths to a materialized fixture, as returned by [`write_fixture`].
#[derive(Clone, Debug)]
pub struct FixturePaths {
/// `neurons.tsv` path.
pub neurons: std::path::PathBuf,
/// `connections.tsv` path.
pub connections: std::path::PathBuf,
/// `classification.tsv` path.
pub classification: std::path::PathBuf,
}

View file

@ -0,0 +1,369 @@
//! FlyWire v783 TSV → `Connectome` loader.
//!
//! Streaming parse: one pass over `neurons.tsv`, one pass over
//! `classification.tsv` (optional override), one pass over
//! `connections.tsv`. Dense `NeuronId`s are assigned in the order neurons
//! are first seen in the neuron file; parallel arrays of `FlyWireNeuronId`
//! and `NeuronMeta` are preserved alongside the CSR.
//!
//! The loader is deterministic: given a byte-identical TSV input, the
//! output `Connectome` (synapses, row_ptr, meta, flywire_ids) is
//! bit-identical. Synapses within a neuron are stored in the order they
//! appear in `connections.tsv`.
//!
//! Errors are surfaced through the crate-level [`FlywireError`] so
//! callers can distinguish "bad CSV syntax" from "unknown cell type"
//! from "dangling synapse reference".
use std::collections::HashMap;
use std::path::Path;
use super::schema::{CellTypeRecord, NeuroTransmitter, NeuronRecord, SynapseRecord};
use super::FlywireError;
use crate::connectome::generator::Connectome;
use crate::connectome::schema::{
ConnectomeSerCfg, FlyWireNeuronId, NeuronClass, NeuronId, NeuronMeta, Sign, Synapse,
};
/// Load a FlyWire v783 release from `dir`.
///
/// Expects three TSV files under `dir`: `neurons.tsv`,
/// `connections.tsv`, `classification.tsv`. The classification file is
/// optional; if absent, the cell-type column on `neurons.tsv` is used
/// directly.
///
/// See [`FlywireError`] for the failure modes.
pub fn load_flywire(dir: &Path) -> Result<Connectome, FlywireError> {
let neurons_path = dir.join("neurons.tsv");
let connections_path = dir.join("connections.tsv");
let classification_path = dir.join("classification.tsv");
let neurons = read_neurons(&neurons_path)?;
let class_overrides = if classification_path.exists() {
read_classifications(&classification_path)?
} else {
HashMap::new()
};
let synapses = read_synapses(&connections_path)?;
assemble_connectome(neurons, class_overrides, synapses)
}
/// Parse `neurons.tsv` into a vector of [`NeuronRecord`]s. Duplicate
/// `neuron_id` entries yield [`FlywireError::DuplicateNeuron`].
pub fn read_neurons(path: &Path) -> Result<Vec<NeuronRecord>, FlywireError> {
let mut rdr = open_tsv(path)?;
let mut out: Vec<NeuronRecord> = Vec::new();
let mut seen: HashMap<u64, usize> = HashMap::new();
for (i, result) in rdr.deserialize::<NeuronRecord>().enumerate() {
let rec: NeuronRecord = result.map_err(|e| FlywireError::MalformedRow {
file: label_of(path),
line: (i + 2) as u64, // +1 for header, +1 for 1-based
detail: e.to_string(),
})?;
if seen.insert(rec.neuron_id, i).is_some() {
return Err(FlywireError::DuplicateNeuron(rec.neuron_id));
}
out.push(rec);
}
Ok(out)
}
/// Parse `classification.tsv` into a `neuron_id → record` map.
pub fn read_classifications(path: &Path) -> Result<HashMap<u64, CellTypeRecord>, FlywireError> {
let mut rdr = open_tsv(path)?;
let mut out: HashMap<u64, CellTypeRecord> = HashMap::new();
for (i, result) in rdr.deserialize::<CellTypeRecord>().enumerate() {
let rec: CellTypeRecord = result.map_err(|e| FlywireError::MalformedRow {
file: label_of(path),
line: (i + 2) as u64,
detail: e.to_string(),
})?;
out.insert(rec.neuron_id, rec);
}
Ok(out)
}
/// Parse `connections.tsv` into a vector of [`SynapseRecord`]s. Order
/// is preserved; the loader relies on file-declared order for CSR
/// determinism.
pub fn read_synapses(path: &Path) -> Result<Vec<SynapseRecord>, FlywireError> {
let mut rdr = open_tsv(path)?;
let mut out: Vec<SynapseRecord> = Vec::new();
for (i, result) in rdr.deserialize::<SynapseRecord>().enumerate() {
let rec: SynapseRecord = result.map_err(|e| FlywireError::MalformedRow {
file: label_of(path),
line: (i + 2) as u64,
detail: e.to_string(),
})?;
out.push(rec);
}
Ok(out)
}
fn open_tsv(path: &Path) -> Result<csv::Reader<std::fs::File>, FlywireError> {
csv::ReaderBuilder::new()
.delimiter(b'\t')
.has_headers(true)
.flexible(false)
.from_path(path)
.map_err(|e| FlywireError::Io {
file: label_of(path),
detail: e.to_string(),
})
}
fn label_of(path: &Path) -> String {
path.file_name()
.map(|s| s.to_string_lossy().into_owned())
.unwrap_or_else(|| path.display().to_string())
}
fn assemble_connectome(
neurons: Vec<NeuronRecord>,
class_overrides: HashMap<u64, CellTypeRecord>,
synapses: Vec<SynapseRecord>,
) -> Result<Connectome, FlywireError> {
// Dense id assignment in TSV declaration order.
let mut id_of: HashMap<u64, NeuronId> = HashMap::with_capacity(neurons.len());
let mut flywire_ids: Vec<FlyWireNeuronId> = Vec::with_capacity(neurons.len());
let mut meta: Vec<NeuronMeta> = Vec::with_capacity(neurons.len());
let mut nt_per_neuron: Vec<NeuroTransmitter> = Vec::with_capacity(neurons.len());
for (idx, n) in neurons.iter().enumerate() {
id_of.insert(n.neuron_id, NeuronId(idx as u32));
flywire_ids.push(FlyWireNeuronId(n.neuron_id));
let class_override = class_overrides.get(&n.neuron_id);
let effective_cell_type =
n.effective_cell_type(class_override.map(|c| c.cell_type.as_str()));
let class = classify_cell_type(effective_cell_type.as_deref(), n.flow.as_deref())?;
let nt = parse_nt(&n.nt_type, n.neuron_id)?;
nt_per_neuron.push(nt);
meta.push(NeuronMeta {
class,
module: 0,
bias_pa: default_bias_for(class),
});
}
// Partition synapses by pre-id in file-declared order.
let n = neurons.len();
let mut per_pre: Vec<Vec<Synapse>> = vec![Vec::new(); n];
for syn in &synapses {
let pre = *id_of
.get(&syn.pre_id)
.ok_or(FlywireError::UnknownPreNeuron(syn.pre_id))?;
let post = *id_of
.get(&syn.post_id)
.ok_or(FlywireError::UnknownPostNeuron(syn.post_id))?;
if pre == post {
continue; // drop self-loops; matches SBM generator
}
let nt = if let Some(s) = &syn.nt_type {
parse_nt(s, syn.pre_id)?
} else {
nt_per_neuron[pre.idx()]
};
let sign = nt_to_sign(nt);
let count = syn.syn_count.max(1);
let weight = derive_weight(syn, count);
per_pre[pre.idx()].push(Synapse {
post,
weight,
delay_ms: default_delay_ms(),
sign,
});
}
// CSR flatten (row_ptr + synapses), preserving per-pre order.
let mut row_ptr: Vec<u32> = Vec::with_capacity(n + 1);
let total: usize = per_pre.iter().map(|v| v.len()).sum();
let mut flat: Vec<Synapse> = Vec::with_capacity(total);
row_ptr.push(0);
for bucket in per_pre {
flat.extend(bucket);
row_ptr.push(flat.len() as u32);
}
let cfg = ConnectomeSerCfg {
num_neurons: n as u32,
num_modules: 1,
num_hub_modules: 0,
seed: 0,
};
Ok(Connectome::from_parts(
cfg,
meta,
flat,
row_ptr,
Some(flywire_ids),
))
}
/// Normalize a raw NT-type string to the typed enum. Case-insensitive
/// match against the seven release-documented labels. Anything else is
/// [`FlywireError::UnknownNtType`] — no silent default.
pub fn parse_nt(raw: &str, context_id: u64) -> Result<NeuroTransmitter, FlywireError> {
let upper = raw.trim().to_ascii_uppercase();
match upper.as_str() {
"ACH" | "ACETYLCHOLINE" => Ok(NeuroTransmitter::Acetylcholine),
"GLUT" | "GLUTAMATE" => Ok(NeuroTransmitter::Glutamate),
"GABA" => Ok(NeuroTransmitter::Gaba),
"HIST" | "HISTAMINE" => Ok(NeuroTransmitter::Histamine),
"SER" | "SEROTONIN" | "5-HT" | "5HT" => Ok(NeuroTransmitter::Serotonin),
"DOP" | "DOPAMINE" | "DA" => Ok(NeuroTransmitter::Dopamine),
"OCT" | "OCTOPAMINE" | "OA" => Ok(NeuroTransmitter::Octopamine),
_ => Err(FlywireError::UnknownNtType {
raw: raw.to_owned(),
neuron_id: context_id,
}),
}
}
/// NT → fast-path sign mapping (research doc §4 table).
///
/// - ACH, GLUT → +1 (Excitatory)
/// - GABA, HIST → -1 (Inhibitory)
/// - SER, DOP, OCT (modulatory) → +1 in the fast path; analyses that
/// need to exclude slow edges must consult the NT side-channel.
pub fn nt_to_sign(nt: NeuroTransmitter) -> Sign {
match nt {
NeuroTransmitter::Acetylcholine | NeuroTransmitter::Glutamate => Sign::Excitatory,
NeuroTransmitter::Gaba | NeuroTransmitter::Histamine => Sign::Inhibitory,
NeuroTransmitter::Serotonin | NeuroTransmitter::Dopamine | NeuroTransmitter::Octopamine => {
Sign::Excitatory
}
}
}
/// Map a FlyWire cell-type string to our coarse [`NeuronClass`].
///
/// Unknown cell types fall into `NeuronClass::Other` — this is
/// intentional: the FlyWire release documents ~8,000 cell types, and
/// the coarse bucket is the correct v1 behavior per the research doc.
/// Empty cell-type with a non-empty `flow` column still resolves via
/// the flow hint. If *both* are missing the entry is `Other`, not an
/// error (matches the release's "unresolved" neurons).
pub fn classify_cell_type(
cell_type: Option<&str>,
flow: Option<&str>,
) -> Result<NeuronClass, FlywireError> {
if let Some(ct) = cell_type {
if let Some(class) = classify_by_prefix(ct) {
return Ok(class);
}
}
if let Some(f) = flow {
return Ok(classify_by_flow(f));
}
Ok(NeuronClass::Other)
}
/// Strict variant of [`classify_cell_type`]. Unmapped cell types yield
/// [`FlywireError::UnknownCellType`] instead of folding to
/// [`NeuronClass::Other`]. Intended for callers that want to audit
/// prefix-table coverage on a specific release.
pub fn classify_cell_type_strict(
cell_type: Option<&str>,
flow: Option<&str>,
neuron_id: u64,
) -> Result<NeuronClass, FlywireError> {
if let Some(ct) = cell_type {
if let Some(class) = classify_by_prefix(ct) {
return Ok(class);
}
return Err(FlywireError::UnknownCellType {
raw: ct.to_owned(),
neuron_id,
});
}
if let Some(f) = flow {
return Ok(classify_by_flow(f));
}
Ok(NeuronClass::Other)
}
fn classify_by_prefix(ct: &str) -> Option<NeuronClass> {
// Order matters: more-specific prefixes first.
let t = ct.trim();
if t.starts_with("PR_") || t.starts_with("R1") || t.starts_with("R7") || t.starts_with("R8") {
return Some(NeuronClass::PhotoReceptor);
}
if t.starts_with("ORN") || t.starts_with("PN_glom") || t.starts_with("PN_") {
return Some(NeuronClass::Chemosensory);
}
if t.starts_with("JO") || t.starts_with("ML_mech") {
return Some(NeuronClass::Mechanosensory);
}
if t.starts_with("KC") {
return Some(NeuronClass::KenyonCell);
}
if t.starts_with("MBON") {
return Some(NeuronClass::MbOutput);
}
if t.starts_with("EPG") || t.starts_with("PEN") || t.starts_with("FB_") || t.starts_with("PB_")
{
return Some(NeuronClass::CentralComplex);
}
if t.starts_with("LAL") {
return Some(NeuronClass::LateralAccessory);
}
if t.starts_with("DNp") || t.starts_with("DNg") || t.starts_with("DN_") {
return Some(NeuronClass::Descending);
}
if t.starts_with("Ascending") || t.starts_with("AN_") {
return Some(NeuronClass::Ascending);
}
if t.starts_with("Motor") {
return Some(NeuronClass::Motor);
}
if t.starts_with("LN_") || t.starts_with("LocalInter") {
return Some(NeuronClass::LocalInter);
}
if t.starts_with("Proj") || t.starts_with("Projection") {
return Some(NeuronClass::Projection);
}
if t.starts_with("DAN") || t.starts_with("SER_") || t.starts_with("OAN") {
return Some(NeuronClass::Modulatory);
}
if t.starts_with("Loc_opt") || t.starts_with("LoOpt") || t.starts_with("Lo_") {
return Some(NeuronClass::OpticLocal);
}
None
}
fn classify_by_flow(flow: &str) -> NeuronClass {
match flow.trim().to_ascii_lowercase().as_str() {
"afferent" => NeuronClass::Other,
"efferent" => NeuronClass::Motor,
"intrinsic" => NeuronClass::Other,
"ascending" => NeuronClass::Ascending,
"descending" => NeuronClass::Descending,
_ => NeuronClass::Other,
}
}
fn default_bias_for(class: NeuronClass) -> f32 {
if class.is_sensory() {
-0.5
} else if class.is_motor() {
0.5
} else {
0.0
}
}
fn derive_weight(syn: &SynapseRecord, count: u32) -> f32 {
if syn.syn_weight > 0.0 {
syn.syn_weight
} else {
count as f32
}
}
fn default_delay_ms() -> f32 {
// Research doc §3.2: FlyWire does not publish conduction delays;
// the ingest loader uses a constant fallback of 2.0 ms. The
// distance-scaled estimator requires soma coordinates, which are
// optional in the release and absent from the fixture.
2.0
}

View file

@ -0,0 +1,101 @@
//! FlyWire v783 ingest: TSV release → `Connectome`.
//!
//! This module is the first follow-up named in ADR-154 §13. It moves
//! the connectome-fly demonstrator from its synthetic stochastic-block
//! model onto the real FlyWire v783 wiring, one file at a time, without
//! touching any analysis, LIF, or observer code.
//!
//! ## Public API
//!
//! - [`load_flywire`] — parse `neurons.tsv`, `classification.tsv`, and
//! `connections.tsv` from a directory; return a fully-populated
//! [`crate::Connectome`] with parallel `FlyWireNeuronId`s.
//! - [`FlywireError`] — structured error type with one variant per
//! named failure mode (malformed row, dangling reference, unknown
//! NT, unknown cell type, IO failure, …).
//! - [`schema`] — serde record structs matching the release TSV
//! columns.
//! - [`fixture`] — hand-authored 100-neuron fixture used by tests.
//!
//! ## Hard constraints
//!
//! - No `unsafe`. No Python, shell, or JS/TS.
//! - Deterministic: byte-identical TSV input produces bit-identical
//! `Connectome` output across runs.
//! - No download path; `load_flywire` reads whatever TSVs are under
//! the path the caller hands it.
pub mod fixture;
pub mod loader;
pub mod schema;
pub use loader::{
classify_cell_type, classify_cell_type_strict, load_flywire, nt_to_sign, parse_nt,
};
pub use schema::{CellTypeRecord, NeuroTransmitter, NeuronRecord, SynapseRecord};
use thiserror::Error;
/// Errors produced by the FlyWire ingest path. Each variant maps to a
/// distinct test case in `tests/flywire_ingest.rs`.
#[derive(Debug, Error)]
pub enum FlywireError {
/// A row failed to deserialize against the [`NeuronRecord`],
/// [`SynapseRecord`], or [`CellTypeRecord`] schema.
#[error("malformed row in {file} at line {line}: {detail}")]
MalformedRow {
/// File name (not full path), e.g. `"neurons.tsv"`.
file: String,
/// 1-based row number (header is line 1).
line: u64,
/// Underlying parser message.
detail: String,
},
/// IO or CSV-framing failure before per-row dispatch.
#[error("io error on {file}: {detail}")]
Io {
/// File name.
file: String,
/// Underlying error.
detail: String,
},
/// A synapse referenced a `pre_id` that is not present in
/// `neurons.tsv`.
#[error("synapse pre_id {0} not in neurons.tsv")]
UnknownPreNeuron(u64),
/// A synapse referenced a `post_id` that is not present in
/// `neurons.tsv`.
#[error("synapse post_id {0} not in neurons.tsv")]
UnknownPostNeuron(u64),
/// A neuron id appeared twice in `neurons.tsv`.
#[error("duplicate neuron_id {0} in neurons.tsv")]
DuplicateNeuron(u64),
/// An NT-type string did not match the seven release-documented
/// labels (ACH / GLUT / GABA / HIST / SER / DOP / OCT).
#[error("unknown nt_type {raw:?} on neuron_id {neuron_id}")]
UnknownNtType {
/// Raw column value.
raw: String,
/// Context id (neuron or pre-neuron of the offending synapse).
neuron_id: u64,
},
/// A cell-type string did not match any known prefix. Only
/// surfaced from the strict classification path
/// ([`loader::classify_cell_type_strict`]); the default
/// [`loader::classify_cell_type`] folds unknown cell types into
/// [`crate::NeuronClass::Other`] because FlyWire v783 documents
/// ~8 000 cell types and the ingest loader is coarse by design.
#[error("unknown cell_type {raw:?} on neuron_id {neuron_id}")]
UnknownCellType {
/// Raw column value.
raw: String,
/// Context neuron id.
neuron_id: u64,
},
}

View file

@ -0,0 +1,141 @@
//! FlyWire v783 on-disk record schema.
//!
//! Three serde structs, one per published TSV file in the release:
//!
//! - [`NeuronRecord`] — one row per neuron; union of fields across
//! `neurons.tsv` plus the parts of `classification.tsv` / NT tables
//! that the loader consumes in a single pass.
//! - [`SynapseRecord`] — one row per directed pre→post edge in
//! `connections.tsv`.
//! - [`CellTypeRecord`] — one row per neuron in
//! `classification.tsv`; used as an override table when the primary
//! `neurons.tsv` lacks a cell-type assignment.
//!
//! The column names match the published v783 schema (see
//! `docs/research/connectome-ruvector/02-connectome-layer.md` §2).
//! Unknown columns are ignored by the CSV reader so adding downstream
//! fields (e.g. `hemilineage`) does not require a schema version bump.
use serde::{Deserialize, Serialize};
/// One row of the neurons TSV.
///
/// Columns mirror the FlyWire v783 release. `neuron_id` is the stable
/// 64-bit root id; `supervoxel_id` is the coarse segmentation handle
/// (kept for provenance, not used by the loader in v1); `cell_type`,
/// `nt_type`, `side`, `nerve`, and `flow` are all string-enum encoded.
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct NeuronRecord {
/// Stable FlyWire root id.
pub neuron_id: u64,
/// Supervoxel id (provenance only).
#[serde(default)]
pub supervoxel_id: u64,
/// Cell type, e.g. "KC_g", "MBON01", "DNp01". Empty string
/// (deserialized to `None`) is allowed when the classification is
/// unresolved.
#[serde(default)]
pub cell_type: Option<String>,
/// Dominant predicted neurotransmitter: "ACH", "GLUT", "GABA",
/// "SER", "OCT", "DOP", "HIST".
pub nt_type: String,
/// Anatomical side: "left", "right", "center".
#[serde(default)]
pub side: Option<String>,
/// Peripheral nerve id (Wikipedia naming), if afferent / efferent.
#[serde(default)]
pub nerve: Option<String>,
/// Flow class: "afferent", "efferent", "intrinsic".
#[serde(default)]
pub flow: Option<String>,
/// Optional super-class label (e.g. "optic", "central", "motor").
#[serde(default)]
pub super_class: Option<String>,
}
/// One row of the connections TSV.
///
/// `pre_id` and `post_id` are stable FlyWire root ids; both must resolve
/// to a row in the neurons TSV or the loader errors.
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct SynapseRecord {
/// Pre-synaptic neuron id.
pub pre_id: u64,
/// Post-synaptic neuron id.
pub post_id: u64,
/// Neuropil region label (e.g. "MB_CA_L").
#[serde(default)]
pub neuropil: Option<String>,
/// Aggregated synapse count for this directed pair.
pub syn_count: u32,
/// Effective weight reported by the release; loader uses
/// `syn_count` when this field is absent or zero.
#[serde(default)]
pub syn_weight: f32,
/// Per-edge NT prediction (optional; falls back to the pre
/// neuron's dominant NT when unset).
#[serde(default)]
pub nt_type: Option<String>,
}
/// One row of the classification TSV.
///
/// Provides authoritative cell-type / super-class labels that can
/// override or fill in the fields on [`NeuronRecord`].
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
pub struct CellTypeRecord {
/// Stable FlyWire root id.
pub neuron_id: u64,
/// Primary cell-type label.
pub cell_type: String,
/// Optional coarse super-class.
#[serde(default)]
pub super_class: Option<String>,
}
impl NeuronRecord {
/// Effective cell-type string after folding in the classification
/// override. `class_override` wins over `self.cell_type` when both
/// are present.
pub fn effective_cell_type(&self, class_override: Option<&str>) -> Option<String> {
class_override
.map(str::to_owned)
.or_else(|| self.cell_type.clone())
}
}
/// Parsed, normalized neurotransmitter tag. Distinct from the
/// `Sign` enum in the outer schema because several NTs (DA / 5-HT /
/// OA) are neuromodulatory and do not carry a fast-path sign; the
/// loader materializes them as Excitatory in the fast path per the
/// research doc §4 table and records the NT identity on the side.
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub enum NeuroTransmitter {
/// Acetylcholine — fast excitation.
Acetylcholine,
/// Glutamate — excitation in central circuits (v1 default).
Glutamate,
/// GABA — fast inhibition.
Gaba,
/// Histamine — photoreceptor output, inhibitory.
Histamine,
/// Serotonin — neuromodulator, rendered excitatory in the fast path.
Serotonin,
/// Dopamine — neuromodulator, rendered excitatory in the fast path.
Dopamine,
/// Octopamine — neuromodulator, rendered excitatory in the fast path.
Octopamine,
}
impl NeuroTransmitter {
/// Whether this NT is routed through the slow neuromodulatory
/// pool in the research schema. The fast path still assigns a
/// sign so the LIF engine has something to integrate; this flag
/// surfaces the category so analysis code can exclude slow edges.
pub fn is_modulatory(self) -> bool {
matches!(
self,
NeuroTransmitter::Serotonin | NeuroTransmitter::Dopamine | NeuroTransmitter::Octopamine
)
}
}

View file

@ -13,7 +13,8 @@ use smallvec::SmallVec;
use super::persist::ConnectomeError;
use super::schema::{
ConnectomeConfig, ConnectomeSerCfg, NeuronClass, NeuronId, NeuronMeta, Sign, Synapse,
ConnectomeConfig, ConnectomeSerCfg, FlyWireNeuronId, NeuronClass, NeuronId, NeuronMeta, Sign,
Synapse,
};
/// A synthetic fly-like connectome. Stores neuron metadata and a
@ -35,6 +36,12 @@ pub struct Connectome {
pub(super) motor: Vec<NeuronId>,
/// Pre-computed index grouped by class.
pub(super) by_class: Vec<Vec<NeuronId>>,
/// Stable FlyWire root ids, parallel to `meta` / dense ids.
/// `None` for SBM-generated connectomes; `Some` when loaded via the
/// `flywire` module. Serialized at the tail of the bincode blob so
/// existing synthetic blobs remain round-trippable.
#[serde(default)]
pub(super) flywire_ids: Option<Vec<FlyWireNeuronId>>,
}
impl Connectome {
@ -134,9 +141,64 @@ impl Connectome {
sensory,
motor,
by_class,
flywire_ids: None,
}
}
/// Construct a `Connectome` directly from already-assembled parts.
///
/// Used by the `flywire` loader to install parsed FlyWire v783
/// records without going through the synthetic SBM path. Callers
/// are responsible for supplying a CSR-consistent `(row_ptr,
/// synapses)` pair: `row_ptr.len() == meta.len() + 1` and
/// `row_ptr[i] <= row_ptr[i+1] <= synapses.len()`.
///
/// Sensory / motor / by-class indices are derived from `meta`.
/// `flywire_ids`, if provided, must be parallel to `meta`.
pub(super) fn from_parts(
cfg: ConnectomeSerCfg,
meta: Vec<NeuronMeta>,
synapses: Vec<Synapse>,
row_ptr: Vec<u32>,
flywire_ids: Option<Vec<FlyWireNeuronId>>,
) -> Self {
debug_assert_eq!(row_ptr.len(), meta.len() + 1);
debug_assert_eq!(*row_ptr.last().unwrap_or(&0) as usize, synapses.len());
if let Some(ids) = &flywire_ids {
debug_assert_eq!(ids.len(), meta.len());
}
let mut by_class: Vec<Vec<NeuronId>> = vec![Vec::new(); 15];
let mut sensory: Vec<NeuronId> = Vec::new();
let mut motor: Vec<NeuronId> = Vec::new();
for (i, m) in meta.iter().enumerate() {
by_class[m.class as usize].push(NeuronId(i as u32));
if m.class.is_sensory() {
sensory.push(NeuronId(i as u32));
}
if m.class.is_motor() {
motor.push(NeuronId(i as u32));
}
}
Self {
cfg,
meta,
synapses,
row_ptr,
sensory,
motor,
by_class,
flywire_ids,
}
}
/// Parallel array of stable FlyWire root ids, if this connectome
/// was loaded from a FlyWire v783 release. `None` for SBM-generated
/// connectomes.
#[inline]
pub fn flywire_ids(&self) -> Option<&[FlyWireNeuronId]> {
self.flywire_ids.as_deref()
}
/// Total number of neurons.
#[inline]
pub fn num_neurons(&self) -> usize {

View file

@ -1,22 +1,28 @@
//! Connectome schema, stochastic-block-model generator, and compact
//! binary serialization. Split across three submodules:
//! binary serialization. Split across four submodules:
//!
//! - `schema` — public types (`NeuronId`, `Sign`, `NeuronClass`,
//! `Synapse`, `NeuronMeta`, `ConnectomeConfig`).
//! - `schema` — public types (`NeuronId`, `FlyWireNeuronId`, `Sign`,
//! `NeuronClass`, `Synapse`, `NeuronMeta`,
//! `ConnectomeConfig`).
//! - `generator` — deterministic SBM generator + helpers.
//! - `persist` — bincode-backed binary round-trip.
//! - `flywire` — FlyWire v783 TSV ingest (real wiring path).
//!
//! See `docs/research/connectome-ruvector/02-connectome-layer.md` for
//! the schema design and the log-normal / hub-module statistics this
//! generator targets.
//! generator targets, and ADR-154 §13 for the FlyWire ingest hand-off.
pub mod flywire;
pub mod generator;
pub mod persist;
pub mod schema;
pub use flywire::{load_flywire, FlywireError};
pub use generator::Connectome;
pub use persist::ConnectomeError;
pub use schema::{ConnectomeConfig, NeuronClass, NeuronId, NeuronMeta, Sign, Synapse};
pub use schema::{
ConnectomeConfig, FlyWireNeuronId, NeuronClass, NeuronId, NeuronMeta, Sign, Synapse,
};
#[cfg(test)]
mod tests {

View file

@ -20,6 +20,21 @@ impl NeuronId {
}
}
/// Stable FlyWire v783 root id (64-bit). Carried alongside the dense
/// `NeuronId` when a `Connectome` is loaded from FlyWire so analyses can
/// round-trip back to the published identifier space. Opaque newtype;
/// see `docs/research/connectome-ruvector/02-connectome-layer.md` §3.1.
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
pub struct FlyWireNeuronId(pub u64);
impl FlyWireNeuronId {
/// Raw id.
#[inline]
pub const fn raw(self) -> u64 {
self.0
}
}
/// Synapse sign. `+1` excitatory, `-1` inhibitory. Neuromodulatory
/// edges are *not* represented in the fast path
/// (`docs/research/connectome-ruvector/03-neural-dynamics.md` §2.2).

View file

@ -77,7 +77,8 @@ pub use analysis::{
Analysis, AnalysisConfig, FunctionalPartition, MotifHit, MotifIndex, MotifSignature,
};
pub use connectome::{
Connectome, ConnectomeConfig, ConnectomeError, NeuronClass, NeuronId, NeuronMeta, Sign, Synapse,
load_flywire, Connectome, ConnectomeConfig, ConnectomeError, FlyWireNeuronId, FlywireError,
NeuronClass, NeuronId, NeuronMeta, Sign, Synapse,
};
pub use lif::{Engine, EngineConfig, LifError, NeuronParams, Spike, SpikeEvent};
pub use observer::{CoherenceEvent, Observer, Report};

View file

@ -0,0 +1,359 @@
//! FlyWire v783 ingest — acceptance tests.
//!
//! These tests exercise every named failure mode of the loader plus a
//! round-trip on the 100-neuron fixture. The fixture lives as Rust
//! string constants (see `src/connectome/flywire/fixture.rs`) so CI
//! does not need the ~2 GB FlyWire release on disk.
use std::fs;
use std::path::PathBuf;
use connectome_fly::connectome::flywire::{
classify_cell_type, classify_cell_type_strict, fixture, load_flywire, nt_to_sign, parse_nt,
};
use connectome_fly::{FlyWireNeuronId, FlywireError, NeuronClass, Sign};
use tempfile::TempDir;
fn setup_fixture() -> (TempDir, fixture::FixturePaths) {
let dir = TempDir::new().expect("temp dir");
let paths = fixture::write_fixture(dir.path()).expect("write fixture");
(dir, paths)
}
#[test]
fn schema_round_trip_neuron_and_synapse_counts_match_fixture() {
let (dir, _paths) = setup_fixture();
let c = load_flywire(dir.path()).expect("load fixture");
assert_eq!(
c.num_neurons(),
fixture::EXPECTED_NEURONS,
"neuron count mismatch vs fixture declaration",
);
// Connection count in the fixture is 159 directed edges; some may
// be dropped as self-loops or by NT filtering. We expect no
// drops in the fixture (no self-loops authored), so equality holds.
assert_eq!(
c.num_synapses(),
fixture::EXPECTED_SYNAPSES,
"synapse count mismatch vs fixture declaration",
);
}
#[test]
fn flywire_ids_are_parallel_to_dense_ids() {
let (dir, _paths) = setup_fixture();
let c = load_flywire(dir.path()).expect("load fixture");
let ids = c.flywire_ids().expect("flywire_ids set after load");
assert_eq!(ids.len(), c.num_neurons());
assert_eq!(ids[0], FlyWireNeuronId(10_000_001));
assert_eq!(ids[99], FlyWireNeuronId(10_000_100));
// Monotonic in the fixture (authored sequentially).
for win in ids.windows(2) {
assert!(win[0].raw() < win[1].raw());
}
}
#[test]
fn determinism_two_loads_bit_identical_bincode() {
let (dir, _paths) = setup_fixture();
let a = load_flywire(dir.path()).expect("load 1");
let b = load_flywire(dir.path()).expect("load 2");
assert_eq!(a.num_neurons(), b.num_neurons());
assert_eq!(a.num_synapses(), b.num_synapses());
let ab = a.to_bytes().expect("ser a");
let bb = b.to_bytes().expect("ser b");
assert_eq!(ab, bb, "FlyWire ingest is not deterministic");
}
#[test]
fn nt_to_sign_covers_release_documented_labels() {
// Excitatory.
for raw in ["ACH", "GLUT", "ACETYLCHOLINE", "Glutamate"] {
let nt = parse_nt(raw, 0).expect(raw);
assert_eq!(nt_to_sign(nt), Sign::Excitatory);
}
// Inhibitory.
for raw in ["GABA", "HIST", "histamine"] {
let nt = parse_nt(raw, 0).expect(raw);
assert_eq!(nt_to_sign(nt), Sign::Inhibitory);
}
// Neuromodulatory — mapped to excitatory in the fast path per
// research doc §4 (slow pool lives outside the fast path).
for raw in ["DOP", "SER", "OCT", "5-HT", "DA", "OA"] {
let nt = parse_nt(raw, 0).expect(raw);
assert_eq!(nt_to_sign(nt), Sign::Excitatory);
}
}
#[test]
fn unknown_nt_type_is_a_named_error_not_silent_default() {
let err = parse_nt("PANIC", 42).expect_err("must reject unknown NT");
match err {
FlywireError::UnknownNtType { raw, neuron_id } => {
assert_eq!(raw, "PANIC");
assert_eq!(neuron_id, 42);
}
other => panic!("wrong variant: {other:?}"),
}
}
#[test]
fn cell_type_coverage_hits_key_classes() {
let (dir, _paths) = setup_fixture();
let c = load_flywire(dir.path()).expect("load fixture");
// Every coarse class that exists in the fixture must be populated.
// The fixture is authored to cover these explicitly.
for cls in [
NeuronClass::PhotoReceptor,
NeuronClass::Chemosensory,
NeuronClass::Mechanosensory,
NeuronClass::OpticLocal,
NeuronClass::KenyonCell,
NeuronClass::MbOutput,
NeuronClass::CentralComplex,
NeuronClass::LateralAccessory,
NeuronClass::Descending,
NeuronClass::Ascending,
NeuronClass::Motor,
NeuronClass::LocalInter,
NeuronClass::Projection,
NeuronClass::Modulatory,
] {
assert!(
!c.by_class()[cls as usize].is_empty(),
"class {cls:?} unexpectedly empty after fixture load",
);
}
// Sensory + motor indices must also be populated (ADR §3.4 AC
// stimulus / readout needs them).
assert!(!c.sensory_neurons().is_empty());
assert!(!c.motor_neurons().is_empty());
}
#[test]
fn classify_cell_type_known_prefixes() {
assert_eq!(
classify_cell_type(Some("KC_g"), None).unwrap(),
NeuronClass::KenyonCell,
);
assert_eq!(
classify_cell_type(Some("MBON05"), None).unwrap(),
NeuronClass::MbOutput,
);
assert_eq!(
classify_cell_type(Some("DNp01"), None).unwrap(),
NeuronClass::Descending,
);
assert_eq!(
classify_cell_type(Some("Motor_leg_1"), None).unwrap(),
NeuronClass::Motor,
);
assert_eq!(
classify_cell_type(Some("LN_GABA_A"), None).unwrap(),
NeuronClass::LocalInter,
);
// Flow fallback when cell type is missing.
assert_eq!(
classify_cell_type(None, Some("efferent")).unwrap(),
NeuronClass::Motor,
);
// Both missing falls through to Other.
assert_eq!(classify_cell_type(None, None).unwrap(), NeuronClass::Other);
}
#[test]
fn malformed_tsv_surfaces_row_level_error() {
let dir = TempDir::new().expect("temp");
// Valid neurons + classification files.
fs::write(dir.path().join("neurons.tsv"), fixture::neurons_tsv()).unwrap();
fs::write(
dir.path().join("classification.tsv"),
fixture::classification_tsv(),
)
.unwrap();
// Broken connections file: header is valid, but the second data
// row has a non-integer pre_id.
let broken = "pre_id\tpost_id\tneuropil\tsyn_count\tsyn_weight\tnt_type\n\
10000005\t10000013\tMB_CA_L\t12\t12.0\tACH\n\
BROKEN\t10000013\tMB_CA_L\t12\t12.0\tACH\n";
fs::write(dir.path().join("connections.tsv"), broken).unwrap();
let err = load_flywire(dir.path()).expect_err("must fail on BROKEN row");
match err {
FlywireError::MalformedRow { file, line, .. } => {
assert_eq!(file, "connections.tsv");
assert_eq!(line, 3, "expected line 3 (header=1, first data=2)");
}
other => panic!("wrong variant: {other:?}"),
}
}
#[test]
fn unknown_cell_type_folds_to_other_in_default_mode() {
// Default classify_cell_type: unmapped -> Other. FlyWire has ~8k
// cell types and the coarse bucket is the v1 contract.
let class = classify_cell_type(Some("ZZZ_novel_type"), None).unwrap();
assert_eq!(class, NeuronClass::Other);
}
#[test]
fn unknown_cell_type_is_a_named_error_in_strict_mode() {
// Strict path surfaces `FlywireError::UnknownCellType` so callers
// that want to audit prefix coverage can opt in.
let err = classify_cell_type_strict(Some("ZZZ_novel_type"), None, 99)
.expect_err("strict must reject unknown cell type");
match err {
FlywireError::UnknownCellType { raw, neuron_id } => {
assert_eq!(raw, "ZZZ_novel_type");
assert_eq!(neuron_id, 99);
}
other => panic!("wrong variant: {other:?}"),
}
// Known types still pass under strict mode.
assert_eq!(
classify_cell_type_strict(Some("KC_g"), None, 1).unwrap(),
NeuronClass::KenyonCell,
);
}
#[test]
fn unknown_nt_type_in_neurons_file_fails_load() {
let dir = TempDir::new().expect("temp");
// Replace the very first NT label with a bogus one.
let bad_neurons = fixture::neurons_tsv().replacen(
"10000001\t9000001\tPR_R1\tHIST\t",
"10000001\t9000001\tPR_R1\tBOGUS\t",
1,
);
fs::write(dir.path().join("neurons.tsv"), bad_neurons).unwrap();
fs::write(
dir.path().join("classification.tsv"),
fixture::classification_tsv(),
)
.unwrap();
fs::write(
dir.path().join("connections.tsv"),
fixture::connections_tsv(),
)
.unwrap();
let err = load_flywire(dir.path()).expect_err("must fail on BOGUS nt_type");
match err {
FlywireError::UnknownNtType { raw, neuron_id } => {
assert_eq!(raw, "BOGUS");
assert_eq!(neuron_id, 10_000_001);
}
other => panic!("wrong variant: {other:?}"),
}
}
#[test]
fn dangling_synapse_reference_is_a_named_error() {
let dir = TempDir::new().expect("temp");
fs::write(dir.path().join("neurons.tsv"), fixture::neurons_tsv()).unwrap();
fs::write(
dir.path().join("classification.tsv"),
fixture::classification_tsv(),
)
.unwrap();
// Append a synapse pointing at a nonexistent post_id.
let mut connections = fixture::connections_tsv();
connections.push_str("10000005\t99999999\tSMP_L\t3\t3.0\tACH\n");
fs::write(dir.path().join("connections.tsv"), connections).unwrap();
let err = load_flywire(dir.path()).expect_err("must fail on dangling post_id");
match err {
FlywireError::UnknownPostNeuron(id) => assert_eq!(id, 99_999_999),
other => panic!("wrong variant: {other:?}"),
}
}
#[test]
fn duplicate_neuron_id_is_a_named_error() {
let dir = TempDir::new().expect("temp");
// Duplicate the first neuron row at the tail.
let mut neurons = fixture::neurons_tsv();
neurons.push_str("10000001\t9000001\tPR_R1\tHIST\tleft\tOCN\tafferent\tsensory\n");
fs::write(dir.path().join("neurons.tsv"), neurons).unwrap();
fs::write(
dir.path().join("classification.tsv"),
fixture::classification_tsv(),
)
.unwrap();
fs::write(
dir.path().join("connections.tsv"),
fixture::connections_tsv(),
)
.unwrap();
let err = load_flywire(dir.path()).expect_err("must fail on duplicate neuron_id");
match err {
FlywireError::DuplicateNeuron(id) => assert_eq!(id, 10_000_001),
other => panic!("wrong variant: {other:?}"),
}
}
#[test]
fn classification_file_is_optional() {
// No classification.tsv — cell-type is taken from neurons.tsv
// directly. The loader must still succeed.
let dir = TempDir::new().expect("temp");
fs::write(dir.path().join("neurons.tsv"), fixture::neurons_tsv()).unwrap();
fs::write(
dir.path().join("connections.tsv"),
fixture::connections_tsv(),
)
.unwrap();
// Intentionally do NOT write classification.tsv.
let c = load_flywire(dir.path()).expect("load without classification");
assert_eq!(c.num_neurons(), fixture::EXPECTED_NEURONS);
}
#[test]
fn missing_neurons_file_surfaces_io_error() {
let dir = TempDir::new().expect("temp");
// No neurons.tsv at all.
let err = load_flywire(dir.path()).expect_err("must fail without neurons.tsv");
match err {
FlywireError::Io { file, .. } => {
assert_eq!(file, "neurons.tsv");
}
other => panic!("wrong variant: {other:?}"),
}
}
#[test]
fn synapse_signs_follow_nt_mapping_in_fixture() {
let (dir, _paths) = setup_fixture();
let c = load_flywire(dir.path()).expect("load fixture");
// Fixture includes several GABA and HIST edges — expect inhibitory
// synapses to be a non-zero fraction but bounded above by the
// balance of excitatory ACH / GLUT edges.
let mut inh = 0_usize;
let mut exc = 0_usize;
for s in c.synapses() {
match s.sign {
Sign::Inhibitory => inh += 1,
Sign::Excitatory => exc += 1,
}
}
assert!(inh > 0, "fixture has no inhibitory edges: unexpected");
assert!(exc > 0, "fixture has no excitatory edges: unexpected");
let frac = inh as f32 / c.num_synapses() as f32;
assert!(
(0.05..0.5).contains(&frac),
"inhibitory fraction {frac:.3} out of expected band [0.05, 0.5]",
);
}
#[test]
fn dir_label_on_io_error_uses_filename_only() {
// Defensive: the Io variant reports a short filename, not a full
// path. This keeps the error deterministic across tempdir roots.
let bogus = PathBuf::from("/nonexistent/__connectome_fly_test__");
let err = load_flywire(&bogus).expect_err("must fail on missing dir");
match err {
FlywireError::Io { file, .. } => assert_eq!(file, "neurons.tsv"),
other => panic!("wrong variant: {other:?}"),
}
}