From cf21327c968273c19ac565cf302142a36edc0366 Mon Sep 17 00:00:00 2001 From: ruvnet Date: Wed, 22 Apr 2026 11:59:32 -0400 Subject: [PATCH] feat(connectome-fly): FlyWire v783 ingest module + fixture tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements src/connectome/flywire/{mod,schema,loader,fixture}.rs and tests/flywire_ingest.rs — the ingest path named as the first follow-up in ADR-154 §13. Parses the published FlyWire v783 TSV format (neurons, synapses, cell types) into our Connectome struct without touching any existing analysis, LIF, or observer code. Fixture: 100-neuron hand-authored FlyWire-format TSV exercises the full parse path without requiring a ~2 GB data download. NT → sign mapping: ACH/GLUT/GABA/SER/OCT/DOP/HIST follow the Lin et al. 2024 Nature supplementary table mapping; unknown NT produces a named error variant rather than a silent default. File sizes: max file = 437 lines (fixture.rs); src = 1048 lines, tests = 359 lines, + ~93 edit lines on existing files (≤ 1500 LOC budget). Tests: 17 new flywire_ingest tests pass; 10 lib + 28 pre-existing integration tests still green. Co-Authored-By: claude-flow --- Cargo.lock | 2 + examples/connectome-fly/Cargo.toml | 5 + .../src/connectome/flywire/fixture.rs | 437 ++++++++++++++++++ .../src/connectome/flywire/loader.rs | 369 +++++++++++++++ .../src/connectome/flywire/mod.rs | 101 ++++ .../src/connectome/flywire/schema.rs | 141 ++++++ .../src/connectome/generator.rs | 64 ++- examples/connectome-fly/src/connectome/mod.rs | 16 +- .../connectome-fly/src/connectome/schema.rs | 15 + examples/connectome-fly/src/lib.rs | 3 +- .../connectome-fly/tests/flywire_ingest.rs | 359 ++++++++++++++ 11 files changed, 1505 insertions(+), 7 deletions(-) create mode 100644 examples/connectome-fly/src/connectome/flywire/fixture.rs create mode 100644 examples/connectome-fly/src/connectome/flywire/loader.rs create mode 100644 examples/connectome-fly/src/connectome/flywire/mod.rs create mode 100644 examples/connectome-fly/src/connectome/flywire/schema.rs create mode 100644 examples/connectome-fly/tests/flywire_ingest.rs diff --git a/Cargo.lock b/Cargo.lock index a11e8949..354d121b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1587,6 +1587,7 @@ dependencies = [ "bincode 1.3.3", "bytemuck", "criterion 0.5.1", + "csv", "cudarc", "rand 0.8.5", "rand_distr 0.4.3", @@ -1597,6 +1598,7 @@ dependencies = [ "serde", "serde_json", "smallvec 1.15.1", + "tempfile", "thiserror 1.0.69", "wide", ] diff --git a/examples/connectome-fly/Cargo.toml b/examples/connectome-fly/Cargo.toml index 8f13c7cb..a7cc408e 100644 --- a/examples/connectome-fly/Cargo.toml +++ b/examples/connectome-fly/Cargo.toml @@ -46,12 +46,17 @@ bincode = "1.3" bytemuck = { version = "1.16", features = ["derive"] } thiserror = "1.0" +# FlyWire v783 TSV ingest (connectome::flywire). Column-named streaming +# parser; sibling ruvector-graph and ruvector-cli already pin 1.3. +csv = "1.3" + # Optional — gated by feature flags. wide = { version = "0.7", optional = true } cudarc = { version = "0.13", optional = true, default-features = false, features = ["cuda-12050", "driver", "std"] } [dev-dependencies] criterion = { version = "0.5", features = ["html_reports"] } +tempfile = "3" [[bench]] name = "lif_throughput" diff --git a/examples/connectome-fly/src/connectome/flywire/fixture.rs b/examples/connectome-fly/src/connectome/flywire/fixture.rs new file mode 100644 index 00000000..86faf021 --- /dev/null +++ b/examples/connectome-fly/src/connectome/flywire/fixture.rs @@ -0,0 +1,437 @@ +//! Hand-authored 100-neuron fixture in FlyWire v783 TSV format. +//! +//! The fixture lives as three `&'static str` constants so the ingest +//! tests can materialize temp TSV files without any network download +//! or large on-disk asset. The composition targets: +//! +//! - **Cell-type coverage**: KC, MBON, PN, DN, Motor, PR, LN, optic +//! intrinsic — the classes the outer `NeuronClass` enum can map to. +//! - **NT coverage**: ACH, GLUT, GABA, HIST, SER, DOP, OCT — every +//! entry in the research-doc §4 NT table at least once. +//! - **Side / flow coverage**: left + right, afferent + efferent + +//! intrinsic. +//! - **Synapse shape**: 159 directed edges, file-declared ordering, no +//! dangling references and no authored self-loops. +//! +//! `EXPECTED_*` constants capture the counts so tests can assert +//! structural invariants without re-counting rows by hand. + +/// Number of neuron rows emitted by [`neurons_tsv`]. +pub const EXPECTED_NEURONS: usize = 100; + +/// Number of synapse rows emitted by [`connections_tsv`]. +pub const EXPECTED_SYNAPSES: usize = 159; + +/// Number of classification rows emitted by [`classification_tsv`]. A +/// strict subset of neurons — the loader must still function when a +/// neuron has no classification override. +pub const EXPECTED_CLASSIFICATIONS: usize = 40; + +// --------------------------------------------------------------------- +// Fixture payloads. +// +// Split into const `&str` slices and `concat!`-assembled so each const +// stays under ~100 lines of source. Data is hand-authored; the 8-digit +// neuron ids are arbitrary but unique. +// --------------------------------------------------------------------- + +const NEURONS_HEADER: &str = + "neuron_id\tsupervoxel_id\tcell_type\tnt_type\tside\tnerve\tflow\tsuper_class\n"; + +const NEURONS_A: &str = "\ +10000001\t9000001\tPR_R1\tHIST\tleft\tOCN\tafferent\tsensory\n\ +10000002\t9000002\tPR_R1\tHIST\tright\tOCN\tafferent\tsensory\n\ +10000003\t9000003\tPR_R7\tHIST\tleft\tOCN\tafferent\tsensory\n\ +10000004\t9000004\tPR_R8\tHIST\tright\tOCN\tafferent\tsensory\n\ +10000005\t9000005\tPN_glom_DA1\tACH\tleft\tAN\tafferent\tsensory\n\ +10000006\t9000006\tPN_glom_DL3\tACH\tright\tAN\tafferent\tsensory\n\ +10000007\t9000007\tPN_glom_VM7\tACH\tleft\tAN\tafferent\tsensory\n\ +10000008\t9000008\tORN_chm_A\tACH\tleft\tAN\tafferent\tsensory\n\ +10000009\t9000009\tORN_chm_B\tACH\tright\tAN\tafferent\tsensory\n\ +10000010\t9000010\tJO_mech_a\tACH\tleft\tJN\tafferent\tsensory\n\ +10000011\t9000011\tJO_mech_b\tACH\tright\tJN\tafferent\tsensory\n\ +10000012\t9000012\tML_mech_c\tACH\tleft\tLN\tafferent\tsensory\n\ +10000013\t9000013\tKC_g\tACH\tleft\t\tintrinsic\tcentral\n\ +10000014\t9000014\tKC_g\tACH\tright\t\tintrinsic\tcentral\n\ +10000015\t9000015\tKC_ab\tACH\tleft\t\tintrinsic\tcentral\n\ +10000016\t9000016\tKC_ab\tACH\tright\t\tintrinsic\tcentral\n\ +10000017\t9000017\tKC_apbp\tACH\tleft\t\tintrinsic\tcentral\n\ +10000018\t9000018\tKC_apbp\tACH\tright\t\tintrinsic\tcentral\n\ +10000019\t9000019\tKC_g\tACH\tleft\t\tintrinsic\tcentral\n\ +10000020\t9000020\tKC_ab\tACH\tright\t\tintrinsic\tcentral\n\ +"; + +const NEURONS_B: &str = "\ +10000021\t9000021\tKC_apbp\tACH\tleft\t\tintrinsic\tcentral\n\ +10000022\t9000022\tKC_g\tACH\tright\t\tintrinsic\tcentral\n\ +10000023\t9000023\tKC_ab\tACH\tleft\t\tintrinsic\tcentral\n\ +10000024\t9000024\tKC_apbp\tACH\tright\t\tintrinsic\tcentral\n\ +10000025\t9000025\tKC_g\tACH\tleft\t\tintrinsic\tcentral\n\ +10000026\t9000026\tMBON01\tGLUT\tleft\t\tintrinsic\tcentral\n\ +10000027\t9000027\tMBON02\tGLUT\tright\t\tintrinsic\tcentral\n\ +10000028\t9000028\tMBON03\tGABA\tleft\t\tintrinsic\tcentral\n\ +10000029\t9000029\tMBON04\tGABA\tright\t\tintrinsic\tcentral\n\ +10000030\t9000030\tMBON05\tACH\tleft\t\tintrinsic\tcentral\n\ +10000031\t9000031\tMBON06\tACH\tright\t\tintrinsic\tcentral\n\ +10000032\t9000032\tDAN_PPL1\tDOP\tleft\t\tintrinsic\tcentral\n\ +10000033\t9000033\tDAN_PPL1\tDOP\tright\t\tintrinsic\tcentral\n\ +10000034\t9000034\tDAN_PAM\tDOP\tleft\t\tintrinsic\tcentral\n\ +10000035\t9000035\tDAN_PAM\tDOP\tright\t\tintrinsic\tcentral\n\ +10000036\t9000036\tOAN_VPM3\tOCT\tleft\t\tintrinsic\tcentral\n\ +10000037\t9000037\tOAN_VPM3\tOCT\tright\t\tintrinsic\tcentral\n\ +10000038\t9000038\tSER_DRN\tSER\tcenter\t\tintrinsic\tcentral\n\ +10000039\t9000039\tSER_DRN\tSER\tcenter\t\tintrinsic\tcentral\n\ +10000040\t9000040\tEPG_ring\tACH\tleft\t\tintrinsic\tcentral\n\ +"; + +const NEURONS_C: &str = "\ +10000041\t9000041\tEPG_ring\tACH\tright\t\tintrinsic\tcentral\n\ +10000042\t9000042\tEPG_ring\tACH\tleft\t\tintrinsic\tcentral\n\ +10000043\t9000043\tPEN_fan\tACH\tright\t\tintrinsic\tcentral\n\ +10000044\t9000044\tPEN_fan\tACH\tleft\t\tintrinsic\tcentral\n\ +10000045\t9000045\tFB_col\tACH\tright\t\tintrinsic\tcentral\n\ +10000046\t9000046\tFB_col\tACH\tleft\t\tintrinsic\tcentral\n\ +10000047\t9000047\tLAL_loc\tACH\tright\t\tintrinsic\tcentral\n\ +10000048\t9000048\tLAL_loc\tGABA\tleft\t\tintrinsic\tcentral\n\ +10000049\t9000049\tDNp01\tACH\tleft\tCN\tefferent\tdescending\n\ +10000050\t9000050\tDNp02\tACH\tright\tCN\tefferent\tdescending\n\ +10000051\t9000051\tDNp03\tACH\tleft\tCN\tefferent\tdescending\n\ +10000052\t9000052\tDNg01\tACH\tright\tCN\tefferent\tdescending\n\ +10000053\t9000053\tDNg02\tACH\tleft\tCN\tefferent\tdescending\n\ +10000054\t9000054\tMotor_leg_1\tACH\tleft\tLN\tefferent\tmotor\n\ +10000055\t9000055\tMotor_leg_2\tACH\tright\tLN\tefferent\tmotor\n\ +10000056\t9000056\tMotor_leg_3\tACH\tleft\tLN\tefferent\tmotor\n\ +10000057\t9000057\tMotor_wing_1\tACH\tright\tWN\tefferent\tmotor\n\ +10000058\t9000058\tMotor_wing_2\tACH\tleft\tWN\tefferent\tmotor\n\ +10000059\t9000059\tMotor_wing_3\tACH\tright\tWN\tefferent\tmotor\n\ +10000060\t9000060\tMotor_hlt\tACH\tleft\tHN\tefferent\tmotor\n\ +"; + +const NEURONS_D: &str = "\ +10000061\t9000061\tLN_GABA_A\tGABA\tleft\t\tintrinsic\tcentral\n\ +10000062\t9000062\tLN_GABA_B\tGABA\tright\t\tintrinsic\tcentral\n\ +10000063\t9000063\tLN_GABA_C\tGABA\tleft\t\tintrinsic\tcentral\n\ +10000064\t9000064\tLN_GABA_D\tGABA\tright\t\tintrinsic\tcentral\n\ +10000065\t9000065\tLN_GABA_E\tGABA\tleft\t\tintrinsic\tcentral\n\ +10000066\t9000066\tLN_GABA_F\tGABA\tright\t\tintrinsic\tcentral\n\ +10000067\t9000067\tLN_mix_G\tGLUT\tleft\t\tintrinsic\tcentral\n\ +10000068\t9000068\tLN_mix_H\tGLUT\tright\t\tintrinsic\tcentral\n\ +10000069\t9000069\tLN_mix_I\tGLUT\tleft\t\tintrinsic\tcentral\n\ +10000070\t9000070\tLN_mix_J\tGLUT\tright\t\tintrinsic\tcentral\n\ +10000071\t9000071\tLoc_opt_A\tACH\tleft\t\tintrinsic\toptic\n\ +10000072\t9000072\tLoc_opt_B\tACH\tright\t\tintrinsic\toptic\n\ +10000073\t9000073\tLoc_opt_C\tACH\tleft\t\tintrinsic\toptic\n\ +10000074\t9000074\tLoc_opt_D\tGABA\tright\t\tintrinsic\toptic\n\ +10000075\t9000075\tLoc_opt_E\tGABA\tleft\t\tintrinsic\toptic\n\ +10000076\t9000076\tLoc_opt_F\tACH\tright\t\tintrinsic\toptic\n\ +10000077\t9000077\tLoc_opt_G\tGLUT\tleft\t\tintrinsic\toptic\n\ +10000078\t9000078\tLoc_opt_H\tGLUT\tright\t\tintrinsic\toptic\n\ +10000079\t9000079\tLoc_opt_I\tACH\tleft\t\tintrinsic\toptic\n\ +10000080\t9000080\tLoc_opt_J\tGABA\tright\t\tintrinsic\toptic\n\ +"; + +const NEURONS_E: &str = "\ +10000081\t9000081\tPN_glom_DM1\tACH\tleft\tAN\tafferent\tsensory\n\ +10000082\t9000082\tPN_glom_DM2\tACH\tright\tAN\tafferent\tsensory\n\ +10000083\t9000083\tPN_glom_DM3\tACH\tleft\tAN\tafferent\tsensory\n\ +10000084\t9000084\tAscending_A\tACH\tright\t\tintrinsic\tascending\n\ +10000085\t9000085\tAscending_B\tACH\tleft\t\tintrinsic\tascending\n\ +10000086\t9000086\tAscending_C\tACH\tright\t\tintrinsic\tascending\n\ +10000087\t9000087\tAscending_D\tACH\tleft\t\tintrinsic\tascending\n\ +10000088\t9000088\tProj_lcb_A\tACH\tleft\t\tintrinsic\tcentral\n\ +10000089\t9000089\tProj_lcb_B\tACH\tright\t\tintrinsic\tcentral\n\ +10000090\t9000090\tProj_lcb_C\tACH\tleft\t\tintrinsic\tcentral\n\ +10000091\t9000091\tProj_lcb_D\tACH\tright\t\tintrinsic\tcentral\n\ +10000092\t9000092\tProj_lcb_E\tACH\tleft\t\tintrinsic\tcentral\n\ +10000093\t9000093\tMisc_X_A\tACH\tleft\t\tintrinsic\tother\n\ +10000094\t9000094\tMisc_X_B\tACH\tright\t\tintrinsic\tother\n\ +10000095\t9000095\tMisc_X_C\tACH\tleft\t\tintrinsic\tother\n\ +10000096\t9000096\tMisc_X_D\tACH\tright\t\tintrinsic\tother\n\ +10000097\t9000097\tMisc_X_E\tACH\tleft\t\tintrinsic\tother\n\ +10000098\t9000098\tMisc_X_F\tACH\tright\t\tintrinsic\tother\n\ +10000099\t9000099\tMisc_X_G\tACH\tleft\t\tintrinsic\tother\n\ +10000100\t9000100\tMisc_X_H\tACH\tright\t\tintrinsic\tother\n\ +"; + +/// Return the full neurons TSV payload (header + 100 data rows). +pub fn neurons_tsv() -> String { + let mut s = String::with_capacity(12 * 1024); + s.push_str(NEURONS_HEADER); + s.push_str(NEURONS_A); + s.push_str(NEURONS_B); + s.push_str(NEURONS_C); + s.push_str(NEURONS_D); + s.push_str(NEURONS_E); + s +} + +const CONNECTIONS_HEADER: &str = "pre_id\tpost_id\tneuropil\tsyn_count\tsyn_weight\tnt_type\n"; + +const CONNECTIONS_A: &str = "\ +10000001\t10000071\tME_L\t12\t12.0\tHIST\n\ +10000001\t10000072\tME_L\t8\t8.0\tHIST\n\ +10000002\t10000071\tME_R\t10\t10.0\tHIST\n\ +10000002\t10000073\tME_R\t7\t7.0\tHIST\n\ +10000003\t10000074\tME_L\t9\t9.0\tHIST\n\ +10000003\t10000075\tME_L\t11\t11.0\tHIST\n\ +10000004\t10000076\tME_R\t5\t5.0\tHIST\n\ +10000004\t10000077\tME_R\t6\t6.0\tHIST\n\ +10000005\t10000013\tMB_CA_L\t14\t14.0\tACH\n\ +10000005\t10000015\tMB_CA_L\t9\t9.0\tACH\n\ +10000005\t10000017\tMB_CA_L\t7\t7.0\tACH\n\ +10000006\t10000014\tMB_CA_R\t13\t13.0\tACH\n\ +10000006\t10000016\tMB_CA_R\t11\t11.0\tACH\n\ +10000006\t10000018\tMB_CA_R\t8\t8.0\tACH\n\ +10000007\t10000013\tMB_CA_L\t6\t6.0\tACH\n\ +10000007\t10000019\tMB_CA_L\t5\t5.0\tACH\n\ +10000008\t10000013\tMB_CA_L\t10\t10.0\tACH\n\ +10000008\t10000020\tMB_CA_R\t4\t4.0\tACH\n\ +10000009\t10000014\tMB_CA_R\t12\t12.0\tACH\n\ +10000009\t10000021\tMB_CA_L\t3\t3.0\tACH\n\ +10000010\t10000022\tMB_CA_R\t8\t8.0\tACH\n\ +10000010\t10000025\tMB_CA_L\t4\t4.0\tACH\n\ +10000011\t10000023\tMB_CA_L\t7\t7.0\tACH\n\ +10000011\t10000024\tMB_CA_R\t6\t6.0\tACH\n\ +10000012\t10000025\tMB_CA_L\t5\t5.0\tACH\n\ +10000081\t10000013\tMB_CA_L\t9\t9.0\tACH\n\ +10000081\t10000015\tMB_CA_L\t6\t6.0\tACH\n\ +10000082\t10000014\tMB_CA_R\t11\t11.0\tACH\n\ +10000082\t10000016\tMB_CA_R\t8\t8.0\tACH\n\ +10000083\t10000017\tMB_CA_L\t5\t5.0\tACH\n\ +10000083\t10000019\tMB_CA_L\t7\t7.0\tACH\n\ +"; + +const CONNECTIONS_B: &str = "\ +10000013\t10000026\tMB_LH_L\t4\t4.0\tACH\n\ +10000013\t10000030\tMB_LH_L\t3\t3.0\tACH\n\ +10000014\t10000027\tMB_LH_R\t5\t5.0\tACH\n\ +10000014\t10000031\tMB_LH_R\t4\t4.0\tACH\n\ +10000015\t10000026\tMB_LH_L\t6\t6.0\tACH\n\ +10000015\t10000028\tMB_LH_L\t3\t3.0\tACH\n\ +10000016\t10000027\tMB_LH_R\t5\t5.0\tACH\n\ +10000016\t10000029\tMB_LH_R\t4\t4.0\tACH\n\ +10000017\t10000030\tMB_LH_L\t3\t3.0\tACH\n\ +10000018\t10000031\tMB_LH_R\t5\t5.0\tACH\n\ +10000019\t10000028\tMB_LH_L\t6\t6.0\tACH\n\ +10000020\t10000029\tMB_LH_R\t4\t4.0\tACH\n\ +10000021\t10000030\tMB_LH_L\t5\t5.0\tACH\n\ +10000022\t10000031\tMB_LH_R\t7\t7.0\tACH\n\ +10000023\t10000026\tMB_LH_L\t3\t3.0\tACH\n\ +10000024\t10000027\tMB_LH_R\t4\t4.0\tACH\n\ +10000025\t10000030\tMB_LH_L\t6\t6.0\tACH\n\ +10000032\t10000013\tMB_PPL1_L\t3\t3.0\tDOP\n\ +10000033\t10000014\tMB_PPL1_R\t4\t4.0\tDOP\n\ +10000034\t10000015\tMB_PAM_L\t3\t3.0\tDOP\n\ +10000035\t10000016\tMB_PAM_R\t4\t4.0\tDOP\n\ +10000036\t10000017\tMB_OA_L\t2\t2.0\tOCT\n\ +10000037\t10000018\tMB_OA_R\t3\t3.0\tOCT\n\ +10000038\t10000040\tEB_L\t2\t2.0\tSER\n\ +10000039\t10000041\tEB_R\t2\t2.0\tSER\n\ +10000040\t10000044\tEB_L\t5\t5.0\tACH\n\ +10000041\t10000043\tEB_R\t4\t4.0\tACH\n\ +10000042\t10000044\tEB_L\t6\t6.0\tACH\n\ +10000043\t10000045\tFB_L\t4\t4.0\tACH\n\ +10000044\t10000046\tFB_L\t5\t5.0\tACH\n\ +10000045\t10000047\tLAL_L\t6\t6.0\tACH\n\ +10000046\t10000048\tLAL_R\t4\t4.0\tACH\n\ +"; + +const CONNECTIONS_C: &str = "\ +10000047\t10000049\tLAL_L\t5\t5.0\tACH\n\ +10000048\t10000050\tLAL_R\t4\t4.0\tGABA\n\ +10000026\t10000049\tSMP_L\t6\t6.0\tGLUT\n\ +10000027\t10000050\tSMP_R\t5\t5.0\tGLUT\n\ +10000028\t10000049\tSMP_L\t3\t3.0\tGABA\n\ +10000029\t10000050\tSMP_R\t4\t4.0\tGABA\n\ +10000030\t10000051\tSMP_L\t5\t5.0\tACH\n\ +10000031\t10000052\tSMP_R\t4\t4.0\tACH\n\ +10000049\t10000054\tGNG_L\t8\t8.0\tACH\n\ +10000049\t10000056\tGNG_L\t5\t5.0\tACH\n\ +10000050\t10000055\tGNG_R\t7\t7.0\tACH\n\ +10000050\t10000057\tGNG_R\t4\t4.0\tACH\n\ +10000051\t10000058\tGNG_L\t5\t5.0\tACH\n\ +10000052\t10000059\tGNG_R\t4\t4.0\tACH\n\ +10000053\t10000060\tGNG_L\t6\t6.0\tACH\n\ +10000051\t10000054\tGNG_L\t3\t3.0\tACH\n\ +10000052\t10000055\tGNG_R\t3\t3.0\tACH\n\ +10000053\t10000057\tGNG_R\t4\t4.0\tACH\n\ +10000061\t10000013\tMB_CA_L\t2\t2.0\tGABA\n\ +10000062\t10000014\tMB_CA_R\t3\t3.0\tGABA\n\ +10000063\t10000015\tMB_CA_L\t2\t2.0\tGABA\n\ +10000064\t10000016\tMB_CA_R\t3\t3.0\tGABA\n\ +10000065\t10000017\tMB_CA_L\t2\t2.0\tGABA\n\ +10000066\t10000018\tMB_CA_R\t3\t3.0\tGABA\n\ +10000067\t10000019\tAL_L\t4\t4.0\tGLUT\n\ +10000068\t10000020\tAL_R\t5\t5.0\tGLUT\n\ +10000069\t10000021\tAL_L\t3\t3.0\tGLUT\n\ +10000070\t10000022\tAL_R\t4\t4.0\tGLUT\n\ +10000005\t10000061\tAL_L\t3\t3.0\tACH\n\ +10000006\t10000062\tAL_R\t3\t3.0\tACH\n\ +10000007\t10000063\tAL_L\t2\t2.0\tACH\n\ +10000008\t10000064\tAL_R\t2\t2.0\tACH\n\ +"; + +const CONNECTIONS_D: &str = "\ +10000009\t10000065\tAL_L\t3\t3.0\tACH\n\ +10000010\t10000066\tAL_R\t3\t3.0\tACH\n\ +10000081\t10000067\tAL_L\t2\t2.0\tACH\n\ +10000082\t10000068\tAL_R\t2\t2.0\tACH\n\ +10000083\t10000069\tAL_L\t3\t3.0\tACH\n\ +10000071\t10000013\tLO_L\t4\t4.0\tACH\n\ +10000072\t10000014\tLO_R\t4\t4.0\tACH\n\ +10000073\t10000015\tLO_L\t3\t3.0\tACH\n\ +10000074\t10000016\tLO_R\t3\t3.0\tGABA\n\ +10000075\t10000017\tLO_L\t2\t2.0\tGABA\n\ +10000076\t10000018\tLO_R\t3\t3.0\tACH\n\ +10000077\t10000019\tLO_L\t2\t2.0\tGLUT\n\ +10000078\t10000020\tLO_R\t2\t2.0\tGLUT\n\ +10000079\t10000040\tLO_L\t3\t3.0\tACH\n\ +10000080\t10000041\tLO_R\t3\t3.0\tGABA\n\ +10000054\t10000084\tVNC_L\t6\t6.0\tACH\n\ +10000055\t10000085\tVNC_R\t5\t5.0\tACH\n\ +10000056\t10000086\tVNC_L\t4\t4.0\tACH\n\ +10000057\t10000087\tVNC_R\t5\t5.0\tACH\n\ +10000084\t10000049\tSMP_L\t3\t3.0\tACH\n\ +10000085\t10000050\tSMP_R\t3\t3.0\tACH\n\ +10000086\t10000051\tSMP_L\t2\t2.0\tACH\n\ +10000087\t10000052\tSMP_R\t2\t2.0\tACH\n\ +10000088\t10000026\tSMP_L\t4\t4.0\tACH\n\ +10000088\t10000049\tSMP_L\t3\t3.0\tACH\n\ +10000089\t10000027\tSMP_R\t4\t4.0\tACH\n\ +10000089\t10000050\tSMP_R\t3\t3.0\tACH\n\ +10000090\t10000028\tSMP_L\t3\t3.0\tACH\n\ +10000090\t10000040\tSMP_L\t2\t2.0\tACH\n\ +10000091\t10000029\tSMP_R\t3\t3.0\tACH\n\ +10000091\t10000041\tSMP_R\t2\t2.0\tACH\n\ +10000092\t10000030\tSMP_L\t3\t3.0\tACH\n\ +"; + +const CONNECTIONS_E: &str = "\ +10000092\t10000043\tSMP_L\t2\t2.0\tACH\n\ +10000093\t10000013\tGNG_L\t1\t1.0\tACH\n\ +10000094\t10000014\tGNG_R\t1\t1.0\tACH\n\ +10000095\t10000015\tGNG_L\t1\t1.0\tACH\n\ +10000096\t10000016\tGNG_R\t1\t1.0\tACH\n\ +10000097\t10000017\tGNG_L\t1\t1.0\tACH\n\ +10000098\t10000018\tGNG_R\t1\t1.0\tACH\n\ +10000099\t10000019\tGNG_L\t1\t1.0\tACH\n\ +10000100\t10000020\tGNG_R\t1\t1.0\tACH\n\ +10000032\t10000026\tMB_MBON_L\t2\t2.0\tDOP\n\ +10000033\t10000027\tMB_MBON_R\t2\t2.0\tDOP\n\ +10000034\t10000028\tMB_MBON_L\t2\t2.0\tDOP\n\ +10000035\t10000029\tMB_MBON_R\t2\t2.0\tDOP\n\ +10000036\t10000030\tMB_MBON_L\t1\t1.0\tOCT\n\ +10000037\t10000031\tMB_MBON_R\t1\t1.0\tOCT\n\ +10000058\t10000084\tVNC_L\t3\t3.0\tACH\n\ +10000059\t10000085\tVNC_R\t3\t3.0\tACH\n\ +10000060\t10000086\tVNC_L\t2\t2.0\tACH\n\ +10000026\t10000040\tSMP_L\t3\t3.0\tGLUT\n\ +10000027\t10000041\tSMP_R\t3\t3.0\tGLUT\n\ +10000028\t10000040\tSMP_L\t2\t2.0\tGABA\n\ +10000029\t10000041\tSMP_R\t2\t2.0\tGABA\n\ +10000030\t10000042\tSMP_L\t3\t3.0\tACH\n\ +10000031\t10000043\tSMP_R\t3\t3.0\tACH\n\ +10000067\t10000026\tAL_L\t2\t2.0\tGLUT\n\ +10000068\t10000027\tAL_R\t2\t2.0\tGLUT\n\ +10000069\t10000028\tAL_L\t2\t2.0\tGLUT\n\ +10000070\t10000029\tAL_R\t2\t2.0\tGLUT\n\ +10000071\t10000026\tLO_L\t2\t2.0\tACH\n\ +10000072\t10000027\tLO_R\t2\t2.0\tACH\n\ +10000073\t10000028\tLO_L\t2\t2.0\tACH\n\ +10000074\t10000029\tLO_R\t2\t2.0\tGABA\n\ +"; + +/// FlyWire-format connections TSV (header + 260 data rows). +pub fn connections_tsv() -> String { + let mut s = String::with_capacity(16 * 1024); + s.push_str(CONNECTIONS_HEADER); + s.push_str(CONNECTIONS_A); + s.push_str(CONNECTIONS_B); + s.push_str(CONNECTIONS_C); + s.push_str(CONNECTIONS_D); + s.push_str(CONNECTIONS_E); + s +} + +const CLASSIFICATION_HEADER: &str = "neuron_id\tcell_type\tsuper_class\n"; + +/// FlyWire-format classification TSV (40 authoritative overrides). +const CLASSIFICATION_BODY: &str = "\ +10000013\tKC_g\tcentral\n\ +10000014\tKC_g\tcentral\n\ +10000015\tKC_ab\tcentral\n\ +10000016\tKC_ab\tcentral\n\ +10000017\tKC_apbp\tcentral\n\ +10000018\tKC_apbp\tcentral\n\ +10000019\tKC_g\tcentral\n\ +10000020\tKC_ab\tcentral\n\ +10000021\tKC_apbp\tcentral\n\ +10000022\tKC_g\tcentral\n\ +10000026\tMBON01\tcentral\n\ +10000027\tMBON02\tcentral\n\ +10000028\tMBON03\tcentral\n\ +10000029\tMBON04\tcentral\n\ +10000030\tMBON05\tcentral\n\ +10000031\tMBON06\tcentral\n\ +10000049\tDNp01\tdescending\n\ +10000050\tDNp02\tdescending\n\ +10000051\tDNp03\tdescending\n\ +10000052\tDNg01\tdescending\n\ +10000053\tDNg02\tdescending\n\ +10000054\tMotor_leg_1\tmotor\n\ +10000055\tMotor_leg_2\tmotor\n\ +10000056\tMotor_leg_3\tmotor\n\ +10000057\tMotor_wing_1\tmotor\n\ +10000058\tMotor_wing_2\tmotor\n\ +10000059\tMotor_wing_3\tmotor\n\ +10000060\tMotor_hlt\tmotor\n\ +10000001\tPR_R1\tsensory\n\ +10000002\tPR_R1\tsensory\n\ +10000003\tPR_R7\tsensory\n\ +10000004\tPR_R8\tsensory\n\ +10000032\tDAN_PPL1\tcentral\n\ +10000033\tDAN_PPL1\tcentral\n\ +10000034\tDAN_PAM\tcentral\n\ +10000035\tDAN_PAM\tcentral\n\ +10000036\tOAN_VPM3\tcentral\n\ +10000037\tOAN_VPM3\tcentral\n\ +10000038\tSER_DRN\tcentral\n\ +10000039\tSER_DRN\tcentral\n\ +"; + +/// FlyWire-format classification TSV (header + 40 override rows). +pub fn classification_tsv() -> String { + let mut s = String::with_capacity(2 * 1024); + s.push_str(CLASSIFICATION_HEADER); + s.push_str(CLASSIFICATION_BODY); + s +} + +/// Write the three fixture TSVs to `dir`, returning the paths of +/// `(neurons, connections, classification)`. The files are named +/// `neurons.tsv`, `connections.tsv`, `classification.tsv` — the same +/// names used on the FlyWire release. +pub fn write_fixture(dir: &std::path::Path) -> std::io::Result { + let neurons = dir.join("neurons.tsv"); + let connections = dir.join("connections.tsv"); + let classification = dir.join("classification.tsv"); + std::fs::write(&neurons, neurons_tsv())?; + std::fs::write(&connections, connections_tsv())?; + std::fs::write(&classification, classification_tsv())?; + Ok(FixturePaths { + neurons, + connections, + classification, + }) +} + +/// Paths to a materialized fixture, as returned by [`write_fixture`]. +#[derive(Clone, Debug)] +pub struct FixturePaths { + /// `neurons.tsv` path. + pub neurons: std::path::PathBuf, + /// `connections.tsv` path. + pub connections: std::path::PathBuf, + /// `classification.tsv` path. + pub classification: std::path::PathBuf, +} diff --git a/examples/connectome-fly/src/connectome/flywire/loader.rs b/examples/connectome-fly/src/connectome/flywire/loader.rs new file mode 100644 index 00000000..a84b9c1a --- /dev/null +++ b/examples/connectome-fly/src/connectome/flywire/loader.rs @@ -0,0 +1,369 @@ +//! FlyWire v783 TSV → `Connectome` loader. +//! +//! Streaming parse: one pass over `neurons.tsv`, one pass over +//! `classification.tsv` (optional override), one pass over +//! `connections.tsv`. Dense `NeuronId`s are assigned in the order neurons +//! are first seen in the neuron file; parallel arrays of `FlyWireNeuronId` +//! and `NeuronMeta` are preserved alongside the CSR. +//! +//! The loader is deterministic: given a byte-identical TSV input, the +//! output `Connectome` (synapses, row_ptr, meta, flywire_ids) is +//! bit-identical. Synapses within a neuron are stored in the order they +//! appear in `connections.tsv`. +//! +//! Errors are surfaced through the crate-level [`FlywireError`] so +//! callers can distinguish "bad CSV syntax" from "unknown cell type" +//! from "dangling synapse reference". + +use std::collections::HashMap; +use std::path::Path; + +use super::schema::{CellTypeRecord, NeuroTransmitter, NeuronRecord, SynapseRecord}; +use super::FlywireError; +use crate::connectome::generator::Connectome; +use crate::connectome::schema::{ + ConnectomeSerCfg, FlyWireNeuronId, NeuronClass, NeuronId, NeuronMeta, Sign, Synapse, +}; + +/// Load a FlyWire v783 release from `dir`. +/// +/// Expects three TSV files under `dir`: `neurons.tsv`, +/// `connections.tsv`, `classification.tsv`. The classification file is +/// optional; if absent, the cell-type column on `neurons.tsv` is used +/// directly. +/// +/// See [`FlywireError`] for the failure modes. +pub fn load_flywire(dir: &Path) -> Result { + let neurons_path = dir.join("neurons.tsv"); + let connections_path = dir.join("connections.tsv"); + let classification_path = dir.join("classification.tsv"); + let neurons = read_neurons(&neurons_path)?; + let class_overrides = if classification_path.exists() { + read_classifications(&classification_path)? + } else { + HashMap::new() + }; + let synapses = read_synapses(&connections_path)?; + assemble_connectome(neurons, class_overrides, synapses) +} + +/// Parse `neurons.tsv` into a vector of [`NeuronRecord`]s. Duplicate +/// `neuron_id` entries yield [`FlywireError::DuplicateNeuron`]. +pub fn read_neurons(path: &Path) -> Result, FlywireError> { + let mut rdr = open_tsv(path)?; + let mut out: Vec = Vec::new(); + let mut seen: HashMap = HashMap::new(); + for (i, result) in rdr.deserialize::().enumerate() { + let rec: NeuronRecord = result.map_err(|e| FlywireError::MalformedRow { + file: label_of(path), + line: (i + 2) as u64, // +1 for header, +1 for 1-based + detail: e.to_string(), + })?; + if seen.insert(rec.neuron_id, i).is_some() { + return Err(FlywireError::DuplicateNeuron(rec.neuron_id)); + } + out.push(rec); + } + Ok(out) +} + +/// Parse `classification.tsv` into a `neuron_id → record` map. +pub fn read_classifications(path: &Path) -> Result, FlywireError> { + let mut rdr = open_tsv(path)?; + let mut out: HashMap = HashMap::new(); + for (i, result) in rdr.deserialize::().enumerate() { + let rec: CellTypeRecord = result.map_err(|e| FlywireError::MalformedRow { + file: label_of(path), + line: (i + 2) as u64, + detail: e.to_string(), + })?; + out.insert(rec.neuron_id, rec); + } + Ok(out) +} + +/// Parse `connections.tsv` into a vector of [`SynapseRecord`]s. Order +/// is preserved; the loader relies on file-declared order for CSR +/// determinism. +pub fn read_synapses(path: &Path) -> Result, FlywireError> { + let mut rdr = open_tsv(path)?; + let mut out: Vec = Vec::new(); + for (i, result) in rdr.deserialize::().enumerate() { + let rec: SynapseRecord = result.map_err(|e| FlywireError::MalformedRow { + file: label_of(path), + line: (i + 2) as u64, + detail: e.to_string(), + })?; + out.push(rec); + } + Ok(out) +} + +fn open_tsv(path: &Path) -> Result, FlywireError> { + csv::ReaderBuilder::new() + .delimiter(b'\t') + .has_headers(true) + .flexible(false) + .from_path(path) + .map_err(|e| FlywireError::Io { + file: label_of(path), + detail: e.to_string(), + }) +} + +fn label_of(path: &Path) -> String { + path.file_name() + .map(|s| s.to_string_lossy().into_owned()) + .unwrap_or_else(|| path.display().to_string()) +} + +fn assemble_connectome( + neurons: Vec, + class_overrides: HashMap, + synapses: Vec, +) -> Result { + // Dense id assignment in TSV declaration order. + let mut id_of: HashMap = HashMap::with_capacity(neurons.len()); + let mut flywire_ids: Vec = Vec::with_capacity(neurons.len()); + let mut meta: Vec = Vec::with_capacity(neurons.len()); + let mut nt_per_neuron: Vec = Vec::with_capacity(neurons.len()); + + for (idx, n) in neurons.iter().enumerate() { + id_of.insert(n.neuron_id, NeuronId(idx as u32)); + flywire_ids.push(FlyWireNeuronId(n.neuron_id)); + let class_override = class_overrides.get(&n.neuron_id); + let effective_cell_type = + n.effective_cell_type(class_override.map(|c| c.cell_type.as_str())); + let class = classify_cell_type(effective_cell_type.as_deref(), n.flow.as_deref())?; + let nt = parse_nt(&n.nt_type, n.neuron_id)?; + nt_per_neuron.push(nt); + meta.push(NeuronMeta { + class, + module: 0, + bias_pa: default_bias_for(class), + }); + } + + // Partition synapses by pre-id in file-declared order. + let n = neurons.len(); + let mut per_pre: Vec> = vec![Vec::new(); n]; + + for syn in &synapses { + let pre = *id_of + .get(&syn.pre_id) + .ok_or(FlywireError::UnknownPreNeuron(syn.pre_id))?; + let post = *id_of + .get(&syn.post_id) + .ok_or(FlywireError::UnknownPostNeuron(syn.post_id))?; + if pre == post { + continue; // drop self-loops; matches SBM generator + } + let nt = if let Some(s) = &syn.nt_type { + parse_nt(s, syn.pre_id)? + } else { + nt_per_neuron[pre.idx()] + }; + let sign = nt_to_sign(nt); + let count = syn.syn_count.max(1); + let weight = derive_weight(syn, count); + per_pre[pre.idx()].push(Synapse { + post, + weight, + delay_ms: default_delay_ms(), + sign, + }); + } + + // CSR flatten (row_ptr + synapses), preserving per-pre order. + let mut row_ptr: Vec = Vec::with_capacity(n + 1); + let total: usize = per_pre.iter().map(|v| v.len()).sum(); + let mut flat: Vec = Vec::with_capacity(total); + row_ptr.push(0); + for bucket in per_pre { + flat.extend(bucket); + row_ptr.push(flat.len() as u32); + } + + let cfg = ConnectomeSerCfg { + num_neurons: n as u32, + num_modules: 1, + num_hub_modules: 0, + seed: 0, + }; + Ok(Connectome::from_parts( + cfg, + meta, + flat, + row_ptr, + Some(flywire_ids), + )) +} + +/// Normalize a raw NT-type string to the typed enum. Case-insensitive +/// match against the seven release-documented labels. Anything else is +/// [`FlywireError::UnknownNtType`] — no silent default. +pub fn parse_nt(raw: &str, context_id: u64) -> Result { + let upper = raw.trim().to_ascii_uppercase(); + match upper.as_str() { + "ACH" | "ACETYLCHOLINE" => Ok(NeuroTransmitter::Acetylcholine), + "GLUT" | "GLUTAMATE" => Ok(NeuroTransmitter::Glutamate), + "GABA" => Ok(NeuroTransmitter::Gaba), + "HIST" | "HISTAMINE" => Ok(NeuroTransmitter::Histamine), + "SER" | "SEROTONIN" | "5-HT" | "5HT" => Ok(NeuroTransmitter::Serotonin), + "DOP" | "DOPAMINE" | "DA" => Ok(NeuroTransmitter::Dopamine), + "OCT" | "OCTOPAMINE" | "OA" => Ok(NeuroTransmitter::Octopamine), + _ => Err(FlywireError::UnknownNtType { + raw: raw.to_owned(), + neuron_id: context_id, + }), + } +} + +/// NT → fast-path sign mapping (research doc §4 table). +/// +/// - ACH, GLUT → +1 (Excitatory) +/// - GABA, HIST → -1 (Inhibitory) +/// - SER, DOP, OCT (modulatory) → +1 in the fast path; analyses that +/// need to exclude slow edges must consult the NT side-channel. +pub fn nt_to_sign(nt: NeuroTransmitter) -> Sign { + match nt { + NeuroTransmitter::Acetylcholine | NeuroTransmitter::Glutamate => Sign::Excitatory, + NeuroTransmitter::Gaba | NeuroTransmitter::Histamine => Sign::Inhibitory, + NeuroTransmitter::Serotonin | NeuroTransmitter::Dopamine | NeuroTransmitter::Octopamine => { + Sign::Excitatory + } + } +} + +/// Map a FlyWire cell-type string to our coarse [`NeuronClass`]. +/// +/// Unknown cell types fall into `NeuronClass::Other` — this is +/// intentional: the FlyWire release documents ~8,000 cell types, and +/// the coarse bucket is the correct v1 behavior per the research doc. +/// Empty cell-type with a non-empty `flow` column still resolves via +/// the flow hint. If *both* are missing the entry is `Other`, not an +/// error (matches the release's "unresolved" neurons). +pub fn classify_cell_type( + cell_type: Option<&str>, + flow: Option<&str>, +) -> Result { + if let Some(ct) = cell_type { + if let Some(class) = classify_by_prefix(ct) { + return Ok(class); + } + } + if let Some(f) = flow { + return Ok(classify_by_flow(f)); + } + Ok(NeuronClass::Other) +} + +/// Strict variant of [`classify_cell_type`]. Unmapped cell types yield +/// [`FlywireError::UnknownCellType`] instead of folding to +/// [`NeuronClass::Other`]. Intended for callers that want to audit +/// prefix-table coverage on a specific release. +pub fn classify_cell_type_strict( + cell_type: Option<&str>, + flow: Option<&str>, + neuron_id: u64, +) -> Result { + if let Some(ct) = cell_type { + if let Some(class) = classify_by_prefix(ct) { + return Ok(class); + } + return Err(FlywireError::UnknownCellType { + raw: ct.to_owned(), + neuron_id, + }); + } + if let Some(f) = flow { + return Ok(classify_by_flow(f)); + } + Ok(NeuronClass::Other) +} + +fn classify_by_prefix(ct: &str) -> Option { + // Order matters: more-specific prefixes first. + let t = ct.trim(); + if t.starts_with("PR_") || t.starts_with("R1") || t.starts_with("R7") || t.starts_with("R8") { + return Some(NeuronClass::PhotoReceptor); + } + if t.starts_with("ORN") || t.starts_with("PN_glom") || t.starts_with("PN_") { + return Some(NeuronClass::Chemosensory); + } + if t.starts_with("JO") || t.starts_with("ML_mech") { + return Some(NeuronClass::Mechanosensory); + } + if t.starts_with("KC") { + return Some(NeuronClass::KenyonCell); + } + if t.starts_with("MBON") { + return Some(NeuronClass::MbOutput); + } + if t.starts_with("EPG") || t.starts_with("PEN") || t.starts_with("FB_") || t.starts_with("PB_") + { + return Some(NeuronClass::CentralComplex); + } + if t.starts_with("LAL") { + return Some(NeuronClass::LateralAccessory); + } + if t.starts_with("DNp") || t.starts_with("DNg") || t.starts_with("DN_") { + return Some(NeuronClass::Descending); + } + if t.starts_with("Ascending") || t.starts_with("AN_") { + return Some(NeuronClass::Ascending); + } + if t.starts_with("Motor") { + return Some(NeuronClass::Motor); + } + if t.starts_with("LN_") || t.starts_with("LocalInter") { + return Some(NeuronClass::LocalInter); + } + if t.starts_with("Proj") || t.starts_with("Projection") { + return Some(NeuronClass::Projection); + } + if t.starts_with("DAN") || t.starts_with("SER_") || t.starts_with("OAN") { + return Some(NeuronClass::Modulatory); + } + if t.starts_with("Loc_opt") || t.starts_with("LoOpt") || t.starts_with("Lo_") { + return Some(NeuronClass::OpticLocal); + } + None +} + +fn classify_by_flow(flow: &str) -> NeuronClass { + match flow.trim().to_ascii_lowercase().as_str() { + "afferent" => NeuronClass::Other, + "efferent" => NeuronClass::Motor, + "intrinsic" => NeuronClass::Other, + "ascending" => NeuronClass::Ascending, + "descending" => NeuronClass::Descending, + _ => NeuronClass::Other, + } +} + +fn default_bias_for(class: NeuronClass) -> f32 { + if class.is_sensory() { + -0.5 + } else if class.is_motor() { + 0.5 + } else { + 0.0 + } +} + +fn derive_weight(syn: &SynapseRecord, count: u32) -> f32 { + if syn.syn_weight > 0.0 { + syn.syn_weight + } else { + count as f32 + } +} + +fn default_delay_ms() -> f32 { + // Research doc §3.2: FlyWire does not publish conduction delays; + // the ingest loader uses a constant fallback of 2.0 ms. The + // distance-scaled estimator requires soma coordinates, which are + // optional in the release and absent from the fixture. + 2.0 +} diff --git a/examples/connectome-fly/src/connectome/flywire/mod.rs b/examples/connectome-fly/src/connectome/flywire/mod.rs new file mode 100644 index 00000000..0a1bddcc --- /dev/null +++ b/examples/connectome-fly/src/connectome/flywire/mod.rs @@ -0,0 +1,101 @@ +//! FlyWire v783 ingest: TSV release → `Connectome`. +//! +//! This module is the first follow-up named in ADR-154 §13. It moves +//! the connectome-fly demonstrator from its synthetic stochastic-block +//! model onto the real FlyWire v783 wiring, one file at a time, without +//! touching any analysis, LIF, or observer code. +//! +//! ## Public API +//! +//! - [`load_flywire`] — parse `neurons.tsv`, `classification.tsv`, and +//! `connections.tsv` from a directory; return a fully-populated +//! [`crate::Connectome`] with parallel `FlyWireNeuronId`s. +//! - [`FlywireError`] — structured error type with one variant per +//! named failure mode (malformed row, dangling reference, unknown +//! NT, unknown cell type, IO failure, …). +//! - [`schema`] — serde record structs matching the release TSV +//! columns. +//! - [`fixture`] — hand-authored 100-neuron fixture used by tests. +//! +//! ## Hard constraints +//! +//! - No `unsafe`. No Python, shell, or JS/TS. +//! - Deterministic: byte-identical TSV input produces bit-identical +//! `Connectome` output across runs. +//! - No download path; `load_flywire` reads whatever TSVs are under +//! the path the caller hands it. + +pub mod fixture; +pub mod loader; +pub mod schema; + +pub use loader::{ + classify_cell_type, classify_cell_type_strict, load_flywire, nt_to_sign, parse_nt, +}; +pub use schema::{CellTypeRecord, NeuroTransmitter, NeuronRecord, SynapseRecord}; + +use thiserror::Error; + +/// Errors produced by the FlyWire ingest path. Each variant maps to a +/// distinct test case in `tests/flywire_ingest.rs`. +#[derive(Debug, Error)] +pub enum FlywireError { + /// A row failed to deserialize against the [`NeuronRecord`], + /// [`SynapseRecord`], or [`CellTypeRecord`] schema. + #[error("malformed row in {file} at line {line}: {detail}")] + MalformedRow { + /// File name (not full path), e.g. `"neurons.tsv"`. + file: String, + /// 1-based row number (header is line 1). + line: u64, + /// Underlying parser message. + detail: String, + }, + + /// IO or CSV-framing failure before per-row dispatch. + #[error("io error on {file}: {detail}")] + Io { + /// File name. + file: String, + /// Underlying error. + detail: String, + }, + + /// A synapse referenced a `pre_id` that is not present in + /// `neurons.tsv`. + #[error("synapse pre_id {0} not in neurons.tsv")] + UnknownPreNeuron(u64), + + /// A synapse referenced a `post_id` that is not present in + /// `neurons.tsv`. + #[error("synapse post_id {0} not in neurons.tsv")] + UnknownPostNeuron(u64), + + /// A neuron id appeared twice in `neurons.tsv`. + #[error("duplicate neuron_id {0} in neurons.tsv")] + DuplicateNeuron(u64), + + /// An NT-type string did not match the seven release-documented + /// labels (ACH / GLUT / GABA / HIST / SER / DOP / OCT). + #[error("unknown nt_type {raw:?} on neuron_id {neuron_id}")] + UnknownNtType { + /// Raw column value. + raw: String, + /// Context id (neuron or pre-neuron of the offending synapse). + neuron_id: u64, + }, + + /// A cell-type string did not match any known prefix. Only + /// surfaced from the strict classification path + /// ([`loader::classify_cell_type_strict`]); the default + /// [`loader::classify_cell_type`] folds unknown cell types into + /// [`crate::NeuronClass::Other`] because FlyWire v783 documents + /// ~8 000 cell types and the ingest loader is coarse by design. + #[error("unknown cell_type {raw:?} on neuron_id {neuron_id}")] + UnknownCellType { + /// Raw column value. + raw: String, + /// Context neuron id. + neuron_id: u64, + }, +} diff --git a/examples/connectome-fly/src/connectome/flywire/schema.rs b/examples/connectome-fly/src/connectome/flywire/schema.rs new file mode 100644 index 00000000..424c2a29 --- /dev/null +++ b/examples/connectome-fly/src/connectome/flywire/schema.rs @@ -0,0 +1,141 @@ +//! FlyWire v783 on-disk record schema. +//! +//! Three serde structs, one per published TSV file in the release: +//! +//! - [`NeuronRecord`] — one row per neuron; union of fields across +//! `neurons.tsv` plus the parts of `classification.tsv` / NT tables +//! that the loader consumes in a single pass. +//! - [`SynapseRecord`] — one row per directed pre→post edge in +//! `connections.tsv`. +//! - [`CellTypeRecord`] — one row per neuron in +//! `classification.tsv`; used as an override table when the primary +//! `neurons.tsv` lacks a cell-type assignment. +//! +//! The column names match the published v783 schema (see +//! `docs/research/connectome-ruvector/02-connectome-layer.md` §2). +//! Unknown columns are ignored by the CSV reader so adding downstream +//! fields (e.g. `hemilineage`) does not require a schema version bump. + +use serde::{Deserialize, Serialize}; + +/// One row of the neurons TSV. +/// +/// Columns mirror the FlyWire v783 release. `neuron_id` is the stable +/// 64-bit root id; `supervoxel_id` is the coarse segmentation handle +/// (kept for provenance, not used by the loader in v1); `cell_type`, +/// `nt_type`, `side`, `nerve`, and `flow` are all string-enum encoded. +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub struct NeuronRecord { + /// Stable FlyWire root id. + pub neuron_id: u64, + /// Supervoxel id (provenance only). + #[serde(default)] + pub supervoxel_id: u64, + /// Cell type, e.g. "KC_g", "MBON01", "DNp01". Empty string + /// (deserialized to `None`) is allowed when the classification is + /// unresolved. + #[serde(default)] + pub cell_type: Option, + /// Dominant predicted neurotransmitter: "ACH", "GLUT", "GABA", + /// "SER", "OCT", "DOP", "HIST". + pub nt_type: String, + /// Anatomical side: "left", "right", "center". + #[serde(default)] + pub side: Option, + /// Peripheral nerve id (Wikipedia naming), if afferent / efferent. + #[serde(default)] + pub nerve: Option, + /// Flow class: "afferent", "efferent", "intrinsic". + #[serde(default)] + pub flow: Option, + /// Optional super-class label (e.g. "optic", "central", "motor"). + #[serde(default)] + pub super_class: Option, +} + +/// One row of the connections TSV. +/// +/// `pre_id` and `post_id` are stable FlyWire root ids; both must resolve +/// to a row in the neurons TSV or the loader errors. +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub struct SynapseRecord { + /// Pre-synaptic neuron id. + pub pre_id: u64, + /// Post-synaptic neuron id. + pub post_id: u64, + /// Neuropil region label (e.g. "MB_CA_L"). + #[serde(default)] + pub neuropil: Option, + /// Aggregated synapse count for this directed pair. + pub syn_count: u32, + /// Effective weight reported by the release; loader uses + /// `syn_count` when this field is absent or zero. + #[serde(default)] + pub syn_weight: f32, + /// Per-edge NT prediction (optional; falls back to the pre + /// neuron's dominant NT when unset). + #[serde(default)] + pub nt_type: Option, +} + +/// One row of the classification TSV. +/// +/// Provides authoritative cell-type / super-class labels that can +/// override or fill in the fields on [`NeuronRecord`]. +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub struct CellTypeRecord { + /// Stable FlyWire root id. + pub neuron_id: u64, + /// Primary cell-type label. + pub cell_type: String, + /// Optional coarse super-class. + #[serde(default)] + pub super_class: Option, +} + +impl NeuronRecord { + /// Effective cell-type string after folding in the classification + /// override. `class_override` wins over `self.cell_type` when both + /// are present. + pub fn effective_cell_type(&self, class_override: Option<&str>) -> Option { + class_override + .map(str::to_owned) + .or_else(|| self.cell_type.clone()) + } +} + +/// Parsed, normalized neurotransmitter tag. Distinct from the +/// `Sign` enum in the outer schema because several NTs (DA / 5-HT / +/// OA) are neuromodulatory and do not carry a fast-path sign; the +/// loader materializes them as Excitatory in the fast path per the +/// research doc §4 table and records the NT identity on the side. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum NeuroTransmitter { + /// Acetylcholine — fast excitation. + Acetylcholine, + /// Glutamate — excitation in central circuits (v1 default). + Glutamate, + /// GABA — fast inhibition. + Gaba, + /// Histamine — photoreceptor output, inhibitory. + Histamine, + /// Serotonin — neuromodulator, rendered excitatory in the fast path. + Serotonin, + /// Dopamine — neuromodulator, rendered excitatory in the fast path. + Dopamine, + /// Octopamine — neuromodulator, rendered excitatory in the fast path. + Octopamine, +} + +impl NeuroTransmitter { + /// Whether this NT is routed through the slow neuromodulatory + /// pool in the research schema. The fast path still assigns a + /// sign so the LIF engine has something to integrate; this flag + /// surfaces the category so analysis code can exclude slow edges. + pub fn is_modulatory(self) -> bool { + matches!( + self, + NeuroTransmitter::Serotonin | NeuroTransmitter::Dopamine | NeuroTransmitter::Octopamine + ) + } +} diff --git a/examples/connectome-fly/src/connectome/generator.rs b/examples/connectome-fly/src/connectome/generator.rs index 5e1d6e0b..590fe384 100644 --- a/examples/connectome-fly/src/connectome/generator.rs +++ b/examples/connectome-fly/src/connectome/generator.rs @@ -13,7 +13,8 @@ use smallvec::SmallVec; use super::persist::ConnectomeError; use super::schema::{ - ConnectomeConfig, ConnectomeSerCfg, NeuronClass, NeuronId, NeuronMeta, Sign, Synapse, + ConnectomeConfig, ConnectomeSerCfg, FlyWireNeuronId, NeuronClass, NeuronId, NeuronMeta, Sign, + Synapse, }; /// A synthetic fly-like connectome. Stores neuron metadata and a @@ -35,6 +36,12 @@ pub struct Connectome { pub(super) motor: Vec, /// Pre-computed index grouped by class. pub(super) by_class: Vec>, + /// Stable FlyWire root ids, parallel to `meta` / dense ids. + /// `None` for SBM-generated connectomes; `Some` when loaded via the + /// `flywire` module. Serialized at the tail of the bincode blob so + /// existing synthetic blobs remain round-trippable. + #[serde(default)] + pub(super) flywire_ids: Option>, } impl Connectome { @@ -134,9 +141,64 @@ impl Connectome { sensory, motor, by_class, + flywire_ids: None, } } + /// Construct a `Connectome` directly from already-assembled parts. + /// + /// Used by the `flywire` loader to install parsed FlyWire v783 + /// records without going through the synthetic SBM path. Callers + /// are responsible for supplying a CSR-consistent `(row_ptr, + /// synapses)` pair: `row_ptr.len() == meta.len() + 1` and + /// `row_ptr[i] <= row_ptr[i+1] <= synapses.len()`. + /// + /// Sensory / motor / by-class indices are derived from `meta`. + /// `flywire_ids`, if provided, must be parallel to `meta`. + pub(super) fn from_parts( + cfg: ConnectomeSerCfg, + meta: Vec, + synapses: Vec, + row_ptr: Vec, + flywire_ids: Option>, + ) -> Self { + debug_assert_eq!(row_ptr.len(), meta.len() + 1); + debug_assert_eq!(*row_ptr.last().unwrap_or(&0) as usize, synapses.len()); + if let Some(ids) = &flywire_ids { + debug_assert_eq!(ids.len(), meta.len()); + } + let mut by_class: Vec> = vec![Vec::new(); 15]; + let mut sensory: Vec = Vec::new(); + let mut motor: Vec = Vec::new(); + for (i, m) in meta.iter().enumerate() { + by_class[m.class as usize].push(NeuronId(i as u32)); + if m.class.is_sensory() { + sensory.push(NeuronId(i as u32)); + } + if m.class.is_motor() { + motor.push(NeuronId(i as u32)); + } + } + Self { + cfg, + meta, + synapses, + row_ptr, + sensory, + motor, + by_class, + flywire_ids, + } + } + + /// Parallel array of stable FlyWire root ids, if this connectome + /// was loaded from a FlyWire v783 release. `None` for SBM-generated + /// connectomes. + #[inline] + pub fn flywire_ids(&self) -> Option<&[FlyWireNeuronId]> { + self.flywire_ids.as_deref() + } + /// Total number of neurons. #[inline] pub fn num_neurons(&self) -> usize { diff --git a/examples/connectome-fly/src/connectome/mod.rs b/examples/connectome-fly/src/connectome/mod.rs index df289c90..d7cd21b7 100644 --- a/examples/connectome-fly/src/connectome/mod.rs +++ b/examples/connectome-fly/src/connectome/mod.rs @@ -1,22 +1,28 @@ //! Connectome schema, stochastic-block-model generator, and compact -//! binary serialization. Split across three submodules: +//! binary serialization. Split across four submodules: //! -//! - `schema` — public types (`NeuronId`, `Sign`, `NeuronClass`, -//! `Synapse`, `NeuronMeta`, `ConnectomeConfig`). +//! - `schema` — public types (`NeuronId`, `FlyWireNeuronId`, `Sign`, +//! `NeuronClass`, `Synapse`, `NeuronMeta`, +//! `ConnectomeConfig`). //! - `generator` — deterministic SBM generator + helpers. //! - `persist` — bincode-backed binary round-trip. +//! - `flywire` — FlyWire v783 TSV ingest (real wiring path). //! //! See `docs/research/connectome-ruvector/02-connectome-layer.md` for //! the schema design and the log-normal / hub-module statistics this -//! generator targets. +//! generator targets, and ADR-154 §13 for the FlyWire ingest hand-off. +pub mod flywire; pub mod generator; pub mod persist; pub mod schema; +pub use flywire::{load_flywire, FlywireError}; pub use generator::Connectome; pub use persist::ConnectomeError; -pub use schema::{ConnectomeConfig, NeuronClass, NeuronId, NeuronMeta, Sign, Synapse}; +pub use schema::{ + ConnectomeConfig, FlyWireNeuronId, NeuronClass, NeuronId, NeuronMeta, Sign, Synapse, +}; #[cfg(test)] mod tests { diff --git a/examples/connectome-fly/src/connectome/schema.rs b/examples/connectome-fly/src/connectome/schema.rs index 89b60083..b7d03a36 100644 --- a/examples/connectome-fly/src/connectome/schema.rs +++ b/examples/connectome-fly/src/connectome/schema.rs @@ -20,6 +20,21 @@ impl NeuronId { } } +/// Stable FlyWire v783 root id (64-bit). Carried alongside the dense +/// `NeuronId` when a `Connectome` is loaded from FlyWire so analyses can +/// round-trip back to the published identifier space. Opaque newtype; +/// see `docs/research/connectome-ruvector/02-connectome-layer.md` §3.1. +#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)] +pub struct FlyWireNeuronId(pub u64); + +impl FlyWireNeuronId { + /// Raw id. + #[inline] + pub const fn raw(self) -> u64 { + self.0 + } +} + /// Synapse sign. `+1` excitatory, `-1` inhibitory. Neuromodulatory /// edges are *not* represented in the fast path /// (`docs/research/connectome-ruvector/03-neural-dynamics.md` §2.2). diff --git a/examples/connectome-fly/src/lib.rs b/examples/connectome-fly/src/lib.rs index 34f3c11c..21d3628b 100644 --- a/examples/connectome-fly/src/lib.rs +++ b/examples/connectome-fly/src/lib.rs @@ -77,7 +77,8 @@ pub use analysis::{ Analysis, AnalysisConfig, FunctionalPartition, MotifHit, MotifIndex, MotifSignature, }; pub use connectome::{ - Connectome, ConnectomeConfig, ConnectomeError, NeuronClass, NeuronId, NeuronMeta, Sign, Synapse, + load_flywire, Connectome, ConnectomeConfig, ConnectomeError, FlyWireNeuronId, FlywireError, + NeuronClass, NeuronId, NeuronMeta, Sign, Synapse, }; pub use lif::{Engine, EngineConfig, LifError, NeuronParams, Spike, SpikeEvent}; pub use observer::{CoherenceEvent, Observer, Report}; diff --git a/examples/connectome-fly/tests/flywire_ingest.rs b/examples/connectome-fly/tests/flywire_ingest.rs new file mode 100644 index 00000000..f7cb2098 --- /dev/null +++ b/examples/connectome-fly/tests/flywire_ingest.rs @@ -0,0 +1,359 @@ +//! FlyWire v783 ingest — acceptance tests. +//! +//! These tests exercise every named failure mode of the loader plus a +//! round-trip on the 100-neuron fixture. The fixture lives as Rust +//! string constants (see `src/connectome/flywire/fixture.rs`) so CI +//! does not need the ~2 GB FlyWire release on disk. + +use std::fs; +use std::path::PathBuf; + +use connectome_fly::connectome::flywire::{ + classify_cell_type, classify_cell_type_strict, fixture, load_flywire, nt_to_sign, parse_nt, +}; +use connectome_fly::{FlyWireNeuronId, FlywireError, NeuronClass, Sign}; +use tempfile::TempDir; + +fn setup_fixture() -> (TempDir, fixture::FixturePaths) { + let dir = TempDir::new().expect("temp dir"); + let paths = fixture::write_fixture(dir.path()).expect("write fixture"); + (dir, paths) +} + +#[test] +fn schema_round_trip_neuron_and_synapse_counts_match_fixture() { + let (dir, _paths) = setup_fixture(); + let c = load_flywire(dir.path()).expect("load fixture"); + assert_eq!( + c.num_neurons(), + fixture::EXPECTED_NEURONS, + "neuron count mismatch vs fixture declaration", + ); + // Connection count in the fixture is 159 directed edges; some may + // be dropped as self-loops or by NT filtering. We expect no + // drops in the fixture (no self-loops authored), so equality holds. + assert_eq!( + c.num_synapses(), + fixture::EXPECTED_SYNAPSES, + "synapse count mismatch vs fixture declaration", + ); +} + +#[test] +fn flywire_ids_are_parallel_to_dense_ids() { + let (dir, _paths) = setup_fixture(); + let c = load_flywire(dir.path()).expect("load fixture"); + let ids = c.flywire_ids().expect("flywire_ids set after load"); + assert_eq!(ids.len(), c.num_neurons()); + assert_eq!(ids[0], FlyWireNeuronId(10_000_001)); + assert_eq!(ids[99], FlyWireNeuronId(10_000_100)); + // Monotonic in the fixture (authored sequentially). + for win in ids.windows(2) { + assert!(win[0].raw() < win[1].raw()); + } +} + +#[test] +fn determinism_two_loads_bit_identical_bincode() { + let (dir, _paths) = setup_fixture(); + let a = load_flywire(dir.path()).expect("load 1"); + let b = load_flywire(dir.path()).expect("load 2"); + assert_eq!(a.num_neurons(), b.num_neurons()); + assert_eq!(a.num_synapses(), b.num_synapses()); + let ab = a.to_bytes().expect("ser a"); + let bb = b.to_bytes().expect("ser b"); + assert_eq!(ab, bb, "FlyWire ingest is not deterministic"); +} + +#[test] +fn nt_to_sign_covers_release_documented_labels() { + // Excitatory. + for raw in ["ACH", "GLUT", "ACETYLCHOLINE", "Glutamate"] { + let nt = parse_nt(raw, 0).expect(raw); + assert_eq!(nt_to_sign(nt), Sign::Excitatory); + } + // Inhibitory. + for raw in ["GABA", "HIST", "histamine"] { + let nt = parse_nt(raw, 0).expect(raw); + assert_eq!(nt_to_sign(nt), Sign::Inhibitory); + } + // Neuromodulatory — mapped to excitatory in the fast path per + // research doc §4 (slow pool lives outside the fast path). + for raw in ["DOP", "SER", "OCT", "5-HT", "DA", "OA"] { + let nt = parse_nt(raw, 0).expect(raw); + assert_eq!(nt_to_sign(nt), Sign::Excitatory); + } +} + +#[test] +fn unknown_nt_type_is_a_named_error_not_silent_default() { + let err = parse_nt("PANIC", 42).expect_err("must reject unknown NT"); + match err { + FlywireError::UnknownNtType { raw, neuron_id } => { + assert_eq!(raw, "PANIC"); + assert_eq!(neuron_id, 42); + } + other => panic!("wrong variant: {other:?}"), + } +} + +#[test] +fn cell_type_coverage_hits_key_classes() { + let (dir, _paths) = setup_fixture(); + let c = load_flywire(dir.path()).expect("load fixture"); + // Every coarse class that exists in the fixture must be populated. + // The fixture is authored to cover these explicitly. + for cls in [ + NeuronClass::PhotoReceptor, + NeuronClass::Chemosensory, + NeuronClass::Mechanosensory, + NeuronClass::OpticLocal, + NeuronClass::KenyonCell, + NeuronClass::MbOutput, + NeuronClass::CentralComplex, + NeuronClass::LateralAccessory, + NeuronClass::Descending, + NeuronClass::Ascending, + NeuronClass::Motor, + NeuronClass::LocalInter, + NeuronClass::Projection, + NeuronClass::Modulatory, + ] { + assert!( + !c.by_class()[cls as usize].is_empty(), + "class {cls:?} unexpectedly empty after fixture load", + ); + } + // Sensory + motor indices must also be populated (ADR §3.4 AC + // stimulus / readout needs them). + assert!(!c.sensory_neurons().is_empty()); + assert!(!c.motor_neurons().is_empty()); +} + +#[test] +fn classify_cell_type_known_prefixes() { + assert_eq!( + classify_cell_type(Some("KC_g"), None).unwrap(), + NeuronClass::KenyonCell, + ); + assert_eq!( + classify_cell_type(Some("MBON05"), None).unwrap(), + NeuronClass::MbOutput, + ); + assert_eq!( + classify_cell_type(Some("DNp01"), None).unwrap(), + NeuronClass::Descending, + ); + assert_eq!( + classify_cell_type(Some("Motor_leg_1"), None).unwrap(), + NeuronClass::Motor, + ); + assert_eq!( + classify_cell_type(Some("LN_GABA_A"), None).unwrap(), + NeuronClass::LocalInter, + ); + // Flow fallback when cell type is missing. + assert_eq!( + classify_cell_type(None, Some("efferent")).unwrap(), + NeuronClass::Motor, + ); + // Both missing falls through to Other. + assert_eq!(classify_cell_type(None, None).unwrap(), NeuronClass::Other); +} + +#[test] +fn malformed_tsv_surfaces_row_level_error() { + let dir = TempDir::new().expect("temp"); + // Valid neurons + classification files. + fs::write(dir.path().join("neurons.tsv"), fixture::neurons_tsv()).unwrap(); + fs::write( + dir.path().join("classification.tsv"), + fixture::classification_tsv(), + ) + .unwrap(); + // Broken connections file: header is valid, but the second data + // row has a non-integer pre_id. + let broken = "pre_id\tpost_id\tneuropil\tsyn_count\tsyn_weight\tnt_type\n\ + 10000005\t10000013\tMB_CA_L\t12\t12.0\tACH\n\ + BROKEN\t10000013\tMB_CA_L\t12\t12.0\tACH\n"; + fs::write(dir.path().join("connections.tsv"), broken).unwrap(); + + let err = load_flywire(dir.path()).expect_err("must fail on BROKEN row"); + match err { + FlywireError::MalformedRow { file, line, .. } => { + assert_eq!(file, "connections.tsv"); + assert_eq!(line, 3, "expected line 3 (header=1, first data=2)"); + } + other => panic!("wrong variant: {other:?}"), + } +} + +#[test] +fn unknown_cell_type_folds_to_other_in_default_mode() { + // Default classify_cell_type: unmapped -> Other. FlyWire has ~8k + // cell types and the coarse bucket is the v1 contract. + let class = classify_cell_type(Some("ZZZ_novel_type"), None).unwrap(); + assert_eq!(class, NeuronClass::Other); +} + +#[test] +fn unknown_cell_type_is_a_named_error_in_strict_mode() { + // Strict path surfaces `FlywireError::UnknownCellType` so callers + // that want to audit prefix coverage can opt in. + let err = classify_cell_type_strict(Some("ZZZ_novel_type"), None, 99) + .expect_err("strict must reject unknown cell type"); + match err { + FlywireError::UnknownCellType { raw, neuron_id } => { + assert_eq!(raw, "ZZZ_novel_type"); + assert_eq!(neuron_id, 99); + } + other => panic!("wrong variant: {other:?}"), + } + // Known types still pass under strict mode. + assert_eq!( + classify_cell_type_strict(Some("KC_g"), None, 1).unwrap(), + NeuronClass::KenyonCell, + ); +} + +#[test] +fn unknown_nt_type_in_neurons_file_fails_load() { + let dir = TempDir::new().expect("temp"); + // Replace the very first NT label with a bogus one. + let bad_neurons = fixture::neurons_tsv().replacen( + "10000001\t9000001\tPR_R1\tHIST\t", + "10000001\t9000001\tPR_R1\tBOGUS\t", + 1, + ); + fs::write(dir.path().join("neurons.tsv"), bad_neurons).unwrap(); + fs::write( + dir.path().join("classification.tsv"), + fixture::classification_tsv(), + ) + .unwrap(); + fs::write( + dir.path().join("connections.tsv"), + fixture::connections_tsv(), + ) + .unwrap(); + + let err = load_flywire(dir.path()).expect_err("must fail on BOGUS nt_type"); + match err { + FlywireError::UnknownNtType { raw, neuron_id } => { + assert_eq!(raw, "BOGUS"); + assert_eq!(neuron_id, 10_000_001); + } + other => panic!("wrong variant: {other:?}"), + } +} + +#[test] +fn dangling_synapse_reference_is_a_named_error() { + let dir = TempDir::new().expect("temp"); + fs::write(dir.path().join("neurons.tsv"), fixture::neurons_tsv()).unwrap(); + fs::write( + dir.path().join("classification.tsv"), + fixture::classification_tsv(), + ) + .unwrap(); + // Append a synapse pointing at a nonexistent post_id. + let mut connections = fixture::connections_tsv(); + connections.push_str("10000005\t99999999\tSMP_L\t3\t3.0\tACH\n"); + fs::write(dir.path().join("connections.tsv"), connections).unwrap(); + + let err = load_flywire(dir.path()).expect_err("must fail on dangling post_id"); + match err { + FlywireError::UnknownPostNeuron(id) => assert_eq!(id, 99_999_999), + other => panic!("wrong variant: {other:?}"), + } +} + +#[test] +fn duplicate_neuron_id_is_a_named_error() { + let dir = TempDir::new().expect("temp"); + // Duplicate the first neuron row at the tail. + let mut neurons = fixture::neurons_tsv(); + neurons.push_str("10000001\t9000001\tPR_R1\tHIST\tleft\tOCN\tafferent\tsensory\n"); + fs::write(dir.path().join("neurons.tsv"), neurons).unwrap(); + fs::write( + dir.path().join("classification.tsv"), + fixture::classification_tsv(), + ) + .unwrap(); + fs::write( + dir.path().join("connections.tsv"), + fixture::connections_tsv(), + ) + .unwrap(); + + let err = load_flywire(dir.path()).expect_err("must fail on duplicate neuron_id"); + match err { + FlywireError::DuplicateNeuron(id) => assert_eq!(id, 10_000_001), + other => panic!("wrong variant: {other:?}"), + } +} + +#[test] +fn classification_file_is_optional() { + // No classification.tsv — cell-type is taken from neurons.tsv + // directly. The loader must still succeed. + let dir = TempDir::new().expect("temp"); + fs::write(dir.path().join("neurons.tsv"), fixture::neurons_tsv()).unwrap(); + fs::write( + dir.path().join("connections.tsv"), + fixture::connections_tsv(), + ) + .unwrap(); + // Intentionally do NOT write classification.tsv. + let c = load_flywire(dir.path()).expect("load without classification"); + assert_eq!(c.num_neurons(), fixture::EXPECTED_NEURONS); +} + +#[test] +fn missing_neurons_file_surfaces_io_error() { + let dir = TempDir::new().expect("temp"); + // No neurons.tsv at all. + let err = load_flywire(dir.path()).expect_err("must fail without neurons.tsv"); + match err { + FlywireError::Io { file, .. } => { + assert_eq!(file, "neurons.tsv"); + } + other => panic!("wrong variant: {other:?}"), + } +} + +#[test] +fn synapse_signs_follow_nt_mapping_in_fixture() { + let (dir, _paths) = setup_fixture(); + let c = load_flywire(dir.path()).expect("load fixture"); + // Fixture includes several GABA and HIST edges — expect inhibitory + // synapses to be a non-zero fraction but bounded above by the + // balance of excitatory ACH / GLUT edges. + let mut inh = 0_usize; + let mut exc = 0_usize; + for s in c.synapses() { + match s.sign { + Sign::Inhibitory => inh += 1, + Sign::Excitatory => exc += 1, + } + } + assert!(inh > 0, "fixture has no inhibitory edges: unexpected"); + assert!(exc > 0, "fixture has no excitatory edges: unexpected"); + let frac = inh as f32 / c.num_synapses() as f32; + assert!( + (0.05..0.5).contains(&frac), + "inhibitory fraction {frac:.3} out of expected band [0.05, 0.5]", + ); +} + +#[test] +fn dir_label_on_io_error_uses_filename_only() { + // Defensive: the Io variant reports a short filename, not a full + // path. This keeps the error deterministic across tempdir roots. + let bogus = PathBuf::from("/nonexistent/__connectome_fly_test__"); + let err = load_flywire(&bogus).expect_err("must fail on missing dir"); + match err { + FlywireError::Io { file, .. } => assert_eq!(file, "neurons.tsv"), + other => panic!("wrong variant: {other:?}"), + } +}