mirror of
https://github.com/ruvnet/RuVector.git
synced 2026-05-28 01:44:41 +00:00
feat(connectome-fly): FlyWire v783 ingest module + fixture tests
Implements src/connectome/flywire/{mod,schema,loader,fixture}.rs and
tests/flywire_ingest.rs — the ingest path named as the first follow-up
in ADR-154 §13. Parses the published FlyWire v783 TSV format (neurons,
synapses, cell types) into our Connectome struct without touching any
existing analysis, LIF, or observer code.
Fixture: 100-neuron hand-authored FlyWire-format TSV exercises the
full parse path without requiring a ~2 GB data download.
NT → sign mapping: ACH/GLUT/GABA/SER/OCT/DOP/HIST follow the Lin et al.
2024 Nature supplementary table mapping; unknown NT produces a
named error variant rather than a silent default.
File sizes: max file = 437 lines (fixture.rs); src = 1048 lines,
tests = 359 lines, + ~93 edit lines on existing files (≤ 1500 LOC
budget).
Tests: 17 new flywire_ingest tests pass; 10 lib + 28 pre-existing
integration tests still green.
Co-Authored-By: claude-flow <ruv@ruv.net>
This commit is contained in:
parent
bd26c4ee41
commit
cf21327c96
11 changed files with 1505 additions and 7 deletions
2
Cargo.lock
generated
2
Cargo.lock
generated
|
|
@ -1587,6 +1587,7 @@ dependencies = [
|
|||
"bincode 1.3.3",
|
||||
"bytemuck",
|
||||
"criterion 0.5.1",
|
||||
"csv",
|
||||
"cudarc",
|
||||
"rand 0.8.5",
|
||||
"rand_distr 0.4.3",
|
||||
|
|
@ -1597,6 +1598,7 @@ dependencies = [
|
|||
"serde",
|
||||
"serde_json",
|
||||
"smallvec 1.15.1",
|
||||
"tempfile",
|
||||
"thiserror 1.0.69",
|
||||
"wide",
|
||||
]
|
||||
|
|
|
|||
|
|
@ -46,12 +46,17 @@ bincode = "1.3"
|
|||
bytemuck = { version = "1.16", features = ["derive"] }
|
||||
thiserror = "1.0"
|
||||
|
||||
# FlyWire v783 TSV ingest (connectome::flywire). Column-named streaming
|
||||
# parser; sibling ruvector-graph and ruvector-cli already pin 1.3.
|
||||
csv = "1.3"
|
||||
|
||||
# Optional — gated by feature flags.
|
||||
wide = { version = "0.7", optional = true }
|
||||
cudarc = { version = "0.13", optional = true, default-features = false, features = ["cuda-12050", "driver", "std"] }
|
||||
|
||||
[dev-dependencies]
|
||||
criterion = { version = "0.5", features = ["html_reports"] }
|
||||
tempfile = "3"
|
||||
|
||||
[[bench]]
|
||||
name = "lif_throughput"
|
||||
|
|
|
|||
437
examples/connectome-fly/src/connectome/flywire/fixture.rs
Normal file
437
examples/connectome-fly/src/connectome/flywire/fixture.rs
Normal file
|
|
@ -0,0 +1,437 @@
|
|||
//! Hand-authored 100-neuron fixture in FlyWire v783 TSV format.
|
||||
//!
|
||||
//! The fixture lives as three `&'static str` constants so the ingest
|
||||
//! tests can materialize temp TSV files without any network download
|
||||
//! or large on-disk asset. The composition targets:
|
||||
//!
|
||||
//! - **Cell-type coverage**: KC, MBON, PN, DN, Motor, PR, LN, optic
|
||||
//! intrinsic — the classes the outer `NeuronClass` enum can map to.
|
||||
//! - **NT coverage**: ACH, GLUT, GABA, HIST, SER, DOP, OCT — every
|
||||
//! entry in the research-doc §4 NT table at least once.
|
||||
//! - **Side / flow coverage**: left + right, afferent + efferent +
|
||||
//! intrinsic.
|
||||
//! - **Synapse shape**: 159 directed edges, file-declared ordering, no
|
||||
//! dangling references and no authored self-loops.
|
||||
//!
|
||||
//! `EXPECTED_*` constants capture the counts so tests can assert
|
||||
//! structural invariants without re-counting rows by hand.
|
||||
|
||||
/// Number of neuron rows emitted by [`neurons_tsv`].
|
||||
pub const EXPECTED_NEURONS: usize = 100;
|
||||
|
||||
/// Number of synapse rows emitted by [`connections_tsv`].
|
||||
pub const EXPECTED_SYNAPSES: usize = 159;
|
||||
|
||||
/// Number of classification rows emitted by [`classification_tsv`]. A
|
||||
/// strict subset of neurons — the loader must still function when a
|
||||
/// neuron has no classification override.
|
||||
pub const EXPECTED_CLASSIFICATIONS: usize = 40;
|
||||
|
||||
// ---------------------------------------------------------------------
|
||||
// Fixture payloads.
|
||||
//
|
||||
// Split into const `&str` slices and `concat!`-assembled so each const
|
||||
// stays under ~100 lines of source. Data is hand-authored; the 8-digit
|
||||
// neuron ids are arbitrary but unique.
|
||||
// ---------------------------------------------------------------------
|
||||
|
||||
const NEURONS_HEADER: &str =
|
||||
"neuron_id\tsupervoxel_id\tcell_type\tnt_type\tside\tnerve\tflow\tsuper_class\n";
|
||||
|
||||
const NEURONS_A: &str = "\
|
||||
10000001\t9000001\tPR_R1\tHIST\tleft\tOCN\tafferent\tsensory\n\
|
||||
10000002\t9000002\tPR_R1\tHIST\tright\tOCN\tafferent\tsensory\n\
|
||||
10000003\t9000003\tPR_R7\tHIST\tleft\tOCN\tafferent\tsensory\n\
|
||||
10000004\t9000004\tPR_R8\tHIST\tright\tOCN\tafferent\tsensory\n\
|
||||
10000005\t9000005\tPN_glom_DA1\tACH\tleft\tAN\tafferent\tsensory\n\
|
||||
10000006\t9000006\tPN_glom_DL3\tACH\tright\tAN\tafferent\tsensory\n\
|
||||
10000007\t9000007\tPN_glom_VM7\tACH\tleft\tAN\tafferent\tsensory\n\
|
||||
10000008\t9000008\tORN_chm_A\tACH\tleft\tAN\tafferent\tsensory\n\
|
||||
10000009\t9000009\tORN_chm_B\tACH\tright\tAN\tafferent\tsensory\n\
|
||||
10000010\t9000010\tJO_mech_a\tACH\tleft\tJN\tafferent\tsensory\n\
|
||||
10000011\t9000011\tJO_mech_b\tACH\tright\tJN\tafferent\tsensory\n\
|
||||
10000012\t9000012\tML_mech_c\tACH\tleft\tLN\tafferent\tsensory\n\
|
||||
10000013\t9000013\tKC_g\tACH\tleft\t\tintrinsic\tcentral\n\
|
||||
10000014\t9000014\tKC_g\tACH\tright\t\tintrinsic\tcentral\n\
|
||||
10000015\t9000015\tKC_ab\tACH\tleft\t\tintrinsic\tcentral\n\
|
||||
10000016\t9000016\tKC_ab\tACH\tright\t\tintrinsic\tcentral\n\
|
||||
10000017\t9000017\tKC_apbp\tACH\tleft\t\tintrinsic\tcentral\n\
|
||||
10000018\t9000018\tKC_apbp\tACH\tright\t\tintrinsic\tcentral\n\
|
||||
10000019\t9000019\tKC_g\tACH\tleft\t\tintrinsic\tcentral\n\
|
||||
10000020\t9000020\tKC_ab\tACH\tright\t\tintrinsic\tcentral\n\
|
||||
";
|
||||
|
||||
const NEURONS_B: &str = "\
|
||||
10000021\t9000021\tKC_apbp\tACH\tleft\t\tintrinsic\tcentral\n\
|
||||
10000022\t9000022\tKC_g\tACH\tright\t\tintrinsic\tcentral\n\
|
||||
10000023\t9000023\tKC_ab\tACH\tleft\t\tintrinsic\tcentral\n\
|
||||
10000024\t9000024\tKC_apbp\tACH\tright\t\tintrinsic\tcentral\n\
|
||||
10000025\t9000025\tKC_g\tACH\tleft\t\tintrinsic\tcentral\n\
|
||||
10000026\t9000026\tMBON01\tGLUT\tleft\t\tintrinsic\tcentral\n\
|
||||
10000027\t9000027\tMBON02\tGLUT\tright\t\tintrinsic\tcentral\n\
|
||||
10000028\t9000028\tMBON03\tGABA\tleft\t\tintrinsic\tcentral\n\
|
||||
10000029\t9000029\tMBON04\tGABA\tright\t\tintrinsic\tcentral\n\
|
||||
10000030\t9000030\tMBON05\tACH\tleft\t\tintrinsic\tcentral\n\
|
||||
10000031\t9000031\tMBON06\tACH\tright\t\tintrinsic\tcentral\n\
|
||||
10000032\t9000032\tDAN_PPL1\tDOP\tleft\t\tintrinsic\tcentral\n\
|
||||
10000033\t9000033\tDAN_PPL1\tDOP\tright\t\tintrinsic\tcentral\n\
|
||||
10000034\t9000034\tDAN_PAM\tDOP\tleft\t\tintrinsic\tcentral\n\
|
||||
10000035\t9000035\tDAN_PAM\tDOP\tright\t\tintrinsic\tcentral\n\
|
||||
10000036\t9000036\tOAN_VPM3\tOCT\tleft\t\tintrinsic\tcentral\n\
|
||||
10000037\t9000037\tOAN_VPM3\tOCT\tright\t\tintrinsic\tcentral\n\
|
||||
10000038\t9000038\tSER_DRN\tSER\tcenter\t\tintrinsic\tcentral\n\
|
||||
10000039\t9000039\tSER_DRN\tSER\tcenter\t\tintrinsic\tcentral\n\
|
||||
10000040\t9000040\tEPG_ring\tACH\tleft\t\tintrinsic\tcentral\n\
|
||||
";
|
||||
|
||||
const NEURONS_C: &str = "\
|
||||
10000041\t9000041\tEPG_ring\tACH\tright\t\tintrinsic\tcentral\n\
|
||||
10000042\t9000042\tEPG_ring\tACH\tleft\t\tintrinsic\tcentral\n\
|
||||
10000043\t9000043\tPEN_fan\tACH\tright\t\tintrinsic\tcentral\n\
|
||||
10000044\t9000044\tPEN_fan\tACH\tleft\t\tintrinsic\tcentral\n\
|
||||
10000045\t9000045\tFB_col\tACH\tright\t\tintrinsic\tcentral\n\
|
||||
10000046\t9000046\tFB_col\tACH\tleft\t\tintrinsic\tcentral\n\
|
||||
10000047\t9000047\tLAL_loc\tACH\tright\t\tintrinsic\tcentral\n\
|
||||
10000048\t9000048\tLAL_loc\tGABA\tleft\t\tintrinsic\tcentral\n\
|
||||
10000049\t9000049\tDNp01\tACH\tleft\tCN\tefferent\tdescending\n\
|
||||
10000050\t9000050\tDNp02\tACH\tright\tCN\tefferent\tdescending\n\
|
||||
10000051\t9000051\tDNp03\tACH\tleft\tCN\tefferent\tdescending\n\
|
||||
10000052\t9000052\tDNg01\tACH\tright\tCN\tefferent\tdescending\n\
|
||||
10000053\t9000053\tDNg02\tACH\tleft\tCN\tefferent\tdescending\n\
|
||||
10000054\t9000054\tMotor_leg_1\tACH\tleft\tLN\tefferent\tmotor\n\
|
||||
10000055\t9000055\tMotor_leg_2\tACH\tright\tLN\tefferent\tmotor\n\
|
||||
10000056\t9000056\tMotor_leg_3\tACH\tleft\tLN\tefferent\tmotor\n\
|
||||
10000057\t9000057\tMotor_wing_1\tACH\tright\tWN\tefferent\tmotor\n\
|
||||
10000058\t9000058\tMotor_wing_2\tACH\tleft\tWN\tefferent\tmotor\n\
|
||||
10000059\t9000059\tMotor_wing_3\tACH\tright\tWN\tefferent\tmotor\n\
|
||||
10000060\t9000060\tMotor_hlt\tACH\tleft\tHN\tefferent\tmotor\n\
|
||||
";
|
||||
|
||||
const NEURONS_D: &str = "\
|
||||
10000061\t9000061\tLN_GABA_A\tGABA\tleft\t\tintrinsic\tcentral\n\
|
||||
10000062\t9000062\tLN_GABA_B\tGABA\tright\t\tintrinsic\tcentral\n\
|
||||
10000063\t9000063\tLN_GABA_C\tGABA\tleft\t\tintrinsic\tcentral\n\
|
||||
10000064\t9000064\tLN_GABA_D\tGABA\tright\t\tintrinsic\tcentral\n\
|
||||
10000065\t9000065\tLN_GABA_E\tGABA\tleft\t\tintrinsic\tcentral\n\
|
||||
10000066\t9000066\tLN_GABA_F\tGABA\tright\t\tintrinsic\tcentral\n\
|
||||
10000067\t9000067\tLN_mix_G\tGLUT\tleft\t\tintrinsic\tcentral\n\
|
||||
10000068\t9000068\tLN_mix_H\tGLUT\tright\t\tintrinsic\tcentral\n\
|
||||
10000069\t9000069\tLN_mix_I\tGLUT\tleft\t\tintrinsic\tcentral\n\
|
||||
10000070\t9000070\tLN_mix_J\tGLUT\tright\t\tintrinsic\tcentral\n\
|
||||
10000071\t9000071\tLoc_opt_A\tACH\tleft\t\tintrinsic\toptic\n\
|
||||
10000072\t9000072\tLoc_opt_B\tACH\tright\t\tintrinsic\toptic\n\
|
||||
10000073\t9000073\tLoc_opt_C\tACH\tleft\t\tintrinsic\toptic\n\
|
||||
10000074\t9000074\tLoc_opt_D\tGABA\tright\t\tintrinsic\toptic\n\
|
||||
10000075\t9000075\tLoc_opt_E\tGABA\tleft\t\tintrinsic\toptic\n\
|
||||
10000076\t9000076\tLoc_opt_F\tACH\tright\t\tintrinsic\toptic\n\
|
||||
10000077\t9000077\tLoc_opt_G\tGLUT\tleft\t\tintrinsic\toptic\n\
|
||||
10000078\t9000078\tLoc_opt_H\tGLUT\tright\t\tintrinsic\toptic\n\
|
||||
10000079\t9000079\tLoc_opt_I\tACH\tleft\t\tintrinsic\toptic\n\
|
||||
10000080\t9000080\tLoc_opt_J\tGABA\tright\t\tintrinsic\toptic\n\
|
||||
";
|
||||
|
||||
const NEURONS_E: &str = "\
|
||||
10000081\t9000081\tPN_glom_DM1\tACH\tleft\tAN\tafferent\tsensory\n\
|
||||
10000082\t9000082\tPN_glom_DM2\tACH\tright\tAN\tafferent\tsensory\n\
|
||||
10000083\t9000083\tPN_glom_DM3\tACH\tleft\tAN\tafferent\tsensory\n\
|
||||
10000084\t9000084\tAscending_A\tACH\tright\t\tintrinsic\tascending\n\
|
||||
10000085\t9000085\tAscending_B\tACH\tleft\t\tintrinsic\tascending\n\
|
||||
10000086\t9000086\tAscending_C\tACH\tright\t\tintrinsic\tascending\n\
|
||||
10000087\t9000087\tAscending_D\tACH\tleft\t\tintrinsic\tascending\n\
|
||||
10000088\t9000088\tProj_lcb_A\tACH\tleft\t\tintrinsic\tcentral\n\
|
||||
10000089\t9000089\tProj_lcb_B\tACH\tright\t\tintrinsic\tcentral\n\
|
||||
10000090\t9000090\tProj_lcb_C\tACH\tleft\t\tintrinsic\tcentral\n\
|
||||
10000091\t9000091\tProj_lcb_D\tACH\tright\t\tintrinsic\tcentral\n\
|
||||
10000092\t9000092\tProj_lcb_E\tACH\tleft\t\tintrinsic\tcentral\n\
|
||||
10000093\t9000093\tMisc_X_A\tACH\tleft\t\tintrinsic\tother\n\
|
||||
10000094\t9000094\tMisc_X_B\tACH\tright\t\tintrinsic\tother\n\
|
||||
10000095\t9000095\tMisc_X_C\tACH\tleft\t\tintrinsic\tother\n\
|
||||
10000096\t9000096\tMisc_X_D\tACH\tright\t\tintrinsic\tother\n\
|
||||
10000097\t9000097\tMisc_X_E\tACH\tleft\t\tintrinsic\tother\n\
|
||||
10000098\t9000098\tMisc_X_F\tACH\tright\t\tintrinsic\tother\n\
|
||||
10000099\t9000099\tMisc_X_G\tACH\tleft\t\tintrinsic\tother\n\
|
||||
10000100\t9000100\tMisc_X_H\tACH\tright\t\tintrinsic\tother\n\
|
||||
";
|
||||
|
||||
/// Return the full neurons TSV payload (header + 100 data rows).
|
||||
pub fn neurons_tsv() -> String {
|
||||
let mut s = String::with_capacity(12 * 1024);
|
||||
s.push_str(NEURONS_HEADER);
|
||||
s.push_str(NEURONS_A);
|
||||
s.push_str(NEURONS_B);
|
||||
s.push_str(NEURONS_C);
|
||||
s.push_str(NEURONS_D);
|
||||
s.push_str(NEURONS_E);
|
||||
s
|
||||
}
|
||||
|
||||
const CONNECTIONS_HEADER: &str = "pre_id\tpost_id\tneuropil\tsyn_count\tsyn_weight\tnt_type\n";
|
||||
|
||||
const CONNECTIONS_A: &str = "\
|
||||
10000001\t10000071\tME_L\t12\t12.0\tHIST\n\
|
||||
10000001\t10000072\tME_L\t8\t8.0\tHIST\n\
|
||||
10000002\t10000071\tME_R\t10\t10.0\tHIST\n\
|
||||
10000002\t10000073\tME_R\t7\t7.0\tHIST\n\
|
||||
10000003\t10000074\tME_L\t9\t9.0\tHIST\n\
|
||||
10000003\t10000075\tME_L\t11\t11.0\tHIST\n\
|
||||
10000004\t10000076\tME_R\t5\t5.0\tHIST\n\
|
||||
10000004\t10000077\tME_R\t6\t6.0\tHIST\n\
|
||||
10000005\t10000013\tMB_CA_L\t14\t14.0\tACH\n\
|
||||
10000005\t10000015\tMB_CA_L\t9\t9.0\tACH\n\
|
||||
10000005\t10000017\tMB_CA_L\t7\t7.0\tACH\n\
|
||||
10000006\t10000014\tMB_CA_R\t13\t13.0\tACH\n\
|
||||
10000006\t10000016\tMB_CA_R\t11\t11.0\tACH\n\
|
||||
10000006\t10000018\tMB_CA_R\t8\t8.0\tACH\n\
|
||||
10000007\t10000013\tMB_CA_L\t6\t6.0\tACH\n\
|
||||
10000007\t10000019\tMB_CA_L\t5\t5.0\tACH\n\
|
||||
10000008\t10000013\tMB_CA_L\t10\t10.0\tACH\n\
|
||||
10000008\t10000020\tMB_CA_R\t4\t4.0\tACH\n\
|
||||
10000009\t10000014\tMB_CA_R\t12\t12.0\tACH\n\
|
||||
10000009\t10000021\tMB_CA_L\t3\t3.0\tACH\n\
|
||||
10000010\t10000022\tMB_CA_R\t8\t8.0\tACH\n\
|
||||
10000010\t10000025\tMB_CA_L\t4\t4.0\tACH\n\
|
||||
10000011\t10000023\tMB_CA_L\t7\t7.0\tACH\n\
|
||||
10000011\t10000024\tMB_CA_R\t6\t6.0\tACH\n\
|
||||
10000012\t10000025\tMB_CA_L\t5\t5.0\tACH\n\
|
||||
10000081\t10000013\tMB_CA_L\t9\t9.0\tACH\n\
|
||||
10000081\t10000015\tMB_CA_L\t6\t6.0\tACH\n\
|
||||
10000082\t10000014\tMB_CA_R\t11\t11.0\tACH\n\
|
||||
10000082\t10000016\tMB_CA_R\t8\t8.0\tACH\n\
|
||||
10000083\t10000017\tMB_CA_L\t5\t5.0\tACH\n\
|
||||
10000083\t10000019\tMB_CA_L\t7\t7.0\tACH\n\
|
||||
";
|
||||
|
||||
const CONNECTIONS_B: &str = "\
|
||||
10000013\t10000026\tMB_LH_L\t4\t4.0\tACH\n\
|
||||
10000013\t10000030\tMB_LH_L\t3\t3.0\tACH\n\
|
||||
10000014\t10000027\tMB_LH_R\t5\t5.0\tACH\n\
|
||||
10000014\t10000031\tMB_LH_R\t4\t4.0\tACH\n\
|
||||
10000015\t10000026\tMB_LH_L\t6\t6.0\tACH\n\
|
||||
10000015\t10000028\tMB_LH_L\t3\t3.0\tACH\n\
|
||||
10000016\t10000027\tMB_LH_R\t5\t5.0\tACH\n\
|
||||
10000016\t10000029\tMB_LH_R\t4\t4.0\tACH\n\
|
||||
10000017\t10000030\tMB_LH_L\t3\t3.0\tACH\n\
|
||||
10000018\t10000031\tMB_LH_R\t5\t5.0\tACH\n\
|
||||
10000019\t10000028\tMB_LH_L\t6\t6.0\tACH\n\
|
||||
10000020\t10000029\tMB_LH_R\t4\t4.0\tACH\n\
|
||||
10000021\t10000030\tMB_LH_L\t5\t5.0\tACH\n\
|
||||
10000022\t10000031\tMB_LH_R\t7\t7.0\tACH\n\
|
||||
10000023\t10000026\tMB_LH_L\t3\t3.0\tACH\n\
|
||||
10000024\t10000027\tMB_LH_R\t4\t4.0\tACH\n\
|
||||
10000025\t10000030\tMB_LH_L\t6\t6.0\tACH\n\
|
||||
10000032\t10000013\tMB_PPL1_L\t3\t3.0\tDOP\n\
|
||||
10000033\t10000014\tMB_PPL1_R\t4\t4.0\tDOP\n\
|
||||
10000034\t10000015\tMB_PAM_L\t3\t3.0\tDOP\n\
|
||||
10000035\t10000016\tMB_PAM_R\t4\t4.0\tDOP\n\
|
||||
10000036\t10000017\tMB_OA_L\t2\t2.0\tOCT\n\
|
||||
10000037\t10000018\tMB_OA_R\t3\t3.0\tOCT\n\
|
||||
10000038\t10000040\tEB_L\t2\t2.0\tSER\n\
|
||||
10000039\t10000041\tEB_R\t2\t2.0\tSER\n\
|
||||
10000040\t10000044\tEB_L\t5\t5.0\tACH\n\
|
||||
10000041\t10000043\tEB_R\t4\t4.0\tACH\n\
|
||||
10000042\t10000044\tEB_L\t6\t6.0\tACH\n\
|
||||
10000043\t10000045\tFB_L\t4\t4.0\tACH\n\
|
||||
10000044\t10000046\tFB_L\t5\t5.0\tACH\n\
|
||||
10000045\t10000047\tLAL_L\t6\t6.0\tACH\n\
|
||||
10000046\t10000048\tLAL_R\t4\t4.0\tACH\n\
|
||||
";
|
||||
|
||||
const CONNECTIONS_C: &str = "\
|
||||
10000047\t10000049\tLAL_L\t5\t5.0\tACH\n\
|
||||
10000048\t10000050\tLAL_R\t4\t4.0\tGABA\n\
|
||||
10000026\t10000049\tSMP_L\t6\t6.0\tGLUT\n\
|
||||
10000027\t10000050\tSMP_R\t5\t5.0\tGLUT\n\
|
||||
10000028\t10000049\tSMP_L\t3\t3.0\tGABA\n\
|
||||
10000029\t10000050\tSMP_R\t4\t4.0\tGABA\n\
|
||||
10000030\t10000051\tSMP_L\t5\t5.0\tACH\n\
|
||||
10000031\t10000052\tSMP_R\t4\t4.0\tACH\n\
|
||||
10000049\t10000054\tGNG_L\t8\t8.0\tACH\n\
|
||||
10000049\t10000056\tGNG_L\t5\t5.0\tACH\n\
|
||||
10000050\t10000055\tGNG_R\t7\t7.0\tACH\n\
|
||||
10000050\t10000057\tGNG_R\t4\t4.0\tACH\n\
|
||||
10000051\t10000058\tGNG_L\t5\t5.0\tACH\n\
|
||||
10000052\t10000059\tGNG_R\t4\t4.0\tACH\n\
|
||||
10000053\t10000060\tGNG_L\t6\t6.0\tACH\n\
|
||||
10000051\t10000054\tGNG_L\t3\t3.0\tACH\n\
|
||||
10000052\t10000055\tGNG_R\t3\t3.0\tACH\n\
|
||||
10000053\t10000057\tGNG_R\t4\t4.0\tACH\n\
|
||||
10000061\t10000013\tMB_CA_L\t2\t2.0\tGABA\n\
|
||||
10000062\t10000014\tMB_CA_R\t3\t3.0\tGABA\n\
|
||||
10000063\t10000015\tMB_CA_L\t2\t2.0\tGABA\n\
|
||||
10000064\t10000016\tMB_CA_R\t3\t3.0\tGABA\n\
|
||||
10000065\t10000017\tMB_CA_L\t2\t2.0\tGABA\n\
|
||||
10000066\t10000018\tMB_CA_R\t3\t3.0\tGABA\n\
|
||||
10000067\t10000019\tAL_L\t4\t4.0\tGLUT\n\
|
||||
10000068\t10000020\tAL_R\t5\t5.0\tGLUT\n\
|
||||
10000069\t10000021\tAL_L\t3\t3.0\tGLUT\n\
|
||||
10000070\t10000022\tAL_R\t4\t4.0\tGLUT\n\
|
||||
10000005\t10000061\tAL_L\t3\t3.0\tACH\n\
|
||||
10000006\t10000062\tAL_R\t3\t3.0\tACH\n\
|
||||
10000007\t10000063\tAL_L\t2\t2.0\tACH\n\
|
||||
10000008\t10000064\tAL_R\t2\t2.0\tACH\n\
|
||||
";
|
||||
|
||||
const CONNECTIONS_D: &str = "\
|
||||
10000009\t10000065\tAL_L\t3\t3.0\tACH\n\
|
||||
10000010\t10000066\tAL_R\t3\t3.0\tACH\n\
|
||||
10000081\t10000067\tAL_L\t2\t2.0\tACH\n\
|
||||
10000082\t10000068\tAL_R\t2\t2.0\tACH\n\
|
||||
10000083\t10000069\tAL_L\t3\t3.0\tACH\n\
|
||||
10000071\t10000013\tLO_L\t4\t4.0\tACH\n\
|
||||
10000072\t10000014\tLO_R\t4\t4.0\tACH\n\
|
||||
10000073\t10000015\tLO_L\t3\t3.0\tACH\n\
|
||||
10000074\t10000016\tLO_R\t3\t3.0\tGABA\n\
|
||||
10000075\t10000017\tLO_L\t2\t2.0\tGABA\n\
|
||||
10000076\t10000018\tLO_R\t3\t3.0\tACH\n\
|
||||
10000077\t10000019\tLO_L\t2\t2.0\tGLUT\n\
|
||||
10000078\t10000020\tLO_R\t2\t2.0\tGLUT\n\
|
||||
10000079\t10000040\tLO_L\t3\t3.0\tACH\n\
|
||||
10000080\t10000041\tLO_R\t3\t3.0\tGABA\n\
|
||||
10000054\t10000084\tVNC_L\t6\t6.0\tACH\n\
|
||||
10000055\t10000085\tVNC_R\t5\t5.0\tACH\n\
|
||||
10000056\t10000086\tVNC_L\t4\t4.0\tACH\n\
|
||||
10000057\t10000087\tVNC_R\t5\t5.0\tACH\n\
|
||||
10000084\t10000049\tSMP_L\t3\t3.0\tACH\n\
|
||||
10000085\t10000050\tSMP_R\t3\t3.0\tACH\n\
|
||||
10000086\t10000051\tSMP_L\t2\t2.0\tACH\n\
|
||||
10000087\t10000052\tSMP_R\t2\t2.0\tACH\n\
|
||||
10000088\t10000026\tSMP_L\t4\t4.0\tACH\n\
|
||||
10000088\t10000049\tSMP_L\t3\t3.0\tACH\n\
|
||||
10000089\t10000027\tSMP_R\t4\t4.0\tACH\n\
|
||||
10000089\t10000050\tSMP_R\t3\t3.0\tACH\n\
|
||||
10000090\t10000028\tSMP_L\t3\t3.0\tACH\n\
|
||||
10000090\t10000040\tSMP_L\t2\t2.0\tACH\n\
|
||||
10000091\t10000029\tSMP_R\t3\t3.0\tACH\n\
|
||||
10000091\t10000041\tSMP_R\t2\t2.0\tACH\n\
|
||||
10000092\t10000030\tSMP_L\t3\t3.0\tACH\n\
|
||||
";
|
||||
|
||||
const CONNECTIONS_E: &str = "\
|
||||
10000092\t10000043\tSMP_L\t2\t2.0\tACH\n\
|
||||
10000093\t10000013\tGNG_L\t1\t1.0\tACH\n\
|
||||
10000094\t10000014\tGNG_R\t1\t1.0\tACH\n\
|
||||
10000095\t10000015\tGNG_L\t1\t1.0\tACH\n\
|
||||
10000096\t10000016\tGNG_R\t1\t1.0\tACH\n\
|
||||
10000097\t10000017\tGNG_L\t1\t1.0\tACH\n\
|
||||
10000098\t10000018\tGNG_R\t1\t1.0\tACH\n\
|
||||
10000099\t10000019\tGNG_L\t1\t1.0\tACH\n\
|
||||
10000100\t10000020\tGNG_R\t1\t1.0\tACH\n\
|
||||
10000032\t10000026\tMB_MBON_L\t2\t2.0\tDOP\n\
|
||||
10000033\t10000027\tMB_MBON_R\t2\t2.0\tDOP\n\
|
||||
10000034\t10000028\tMB_MBON_L\t2\t2.0\tDOP\n\
|
||||
10000035\t10000029\tMB_MBON_R\t2\t2.0\tDOP\n\
|
||||
10000036\t10000030\tMB_MBON_L\t1\t1.0\tOCT\n\
|
||||
10000037\t10000031\tMB_MBON_R\t1\t1.0\tOCT\n\
|
||||
10000058\t10000084\tVNC_L\t3\t3.0\tACH\n\
|
||||
10000059\t10000085\tVNC_R\t3\t3.0\tACH\n\
|
||||
10000060\t10000086\tVNC_L\t2\t2.0\tACH\n\
|
||||
10000026\t10000040\tSMP_L\t3\t3.0\tGLUT\n\
|
||||
10000027\t10000041\tSMP_R\t3\t3.0\tGLUT\n\
|
||||
10000028\t10000040\tSMP_L\t2\t2.0\tGABA\n\
|
||||
10000029\t10000041\tSMP_R\t2\t2.0\tGABA\n\
|
||||
10000030\t10000042\tSMP_L\t3\t3.0\tACH\n\
|
||||
10000031\t10000043\tSMP_R\t3\t3.0\tACH\n\
|
||||
10000067\t10000026\tAL_L\t2\t2.0\tGLUT\n\
|
||||
10000068\t10000027\tAL_R\t2\t2.0\tGLUT\n\
|
||||
10000069\t10000028\tAL_L\t2\t2.0\tGLUT\n\
|
||||
10000070\t10000029\tAL_R\t2\t2.0\tGLUT\n\
|
||||
10000071\t10000026\tLO_L\t2\t2.0\tACH\n\
|
||||
10000072\t10000027\tLO_R\t2\t2.0\tACH\n\
|
||||
10000073\t10000028\tLO_L\t2\t2.0\tACH\n\
|
||||
10000074\t10000029\tLO_R\t2\t2.0\tGABA\n\
|
||||
";
|
||||
|
||||
/// FlyWire-format connections TSV (header + 260 data rows).
|
||||
pub fn connections_tsv() -> String {
|
||||
let mut s = String::with_capacity(16 * 1024);
|
||||
s.push_str(CONNECTIONS_HEADER);
|
||||
s.push_str(CONNECTIONS_A);
|
||||
s.push_str(CONNECTIONS_B);
|
||||
s.push_str(CONNECTIONS_C);
|
||||
s.push_str(CONNECTIONS_D);
|
||||
s.push_str(CONNECTIONS_E);
|
||||
s
|
||||
}
|
||||
|
||||
const CLASSIFICATION_HEADER: &str = "neuron_id\tcell_type\tsuper_class\n";
|
||||
|
||||
/// FlyWire-format classification TSV (40 authoritative overrides).
|
||||
const CLASSIFICATION_BODY: &str = "\
|
||||
10000013\tKC_g\tcentral\n\
|
||||
10000014\tKC_g\tcentral\n\
|
||||
10000015\tKC_ab\tcentral\n\
|
||||
10000016\tKC_ab\tcentral\n\
|
||||
10000017\tKC_apbp\tcentral\n\
|
||||
10000018\tKC_apbp\tcentral\n\
|
||||
10000019\tKC_g\tcentral\n\
|
||||
10000020\tKC_ab\tcentral\n\
|
||||
10000021\tKC_apbp\tcentral\n\
|
||||
10000022\tKC_g\tcentral\n\
|
||||
10000026\tMBON01\tcentral\n\
|
||||
10000027\tMBON02\tcentral\n\
|
||||
10000028\tMBON03\tcentral\n\
|
||||
10000029\tMBON04\tcentral\n\
|
||||
10000030\tMBON05\tcentral\n\
|
||||
10000031\tMBON06\tcentral\n\
|
||||
10000049\tDNp01\tdescending\n\
|
||||
10000050\tDNp02\tdescending\n\
|
||||
10000051\tDNp03\tdescending\n\
|
||||
10000052\tDNg01\tdescending\n\
|
||||
10000053\tDNg02\tdescending\n\
|
||||
10000054\tMotor_leg_1\tmotor\n\
|
||||
10000055\tMotor_leg_2\tmotor\n\
|
||||
10000056\tMotor_leg_3\tmotor\n\
|
||||
10000057\tMotor_wing_1\tmotor\n\
|
||||
10000058\tMotor_wing_2\tmotor\n\
|
||||
10000059\tMotor_wing_3\tmotor\n\
|
||||
10000060\tMotor_hlt\tmotor\n\
|
||||
10000001\tPR_R1\tsensory\n\
|
||||
10000002\tPR_R1\tsensory\n\
|
||||
10000003\tPR_R7\tsensory\n\
|
||||
10000004\tPR_R8\tsensory\n\
|
||||
10000032\tDAN_PPL1\tcentral\n\
|
||||
10000033\tDAN_PPL1\tcentral\n\
|
||||
10000034\tDAN_PAM\tcentral\n\
|
||||
10000035\tDAN_PAM\tcentral\n\
|
||||
10000036\tOAN_VPM3\tcentral\n\
|
||||
10000037\tOAN_VPM3\tcentral\n\
|
||||
10000038\tSER_DRN\tcentral\n\
|
||||
10000039\tSER_DRN\tcentral\n\
|
||||
";
|
||||
|
||||
/// FlyWire-format classification TSV (header + 40 override rows).
|
||||
pub fn classification_tsv() -> String {
|
||||
let mut s = String::with_capacity(2 * 1024);
|
||||
s.push_str(CLASSIFICATION_HEADER);
|
||||
s.push_str(CLASSIFICATION_BODY);
|
||||
s
|
||||
}
|
||||
|
||||
/// Write the three fixture TSVs to `dir`, returning the paths of
|
||||
/// `(neurons, connections, classification)`. The files are named
|
||||
/// `neurons.tsv`, `connections.tsv`, `classification.tsv` — the same
|
||||
/// names used on the FlyWire release.
|
||||
pub fn write_fixture(dir: &std::path::Path) -> std::io::Result<FixturePaths> {
|
||||
let neurons = dir.join("neurons.tsv");
|
||||
let connections = dir.join("connections.tsv");
|
||||
let classification = dir.join("classification.tsv");
|
||||
std::fs::write(&neurons, neurons_tsv())?;
|
||||
std::fs::write(&connections, connections_tsv())?;
|
||||
std::fs::write(&classification, classification_tsv())?;
|
||||
Ok(FixturePaths {
|
||||
neurons,
|
||||
connections,
|
||||
classification,
|
||||
})
|
||||
}
|
||||
|
||||
/// Paths to a materialized fixture, as returned by [`write_fixture`].
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct FixturePaths {
|
||||
/// `neurons.tsv` path.
|
||||
pub neurons: std::path::PathBuf,
|
||||
/// `connections.tsv` path.
|
||||
pub connections: std::path::PathBuf,
|
||||
/// `classification.tsv` path.
|
||||
pub classification: std::path::PathBuf,
|
||||
}
|
||||
369
examples/connectome-fly/src/connectome/flywire/loader.rs
Normal file
369
examples/connectome-fly/src/connectome/flywire/loader.rs
Normal file
|
|
@ -0,0 +1,369 @@
|
|||
//! FlyWire v783 TSV → `Connectome` loader.
|
||||
//!
|
||||
//! Streaming parse: one pass over `neurons.tsv`, one pass over
|
||||
//! `classification.tsv` (optional override), one pass over
|
||||
//! `connections.tsv`. Dense `NeuronId`s are assigned in the order neurons
|
||||
//! are first seen in the neuron file; parallel arrays of `FlyWireNeuronId`
|
||||
//! and `NeuronMeta` are preserved alongside the CSR.
|
||||
//!
|
||||
//! The loader is deterministic: given a byte-identical TSV input, the
|
||||
//! output `Connectome` (synapses, row_ptr, meta, flywire_ids) is
|
||||
//! bit-identical. Synapses within a neuron are stored in the order they
|
||||
//! appear in `connections.tsv`.
|
||||
//!
|
||||
//! Errors are surfaced through the crate-level [`FlywireError`] so
|
||||
//! callers can distinguish "bad CSV syntax" from "unknown cell type"
|
||||
//! from "dangling synapse reference".
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
|
||||
use super::schema::{CellTypeRecord, NeuroTransmitter, NeuronRecord, SynapseRecord};
|
||||
use super::FlywireError;
|
||||
use crate::connectome::generator::Connectome;
|
||||
use crate::connectome::schema::{
|
||||
ConnectomeSerCfg, FlyWireNeuronId, NeuronClass, NeuronId, NeuronMeta, Sign, Synapse,
|
||||
};
|
||||
|
||||
/// Load a FlyWire v783 release from `dir`.
|
||||
///
|
||||
/// Expects three TSV files under `dir`: `neurons.tsv`,
|
||||
/// `connections.tsv`, `classification.tsv`. The classification file is
|
||||
/// optional; if absent, the cell-type column on `neurons.tsv` is used
|
||||
/// directly.
|
||||
///
|
||||
/// See [`FlywireError`] for the failure modes.
|
||||
pub fn load_flywire(dir: &Path) -> Result<Connectome, FlywireError> {
|
||||
let neurons_path = dir.join("neurons.tsv");
|
||||
let connections_path = dir.join("connections.tsv");
|
||||
let classification_path = dir.join("classification.tsv");
|
||||
let neurons = read_neurons(&neurons_path)?;
|
||||
let class_overrides = if classification_path.exists() {
|
||||
read_classifications(&classification_path)?
|
||||
} else {
|
||||
HashMap::new()
|
||||
};
|
||||
let synapses = read_synapses(&connections_path)?;
|
||||
assemble_connectome(neurons, class_overrides, synapses)
|
||||
}
|
||||
|
||||
/// Parse `neurons.tsv` into a vector of [`NeuronRecord`]s. Duplicate
|
||||
/// `neuron_id` entries yield [`FlywireError::DuplicateNeuron`].
|
||||
pub fn read_neurons(path: &Path) -> Result<Vec<NeuronRecord>, FlywireError> {
|
||||
let mut rdr = open_tsv(path)?;
|
||||
let mut out: Vec<NeuronRecord> = Vec::new();
|
||||
let mut seen: HashMap<u64, usize> = HashMap::new();
|
||||
for (i, result) in rdr.deserialize::<NeuronRecord>().enumerate() {
|
||||
let rec: NeuronRecord = result.map_err(|e| FlywireError::MalformedRow {
|
||||
file: label_of(path),
|
||||
line: (i + 2) as u64, // +1 for header, +1 for 1-based
|
||||
detail: e.to_string(),
|
||||
})?;
|
||||
if seen.insert(rec.neuron_id, i).is_some() {
|
||||
return Err(FlywireError::DuplicateNeuron(rec.neuron_id));
|
||||
}
|
||||
out.push(rec);
|
||||
}
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
/// Parse `classification.tsv` into a `neuron_id → record` map.
|
||||
pub fn read_classifications(path: &Path) -> Result<HashMap<u64, CellTypeRecord>, FlywireError> {
|
||||
let mut rdr = open_tsv(path)?;
|
||||
let mut out: HashMap<u64, CellTypeRecord> = HashMap::new();
|
||||
for (i, result) in rdr.deserialize::<CellTypeRecord>().enumerate() {
|
||||
let rec: CellTypeRecord = result.map_err(|e| FlywireError::MalformedRow {
|
||||
file: label_of(path),
|
||||
line: (i + 2) as u64,
|
||||
detail: e.to_string(),
|
||||
})?;
|
||||
out.insert(rec.neuron_id, rec);
|
||||
}
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
/// Parse `connections.tsv` into a vector of [`SynapseRecord`]s. Order
|
||||
/// is preserved; the loader relies on file-declared order for CSR
|
||||
/// determinism.
|
||||
pub fn read_synapses(path: &Path) -> Result<Vec<SynapseRecord>, FlywireError> {
|
||||
let mut rdr = open_tsv(path)?;
|
||||
let mut out: Vec<SynapseRecord> = Vec::new();
|
||||
for (i, result) in rdr.deserialize::<SynapseRecord>().enumerate() {
|
||||
let rec: SynapseRecord = result.map_err(|e| FlywireError::MalformedRow {
|
||||
file: label_of(path),
|
||||
line: (i + 2) as u64,
|
||||
detail: e.to_string(),
|
||||
})?;
|
||||
out.push(rec);
|
||||
}
|
||||
Ok(out)
|
||||
}
|
||||
|
||||
fn open_tsv(path: &Path) -> Result<csv::Reader<std::fs::File>, FlywireError> {
|
||||
csv::ReaderBuilder::new()
|
||||
.delimiter(b'\t')
|
||||
.has_headers(true)
|
||||
.flexible(false)
|
||||
.from_path(path)
|
||||
.map_err(|e| FlywireError::Io {
|
||||
file: label_of(path),
|
||||
detail: e.to_string(),
|
||||
})
|
||||
}
|
||||
|
||||
fn label_of(path: &Path) -> String {
|
||||
path.file_name()
|
||||
.map(|s| s.to_string_lossy().into_owned())
|
||||
.unwrap_or_else(|| path.display().to_string())
|
||||
}
|
||||
|
||||
fn assemble_connectome(
|
||||
neurons: Vec<NeuronRecord>,
|
||||
class_overrides: HashMap<u64, CellTypeRecord>,
|
||||
synapses: Vec<SynapseRecord>,
|
||||
) -> Result<Connectome, FlywireError> {
|
||||
// Dense id assignment in TSV declaration order.
|
||||
let mut id_of: HashMap<u64, NeuronId> = HashMap::with_capacity(neurons.len());
|
||||
let mut flywire_ids: Vec<FlyWireNeuronId> = Vec::with_capacity(neurons.len());
|
||||
let mut meta: Vec<NeuronMeta> = Vec::with_capacity(neurons.len());
|
||||
let mut nt_per_neuron: Vec<NeuroTransmitter> = Vec::with_capacity(neurons.len());
|
||||
|
||||
for (idx, n) in neurons.iter().enumerate() {
|
||||
id_of.insert(n.neuron_id, NeuronId(idx as u32));
|
||||
flywire_ids.push(FlyWireNeuronId(n.neuron_id));
|
||||
let class_override = class_overrides.get(&n.neuron_id);
|
||||
let effective_cell_type =
|
||||
n.effective_cell_type(class_override.map(|c| c.cell_type.as_str()));
|
||||
let class = classify_cell_type(effective_cell_type.as_deref(), n.flow.as_deref())?;
|
||||
let nt = parse_nt(&n.nt_type, n.neuron_id)?;
|
||||
nt_per_neuron.push(nt);
|
||||
meta.push(NeuronMeta {
|
||||
class,
|
||||
module: 0,
|
||||
bias_pa: default_bias_for(class),
|
||||
});
|
||||
}
|
||||
|
||||
// Partition synapses by pre-id in file-declared order.
|
||||
let n = neurons.len();
|
||||
let mut per_pre: Vec<Vec<Synapse>> = vec![Vec::new(); n];
|
||||
|
||||
for syn in &synapses {
|
||||
let pre = *id_of
|
||||
.get(&syn.pre_id)
|
||||
.ok_or(FlywireError::UnknownPreNeuron(syn.pre_id))?;
|
||||
let post = *id_of
|
||||
.get(&syn.post_id)
|
||||
.ok_or(FlywireError::UnknownPostNeuron(syn.post_id))?;
|
||||
if pre == post {
|
||||
continue; // drop self-loops; matches SBM generator
|
||||
}
|
||||
let nt = if let Some(s) = &syn.nt_type {
|
||||
parse_nt(s, syn.pre_id)?
|
||||
} else {
|
||||
nt_per_neuron[pre.idx()]
|
||||
};
|
||||
let sign = nt_to_sign(nt);
|
||||
let count = syn.syn_count.max(1);
|
||||
let weight = derive_weight(syn, count);
|
||||
per_pre[pre.idx()].push(Synapse {
|
||||
post,
|
||||
weight,
|
||||
delay_ms: default_delay_ms(),
|
||||
sign,
|
||||
});
|
||||
}
|
||||
|
||||
// CSR flatten (row_ptr + synapses), preserving per-pre order.
|
||||
let mut row_ptr: Vec<u32> = Vec::with_capacity(n + 1);
|
||||
let total: usize = per_pre.iter().map(|v| v.len()).sum();
|
||||
let mut flat: Vec<Synapse> = Vec::with_capacity(total);
|
||||
row_ptr.push(0);
|
||||
for bucket in per_pre {
|
||||
flat.extend(bucket);
|
||||
row_ptr.push(flat.len() as u32);
|
||||
}
|
||||
|
||||
let cfg = ConnectomeSerCfg {
|
||||
num_neurons: n as u32,
|
||||
num_modules: 1,
|
||||
num_hub_modules: 0,
|
||||
seed: 0,
|
||||
};
|
||||
Ok(Connectome::from_parts(
|
||||
cfg,
|
||||
meta,
|
||||
flat,
|
||||
row_ptr,
|
||||
Some(flywire_ids),
|
||||
))
|
||||
}
|
||||
|
||||
/// Normalize a raw NT-type string to the typed enum. Case-insensitive
|
||||
/// match against the seven release-documented labels. Anything else is
|
||||
/// [`FlywireError::UnknownNtType`] — no silent default.
|
||||
pub fn parse_nt(raw: &str, context_id: u64) -> Result<NeuroTransmitter, FlywireError> {
|
||||
let upper = raw.trim().to_ascii_uppercase();
|
||||
match upper.as_str() {
|
||||
"ACH" | "ACETYLCHOLINE" => Ok(NeuroTransmitter::Acetylcholine),
|
||||
"GLUT" | "GLUTAMATE" => Ok(NeuroTransmitter::Glutamate),
|
||||
"GABA" => Ok(NeuroTransmitter::Gaba),
|
||||
"HIST" | "HISTAMINE" => Ok(NeuroTransmitter::Histamine),
|
||||
"SER" | "SEROTONIN" | "5-HT" | "5HT" => Ok(NeuroTransmitter::Serotonin),
|
||||
"DOP" | "DOPAMINE" | "DA" => Ok(NeuroTransmitter::Dopamine),
|
||||
"OCT" | "OCTOPAMINE" | "OA" => Ok(NeuroTransmitter::Octopamine),
|
||||
_ => Err(FlywireError::UnknownNtType {
|
||||
raw: raw.to_owned(),
|
||||
neuron_id: context_id,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
/// NT → fast-path sign mapping (research doc §4 table).
|
||||
///
|
||||
/// - ACH, GLUT → +1 (Excitatory)
|
||||
/// - GABA, HIST → -1 (Inhibitory)
|
||||
/// - SER, DOP, OCT (modulatory) → +1 in the fast path; analyses that
|
||||
/// need to exclude slow edges must consult the NT side-channel.
|
||||
pub fn nt_to_sign(nt: NeuroTransmitter) -> Sign {
|
||||
match nt {
|
||||
NeuroTransmitter::Acetylcholine | NeuroTransmitter::Glutamate => Sign::Excitatory,
|
||||
NeuroTransmitter::Gaba | NeuroTransmitter::Histamine => Sign::Inhibitory,
|
||||
NeuroTransmitter::Serotonin | NeuroTransmitter::Dopamine | NeuroTransmitter::Octopamine => {
|
||||
Sign::Excitatory
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Map a FlyWire cell-type string to our coarse [`NeuronClass`].
|
||||
///
|
||||
/// Unknown cell types fall into `NeuronClass::Other` — this is
|
||||
/// intentional: the FlyWire release documents ~8,000 cell types, and
|
||||
/// the coarse bucket is the correct v1 behavior per the research doc.
|
||||
/// Empty cell-type with a non-empty `flow` column still resolves via
|
||||
/// the flow hint. If *both* are missing the entry is `Other`, not an
|
||||
/// error (matches the release's "unresolved" neurons).
|
||||
pub fn classify_cell_type(
|
||||
cell_type: Option<&str>,
|
||||
flow: Option<&str>,
|
||||
) -> Result<NeuronClass, FlywireError> {
|
||||
if let Some(ct) = cell_type {
|
||||
if let Some(class) = classify_by_prefix(ct) {
|
||||
return Ok(class);
|
||||
}
|
||||
}
|
||||
if let Some(f) = flow {
|
||||
return Ok(classify_by_flow(f));
|
||||
}
|
||||
Ok(NeuronClass::Other)
|
||||
}
|
||||
|
||||
/// Strict variant of [`classify_cell_type`]. Unmapped cell types yield
|
||||
/// [`FlywireError::UnknownCellType`] instead of folding to
|
||||
/// [`NeuronClass::Other`]. Intended for callers that want to audit
|
||||
/// prefix-table coverage on a specific release.
|
||||
pub fn classify_cell_type_strict(
|
||||
cell_type: Option<&str>,
|
||||
flow: Option<&str>,
|
||||
neuron_id: u64,
|
||||
) -> Result<NeuronClass, FlywireError> {
|
||||
if let Some(ct) = cell_type {
|
||||
if let Some(class) = classify_by_prefix(ct) {
|
||||
return Ok(class);
|
||||
}
|
||||
return Err(FlywireError::UnknownCellType {
|
||||
raw: ct.to_owned(),
|
||||
neuron_id,
|
||||
});
|
||||
}
|
||||
if let Some(f) = flow {
|
||||
return Ok(classify_by_flow(f));
|
||||
}
|
||||
Ok(NeuronClass::Other)
|
||||
}
|
||||
|
||||
fn classify_by_prefix(ct: &str) -> Option<NeuronClass> {
|
||||
// Order matters: more-specific prefixes first.
|
||||
let t = ct.trim();
|
||||
if t.starts_with("PR_") || t.starts_with("R1") || t.starts_with("R7") || t.starts_with("R8") {
|
||||
return Some(NeuronClass::PhotoReceptor);
|
||||
}
|
||||
if t.starts_with("ORN") || t.starts_with("PN_glom") || t.starts_with("PN_") {
|
||||
return Some(NeuronClass::Chemosensory);
|
||||
}
|
||||
if t.starts_with("JO") || t.starts_with("ML_mech") {
|
||||
return Some(NeuronClass::Mechanosensory);
|
||||
}
|
||||
if t.starts_with("KC") {
|
||||
return Some(NeuronClass::KenyonCell);
|
||||
}
|
||||
if t.starts_with("MBON") {
|
||||
return Some(NeuronClass::MbOutput);
|
||||
}
|
||||
if t.starts_with("EPG") || t.starts_with("PEN") || t.starts_with("FB_") || t.starts_with("PB_")
|
||||
{
|
||||
return Some(NeuronClass::CentralComplex);
|
||||
}
|
||||
if t.starts_with("LAL") {
|
||||
return Some(NeuronClass::LateralAccessory);
|
||||
}
|
||||
if t.starts_with("DNp") || t.starts_with("DNg") || t.starts_with("DN_") {
|
||||
return Some(NeuronClass::Descending);
|
||||
}
|
||||
if t.starts_with("Ascending") || t.starts_with("AN_") {
|
||||
return Some(NeuronClass::Ascending);
|
||||
}
|
||||
if t.starts_with("Motor") {
|
||||
return Some(NeuronClass::Motor);
|
||||
}
|
||||
if t.starts_with("LN_") || t.starts_with("LocalInter") {
|
||||
return Some(NeuronClass::LocalInter);
|
||||
}
|
||||
if t.starts_with("Proj") || t.starts_with("Projection") {
|
||||
return Some(NeuronClass::Projection);
|
||||
}
|
||||
if t.starts_with("DAN") || t.starts_with("SER_") || t.starts_with("OAN") {
|
||||
return Some(NeuronClass::Modulatory);
|
||||
}
|
||||
if t.starts_with("Loc_opt") || t.starts_with("LoOpt") || t.starts_with("Lo_") {
|
||||
return Some(NeuronClass::OpticLocal);
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn classify_by_flow(flow: &str) -> NeuronClass {
|
||||
match flow.trim().to_ascii_lowercase().as_str() {
|
||||
"afferent" => NeuronClass::Other,
|
||||
"efferent" => NeuronClass::Motor,
|
||||
"intrinsic" => NeuronClass::Other,
|
||||
"ascending" => NeuronClass::Ascending,
|
||||
"descending" => NeuronClass::Descending,
|
||||
_ => NeuronClass::Other,
|
||||
}
|
||||
}
|
||||
|
||||
fn default_bias_for(class: NeuronClass) -> f32 {
|
||||
if class.is_sensory() {
|
||||
-0.5
|
||||
} else if class.is_motor() {
|
||||
0.5
|
||||
} else {
|
||||
0.0
|
||||
}
|
||||
}
|
||||
|
||||
fn derive_weight(syn: &SynapseRecord, count: u32) -> f32 {
|
||||
if syn.syn_weight > 0.0 {
|
||||
syn.syn_weight
|
||||
} else {
|
||||
count as f32
|
||||
}
|
||||
}
|
||||
|
||||
fn default_delay_ms() -> f32 {
|
||||
// Research doc §3.2: FlyWire does not publish conduction delays;
|
||||
// the ingest loader uses a constant fallback of 2.0 ms. The
|
||||
// distance-scaled estimator requires soma coordinates, which are
|
||||
// optional in the release and absent from the fixture.
|
||||
2.0
|
||||
}
|
||||
101
examples/connectome-fly/src/connectome/flywire/mod.rs
Normal file
101
examples/connectome-fly/src/connectome/flywire/mod.rs
Normal file
|
|
@ -0,0 +1,101 @@
|
|||
//! FlyWire v783 ingest: TSV release → `Connectome`.
|
||||
//!
|
||||
//! This module is the first follow-up named in ADR-154 §13. It moves
|
||||
//! the connectome-fly demonstrator from its synthetic stochastic-block
|
||||
//! model onto the real FlyWire v783 wiring, one file at a time, without
|
||||
//! touching any analysis, LIF, or observer code.
|
||||
//!
|
||||
//! ## Public API
|
||||
//!
|
||||
//! - [`load_flywire`] — parse `neurons.tsv`, `classification.tsv`, and
|
||||
//! `connections.tsv` from a directory; return a fully-populated
|
||||
//! [`crate::Connectome`] with parallel `FlyWireNeuronId`s.
|
||||
//! - [`FlywireError`] — structured error type with one variant per
|
||||
//! named failure mode (malformed row, dangling reference, unknown
|
||||
//! NT, unknown cell type, IO failure, …).
|
||||
//! - [`schema`] — serde record structs matching the release TSV
|
||||
//! columns.
|
||||
//! - [`fixture`] — hand-authored 100-neuron fixture used by tests.
|
||||
//!
|
||||
//! ## Hard constraints
|
||||
//!
|
||||
//! - No `unsafe`. No Python, shell, or JS/TS.
|
||||
//! - Deterministic: byte-identical TSV input produces bit-identical
|
||||
//! `Connectome` output across runs.
|
||||
//! - No download path; `load_flywire` reads whatever TSVs are under
|
||||
//! the path the caller hands it.
|
||||
|
||||
pub mod fixture;
|
||||
pub mod loader;
|
||||
pub mod schema;
|
||||
|
||||
pub use loader::{
|
||||
classify_cell_type, classify_cell_type_strict, load_flywire, nt_to_sign, parse_nt,
|
||||
};
|
||||
pub use schema::{CellTypeRecord, NeuroTransmitter, NeuronRecord, SynapseRecord};
|
||||
|
||||
use thiserror::Error;
|
||||
|
||||
/// Errors produced by the FlyWire ingest path. Each variant maps to a
|
||||
/// distinct test case in `tests/flywire_ingest.rs`.
|
||||
#[derive(Debug, Error)]
|
||||
pub enum FlywireError {
|
||||
/// A row failed to deserialize against the [`NeuronRecord`],
|
||||
/// [`SynapseRecord`], or [`CellTypeRecord`] schema.
|
||||
#[error("malformed row in {file} at line {line}: {detail}")]
|
||||
MalformedRow {
|
||||
/// File name (not full path), e.g. `"neurons.tsv"`.
|
||||
file: String,
|
||||
/// 1-based row number (header is line 1).
|
||||
line: u64,
|
||||
/// Underlying parser message.
|
||||
detail: String,
|
||||
},
|
||||
|
||||
/// IO or CSV-framing failure before per-row dispatch.
|
||||
#[error("io error on {file}: {detail}")]
|
||||
Io {
|
||||
/// File name.
|
||||
file: String,
|
||||
/// Underlying error.
|
||||
detail: String,
|
||||
},
|
||||
|
||||
/// A synapse referenced a `pre_id` that is not present in
|
||||
/// `neurons.tsv`.
|
||||
#[error("synapse pre_id {0} not in neurons.tsv")]
|
||||
UnknownPreNeuron(u64),
|
||||
|
||||
/// A synapse referenced a `post_id` that is not present in
|
||||
/// `neurons.tsv`.
|
||||
#[error("synapse post_id {0} not in neurons.tsv")]
|
||||
UnknownPostNeuron(u64),
|
||||
|
||||
/// A neuron id appeared twice in `neurons.tsv`.
|
||||
#[error("duplicate neuron_id {0} in neurons.tsv")]
|
||||
DuplicateNeuron(u64),
|
||||
|
||||
/// An NT-type string did not match the seven release-documented
|
||||
/// labels (ACH / GLUT / GABA / HIST / SER / DOP / OCT).
|
||||
#[error("unknown nt_type {raw:?} on neuron_id {neuron_id}")]
|
||||
UnknownNtType {
|
||||
/// Raw column value.
|
||||
raw: String,
|
||||
/// Context id (neuron or pre-neuron of the offending synapse).
|
||||
neuron_id: u64,
|
||||
},
|
||||
|
||||
/// A cell-type string did not match any known prefix. Only
|
||||
/// surfaced from the strict classification path
|
||||
/// ([`loader::classify_cell_type_strict`]); the default
|
||||
/// [`loader::classify_cell_type`] folds unknown cell types into
|
||||
/// [`crate::NeuronClass::Other`] because FlyWire v783 documents
|
||||
/// ~8 000 cell types and the ingest loader is coarse by design.
|
||||
#[error("unknown cell_type {raw:?} on neuron_id {neuron_id}")]
|
||||
UnknownCellType {
|
||||
/// Raw column value.
|
||||
raw: String,
|
||||
/// Context neuron id.
|
||||
neuron_id: u64,
|
||||
},
|
||||
}
|
||||
141
examples/connectome-fly/src/connectome/flywire/schema.rs
Normal file
141
examples/connectome-fly/src/connectome/flywire/schema.rs
Normal file
|
|
@ -0,0 +1,141 @@
|
|||
//! FlyWire v783 on-disk record schema.
|
||||
//!
|
||||
//! Three serde structs, one per published TSV file in the release:
|
||||
//!
|
||||
//! - [`NeuronRecord`] — one row per neuron; union of fields across
|
||||
//! `neurons.tsv` plus the parts of `classification.tsv` / NT tables
|
||||
//! that the loader consumes in a single pass.
|
||||
//! - [`SynapseRecord`] — one row per directed pre→post edge in
|
||||
//! `connections.tsv`.
|
||||
//! - [`CellTypeRecord`] — one row per neuron in
|
||||
//! `classification.tsv`; used as an override table when the primary
|
||||
//! `neurons.tsv` lacks a cell-type assignment.
|
||||
//!
|
||||
//! The column names match the published v783 schema (see
|
||||
//! `docs/research/connectome-ruvector/02-connectome-layer.md` §2).
|
||||
//! Unknown columns are ignored by the CSV reader so adding downstream
|
||||
//! fields (e.g. `hemilineage`) does not require a schema version bump.
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// One row of the neurons TSV.
|
||||
///
|
||||
/// Columns mirror the FlyWire v783 release. `neuron_id` is the stable
|
||||
/// 64-bit root id; `supervoxel_id` is the coarse segmentation handle
|
||||
/// (kept for provenance, not used by the loader in v1); `cell_type`,
|
||||
/// `nt_type`, `side`, `nerve`, and `flow` are all string-enum encoded.
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
pub struct NeuronRecord {
|
||||
/// Stable FlyWire root id.
|
||||
pub neuron_id: u64,
|
||||
/// Supervoxel id (provenance only).
|
||||
#[serde(default)]
|
||||
pub supervoxel_id: u64,
|
||||
/// Cell type, e.g. "KC_g", "MBON01", "DNp01". Empty string
|
||||
/// (deserialized to `None`) is allowed when the classification is
|
||||
/// unresolved.
|
||||
#[serde(default)]
|
||||
pub cell_type: Option<String>,
|
||||
/// Dominant predicted neurotransmitter: "ACH", "GLUT", "GABA",
|
||||
/// "SER", "OCT", "DOP", "HIST".
|
||||
pub nt_type: String,
|
||||
/// Anatomical side: "left", "right", "center".
|
||||
#[serde(default)]
|
||||
pub side: Option<String>,
|
||||
/// Peripheral nerve id (Wikipedia naming), if afferent / efferent.
|
||||
#[serde(default)]
|
||||
pub nerve: Option<String>,
|
||||
/// Flow class: "afferent", "efferent", "intrinsic".
|
||||
#[serde(default)]
|
||||
pub flow: Option<String>,
|
||||
/// Optional super-class label (e.g. "optic", "central", "motor").
|
||||
#[serde(default)]
|
||||
pub super_class: Option<String>,
|
||||
}
|
||||
|
||||
/// One row of the connections TSV.
|
||||
///
|
||||
/// `pre_id` and `post_id` are stable FlyWire root ids; both must resolve
|
||||
/// to a row in the neurons TSV or the loader errors.
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
pub struct SynapseRecord {
|
||||
/// Pre-synaptic neuron id.
|
||||
pub pre_id: u64,
|
||||
/// Post-synaptic neuron id.
|
||||
pub post_id: u64,
|
||||
/// Neuropil region label (e.g. "MB_CA_L").
|
||||
#[serde(default)]
|
||||
pub neuropil: Option<String>,
|
||||
/// Aggregated synapse count for this directed pair.
|
||||
pub syn_count: u32,
|
||||
/// Effective weight reported by the release; loader uses
|
||||
/// `syn_count` when this field is absent or zero.
|
||||
#[serde(default)]
|
||||
pub syn_weight: f32,
|
||||
/// Per-edge NT prediction (optional; falls back to the pre
|
||||
/// neuron's dominant NT when unset).
|
||||
#[serde(default)]
|
||||
pub nt_type: Option<String>,
|
||||
}
|
||||
|
||||
/// One row of the classification TSV.
|
||||
///
|
||||
/// Provides authoritative cell-type / super-class labels that can
|
||||
/// override or fill in the fields on [`NeuronRecord`].
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
|
||||
pub struct CellTypeRecord {
|
||||
/// Stable FlyWire root id.
|
||||
pub neuron_id: u64,
|
||||
/// Primary cell-type label.
|
||||
pub cell_type: String,
|
||||
/// Optional coarse super-class.
|
||||
#[serde(default)]
|
||||
pub super_class: Option<String>,
|
||||
}
|
||||
|
||||
impl NeuronRecord {
|
||||
/// Effective cell-type string after folding in the classification
|
||||
/// override. `class_override` wins over `self.cell_type` when both
|
||||
/// are present.
|
||||
pub fn effective_cell_type(&self, class_override: Option<&str>) -> Option<String> {
|
||||
class_override
|
||||
.map(str::to_owned)
|
||||
.or_else(|| self.cell_type.clone())
|
||||
}
|
||||
}
|
||||
|
||||
/// Parsed, normalized neurotransmitter tag. Distinct from the
|
||||
/// `Sign` enum in the outer schema because several NTs (DA / 5-HT /
|
||||
/// OA) are neuromodulatory and do not carry a fast-path sign; the
|
||||
/// loader materializes them as Excitatory in the fast path per the
|
||||
/// research doc §4 table and records the NT identity on the side.
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
|
||||
pub enum NeuroTransmitter {
|
||||
/// Acetylcholine — fast excitation.
|
||||
Acetylcholine,
|
||||
/// Glutamate — excitation in central circuits (v1 default).
|
||||
Glutamate,
|
||||
/// GABA — fast inhibition.
|
||||
Gaba,
|
||||
/// Histamine — photoreceptor output, inhibitory.
|
||||
Histamine,
|
||||
/// Serotonin — neuromodulator, rendered excitatory in the fast path.
|
||||
Serotonin,
|
||||
/// Dopamine — neuromodulator, rendered excitatory in the fast path.
|
||||
Dopamine,
|
||||
/// Octopamine — neuromodulator, rendered excitatory in the fast path.
|
||||
Octopamine,
|
||||
}
|
||||
|
||||
impl NeuroTransmitter {
|
||||
/// Whether this NT is routed through the slow neuromodulatory
|
||||
/// pool in the research schema. The fast path still assigns a
|
||||
/// sign so the LIF engine has something to integrate; this flag
|
||||
/// surfaces the category so analysis code can exclude slow edges.
|
||||
pub fn is_modulatory(self) -> bool {
|
||||
matches!(
|
||||
self,
|
||||
NeuroTransmitter::Serotonin | NeuroTransmitter::Dopamine | NeuroTransmitter::Octopamine
|
||||
)
|
||||
}
|
||||
}
|
||||
|
|
@ -13,7 +13,8 @@ use smallvec::SmallVec;
|
|||
|
||||
use super::persist::ConnectomeError;
|
||||
use super::schema::{
|
||||
ConnectomeConfig, ConnectomeSerCfg, NeuronClass, NeuronId, NeuronMeta, Sign, Synapse,
|
||||
ConnectomeConfig, ConnectomeSerCfg, FlyWireNeuronId, NeuronClass, NeuronId, NeuronMeta, Sign,
|
||||
Synapse,
|
||||
};
|
||||
|
||||
/// A synthetic fly-like connectome. Stores neuron metadata and a
|
||||
|
|
@ -35,6 +36,12 @@ pub struct Connectome {
|
|||
pub(super) motor: Vec<NeuronId>,
|
||||
/// Pre-computed index grouped by class.
|
||||
pub(super) by_class: Vec<Vec<NeuronId>>,
|
||||
/// Stable FlyWire root ids, parallel to `meta` / dense ids.
|
||||
/// `None` for SBM-generated connectomes; `Some` when loaded via the
|
||||
/// `flywire` module. Serialized at the tail of the bincode blob so
|
||||
/// existing synthetic blobs remain round-trippable.
|
||||
#[serde(default)]
|
||||
pub(super) flywire_ids: Option<Vec<FlyWireNeuronId>>,
|
||||
}
|
||||
|
||||
impl Connectome {
|
||||
|
|
@ -134,9 +141,64 @@ impl Connectome {
|
|||
sensory,
|
||||
motor,
|
||||
by_class,
|
||||
flywire_ids: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Construct a `Connectome` directly from already-assembled parts.
|
||||
///
|
||||
/// Used by the `flywire` loader to install parsed FlyWire v783
|
||||
/// records without going through the synthetic SBM path. Callers
|
||||
/// are responsible for supplying a CSR-consistent `(row_ptr,
|
||||
/// synapses)` pair: `row_ptr.len() == meta.len() + 1` and
|
||||
/// `row_ptr[i] <= row_ptr[i+1] <= synapses.len()`.
|
||||
///
|
||||
/// Sensory / motor / by-class indices are derived from `meta`.
|
||||
/// `flywire_ids`, if provided, must be parallel to `meta`.
|
||||
pub(super) fn from_parts(
|
||||
cfg: ConnectomeSerCfg,
|
||||
meta: Vec<NeuronMeta>,
|
||||
synapses: Vec<Synapse>,
|
||||
row_ptr: Vec<u32>,
|
||||
flywire_ids: Option<Vec<FlyWireNeuronId>>,
|
||||
) -> Self {
|
||||
debug_assert_eq!(row_ptr.len(), meta.len() + 1);
|
||||
debug_assert_eq!(*row_ptr.last().unwrap_or(&0) as usize, synapses.len());
|
||||
if let Some(ids) = &flywire_ids {
|
||||
debug_assert_eq!(ids.len(), meta.len());
|
||||
}
|
||||
let mut by_class: Vec<Vec<NeuronId>> = vec![Vec::new(); 15];
|
||||
let mut sensory: Vec<NeuronId> = Vec::new();
|
||||
let mut motor: Vec<NeuronId> = Vec::new();
|
||||
for (i, m) in meta.iter().enumerate() {
|
||||
by_class[m.class as usize].push(NeuronId(i as u32));
|
||||
if m.class.is_sensory() {
|
||||
sensory.push(NeuronId(i as u32));
|
||||
}
|
||||
if m.class.is_motor() {
|
||||
motor.push(NeuronId(i as u32));
|
||||
}
|
||||
}
|
||||
Self {
|
||||
cfg,
|
||||
meta,
|
||||
synapses,
|
||||
row_ptr,
|
||||
sensory,
|
||||
motor,
|
||||
by_class,
|
||||
flywire_ids,
|
||||
}
|
||||
}
|
||||
|
||||
/// Parallel array of stable FlyWire root ids, if this connectome
|
||||
/// was loaded from a FlyWire v783 release. `None` for SBM-generated
|
||||
/// connectomes.
|
||||
#[inline]
|
||||
pub fn flywire_ids(&self) -> Option<&[FlyWireNeuronId]> {
|
||||
self.flywire_ids.as_deref()
|
||||
}
|
||||
|
||||
/// Total number of neurons.
|
||||
#[inline]
|
||||
pub fn num_neurons(&self) -> usize {
|
||||
|
|
|
|||
|
|
@ -1,22 +1,28 @@
|
|||
//! Connectome schema, stochastic-block-model generator, and compact
|
||||
//! binary serialization. Split across three submodules:
|
||||
//! binary serialization. Split across four submodules:
|
||||
//!
|
||||
//! - `schema` — public types (`NeuronId`, `Sign`, `NeuronClass`,
|
||||
//! `Synapse`, `NeuronMeta`, `ConnectomeConfig`).
|
||||
//! - `schema` — public types (`NeuronId`, `FlyWireNeuronId`, `Sign`,
|
||||
//! `NeuronClass`, `Synapse`, `NeuronMeta`,
|
||||
//! `ConnectomeConfig`).
|
||||
//! - `generator` — deterministic SBM generator + helpers.
|
||||
//! - `persist` — bincode-backed binary round-trip.
|
||||
//! - `flywire` — FlyWire v783 TSV ingest (real wiring path).
|
||||
//!
|
||||
//! See `docs/research/connectome-ruvector/02-connectome-layer.md` for
|
||||
//! the schema design and the log-normal / hub-module statistics this
|
||||
//! generator targets.
|
||||
//! generator targets, and ADR-154 §13 for the FlyWire ingest hand-off.
|
||||
|
||||
pub mod flywire;
|
||||
pub mod generator;
|
||||
pub mod persist;
|
||||
pub mod schema;
|
||||
|
||||
pub use flywire::{load_flywire, FlywireError};
|
||||
pub use generator::Connectome;
|
||||
pub use persist::ConnectomeError;
|
||||
pub use schema::{ConnectomeConfig, NeuronClass, NeuronId, NeuronMeta, Sign, Synapse};
|
||||
pub use schema::{
|
||||
ConnectomeConfig, FlyWireNeuronId, NeuronClass, NeuronId, NeuronMeta, Sign, Synapse,
|
||||
};
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
|
|
|||
|
|
@ -20,6 +20,21 @@ impl NeuronId {
|
|||
}
|
||||
}
|
||||
|
||||
/// Stable FlyWire v783 root id (64-bit). Carried alongside the dense
|
||||
/// `NeuronId` when a `Connectome` is loaded from FlyWire so analyses can
|
||||
/// round-trip back to the published identifier space. Opaque newtype;
|
||||
/// see `docs/research/connectome-ruvector/02-connectome-layer.md` §3.1.
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
|
||||
pub struct FlyWireNeuronId(pub u64);
|
||||
|
||||
impl FlyWireNeuronId {
|
||||
/// Raw id.
|
||||
#[inline]
|
||||
pub const fn raw(self) -> u64 {
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
/// Synapse sign. `+1` excitatory, `-1` inhibitory. Neuromodulatory
|
||||
/// edges are *not* represented in the fast path
|
||||
/// (`docs/research/connectome-ruvector/03-neural-dynamics.md` §2.2).
|
||||
|
|
|
|||
|
|
@ -77,7 +77,8 @@ pub use analysis::{
|
|||
Analysis, AnalysisConfig, FunctionalPartition, MotifHit, MotifIndex, MotifSignature,
|
||||
};
|
||||
pub use connectome::{
|
||||
Connectome, ConnectomeConfig, ConnectomeError, NeuronClass, NeuronId, NeuronMeta, Sign, Synapse,
|
||||
load_flywire, Connectome, ConnectomeConfig, ConnectomeError, FlyWireNeuronId, FlywireError,
|
||||
NeuronClass, NeuronId, NeuronMeta, Sign, Synapse,
|
||||
};
|
||||
pub use lif::{Engine, EngineConfig, LifError, NeuronParams, Spike, SpikeEvent};
|
||||
pub use observer::{CoherenceEvent, Observer, Report};
|
||||
|
|
|
|||
359
examples/connectome-fly/tests/flywire_ingest.rs
Normal file
359
examples/connectome-fly/tests/flywire_ingest.rs
Normal file
|
|
@ -0,0 +1,359 @@
|
|||
//! FlyWire v783 ingest — acceptance tests.
|
||||
//!
|
||||
//! These tests exercise every named failure mode of the loader plus a
|
||||
//! round-trip on the 100-neuron fixture. The fixture lives as Rust
|
||||
//! string constants (see `src/connectome/flywire/fixture.rs`) so CI
|
||||
//! does not need the ~2 GB FlyWire release on disk.
|
||||
|
||||
use std::fs;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use connectome_fly::connectome::flywire::{
|
||||
classify_cell_type, classify_cell_type_strict, fixture, load_flywire, nt_to_sign, parse_nt,
|
||||
};
|
||||
use connectome_fly::{FlyWireNeuronId, FlywireError, NeuronClass, Sign};
|
||||
use tempfile::TempDir;
|
||||
|
||||
fn setup_fixture() -> (TempDir, fixture::FixturePaths) {
|
||||
let dir = TempDir::new().expect("temp dir");
|
||||
let paths = fixture::write_fixture(dir.path()).expect("write fixture");
|
||||
(dir, paths)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn schema_round_trip_neuron_and_synapse_counts_match_fixture() {
|
||||
let (dir, _paths) = setup_fixture();
|
||||
let c = load_flywire(dir.path()).expect("load fixture");
|
||||
assert_eq!(
|
||||
c.num_neurons(),
|
||||
fixture::EXPECTED_NEURONS,
|
||||
"neuron count mismatch vs fixture declaration",
|
||||
);
|
||||
// Connection count in the fixture is 159 directed edges; some may
|
||||
// be dropped as self-loops or by NT filtering. We expect no
|
||||
// drops in the fixture (no self-loops authored), so equality holds.
|
||||
assert_eq!(
|
||||
c.num_synapses(),
|
||||
fixture::EXPECTED_SYNAPSES,
|
||||
"synapse count mismatch vs fixture declaration",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn flywire_ids_are_parallel_to_dense_ids() {
|
||||
let (dir, _paths) = setup_fixture();
|
||||
let c = load_flywire(dir.path()).expect("load fixture");
|
||||
let ids = c.flywire_ids().expect("flywire_ids set after load");
|
||||
assert_eq!(ids.len(), c.num_neurons());
|
||||
assert_eq!(ids[0], FlyWireNeuronId(10_000_001));
|
||||
assert_eq!(ids[99], FlyWireNeuronId(10_000_100));
|
||||
// Monotonic in the fixture (authored sequentially).
|
||||
for win in ids.windows(2) {
|
||||
assert!(win[0].raw() < win[1].raw());
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn determinism_two_loads_bit_identical_bincode() {
|
||||
let (dir, _paths) = setup_fixture();
|
||||
let a = load_flywire(dir.path()).expect("load 1");
|
||||
let b = load_flywire(dir.path()).expect("load 2");
|
||||
assert_eq!(a.num_neurons(), b.num_neurons());
|
||||
assert_eq!(a.num_synapses(), b.num_synapses());
|
||||
let ab = a.to_bytes().expect("ser a");
|
||||
let bb = b.to_bytes().expect("ser b");
|
||||
assert_eq!(ab, bb, "FlyWire ingest is not deterministic");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nt_to_sign_covers_release_documented_labels() {
|
||||
// Excitatory.
|
||||
for raw in ["ACH", "GLUT", "ACETYLCHOLINE", "Glutamate"] {
|
||||
let nt = parse_nt(raw, 0).expect(raw);
|
||||
assert_eq!(nt_to_sign(nt), Sign::Excitatory);
|
||||
}
|
||||
// Inhibitory.
|
||||
for raw in ["GABA", "HIST", "histamine"] {
|
||||
let nt = parse_nt(raw, 0).expect(raw);
|
||||
assert_eq!(nt_to_sign(nt), Sign::Inhibitory);
|
||||
}
|
||||
// Neuromodulatory — mapped to excitatory in the fast path per
|
||||
// research doc §4 (slow pool lives outside the fast path).
|
||||
for raw in ["DOP", "SER", "OCT", "5-HT", "DA", "OA"] {
|
||||
let nt = parse_nt(raw, 0).expect(raw);
|
||||
assert_eq!(nt_to_sign(nt), Sign::Excitatory);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unknown_nt_type_is_a_named_error_not_silent_default() {
|
||||
let err = parse_nt("PANIC", 42).expect_err("must reject unknown NT");
|
||||
match err {
|
||||
FlywireError::UnknownNtType { raw, neuron_id } => {
|
||||
assert_eq!(raw, "PANIC");
|
||||
assert_eq!(neuron_id, 42);
|
||||
}
|
||||
other => panic!("wrong variant: {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cell_type_coverage_hits_key_classes() {
|
||||
let (dir, _paths) = setup_fixture();
|
||||
let c = load_flywire(dir.path()).expect("load fixture");
|
||||
// Every coarse class that exists in the fixture must be populated.
|
||||
// The fixture is authored to cover these explicitly.
|
||||
for cls in [
|
||||
NeuronClass::PhotoReceptor,
|
||||
NeuronClass::Chemosensory,
|
||||
NeuronClass::Mechanosensory,
|
||||
NeuronClass::OpticLocal,
|
||||
NeuronClass::KenyonCell,
|
||||
NeuronClass::MbOutput,
|
||||
NeuronClass::CentralComplex,
|
||||
NeuronClass::LateralAccessory,
|
||||
NeuronClass::Descending,
|
||||
NeuronClass::Ascending,
|
||||
NeuronClass::Motor,
|
||||
NeuronClass::LocalInter,
|
||||
NeuronClass::Projection,
|
||||
NeuronClass::Modulatory,
|
||||
] {
|
||||
assert!(
|
||||
!c.by_class()[cls as usize].is_empty(),
|
||||
"class {cls:?} unexpectedly empty after fixture load",
|
||||
);
|
||||
}
|
||||
// Sensory + motor indices must also be populated (ADR §3.4 AC
|
||||
// stimulus / readout needs them).
|
||||
assert!(!c.sensory_neurons().is_empty());
|
||||
assert!(!c.motor_neurons().is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classify_cell_type_known_prefixes() {
|
||||
assert_eq!(
|
||||
classify_cell_type(Some("KC_g"), None).unwrap(),
|
||||
NeuronClass::KenyonCell,
|
||||
);
|
||||
assert_eq!(
|
||||
classify_cell_type(Some("MBON05"), None).unwrap(),
|
||||
NeuronClass::MbOutput,
|
||||
);
|
||||
assert_eq!(
|
||||
classify_cell_type(Some("DNp01"), None).unwrap(),
|
||||
NeuronClass::Descending,
|
||||
);
|
||||
assert_eq!(
|
||||
classify_cell_type(Some("Motor_leg_1"), None).unwrap(),
|
||||
NeuronClass::Motor,
|
||||
);
|
||||
assert_eq!(
|
||||
classify_cell_type(Some("LN_GABA_A"), None).unwrap(),
|
||||
NeuronClass::LocalInter,
|
||||
);
|
||||
// Flow fallback when cell type is missing.
|
||||
assert_eq!(
|
||||
classify_cell_type(None, Some("efferent")).unwrap(),
|
||||
NeuronClass::Motor,
|
||||
);
|
||||
// Both missing falls through to Other.
|
||||
assert_eq!(classify_cell_type(None, None).unwrap(), NeuronClass::Other);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn malformed_tsv_surfaces_row_level_error() {
|
||||
let dir = TempDir::new().expect("temp");
|
||||
// Valid neurons + classification files.
|
||||
fs::write(dir.path().join("neurons.tsv"), fixture::neurons_tsv()).unwrap();
|
||||
fs::write(
|
||||
dir.path().join("classification.tsv"),
|
||||
fixture::classification_tsv(),
|
||||
)
|
||||
.unwrap();
|
||||
// Broken connections file: header is valid, but the second data
|
||||
// row has a non-integer pre_id.
|
||||
let broken = "pre_id\tpost_id\tneuropil\tsyn_count\tsyn_weight\tnt_type\n\
|
||||
10000005\t10000013\tMB_CA_L\t12\t12.0\tACH\n\
|
||||
BROKEN\t10000013\tMB_CA_L\t12\t12.0\tACH\n";
|
||||
fs::write(dir.path().join("connections.tsv"), broken).unwrap();
|
||||
|
||||
let err = load_flywire(dir.path()).expect_err("must fail on BROKEN row");
|
||||
match err {
|
||||
FlywireError::MalformedRow { file, line, .. } => {
|
||||
assert_eq!(file, "connections.tsv");
|
||||
assert_eq!(line, 3, "expected line 3 (header=1, first data=2)");
|
||||
}
|
||||
other => panic!("wrong variant: {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unknown_cell_type_folds_to_other_in_default_mode() {
|
||||
// Default classify_cell_type: unmapped -> Other. FlyWire has ~8k
|
||||
// cell types and the coarse bucket is the v1 contract.
|
||||
let class = classify_cell_type(Some("ZZZ_novel_type"), None).unwrap();
|
||||
assert_eq!(class, NeuronClass::Other);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unknown_cell_type_is_a_named_error_in_strict_mode() {
|
||||
// Strict path surfaces `FlywireError::UnknownCellType` so callers
|
||||
// that want to audit prefix coverage can opt in.
|
||||
let err = classify_cell_type_strict(Some("ZZZ_novel_type"), None, 99)
|
||||
.expect_err("strict must reject unknown cell type");
|
||||
match err {
|
||||
FlywireError::UnknownCellType { raw, neuron_id } => {
|
||||
assert_eq!(raw, "ZZZ_novel_type");
|
||||
assert_eq!(neuron_id, 99);
|
||||
}
|
||||
other => panic!("wrong variant: {other:?}"),
|
||||
}
|
||||
// Known types still pass under strict mode.
|
||||
assert_eq!(
|
||||
classify_cell_type_strict(Some("KC_g"), None, 1).unwrap(),
|
||||
NeuronClass::KenyonCell,
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unknown_nt_type_in_neurons_file_fails_load() {
|
||||
let dir = TempDir::new().expect("temp");
|
||||
// Replace the very first NT label with a bogus one.
|
||||
let bad_neurons = fixture::neurons_tsv().replacen(
|
||||
"10000001\t9000001\tPR_R1\tHIST\t",
|
||||
"10000001\t9000001\tPR_R1\tBOGUS\t",
|
||||
1,
|
||||
);
|
||||
fs::write(dir.path().join("neurons.tsv"), bad_neurons).unwrap();
|
||||
fs::write(
|
||||
dir.path().join("classification.tsv"),
|
||||
fixture::classification_tsv(),
|
||||
)
|
||||
.unwrap();
|
||||
fs::write(
|
||||
dir.path().join("connections.tsv"),
|
||||
fixture::connections_tsv(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let err = load_flywire(dir.path()).expect_err("must fail on BOGUS nt_type");
|
||||
match err {
|
||||
FlywireError::UnknownNtType { raw, neuron_id } => {
|
||||
assert_eq!(raw, "BOGUS");
|
||||
assert_eq!(neuron_id, 10_000_001);
|
||||
}
|
||||
other => panic!("wrong variant: {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dangling_synapse_reference_is_a_named_error() {
|
||||
let dir = TempDir::new().expect("temp");
|
||||
fs::write(dir.path().join("neurons.tsv"), fixture::neurons_tsv()).unwrap();
|
||||
fs::write(
|
||||
dir.path().join("classification.tsv"),
|
||||
fixture::classification_tsv(),
|
||||
)
|
||||
.unwrap();
|
||||
// Append a synapse pointing at a nonexistent post_id.
|
||||
let mut connections = fixture::connections_tsv();
|
||||
connections.push_str("10000005\t99999999\tSMP_L\t3\t3.0\tACH\n");
|
||||
fs::write(dir.path().join("connections.tsv"), connections).unwrap();
|
||||
|
||||
let err = load_flywire(dir.path()).expect_err("must fail on dangling post_id");
|
||||
match err {
|
||||
FlywireError::UnknownPostNeuron(id) => assert_eq!(id, 99_999_999),
|
||||
other => panic!("wrong variant: {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn duplicate_neuron_id_is_a_named_error() {
|
||||
let dir = TempDir::new().expect("temp");
|
||||
// Duplicate the first neuron row at the tail.
|
||||
let mut neurons = fixture::neurons_tsv();
|
||||
neurons.push_str("10000001\t9000001\tPR_R1\tHIST\tleft\tOCN\tafferent\tsensory\n");
|
||||
fs::write(dir.path().join("neurons.tsv"), neurons).unwrap();
|
||||
fs::write(
|
||||
dir.path().join("classification.tsv"),
|
||||
fixture::classification_tsv(),
|
||||
)
|
||||
.unwrap();
|
||||
fs::write(
|
||||
dir.path().join("connections.tsv"),
|
||||
fixture::connections_tsv(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let err = load_flywire(dir.path()).expect_err("must fail on duplicate neuron_id");
|
||||
match err {
|
||||
FlywireError::DuplicateNeuron(id) => assert_eq!(id, 10_000_001),
|
||||
other => panic!("wrong variant: {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn classification_file_is_optional() {
|
||||
// No classification.tsv — cell-type is taken from neurons.tsv
|
||||
// directly. The loader must still succeed.
|
||||
let dir = TempDir::new().expect("temp");
|
||||
fs::write(dir.path().join("neurons.tsv"), fixture::neurons_tsv()).unwrap();
|
||||
fs::write(
|
||||
dir.path().join("connections.tsv"),
|
||||
fixture::connections_tsv(),
|
||||
)
|
||||
.unwrap();
|
||||
// Intentionally do NOT write classification.tsv.
|
||||
let c = load_flywire(dir.path()).expect("load without classification");
|
||||
assert_eq!(c.num_neurons(), fixture::EXPECTED_NEURONS);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn missing_neurons_file_surfaces_io_error() {
|
||||
let dir = TempDir::new().expect("temp");
|
||||
// No neurons.tsv at all.
|
||||
let err = load_flywire(dir.path()).expect_err("must fail without neurons.tsv");
|
||||
match err {
|
||||
FlywireError::Io { file, .. } => {
|
||||
assert_eq!(file, "neurons.tsv");
|
||||
}
|
||||
other => panic!("wrong variant: {other:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn synapse_signs_follow_nt_mapping_in_fixture() {
|
||||
let (dir, _paths) = setup_fixture();
|
||||
let c = load_flywire(dir.path()).expect("load fixture");
|
||||
// Fixture includes several GABA and HIST edges — expect inhibitory
|
||||
// synapses to be a non-zero fraction but bounded above by the
|
||||
// balance of excitatory ACH / GLUT edges.
|
||||
let mut inh = 0_usize;
|
||||
let mut exc = 0_usize;
|
||||
for s in c.synapses() {
|
||||
match s.sign {
|
||||
Sign::Inhibitory => inh += 1,
|
||||
Sign::Excitatory => exc += 1,
|
||||
}
|
||||
}
|
||||
assert!(inh > 0, "fixture has no inhibitory edges: unexpected");
|
||||
assert!(exc > 0, "fixture has no excitatory edges: unexpected");
|
||||
let frac = inh as f32 / c.num_synapses() as f32;
|
||||
assert!(
|
||||
(0.05..0.5).contains(&frac),
|
||||
"inhibitory fraction {frac:.3} out of expected band [0.05, 0.5]",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dir_label_on_io_error_uses_filename_only() {
|
||||
// Defensive: the Io variant reports a short filename, not a full
|
||||
// path. This keeps the error deterministic across tempdir roots.
|
||||
let bogus = PathBuf::from("/nonexistent/__connectome_fly_test__");
|
||||
let err = load_flywire(&bogus).expect_err("must fail on missing dir");
|
||||
match err {
|
||||
FlywireError::Io { file, .. } => assert_eq!(file, "neurons.tsv"),
|
||||
other => panic!("wrong variant: {other:?}"),
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue