mirror of
https://github.com/ruvnet/RuView.git
synced 2026-04-28 05:59:32 +00:00
feat(ruvector): ADR-084 Pass 1 — sketch module foundation
Implements Pass 1 of ADR-084 (RaBitQ similarity sensor): a thin
RuView-flavored API over `ruvector_core::quantization::BinaryQuantized`,
exposed at `wifi_densepose_ruvector::{Sketch, SketchBank, SketchError}`.
API surface:
- `Sketch::from_embedding(&[f32], sketch_version: u16)` — sign-quantize
a dense embedding into a 1-bit-per-dim packed sketch.
- `Sketch::distance` — hamming distance with schema-mismatch error.
- `Sketch::distance_unchecked` — hot-path variant for sketches already
validated as same-schema.
- `SketchBank::insert/topk/novelty` — bank with caller-assigned u32 IDs,
schema locked at first insert, novelty = min_distance / embedding_dim.
Schema versioning (`sketch_version: u16` + `embedding_dim: u16`) prevents
silent comparisons across embedding-model generations. Bumping the model
forces re-sketch of the candidate bank.
Pass 1 establishes the API and unit-test foundation. Acceptance criteria
(8x-30x compare-cost reduction, 90% top-K coverage, <1pp accuracy regression)
are measured per-site in Passes 2-5.
Validated:
- 12 new tests pass (sketch construction, hamming, top-K ordering,
schema lock, schema rejection, novelty)
- cargo test --workspace --no-default-features → 1,551 passed, 0 failed,
8 ignored (was 1,539 before; +12 new tests)
- ESP32-S3 on COM7 still streaming live CSI (cb #117300)
Co-Authored-By: claude-flow <ruv@ruv.net>
This commit is contained in:
parent
c19a33ee1c
commit
6fd5b7dad5
5 changed files with 964 additions and 47 deletions
534
v2/Cargo.lock
generated
534
v2/Cargo.lock
generated
|
|
@ -64,6 +64,23 @@ version = "0.1.6"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
|
||||
|
||||
[[package]]
|
||||
name = "anndists"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9a8396b473aa0bceed68fb32462505387ea39fa47c7029417e0a49f10592b036"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"cfg-if",
|
||||
"cpu-time",
|
||||
"env_logger",
|
||||
"lazy_static",
|
||||
"log",
|
||||
"num-traits",
|
||||
"num_cpus",
|
||||
"rayon",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ansi-str"
|
||||
version = "0.8.0"
|
||||
|
|
@ -90,7 +107,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"anstyle-parse",
|
||||
"anstyle-parse 0.2.7",
|
||||
"anstyle-query",
|
||||
"anstyle-wincon",
|
||||
"colorchoice",
|
||||
"is_terminal_polyfill",
|
||||
"utf8parse",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstream"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d"
|
||||
dependencies = [
|
||||
"anstyle",
|
||||
"anstyle-parse 1.0.0",
|
||||
"anstyle-query",
|
||||
"anstyle-wincon",
|
||||
"colorchoice",
|
||||
|
|
@ -113,6 +145,15 @@ dependencies = [
|
|||
"utf8parse",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-parse"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e"
|
||||
dependencies = [
|
||||
"utf8parse",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "anstyle-query"
|
||||
version = "1.1.5"
|
||||
|
|
@ -257,10 +298,10 @@ dependencies = [
|
|||
"base64 0.22.1",
|
||||
"bytes",
|
||||
"futures-util",
|
||||
"http",
|
||||
"http-body",
|
||||
"http 1.4.0",
|
||||
"http-body 1.0.1",
|
||||
"http-body-util",
|
||||
"hyper",
|
||||
"hyper 1.8.1",
|
||||
"hyper-util",
|
||||
"itoa",
|
||||
"matchit",
|
||||
|
|
@ -274,7 +315,7 @@ dependencies = [
|
|||
"serde_path_to_error",
|
||||
"serde_urlencoded",
|
||||
"sha1",
|
||||
"sync_wrapper",
|
||||
"sync_wrapper 1.0.2",
|
||||
"tokio",
|
||||
"tokio-tungstenite",
|
||||
"tower",
|
||||
|
|
@ -292,13 +333,13 @@ dependencies = [
|
|||
"async-trait",
|
||||
"bytes",
|
||||
"futures-util",
|
||||
"http",
|
||||
"http-body",
|
||||
"http 1.4.0",
|
||||
"http-body 1.0.1",
|
||||
"http-body-util",
|
||||
"mime",
|
||||
"pin-project-lite",
|
||||
"rustversion",
|
||||
"sync_wrapper",
|
||||
"sync_wrapper 1.0.2",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
|
|
@ -333,6 +374,15 @@ version = "1.8.3"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2af50177e190e07a26ab74f8b1efbfe2ef87da2116221318cb1c2e82baf7de06"
|
||||
|
||||
[[package]]
|
||||
name = "bincode"
|
||||
version = "1.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bincode"
|
||||
version = "2.0.1"
|
||||
|
|
@ -771,7 +821,7 @@ version = "4.5.60"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "24a241312cea5059b13574bb9b3861cabf758b879c15190b37b6d6fd63ab6876"
|
||||
dependencies = [
|
||||
"anstream",
|
||||
"anstream 0.6.21",
|
||||
"anstyle",
|
||||
"clap_lex",
|
||||
"strsim",
|
||||
|
|
@ -875,6 +925,16 @@ dependencies = [
|
|||
"version_check",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "core-foundation"
|
||||
version = "0.9.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f"
|
||||
dependencies = [
|
||||
"core-foundation-sys",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "core-foundation"
|
||||
version = "0.10.1"
|
||||
|
|
@ -898,7 +958,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "064badf302c3194842cf2c5d61f56cc88e54a759313879cdf03abdd27d0c3b97"
|
||||
dependencies = [
|
||||
"bitflags 2.11.0",
|
||||
"core-foundation",
|
||||
"core-foundation 0.10.1",
|
||||
"core-graphics-types",
|
||||
"foreign-types 0.5.0",
|
||||
"libc",
|
||||
|
|
@ -911,10 +971,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "3d44a101f213f6c4cdc1853d4b78aef6db6bdfa3468798cc1d9912f4735013eb"
|
||||
dependencies = [
|
||||
"bitflags 2.11.0",
|
||||
"core-foundation",
|
||||
"core-foundation 0.10.1",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cpu-time"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e9e393a7668fe1fad3075085b86c781883000b4ede868f43627b34a87c8b7ded"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cpufeatures"
|
||||
version = "0.2.17"
|
||||
|
|
@ -1371,7 +1441,7 @@ dependencies = [
|
|||
"rustc_version",
|
||||
"toml 0.9.12+spec-1.1.0",
|
||||
"vswhom",
|
||||
"winreg",
|
||||
"winreg 0.55.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -1407,6 +1477,29 @@ dependencies = [
|
|||
"syn 2.0.117",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "env_filter"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "32e90c2accc4b07a8456ea0debdc2e7587bdd890680d71173a15d4ae604f6eef"
|
||||
dependencies = [
|
||||
"log",
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "env_logger"
|
||||
version = "0.11.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0621c04f2196ac3f488dd583365b9c09be011a4ab8b9f37248ffcc8f6198b56a"
|
||||
dependencies = [
|
||||
"anstream 1.0.0",
|
||||
"anstyle",
|
||||
"env_filter",
|
||||
"jiff",
|
||||
"log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "equivalent"
|
||||
version = "1.0.2"
|
||||
|
|
@ -1867,7 +1960,7 @@ dependencies = [
|
|||
"raw-cpuid",
|
||||
"rayon",
|
||||
"seq-macro",
|
||||
"sysctl",
|
||||
"sysctl 0.5.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -2188,6 +2281,25 @@ dependencies = [
|
|||
"syn 2.0.117",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "h2"
|
||||
version = "0.3.27"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0beca50380b1fc32983fc1cb4587bfa4bb9e78fc259aad4a0032d2080309222d"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"fnv",
|
||||
"futures-core",
|
||||
"futures-sink",
|
||||
"futures-util",
|
||||
"http 0.2.12",
|
||||
"indexmap 2.13.0",
|
||||
"slab",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"tracing",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "half"
|
||||
version = "2.7.1"
|
||||
|
|
@ -2333,6 +2445,31 @@ version = "1.1.14"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ec9d92d097f4749b64e8cc33d924d9f40a2d4eb91402b458014b781f5733d60f"
|
||||
|
||||
[[package]]
|
||||
name = "hnsw_rs"
|
||||
version = "0.3.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "43a5258f079b97bf2e8311ff9579e903c899dcbac0d9a138d62e9a066778bd07"
|
||||
dependencies = [
|
||||
"anndists",
|
||||
"anyhow",
|
||||
"bincode 1.3.3",
|
||||
"cfg-if",
|
||||
"cpu-time",
|
||||
"env_logger",
|
||||
"hashbrown 0.15.5",
|
||||
"indexmap 2.13.0",
|
||||
"lazy_static",
|
||||
"log",
|
||||
"mmap-rs",
|
||||
"num-traits",
|
||||
"num_cpus",
|
||||
"parking_lot",
|
||||
"rand 0.9.2",
|
||||
"rayon",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "html5ever"
|
||||
version = "0.29.1"
|
||||
|
|
@ -2345,6 +2482,17 @@ dependencies = [
|
|||
"match_token",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "http"
|
||||
version = "0.2.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"fnv",
|
||||
"itoa",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "http"
|
||||
version = "1.4.0"
|
||||
|
|
@ -2355,6 +2503,17 @@ dependencies = [
|
|||
"itoa",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "http-body"
|
||||
version = "0.4.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"http 0.2.12",
|
||||
"pin-project-lite",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "http-body"
|
||||
version = "1.0.1"
|
||||
|
|
@ -2362,7 +2521,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"http",
|
||||
"http 1.4.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -2373,8 +2532,8 @@ checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a"
|
|||
dependencies = [
|
||||
"bytes",
|
||||
"futures-core",
|
||||
"http",
|
||||
"http-body",
|
||||
"http 1.4.0",
|
||||
"http-body 1.0.1",
|
||||
"pin-project-lite",
|
||||
]
|
||||
|
||||
|
|
@ -2396,6 +2555,30 @@ version = "1.0.3"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
|
||||
|
||||
[[package]]
|
||||
name = "hyper"
|
||||
version = "0.14.32"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7"
|
||||
dependencies = [
|
||||
"bytes",
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"h2",
|
||||
"http 0.2.12",
|
||||
"http-body 0.4.6",
|
||||
"httparse",
|
||||
"httpdate",
|
||||
"itoa",
|
||||
"pin-project-lite",
|
||||
"socket2 0.5.10",
|
||||
"tokio",
|
||||
"tower-service",
|
||||
"tracing",
|
||||
"want",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hyper"
|
||||
version = "1.8.1"
|
||||
|
|
@ -2406,8 +2589,8 @@ dependencies = [
|
|||
"bytes",
|
||||
"futures-channel",
|
||||
"futures-core",
|
||||
"http",
|
||||
"http-body",
|
||||
"http 1.4.0",
|
||||
"http-body 1.0.1",
|
||||
"httparse",
|
||||
"httpdate",
|
||||
"itoa",
|
||||
|
|
@ -2418,6 +2601,20 @@ dependencies = [
|
|||
"want",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hyper-rustls"
|
||||
version = "0.24.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590"
|
||||
dependencies = [
|
||||
"futures-util",
|
||||
"http 0.2.12",
|
||||
"hyper 0.14.32",
|
||||
"rustls 0.21.12",
|
||||
"tokio",
|
||||
"tokio-rustls",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hyper-tls"
|
||||
version = "0.6.0"
|
||||
|
|
@ -2426,7 +2623,7 @@ checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0"
|
|||
dependencies = [
|
||||
"bytes",
|
||||
"http-body-util",
|
||||
"hyper",
|
||||
"hyper 1.8.1",
|
||||
"hyper-util",
|
||||
"native-tls",
|
||||
"tokio",
|
||||
|
|
@ -2444,9 +2641,9 @@ dependencies = [
|
|||
"bytes",
|
||||
"futures-channel",
|
||||
"futures-util",
|
||||
"http",
|
||||
"http-body",
|
||||
"hyper",
|
||||
"http 1.4.0",
|
||||
"http-body 1.0.1",
|
||||
"hyper 1.8.1",
|
||||
"ipnet",
|
||||
"libc",
|
||||
"percent-encoding",
|
||||
|
|
@ -2778,6 +2975,30 @@ dependencies = [
|
|||
"system-deps",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jiff"
|
||||
version = "0.2.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f00b5dbd620d61dfdcb6007c9c1f6054ebd75319f163d886a9055cec1155073d"
|
||||
dependencies = [
|
||||
"jiff-static",
|
||||
"log",
|
||||
"portable-atomic",
|
||||
"portable-atomic-util",
|
||||
"serde_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jiff-static"
|
||||
version = "0.2.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e000de030ff8022ea1da3f466fbb0f3a809f5e51ed31f6dd931c35181ad8e6d7"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn 2.0.117",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jni"
|
||||
version = "0.21.1"
|
||||
|
|
@ -3270,6 +3491,23 @@ dependencies = [
|
|||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "mmap-rs"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4ecce9d566cb9234ae3db9e249c8b55665feaaf32b0859ff1e27e310d2beb3d8"
|
||||
dependencies = [
|
||||
"bitflags 2.11.0",
|
||||
"combine",
|
||||
"libc",
|
||||
"mach2",
|
||||
"nix 0.30.1",
|
||||
"sysctl 0.6.0",
|
||||
"thiserror 2.0.18",
|
||||
"widestring",
|
||||
"windows 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "muda"
|
||||
version = "0.17.1"
|
||||
|
|
@ -3501,6 +3739,18 @@ dependencies = [
|
|||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nix"
|
||||
version = "0.30.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "74523f3a35e05aba87a1d978330aef40f67b0304ac79c1c00b294c9830543db6"
|
||||
dependencies = [
|
||||
"bitflags 2.11.0",
|
||||
"cfg-if",
|
||||
"cfg_aliases",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nodrop"
|
||||
version = "0.1.14"
|
||||
|
|
@ -4837,6 +5087,15 @@ version = "0.5.5"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "03251193000f4bd3b042892be858ee50e8b3719f2b08e5833ac4353724632430"
|
||||
|
||||
[[package]]
|
||||
name = "redb"
|
||||
version = "2.6.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8eca1e9d98d5a7e9002d0013e18d5a9b000aee942eb134883a82f06ebffb6c01"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "redox_syscall"
|
||||
version = "0.5.18"
|
||||
|
|
@ -4935,6 +5194,47 @@ dependencies = [
|
|||
"bytecheck",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "reqwest"
|
||||
version = "0.11.27"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dd67538700a17451e7cba03ac727fb961abb7607553461627b97de0b89cf4a62"
|
||||
dependencies = [
|
||||
"base64 0.21.7",
|
||||
"bytes",
|
||||
"encoding_rs",
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"h2",
|
||||
"http 0.2.12",
|
||||
"http-body 0.4.6",
|
||||
"hyper 0.14.32",
|
||||
"hyper-rustls",
|
||||
"ipnet",
|
||||
"js-sys",
|
||||
"log",
|
||||
"mime",
|
||||
"once_cell",
|
||||
"percent-encoding",
|
||||
"pin-project-lite",
|
||||
"rustls 0.21.12",
|
||||
"rustls-pemfile",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"serde_urlencoded",
|
||||
"sync_wrapper 0.1.2",
|
||||
"system-configuration",
|
||||
"tokio",
|
||||
"tokio-rustls",
|
||||
"tower-service",
|
||||
"url",
|
||||
"wasm-bindgen",
|
||||
"wasm-bindgen-futures",
|
||||
"web-sys",
|
||||
"webpki-roots",
|
||||
"winreg 0.50.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "reqwest"
|
||||
version = "0.12.28"
|
||||
|
|
@ -4945,10 +5245,10 @@ dependencies = [
|
|||
"bytes",
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"http",
|
||||
"http-body",
|
||||
"http 1.4.0",
|
||||
"http-body 1.0.1",
|
||||
"http-body-util",
|
||||
"hyper",
|
||||
"hyper 1.8.1",
|
||||
"hyper-tls",
|
||||
"hyper-util",
|
||||
"js-sys",
|
||||
|
|
@ -4961,7 +5261,7 @@ dependencies = [
|
|||
"serde",
|
||||
"serde_json",
|
||||
"serde_urlencoded",
|
||||
"sync_wrapper",
|
||||
"sync_wrapper 1.0.2",
|
||||
"tokio",
|
||||
"tokio-native-tls",
|
||||
"tower",
|
||||
|
|
@ -4983,10 +5283,10 @@ dependencies = [
|
|||
"bytes",
|
||||
"futures-core",
|
||||
"futures-util",
|
||||
"http",
|
||||
"http-body",
|
||||
"http 1.4.0",
|
||||
"http-body 1.0.1",
|
||||
"http-body-util",
|
||||
"hyper",
|
||||
"hyper 1.8.1",
|
||||
"hyper-util",
|
||||
"js-sys",
|
||||
"log",
|
||||
|
|
@ -4994,7 +5294,7 @@ dependencies = [
|
|||
"pin-project-lite",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"sync_wrapper",
|
||||
"sync_wrapper 1.0.2",
|
||||
"tokio",
|
||||
"tokio-util",
|
||||
"tower",
|
||||
|
|
@ -5194,6 +5494,18 @@ dependencies = [
|
|||
"windows-sys 0.61.2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustls"
|
||||
version = "0.21.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e"
|
||||
dependencies = [
|
||||
"log",
|
||||
"ring",
|
||||
"rustls-webpki 0.101.7",
|
||||
"sct",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustls"
|
||||
version = "0.22.4"
|
||||
|
|
@ -5234,6 +5546,15 @@ dependencies = [
|
|||
"security-framework",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustls-pemfile"
|
||||
version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c"
|
||||
dependencies = [
|
||||
"base64 0.21.7",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustls-pki-types"
|
||||
version = "1.14.0"
|
||||
|
|
@ -5250,7 +5571,7 @@ version = "0.6.2"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1d99feebc72bae7ab76ba994bb5e121b8d83d910ca40b36e0921f53becc41784"
|
||||
dependencies = [
|
||||
"core-foundation",
|
||||
"core-foundation 0.10.1",
|
||||
"core-foundation-sys",
|
||||
"jni",
|
||||
"log",
|
||||
|
|
@ -5271,6 +5592,16 @@ version = "0.1.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f87165f0995f63a9fbeea62b64d10b4d9d8e78ec6d7d51fb2125fda7bb36788f"
|
||||
|
||||
[[package]]
|
||||
name = "rustls-webpki"
|
||||
version = "0.101.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765"
|
||||
dependencies = [
|
||||
"ring",
|
||||
"untrusted",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rustls-webpki"
|
||||
version = "0.102.8"
|
||||
|
|
@ -5353,17 +5684,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "dc7bc95e3682430c27228d7bc694ba9640cd322dde1bd5e7c9cf96a16afb4ca1"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bincode",
|
||||
"bincode 2.0.1",
|
||||
"chrono",
|
||||
"crossbeam",
|
||||
"dashmap",
|
||||
"hnsw_rs",
|
||||
"memmap2",
|
||||
"ndarray 0.16.1",
|
||||
"once_cell",
|
||||
"parking_lot",
|
||||
"rand 0.8.5",
|
||||
"rand_distr 0.4.3",
|
||||
"rayon",
|
||||
"redb",
|
||||
"reqwest 0.11.27",
|
||||
"rkyv",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"simsimd",
|
||||
"thiserror 2.0.18",
|
||||
"tracing",
|
||||
"uuid",
|
||||
|
|
@ -5556,6 +5894,16 @@ version = "1.2.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
||||
|
||||
[[package]]
|
||||
name = "sct"
|
||||
version = "0.7.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414"
|
||||
dependencies = [
|
||||
"ring",
|
||||
"untrusted",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "security-framework"
|
||||
version = "3.7.0"
|
||||
|
|
@ -5563,7 +5911,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "b7f4bc775c73d9a02cde8bf7b2ec4c9d12743edf609006c7facc23998404cd1d"
|
||||
dependencies = [
|
||||
"bitflags 2.11.0",
|
||||
"core-foundation",
|
||||
"core-foundation 0.10.1",
|
||||
"core-foundation-sys",
|
||||
"libc",
|
||||
"security-framework-sys",
|
||||
|
|
@ -5803,7 +6151,7 @@ checksum = "2acaf3f973e8616d7ceac415f53fc60e190b2a686fbcf8d27d0256c741c5007b"
|
|||
dependencies = [
|
||||
"bitflags 2.11.0",
|
||||
"cfg-if",
|
||||
"core-foundation",
|
||||
"core-foundation 0.10.1",
|
||||
"core-foundation-sys",
|
||||
"io-kit-sys",
|
||||
"libudev",
|
||||
|
|
@ -5928,6 +6276,15 @@ version = "0.1.5"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e"
|
||||
|
||||
[[package]]
|
||||
name = "simsimd"
|
||||
version = "5.9.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9638f2829f4887c62a01958903b58fa1b740a64d5dc2bbc4a75a33827ee1bd53"
|
||||
dependencies = [
|
||||
"cc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "siphasher"
|
||||
version = "0.3.11"
|
||||
|
|
@ -6134,6 +6491,12 @@ dependencies = [
|
|||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sync_wrapper"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160"
|
||||
|
||||
[[package]]
|
||||
name = "sync_wrapper"
|
||||
version = "1.0.2"
|
||||
|
|
@ -6168,6 +6531,20 @@ dependencies = [
|
|||
"walkdir",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sysctl"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "01198a2debb237c62b6826ec7081082d951f46dbb64b0e8c7649a452230d1dfc"
|
||||
dependencies = [
|
||||
"bitflags 2.11.0",
|
||||
"byteorder",
|
||||
"enum-as-inner",
|
||||
"libc",
|
||||
"thiserror 1.0.69",
|
||||
"walkdir",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sysinfo"
|
||||
version = "0.32.1"
|
||||
|
|
@ -6182,6 +6559,27 @@ dependencies = [
|
|||
"windows 0.57.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "system-configuration"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7"
|
||||
dependencies = [
|
||||
"bitflags 1.3.2",
|
||||
"core-foundation 0.9.4",
|
||||
"system-configuration-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "system-configuration-sys"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a75fb188eb626b924683e3b95e3a48e63551fcfb51949de2f06a9d91dbee93c9"
|
||||
dependencies = [
|
||||
"core-foundation-sys",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "system-deps"
|
||||
version = "6.2.2"
|
||||
|
|
@ -6229,7 +6627,7 @@ checksum = "6e06d52c379e63da659a483a958110bbde891695a0ecb53e48cc7786d5eda7bb"
|
|||
dependencies = [
|
||||
"bitflags 2.11.0",
|
||||
"block2",
|
||||
"core-foundation",
|
||||
"core-foundation 0.10.1",
|
||||
"core-graphics",
|
||||
"crossbeam-channel",
|
||||
"dispatch2",
|
||||
|
|
@ -6303,7 +6701,7 @@ dependencies = [
|
|||
"glob",
|
||||
"gtk",
|
||||
"heck 0.5.0",
|
||||
"http",
|
||||
"http 1.4.0",
|
||||
"jni",
|
||||
"libc",
|
||||
"log",
|
||||
|
|
@ -6488,7 +6886,7 @@ dependencies = [
|
|||
"cookie",
|
||||
"dpi",
|
||||
"gtk",
|
||||
"http",
|
||||
"http 1.4.0",
|
||||
"jni",
|
||||
"objc2",
|
||||
"objc2-ui-kit",
|
||||
|
|
@ -6511,7 +6909,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "e11ea2e6f801d275fdd890d6c9603736012742a1c33b96d0db788c9cdebf7f9e"
|
||||
dependencies = [
|
||||
"gtk",
|
||||
"http",
|
||||
"http 1.4.0",
|
||||
"jni",
|
||||
"log",
|
||||
"objc2",
|
||||
|
|
@ -6543,7 +6941,7 @@ dependencies = [
|
|||
"dunce",
|
||||
"glob",
|
||||
"html5ever",
|
||||
"http",
|
||||
"http 1.4.0",
|
||||
"infer",
|
||||
"json-patch",
|
||||
"kuchikiki",
|
||||
|
|
@ -6779,6 +7177,16 @@ dependencies = [
|
|||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-rustls"
|
||||
version = "0.24.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081"
|
||||
dependencies = [
|
||||
"rustls 0.21.12",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tokio-serial"
|
||||
version = "5.4.5"
|
||||
|
|
@ -6966,7 +7374,7 @@ dependencies = [
|
|||
"futures-core",
|
||||
"futures-util",
|
||||
"pin-project-lite",
|
||||
"sync_wrapper",
|
||||
"sync_wrapper 1.0.2",
|
||||
"tokio",
|
||||
"tower-layer",
|
||||
"tower-service",
|
||||
|
|
@ -6982,8 +7390,8 @@ dependencies = [
|
|||
"bitflags 2.11.0",
|
||||
"bytes",
|
||||
"futures-util",
|
||||
"http",
|
||||
"http-body",
|
||||
"http 1.4.0",
|
||||
"http-body 1.0.1",
|
||||
"http-body-util",
|
||||
"http-range-header",
|
||||
"httpdate",
|
||||
|
|
@ -7007,8 +7415,8 @@ dependencies = [
|
|||
"bitflags 2.11.0",
|
||||
"bytes",
|
||||
"futures-util",
|
||||
"http",
|
||||
"http-body",
|
||||
"http 1.4.0",
|
||||
"http-body 1.0.1",
|
||||
"iri-string",
|
||||
"pin-project-lite",
|
||||
"tower",
|
||||
|
|
@ -7150,7 +7558,7 @@ dependencies = [
|
|||
"byteorder",
|
||||
"bytes",
|
||||
"data-encoding",
|
||||
"http",
|
||||
"http 1.4.0",
|
||||
"httparse",
|
||||
"log",
|
||||
"rand 0.8.5",
|
||||
|
|
@ -7306,7 +7714,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
checksum = "d81f9efa9df032be5934a46a068815a10a042b494b6a58cb0a1a97bb5467ed6f"
|
||||
dependencies = [
|
||||
"base64 0.22.1",
|
||||
"http",
|
||||
"http 1.4.0",
|
||||
"httparse",
|
||||
"log",
|
||||
]
|
||||
|
|
@ -7724,6 +8132,12 @@ dependencies = [
|
|||
"rustls-pki-types",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "webpki-roots"
|
||||
version = "0.25.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5f20c57d8d7db6d3b86154206ae5d8fba62dd39573114de97c2cb0578251f8e1"
|
||||
|
||||
[[package]]
|
||||
name = "webview2-com"
|
||||
version = "0.38.2"
|
||||
|
|
@ -7770,6 +8184,12 @@ dependencies = [
|
|||
"safe_arch",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "widestring"
|
||||
version = "1.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72069c3113ab32ab29e5584db3c6ec55d416895e60715417b5b883a357c3e471"
|
||||
|
||||
[[package]]
|
||||
name = "wifi-densepose-api"
|
||||
version = "0.3.0"
|
||||
|
|
@ -7961,6 +8381,7 @@ dependencies = [
|
|||
"criterion",
|
||||
"ruvector-attention 2.0.4",
|
||||
"ruvector-attn-mincut",
|
||||
"ruvector-core",
|
||||
"ruvector-crv",
|
||||
"ruvector-gnn",
|
||||
"ruvector-mincut",
|
||||
|
|
@ -8139,6 +8560,15 @@ dependencies = [
|
|||
"windows-version",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows"
|
||||
version = "0.48.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f"
|
||||
dependencies = [
|
||||
"windows-targets 0.48.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows"
|
||||
version = "0.57.0"
|
||||
|
|
@ -8664,6 +9094,16 @@ dependencies = [
|
|||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winreg"
|
||||
version = "0.50.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "524e57b2c537c0f9b1e69f1965311ec12182b4122e45035b1508cd24d2adadb1"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"windows-sys 0.48.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winreg"
|
||||
version = "0.55.0"
|
||||
|
|
@ -8784,7 +9224,7 @@ dependencies = [
|
|||
"gdkx11",
|
||||
"gtk",
|
||||
"html5ever",
|
||||
"http",
|
||||
"http 1.4.0",
|
||||
"javascriptcore-rs",
|
||||
"jni",
|
||||
"kuchikiki",
|
||||
|
|
|
|||
|
|
@ -120,6 +120,7 @@ midstreamer-attractor = "0.1.0"
|
|||
|
||||
# ruvector integration (published on crates.io)
|
||||
# Vendored at v2.1.0 in vendor/ruvector; using crates.io versions until published.
|
||||
ruvector-core = "2.0.4"
|
||||
ruvector-mincut = "2.0.4"
|
||||
ruvector-attn-mincut = "2.0.4"
|
||||
ruvector-temporal-tensor = "2.0.4"
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ default = []
|
|||
crv = ["dep:ruvector-crv", "dep:ruvector-gnn", "dep:serde", "dep:serde_json"]
|
||||
|
||||
[dependencies]
|
||||
ruvector-core = { workspace = true }
|
||||
ruvector-mincut = { workspace = true }
|
||||
ruvector-attn-mincut = { workspace = true }
|
||||
ruvector-temporal-tensor = { workspace = true }
|
||||
|
|
|
|||
|
|
@ -30,4 +30,7 @@
|
|||
pub mod crv;
|
||||
pub mod mat;
|
||||
pub mod signal;
|
||||
pub mod sketch;
|
||||
pub mod viewpoint;
|
||||
|
||||
pub use sketch::{Sketch, SketchBank, SketchError};
|
||||
|
|
|
|||
472
v2/crates/wifi-densepose-ruvector/src/sketch.rs
Normal file
472
v2/crates/wifi-densepose-ruvector/src/sketch.rs
Normal file
|
|
@ -0,0 +1,472 @@
|
|||
//! RaBitQ-style binary sketch — cheap similarity sensor for CSI/pose embeddings.
|
||||
//!
|
||||
//! Implements **Pass 1** of [ADR-084](../../../../../docs/adr/ADR-084-rabitq-similarity-sensor.md):
|
||||
//! a thin RuView-flavored API over `ruvector_core::quantization::BinaryQuantized`.
|
||||
//!
|
||||
//! # Why a sketch
|
||||
//!
|
||||
//! Every "have I seen something like this before?" comparison in the RuView
|
||||
//! pipeline (AETHER re-ID, room fingerprinting, mincut prefilter, novelty
|
||||
//! detection, mesh-exchange compression, privacy event log) shares the same
|
||||
//! shape: dense float embedding → similarity score → top-K candidates.
|
||||
//! The full-precision compare is expensive — `O(d)` float operations per pair,
|
||||
//! cache-unfriendly because every dimension is a 4-byte load.
|
||||
//!
|
||||
//! A 1-bit sketch (one bit per embedding dimension, packed into bytes) collapses
|
||||
//! the compare to a hardware-accelerated POPCNT/NEON-vcnt over ~32× less
|
||||
//! memory. The published *RaBitQ* algorithm (Gao & Long, SIGMOD 2024) wraps
|
||||
//! this with a randomized rotation for theoretical error bounds; we ship the
|
||||
//! pure sign-quantization variant first and add the rotation later if
|
||||
//! benchmark-measured top-K coverage drops below the ADR-084 acceptance
|
||||
//! threshold of 90%.
|
||||
//!
|
||||
//! # Acceptance criteria (ADR-084 §"Acceptance test")
|
||||
//!
|
||||
//! - Sketch compare cost reduction: **8×–30×** vs full-float compare.
|
||||
//! - Top-K coverage: **≥ 90%** agreement with full-float top-K.
|
||||
//! - End-to-end accuracy regression: **< 1 percentage point**.
|
||||
//!
|
||||
//! Pass 1 establishes the API and the unit-test foundation. Pass 2+ wires it
|
||||
//! into specific pipeline sites and measures the criteria there.
|
||||
//!
|
||||
//! # Use sites (ADR-084)
|
||||
//!
|
||||
//! 1. AETHER re-ID hot-cache filter (`signal::ruvsense::pose_tracker`)
|
||||
//! 2. Cluster-Pi novelty sensor (`sensing-server` `SketchBank`)
|
||||
//! 3. Mesh-exchange compression (ADR-066 swarm bridge)
|
||||
//! 4. Privacy-preserving event log (cluster Pi)
|
||||
//! 5. Mincut prefilter (`ruvector::signal::subcarrier`)
|
||||
//!
|
||||
//! All sites take a `&Sketch` instead of an `&[f32]`; the bridge to dense
|
||||
//! embeddings is `Sketch::from_embedding`.
|
||||
|
||||
use ruvector_core::quantization::{BinaryQuantized, QuantizedVector};
|
||||
|
||||
/// Errors raised by the sketch API.
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum SketchError {
|
||||
/// The sketch's `sketch_version` does not match the `SketchBank`'s.
|
||||
/// This guards against silently comparing sketches produced by different
|
||||
/// embedding-model generations.
|
||||
#[error("sketch_version mismatch: bank={bank}, query={query}")]
|
||||
SketchVersionMismatch {
|
||||
/// Version stored in the bank.
|
||||
bank: u16,
|
||||
/// Version on the incoming sketch.
|
||||
query: u16,
|
||||
},
|
||||
|
||||
/// The sketch's embedding dimension does not match the bank's.
|
||||
/// Two sketches of different dimensions cannot be compared.
|
||||
#[error("embedding_dim mismatch: bank={bank}, query={query}")]
|
||||
EmbeddingDimMismatch {
|
||||
/// Dimension stored in the bank.
|
||||
bank: u16,
|
||||
/// Dimension on the incoming sketch.
|
||||
query: u16,
|
||||
},
|
||||
}
|
||||
|
||||
/// A 1-bit binary sketch of a dense embedding vector.
|
||||
///
|
||||
/// 32× smaller than the source `[f32]` and compared via SIMD-accelerated
|
||||
/// hamming distance (NEON `vcnt` on aarch64, POPCNT on x86_64). Use as a
|
||||
/// cheap pre-filter before full-precision comparison.
|
||||
///
|
||||
/// # Versioning
|
||||
///
|
||||
/// `sketch_version` distinguishes sketches produced by different embedding
|
||||
/// generations. Bumping the embedding model invalidates all stored sketches;
|
||||
/// the `SketchBank` rejects mismatched versions at compare time so callers
|
||||
/// never silently compare incompatible sketches.
|
||||
///
|
||||
/// `embedding_dim` is the source vector's length (not the byte-packed size);
|
||||
/// kept as a check that two sketches are actually comparable.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Sketch {
|
||||
/// 1-bit-per-dimension packed bytes.
|
||||
inner: BinaryQuantized,
|
||||
/// Source-embedding dimension (e.g., 128 for AETHER).
|
||||
embedding_dim: u16,
|
||||
/// Schema version of the producing embedding model.
|
||||
sketch_version: u16,
|
||||
}
|
||||
|
||||
impl Sketch {
|
||||
/// Construct a sketch from a dense f32 embedding.
|
||||
///
|
||||
/// Each dimension contributes one bit: `1` if the value is `> 0.0`,
|
||||
/// `0` otherwise. This is the standard sign-quantization step.
|
||||
///
|
||||
/// `sketch_version` must be supplied by the caller and bumped whenever
|
||||
/// the embedding model that produced the input changes meaningfully
|
||||
/// (e.g., a re-trained AETHER head). Two sketches with different
|
||||
/// `sketch_version`s are not comparable.
|
||||
pub fn from_embedding(embedding: &[f32], sketch_version: u16) -> Self {
|
||||
debug_assert!(
|
||||
embedding.len() <= u16::MAX as usize,
|
||||
"embedding dimension exceeds u16::MAX"
|
||||
);
|
||||
Self {
|
||||
inner: BinaryQuantized::quantize(embedding),
|
||||
embedding_dim: embedding.len() as u16,
|
||||
sketch_version,
|
||||
}
|
||||
}
|
||||
|
||||
/// Hamming distance to another sketch in `[0, embedding_dim]`.
|
||||
///
|
||||
/// Returns `None` if the two sketches have different `embedding_dim` or
|
||||
/// `sketch_version` — comparing them would be semantically meaningless.
|
||||
/// Use [`Sketch::distance_unchecked`] when the caller has already
|
||||
/// validated the sketches come from the same producer.
|
||||
pub fn distance(&self, other: &Self) -> Result<u32, SketchError> {
|
||||
if self.embedding_dim != other.embedding_dim {
|
||||
return Err(SketchError::EmbeddingDimMismatch {
|
||||
bank: self.embedding_dim,
|
||||
query: other.embedding_dim,
|
||||
});
|
||||
}
|
||||
if self.sketch_version != other.sketch_version {
|
||||
return Err(SketchError::SketchVersionMismatch {
|
||||
bank: self.sketch_version,
|
||||
query: other.sketch_version,
|
||||
});
|
||||
}
|
||||
Ok(self.inner.distance(&other.inner) as u32)
|
||||
}
|
||||
|
||||
/// Hamming distance without compatibility checks.
|
||||
///
|
||||
/// Faster than [`Sketch::distance`] (no version/dim check) but the
|
||||
/// caller is responsible for guaranteeing both sketches come from the
|
||||
/// same embedding model and dimension. Use only on sketches retrieved
|
||||
/// from the same `SketchBank`.
|
||||
#[inline]
|
||||
pub fn distance_unchecked(&self, other: &Self) -> u32 {
|
||||
self.inner.distance(&other.inner) as u32
|
||||
}
|
||||
|
||||
/// Source-embedding dimension (number of dimensions in the original
|
||||
/// `[f32]`, not the packed byte length).
|
||||
#[inline]
|
||||
pub fn embedding_dim(&self) -> u16 {
|
||||
self.embedding_dim
|
||||
}
|
||||
|
||||
/// Schema version of the producing embedding model.
|
||||
#[inline]
|
||||
pub fn sketch_version(&self) -> u16 {
|
||||
self.sketch_version
|
||||
}
|
||||
|
||||
/// Borrow the inner ruvector-core `BinaryQuantized` for advanced use
|
||||
/// (e.g., serialisation through ruvector's existing infrastructure).
|
||||
/// Most callers should use [`Sketch::distance`] or [`SketchBank`].
|
||||
#[inline]
|
||||
pub fn as_inner(&self) -> &BinaryQuantized {
|
||||
&self.inner
|
||||
}
|
||||
}
|
||||
|
||||
/// A bank of sketches with stable IDs, queried for top-K nearest neighbours
|
||||
/// by hamming distance.
|
||||
///
|
||||
/// Used at every "have I seen this before" site in the pipeline. The bank
|
||||
/// enforces `sketch_version` and `embedding_dim` consistency at insertion
|
||||
/// time, so `topk` queries never need to re-check.
|
||||
///
|
||||
/// # Invariants
|
||||
///
|
||||
/// - All sketches in a bank share the same `embedding_dim` and `sketch_version`.
|
||||
/// - Bank IDs (`u32`) are caller-assigned and stable across `topk` calls;
|
||||
/// the bank does not renumber on insertion or removal.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct SketchBank {
|
||||
/// (id, sketch) pairs in insertion order.
|
||||
entries: Vec<(u32, Sketch)>,
|
||||
/// Locked at first insertion; all subsequent inserts must match.
|
||||
embedding_dim: Option<u16>,
|
||||
/// Locked at first insertion; all subsequent inserts must match.
|
||||
sketch_version: Option<u16>,
|
||||
}
|
||||
|
||||
impl SketchBank {
|
||||
/// Create an empty bank. Dimension and version are locked at the first
|
||||
/// `insert` call.
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
entries: Vec::new(),
|
||||
embedding_dim: None,
|
||||
sketch_version: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a bank with a pre-locked `embedding_dim` and `sketch_version`.
|
||||
/// Use when the bank's expected schema is known at construction.
|
||||
pub fn with_schema(embedding_dim: u16, sketch_version: u16) -> Self {
|
||||
Self {
|
||||
entries: Vec::new(),
|
||||
embedding_dim: Some(embedding_dim),
|
||||
sketch_version: Some(sketch_version),
|
||||
}
|
||||
}
|
||||
|
||||
/// Number of sketches in the bank.
|
||||
#[inline]
|
||||
pub fn len(&self) -> usize {
|
||||
self.entries.len()
|
||||
}
|
||||
|
||||
/// True iff the bank has no sketches.
|
||||
#[inline]
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.entries.is_empty()
|
||||
}
|
||||
|
||||
/// Locked embedding dimension, or `None` if the bank is empty and
|
||||
/// no schema was pre-supplied.
|
||||
#[inline]
|
||||
pub fn embedding_dim(&self) -> Option<u16> {
|
||||
self.embedding_dim
|
||||
}
|
||||
|
||||
/// Locked sketch version, or `None` if the bank is empty and
|
||||
/// no schema was pre-supplied.
|
||||
#[inline]
|
||||
pub fn sketch_version(&self) -> Option<u16> {
|
||||
self.sketch_version
|
||||
}
|
||||
|
||||
/// Insert a sketch with caller-assigned ID. Locks the bank's schema on
|
||||
/// first insertion; rejects subsequent inserts that mismatch.
|
||||
pub fn insert(&mut self, id: u32, sketch: Sketch) -> Result<(), SketchError> {
|
||||
match self.embedding_dim {
|
||||
None => self.embedding_dim = Some(sketch.embedding_dim),
|
||||
Some(d) if d != sketch.embedding_dim => {
|
||||
return Err(SketchError::EmbeddingDimMismatch {
|
||||
bank: d,
|
||||
query: sketch.embedding_dim,
|
||||
});
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
match self.sketch_version {
|
||||
None => self.sketch_version = Some(sketch.sketch_version),
|
||||
Some(v) if v != sketch.sketch_version => {
|
||||
return Err(SketchError::SketchVersionMismatch {
|
||||
bank: v,
|
||||
query: sketch.sketch_version,
|
||||
});
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
self.entries.push((id, sketch));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Top-K nearest neighbours by hamming distance, ascending.
|
||||
///
|
||||
/// Returns up to `k` `(id, distance)` pairs sorted by distance. If the
|
||||
/// bank has fewer than `k` entries, returns all of them. If `k == 0`,
|
||||
/// returns empty.
|
||||
///
|
||||
/// Returns `Err` if the query's `embedding_dim` or `sketch_version`
|
||||
/// disagrees with the bank's locked schema. (Cannot return `Err` if the
|
||||
/// bank is empty *and* no schema was pre-supplied — there's nothing to
|
||||
/// disagree with.)
|
||||
pub fn topk(&self, query: &Sketch, k: usize) -> Result<Vec<(u32, u32)>, SketchError> {
|
||||
if k == 0 || self.entries.is_empty() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
if let Some(d) = self.embedding_dim {
|
||||
if d != query.embedding_dim {
|
||||
return Err(SketchError::EmbeddingDimMismatch {
|
||||
bank: d,
|
||||
query: query.embedding_dim,
|
||||
});
|
||||
}
|
||||
}
|
||||
if let Some(v) = self.sketch_version {
|
||||
if v != query.sketch_version {
|
||||
return Err(SketchError::SketchVersionMismatch {
|
||||
bank: v,
|
||||
query: query.sketch_version,
|
||||
});
|
||||
}
|
||||
}
|
||||
// O(n log k) using a partial sort; for small k (typical k = 8 to 64)
|
||||
// and bank sizes up to a few thousand sketches, the simple sort-all
|
||||
// approach is faster in practice (cache-friendly) and easier to audit.
|
||||
// Switch to a max-heap if profiling shows this becomes a hot spot.
|
||||
let mut scored: Vec<(u32, u32)> = self
|
||||
.entries
|
||||
.iter()
|
||||
.map(|(id, sk)| (*id, sk.distance_unchecked(query)))
|
||||
.collect();
|
||||
scored.sort_by_key(|&(_, d)| d);
|
||||
scored.truncate(k);
|
||||
Ok(scored)
|
||||
}
|
||||
|
||||
/// Compute the novelty score of a query against the bank in `[0.0, 1.0]`.
|
||||
///
|
||||
/// Defined as `min_distance / embedding_dim`, so 0.0 means "exact bit
|
||||
/// match exists in the bank" and 1.0 means "every bit differs from the
|
||||
/// nearest stored sketch." Returns 1.0 (max novelty) on an empty bank.
|
||||
/// Returns `Err` on schema mismatch.
|
||||
pub fn novelty(&self, query: &Sketch) -> Result<f32, SketchError> {
|
||||
if self.entries.is_empty() {
|
||||
return Ok(1.0);
|
||||
}
|
||||
let topk = self.topk(query, 1)?;
|
||||
let min_distance = topk.first().map(|&(_, d)| d).unwrap_or(u32::MAX);
|
||||
Ok(min_distance as f32 / query.embedding_dim as f32)
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for SketchBank {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn from_embedding_packs_one_bit_per_dim() {
|
||||
let v = vec![0.5, -0.5, 0.5, -0.5, 0.5, -0.5, 0.5, -0.5];
|
||||
let s = Sketch::from_embedding(&v, 1);
|
||||
assert_eq!(s.embedding_dim(), 8);
|
||||
assert_eq!(s.sketch_version(), 1);
|
||||
// Distance to self is 0
|
||||
assert_eq!(s.distance_unchecked(&s), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn distance_is_hamming_count() {
|
||||
let a = Sketch::from_embedding(&[0.5, 0.5, 0.5, 0.5], 1);
|
||||
let b = Sketch::from_embedding(&[-0.5, -0.5, -0.5, -0.5], 1);
|
||||
// All 4 dims flipped sign → 4 bit differences.
|
||||
assert_eq!(a.distance(&b).unwrap(), 4);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn distance_rejects_mismatched_dims() {
|
||||
let a = Sketch::from_embedding(&[0.5, 0.5], 1);
|
||||
let b = Sketch::from_embedding(&[0.5, 0.5, 0.5, 0.5], 1);
|
||||
let err = a.distance(&b).unwrap_err();
|
||||
assert!(matches!(err, SketchError::EmbeddingDimMismatch { .. }));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn distance_rejects_mismatched_versions() {
|
||||
let a = Sketch::from_embedding(&[0.5, 0.5, 0.5, 0.5], 1);
|
||||
let b = Sketch::from_embedding(&[0.5, 0.5, 0.5, 0.5], 2);
|
||||
let err = a.distance(&b).unwrap_err();
|
||||
assert!(matches!(err, SketchError::SketchVersionMismatch { .. }));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bank_topk_returns_sorted_by_distance() {
|
||||
let mut bank = SketchBank::new();
|
||||
// id 10: identical
|
||||
bank.insert(10, Sketch::from_embedding(&[0.5, 0.5, 0.5, 0.5], 1)).unwrap();
|
||||
// id 20: 1 bit different (last dim flipped)
|
||||
bank.insert(20, Sketch::from_embedding(&[0.5, 0.5, 0.5, -0.5], 1)).unwrap();
|
||||
// id 30: 2 bits different
|
||||
bank.insert(30, Sketch::from_embedding(&[-0.5, 0.5, -0.5, 0.5], 1)).unwrap();
|
||||
|
||||
let query = Sketch::from_embedding(&[0.5, 0.5, 0.5, 0.5], 1);
|
||||
let topk = bank.topk(&query, 3).unwrap();
|
||||
|
||||
assert_eq!(topk.len(), 3);
|
||||
assert_eq!(topk[0].0, 10); // 0 distance
|
||||
assert_eq!(topk[1].0, 20); // 1 distance
|
||||
assert_eq!(topk[2].0, 30); // 2 distance
|
||||
assert!(topk[0].1 <= topk[1].1);
|
||||
assert!(topk[1].1 <= topk[2].1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bank_topk_zero_returns_empty() {
|
||||
let mut bank = SketchBank::new();
|
||||
bank.insert(1, Sketch::from_embedding(&[0.5, 0.5], 1)).unwrap();
|
||||
let q = Sketch::from_embedding(&[0.5, 0.5], 1);
|
||||
assert_eq!(bank.topk(&q, 0).unwrap().len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bank_topk_more_than_size_returns_all() {
|
||||
let mut bank = SketchBank::new();
|
||||
bank.insert(1, Sketch::from_embedding(&[0.5, 0.5], 1)).unwrap();
|
||||
bank.insert(2, Sketch::from_embedding(&[-0.5, 0.5], 1)).unwrap();
|
||||
let q = Sketch::from_embedding(&[0.5, 0.5], 1);
|
||||
assert_eq!(bank.topk(&q, 100).unwrap().len(), 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bank_locks_schema_on_first_insert() {
|
||||
let mut bank = SketchBank::new();
|
||||
bank.insert(1, Sketch::from_embedding(&[0.5, 0.5, 0.5, 0.5], 1)).unwrap();
|
||||
// Different version → reject
|
||||
let err = bank
|
||||
.insert(2, Sketch::from_embedding(&[0.5, 0.5, 0.5, 0.5], 2))
|
||||
.unwrap_err();
|
||||
assert!(matches!(err, SketchError::SketchVersionMismatch { .. }));
|
||||
// Different dim → reject
|
||||
let err = bank
|
||||
.insert(3, Sketch::from_embedding(&[0.5, 0.5], 1))
|
||||
.unwrap_err();
|
||||
assert!(matches!(err, SketchError::EmbeddingDimMismatch { .. }));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bank_with_schema_rejects_first_mismatching_insert() {
|
||||
let mut bank = SketchBank::with_schema(4, 7);
|
||||
let err = bank
|
||||
.insert(1, Sketch::from_embedding(&[0.5, 0.5], 7))
|
||||
.unwrap_err();
|
||||
assert!(matches!(err, SketchError::EmbeddingDimMismatch { .. }));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn novelty_zero_for_exact_match_one_for_empty() {
|
||||
let bank_empty = SketchBank::new();
|
||||
let q = Sketch::from_embedding(&[0.5, 0.5, 0.5, 0.5], 1);
|
||||
assert_eq!(bank_empty.novelty(&q).unwrap(), 1.0);
|
||||
|
||||
let mut bank = SketchBank::new();
|
||||
bank.insert(1, q.clone()).unwrap();
|
||||
assert_eq!(bank.novelty(&q).unwrap(), 0.0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn novelty_is_proportional_to_min_distance() {
|
||||
let mut bank = SketchBank::new();
|
||||
// Bank has one sketch with all 8 dims positive.
|
||||
bank.insert(1, Sketch::from_embedding(&[0.5; 8], 1)).unwrap();
|
||||
// Query flips half the dims → 4 bit difference / 8 dims = 0.5.
|
||||
let query = Sketch::from_embedding(&[0.5, 0.5, 0.5, 0.5, -0.5, -0.5, -0.5, -0.5], 1);
|
||||
let novelty = bank.novelty(&query).unwrap();
|
||||
assert!((novelty - 0.5).abs() < 1e-6);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn topk_rejects_query_with_wrong_schema() {
|
||||
let mut bank = SketchBank::with_schema(4, 1);
|
||||
bank.insert(1, Sketch::from_embedding(&[0.5, 0.5, 0.5, 0.5], 1)).unwrap();
|
||||
let bad_dim = Sketch::from_embedding(&[0.5, 0.5], 1);
|
||||
assert!(matches!(
|
||||
bank.topk(&bad_dim, 1).unwrap_err(),
|
||||
SketchError::EmbeddingDimMismatch { .. }
|
||||
));
|
||||
let bad_ver = Sketch::from_embedding(&[0.5, 0.5, 0.5, 0.5], 99);
|
||||
assert!(matches!(
|
||||
bank.topk(&bad_ver, 1).unwrap_err(),
|
||||
SketchError::SketchVersionMismatch { .. }
|
||||
));
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue