ruvector/crates/ruvector-core/Cargo.toml
rUv 2b6c9c57d6
feat(ruvector-core): add OnnxEmbedding for real semantic embeddings (#265)
Add native ONNX Runtime integration for production-ready semantic embeddings.

## New Features
- `OnnxEmbedding` struct with `from_pretrained()` and `from_files()` methods
- Feature flag: `onnx-embeddings` (optional, not default)
- Auto-downloads models from HuggingFace Hub (~90MB for all-MiniLM-L6-v2)
- Supports sentence-transformers, BGE, E5 model families
- Thread-safe inference via RwLock<Session>
- Mean pooling and L2 normalization for sentence transformers

## Dependencies (optional)
- ort 2.0.0-rc.9 (ONNX Runtime)
- tokenizers 0.20 (HuggingFace tokenizers)
- hf-hub 0.3 (model downloads)

## Documentation
- Updated ADR-114 with implementation details
- Updated lib.rs deprecation warning to reference OnnxEmbedding

Closes #263

Co-authored-by: Reuven <cohen@ruv-mac-mini.local>
2026-03-16 11:46:47 -04:00

111 lines
3.1 KiB
TOML

[package]
name = "ruvector-core"
version.workspace = true
edition.workspace = true
rust-version.workspace = true
license.workspace = true
authors.workspace = true
repository.workspace = true
readme = "README.md"
description = "High-performance Rust vector database core with HNSW indexing"
[dependencies]
# Core functionality
redb = { workspace = true, optional = true }
memmap2 = { workspace = true, optional = true }
hnsw_rs = { workspace = true, optional = true }
simsimd = { workspace = true, optional = true }
rayon = { workspace = true, optional = true }
crossbeam = { workspace = true, optional = true }
# Serialization
rkyv = { workspace = true }
bincode = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
# Error handling
thiserror = { workspace = true }
anyhow = { workspace = true }
tracing = { workspace = true }
# Math and numerics
ndarray = { workspace = true, features = ["serde"] }
rand = { workspace = true }
rand_distr = { workspace = true }
# Performance
dashmap = { workspace = true }
parking_lot = { workspace = true }
once_cell = { workspace = true }
# Time and UUID
chrono = { workspace = true }
uuid = { workspace = true, features = ["v4"] }
# HTTP client for API embeddings (not available in WASM)
reqwest = { version = "0.11", default-features = false, features = ["blocking", "json", "rustls-tls"], optional = true }
# ONNX Runtime for local semantic embeddings (not available in WASM)
ort = { version = "2.0.0-rc.9", optional = true }
# Tokenizers for ONNX models
tokenizers = { version = "0.20", default-features = false, features = ["onig"], optional = true }
# HuggingFace Hub for model downloads
hf-hub = { version = "0.3", optional = true }
[dev-dependencies]
criterion = { workspace = true }
proptest = { workspace = true }
mockall = { workspace = true }
tempfile = "3.13"
tracing-subscriber = { workspace = true }
[[bench]]
name = "distance_metrics"
harness = false
[[bench]]
name = "hnsw_search"
harness = false
[[bench]]
name = "quantization_bench"
harness = false
[[bench]]
name = "batch_operations"
harness = false
[[bench]]
name = "comprehensive_bench"
harness = false
[[bench]]
name = "real_benchmark"
harness = false
[[bench]]
name = "bench_simd"
harness = false
[[bench]]
name = "bench_memory"
harness = false
[features]
default = ["simd", "storage", "hnsw", "api-embeddings", "parallel"]
simd = ["simsimd"] # SIMD acceleration (not available in WASM)
parallel = ["rayon", "crossbeam"] # Parallel processing (not available in WASM)
storage = ["redb", "memmap2"] # File-based storage (not available in WASM)
hnsw = ["hnsw_rs"] # HNSW indexing (not available in WASM due to mmap dependency)
memory-only = [] # Pure in-memory storage for WASM
uuid-support = [] # Deprecated: uuid is now always included
real-embeddings = [] # Feature flag for embedding provider API (use ApiEmbedding for production)
api-embeddings = ["reqwest"] # API-based embeddings (not available in WASM)
onnx-embeddings = ["ort", "tokenizers", "hf-hub"] # ONNX-based local embeddings (not available in WASM)
[lib]
crate-type = ["rlib"]
bench = false