ruvector/crates/ruvector-sparse-inference/Cargo.toml
rUv 76cec5641e
feat: Add PowerInfer-style sparse inference engine with precision lanes (#106)
## Summary
- Add PowerInfer-style sparse inference engine with precision lanes
- Add memory module with QuantizedWeights and NeuronCache
- Fix compilation and test issues
- Demonstrated 2.9-8.7x speedup at typical sparsity levels
- Published to crates.io as ruvector-sparse-inference v0.1.30

## Key Features
- Low-rank predictor using P·Q matrix factorization for fast neuron selection
- Sparse FFN kernels that only compute active neurons
- SIMD optimization for AVX2, SSE4.1, NEON, and WASM SIMD
- GGUF parser with full quantization support (Q4_0 through Q6_K)
- Precision lanes (3/5/7-bit layered quantization)
- π integration for low-precision systems

🤖 Generated with [Claude Code](https://claude.com/claude-code)
2026-01-04 23:40:31 -05:00

53 lines
1.2 KiB
TOML

[package]
name = "ruvector-sparse-inference"
version.workspace = true
edition.workspace = true
license.workspace = true
authors.workspace = true
repository.workspace = true
rust-version.workspace = true
description = "PowerInfer-style sparse inference engine for efficient neural network inference on edge devices"
keywords = ["sparse-inference", "neural-network", "quantization", "simd", "edge-ai"]
categories = ["science", "algorithms"]
readme = "README.md"
[dependencies]
# Math and numerics
ndarray = { version = "0.16", features = ["serde"] }
rand = { workspace = true }
rand_distr = { workspace = true }
# Serialization
serde = { workspace = true }
serde_json = { workspace = true }
rkyv = { workspace = true }
# Error handling
thiserror = { workspace = true }
anyhow = { workspace = true }
# Logging
tracing = { workspace = true }
# Performance
rayon = { workspace = true }
parking_lot = { workspace = true }
# Memory mapping for model loading
memmap2 = { workspace = true }
# GGUF model loading support
byteorder = "1.5"
half = "2.4"
[dev-dependencies]
criterion = { workspace = true }
proptest = { workspace = true }
mockall = { workspace = true }
[[bench]]
name = "sparse_inference_bench"
harness = false
[lib]
bench = false