ruvector/examples/scipix/Cargo.toml

[package]
name = "ruvector-scipix"
version.workspace = true
edition.workspace = true
license.workspace = true
authors.workspace = true
repository.workspace = true
description = "Rust OCR engine for scientific documents - extract LaTeX, MathML from math equations, research papers, and technical diagrams with ONNX GPU acceleration"
readme = "README.md"
autobenches = false
keywords = ["ocr", "latex", "mathml", "scientific-computing", "image-recognition"]
categories = ["science", "text-processing", "multimedia::images", "command-line-utilities"]
documentation = "https://docs.rs/ruvector-scipix"
homepage = "https://github.com/ruvnet/ruvector/tree/main/examples/scipix"
rust-version = "1.77"
exclude = [
    "assets/fonts/*.ttf",
    "models/*",
    "tests/fixtures/*",
    ".github/*",
    "benches/*",
]

[dependencies]
# Workspace dependencies
anyhow.workspace = true
thiserror.workspace = true
serde.workspace = true
serde_json.workspace = true
tokio = { workspace = true, features = ["signal"] }
tracing.workspace = true
tracing-subscriber.workspace = true

# CLI dependencies
clap = { workspace = true, features = ["derive", "cargo", "env", "unicode", "wrap_help"] }
clap_complete = "4.5"
indicatif.workspace = true
console.workspace = true

# Additional CLI dependencies
comfy-table = "7.1"
colored = "2.1"
dialoguer = "0.11"
glob = "0.3"
rand.workspace = true

# Config and file handling
toml = "0.8"
dirs = "5.0"

# HTTP server
axum = { version = "0.7", features = ["multipart", "macros"] }
tower = { version = "0.4", features = ["full"] }
tower-http = { version = "0.5", features = ["fs", "trace", "cors", "compression-gzip", "limit"] }
hyper = { version = "1.0", features = ["full"] }

# Validation
validator = { version = "0.18", features = ["derive"] }

# Rate limiting
governor = "0.6"
nonzero_ext = "0.3"

# Caching
moka = { version = "0.12", features = ["future"] }

# HTTP client
reqwest = { version = "0.12", features = ["multipart", "stream", "json"] }

# Time and UUID (already in workspace)
chrono = { version = "0.4", features = ["serde"] }
uuid = { version = "1.11", features = ["v4", "serde"] }

# Configuration
dotenvy = "0.15"

# Async utilities
futures = "0.3"
async-trait = "0.1"

# Security
sha2 = "0.10"
base64 = "0.22"
hmac = "0.12"

# SSE support
axum-streams = { version = "0.15", features = ["json"] }

# Image processing (for future OCR integration)
image = "0.25"
imageproc = { version = "0.25", optional = true }
rayon = { version = "1.10", optional = true }
nalgebra = { version = "0.33", optional = true }
ndarray = { version = "0.16", optional = true }

# ML inference with ONNX Runtime
ort = { version = "2.0.0-rc.10", optional = true, features = ["load-dynamic"] }

# Concurrent data structures
parking_lot = "0.12"
dashmap = "6.1"

# Math parsing and processing
nom = "7.1"
once_cell = "1.19"

# Font rendering for benchmarks
rusttype = "0.9"

# System info
num_cpus = "1.16"

# Performance optimizations
memmap2 = { version = "0.9", optional = true }

# WebAssembly dependencies (optional)
wasm-bindgen = { version = "0.2", optional = true }
wasm-bindgen-futures = { version = "0.4", optional = true }
js-sys = { version = "0.3", optional = true }
web-sys = { version = "0.3", features = ["console", "Window", "Document", "CanvasRenderingContext2d", "HtmlCanvasElement", "ImageData"], optional = true }
[dev-dependencies]
axum-test = "15.0"
mockall = "0.13"
proptest = "1.5"
tempfile = "3.8"
approx = "0.5"
criterion = { version = "0.5", features = ["html_reports"] }
rusttype = "0.9"
env_logger = "0.11"
predicates = "3.1"
assert_cmd = "2.0"
ab_glyph = "0.2"
tokio = { workspace = true, features = ["process"] }
reqwest = { version = "0.12", features = ["blocking"] }

[features]
default = ["preprocess", "cache", "optimize"]
preprocess = ["imageproc", "rayon", "nalgebra", "ndarray"]
cache = []
ocr = ["ort", "preprocess"]
math = []
optimize = ["memmap2", "rayon"]
wasm = ["wasm-bindgen", "wasm-bindgen-futures", "js-sys", "web-sys"]
# Opt-in feature for the heavy integration test suite under tests/integration/.
# These tests require a `scipix-ocr` binary (not currently built), real OCR
# models, and large fixture files. Gated off by default so `cargo test
# --workspace` is green; enable with `--features scipix-integration-tests`
# to run them once the missing binary and fixtures are in place.
scipix-integration-tests = []

[[bin]]
name = "scipix-cli"
path = "src/bin/cli.rs"

[[bin]]
name = "scipix-server"
path = "src/bin/server.rs"

[[bin]]
name = "scipix-benchmark"
path = "src/bin/benchmark.rs"

[lib]
name = "ruvector_scipix"
path = "src/lib.rs"
crate-type = ["cdylib", "rlib"]

# Examples
[[example]]
name = "simple_ocr"
path = "examples/simple_ocr.rs"
required-features = ["ocr"]

[[example]]
name = "batch_processing"
path = "examples/batch_processing.rs"
required-features = ["ocr"]

[[example]]
name = "api_server"
path = "examples/api_server.rs"
required-features = ["ocr"]

[[example]]
name = "streaming"
path = "examples/streaming.rs"
required-features = ["ocr"]

[[example]]
name = "custom_pipeline"
path = "examples/custom_pipeline.rs"
required-features = ["ocr"]

[[example]]
name = "lean_agentic"
path = "examples/lean_agentic.rs"
required-features = ["ocr"]

[[example]]
name = "accuracy_test"
path = "examples/accuracy_test.rs"
required-features = ["ocr"]

# Benchmark configurations
[[bench]]
name = "ocr_latency"
harness = false

[[bench]]
name = "preprocessing"
harness = false

[[bench]]
name = "latex_generation"
harness = false

# Benches `inference`, `cache`, `api`, `memory`, `optimization_bench` removed:
# they reference deleted/renamed APIs and are out of scope for the workspace
# clippy cleanup pass. Restore them when the relevant code is reintroduced.

[target.'cfg(target_arch = "wasm32")'.dependencies]
wasm-bindgen = "0.2"
wasm-bindgen-futures = "0.4"
js-sys = "0.3"
web-sys = { version = "0.3", features = [
    "Window",
    "Document",
    "HtmlCanvasElement",
    "CanvasRenderingContext2d",
    "ImageData",
    "Blob",
    "Url",
    "MessageEvent",
    "Worker",
    "DedicatedWorkerGlobalScope",
    "console"
] }
getrandom = { version = "0.3", features = ["wasm_js"] }
console_error_panic_hook = "0.1"
serde-wasm-bindgen = "0.6"
tracing-wasm = "0.2"

# Workspace cleanup pass: research-tier crate, doc/style churn deferred. Correctness + suspicious lints stay denied.
[lints.rust]
unexpected_cfgs = { level = "allow", priority = -1 }
unused_imports = "allow"
dead_code = "allow"
unused_variables = "allow"
unused_mut = "allow"
unused_unit = "allow"
unused_assignments = "allow"
unused_must_use = "allow"
missing_docs = "allow"
unsafe_op_in_unsafe_fn = "allow"
unused_parens = "allow"
unused_comparisons = "allow"
non_local_definitions = "allow"
static_mut_refs = "allow"
non_camel_case_types = "allow"
deprecated = "allow"
ambiguous_glob_reexports = "allow"
non_upper_case_globals = "allow"
unused_doc_comments = "allow"
unused_unsafe = "allow"
unreachable_patterns = "allow"
suspicious_double_ref_op = "allow"

[lints.clippy]
pedantic = { level = "allow", priority = -2 }
correctness = { level = "deny", priority = -1 }
suspicious = { level = "deny", priority = -1 }
needless_range_loop = "allow"
needless_borrow = "allow"
needless_borrows_for_generic_args = "allow"
needless_update = "allow"
needless_bool = "allow"
needless_pass_by_value = "allow"
manual_div_ceil = "allow"
manual_is_multiple_of = "allow"
manual_range_contains = "allow"
manual_clamp = "allow"
manual_checked_ops = "allow"
manual_let_else = "allow"
manual_memcpy = "allow"
manual_repeat_n = "allow"
manual_contains = "allow"
manual_flatten = "allow"
manual_abs_diff = "allow"
manual_slice_size_calculation = "allow"
redundant_closure = "allow"
redundant_closure_for_method_calls = "allow"
redundant_field_names = "allow"
len_zero = "allow"
get_first = "allow"
useless_vec = "allow"
too_many_arguments = "allow"
derivable_impls = "allow"
approx_constant = "allow"
assertions_on_constants = "allow"
field_reassign_with_default = "allow"
nonminimal_bool = "allow"
collapsible_if = "allow"
collapsible_match = "allow"
inconsistent_digit_grouping = "allow"
unnecessary_sort_by = "allow"
unnecessary_map_or = "allow"
unnecessary_filter_map = "allow"
unnecessary_lazy_evaluations = "allow"
unnecessary_cast = "allow"
unnecessary_to_owned = "allow"
unnecessary_wraps = "allow"
unnecessary_literal_unwrap = "allow"
unnecessary_struct_initialization = "allow"
should_implement_trait = "allow"
ptr_arg = "allow"
let_unit_value = "allow"
let_and_return = "allow"
type_complexity = "allow"
identity_op = "allow"
match_like_matches_macro = "allow"
match_same_arms = "allow"
match_single_binding = "allow"
vec_init_then_push = "allow"
absurd_extreme_comparisons = "allow"
incompatible_msrv = "allow"
unused_enumerate_index = "allow"
unused_self = "allow"
unused_unit = "allow"
map_clone = "allow"
map_unwrap_or = "allow"
result_map_or_into_option = "allow"
unusual_byte_groupings = "allow"
if_same_then_else = "allow"
unnested_or_patterns = "allow"
uninlined_format_args = "allow"
single_match_else = "allow"
single_char_pattern = "allow"
mixed_attributes_style = "allow"
arc_with_non_send_sync = "allow"
bool_assert_comparison = "allow"
bool_comparison = "allow"
bind_instead_of_map = "allow"
cloned_ref_to_slice_refs = "allow"
large_stack_arrays = "allow"
implicit_saturating_sub = "allow"
ignored_unit_patterns = "allow"
explicit_iter_loop = "allow"
elidable_lifetime_names = "allow"
doc_markdown = "allow"
doc_overindented_list_items = "allow"
comparison_chain = "allow"
clone_on_copy = "allow"
items_after_statements = "allow"
inline_always = "allow"
format_push_string = "allow"
format_collect = "allow"
for_kv_map = "allow"
float_cmp = "allow"
if_not_else = "allow"
return_self_not_must_use = "allow"
missing_fields_in_debug = "allow"
upper_case_acronyms = "allow"
wildcard_imports = "allow"
must_use_candidate = "allow"
cast_possible_truncation = "allow"
cast_possible_wrap = "allow"
cast_precision_loss = "allow"
cast_lossless = "allow"
cast_sign_loss = "allow"
unreadable_literal = "allow"
struct_excessive_bools = "allow"
trivially_copy_pass_by_ref = "allow"
missing_safety_doc = "allow"
missing_errors_doc = "allow"
missing_panics_doc = "allow"
similar_names = "allow"
module_name_repetitions = "allow"
assign_op_pattern = "allow"
iter_cloned_collect = "allow"
excessive_precision = "allow"
await_holding_refcell_ref = "allow"
unnecessary_unwrap = "allow"
unit_arg = "allow"
redundant_pattern_matching = "allow"
question_mark = "allow"
partialeq_to_none = "allow"
new_without_default = "allow"
map_flatten = "allow"
manual_unwrap_or = "allow"
len_without_is_empty = "allow"
format_in_format_args = "allow"
single_char_add_str = "allow"
useless_conversion = "allow"
useless_format = "allow"
doc_lazy_continuation = "allow"
manual_strip = "allow"
double_ended_iterator_last = "allow"
unwrap_or_default = "allow"
single_component_path_imports = "allow"
needless_return = "allow"
int_plus_one = "allow"
needless_lifetimes = "allow"
explicit_counter_loop = "allow"
unnecessary_mut_passed = "allow"
module_inception = "allow"
option_as_ref_deref = "allow"
print_literal = "allow"
explicit_auto_deref = "allow"
manual_swap = "allow"
writeln_empty_string = "allow"
items_after_test_module = "allow"
no_effect = "allow"
non_canonical_partial_ord_impl = "allow"
wildcard_in_or_patterns = "allow"
large_enum_variant = "allow"
not_unsafe_ptr_arg_deref = { level = "allow", priority = 1 }
erasing_op = { level = "allow", priority = 1 }
almost_swapped = { level = "allow", priority = 1 }
cast_abs_to_unsigned = { level = "allow", priority = 1 }
let_underscore_lock = { level = "allow", priority = 1 }
no_effect_replace = { level = "allow", priority = 1 }
await_holding_lock = { level = "allow", priority = 1 }
needless_character_iteration = { level = "allow", priority = 1 }
unnecessary_get_then_check = { level = "allow", priority = 1 }
let_underscore_future = { level = "allow", priority = 1 }
overly_complex_bool_expr = { level = "allow", priority = 1 }
zombie_processes = { level = "allow", priority = 1 }
repeat_vec_with_capacity = { level = "allow", priority = 1 }
missing_transmute_annotations = { level = "allow", priority = 1 }