[package] name = "ruvector-scipix" version.workspace = true edition.workspace = true license.workspace = true authors.workspace = true repository.workspace = true description = "Rust OCR engine for scientific documents - extract LaTeX, MathML from math equations, research papers, and technical diagrams with ONNX GPU acceleration" readme = "README.md" autobenches = false keywords = ["ocr", "latex", "mathml", "scientific-computing", "image-recognition"] categories = ["science", "text-processing", "multimedia::images", "command-line-utilities"] documentation = "https://docs.rs/ruvector-scipix" homepage = "https://github.com/ruvnet/ruvector/tree/main/examples/scipix" rust-version = "1.77" exclude = [ "assets/fonts/*.ttf", "models/*", "tests/fixtures/*", ".github/*", "benches/*", ] [dependencies] # Workspace dependencies anyhow.workspace = true thiserror.workspace = true serde.workspace = true serde_json.workspace = true tokio = { workspace = true, features = ["signal"] } tracing.workspace = true tracing-subscriber.workspace = true # CLI dependencies clap = { workspace = true, features = ["derive", "cargo", "env", "unicode", "wrap_help"] } clap_complete = "4.5" indicatif.workspace = true console.workspace = true # Additional CLI dependencies comfy-table = "7.1" colored = "2.1" dialoguer = "0.11" glob = "0.3" rand.workspace = true # Config and file handling toml = "0.8" dirs = "5.0" # HTTP server axum = { version = "0.7", features = ["multipart", "macros"] } tower = { version = "0.4", features = ["full"] } tower-http = { version = "0.5", features = ["fs", "trace", "cors", "compression-gzip", "limit"] } hyper = { version = "1.0", features = ["full"] } # Validation validator = { version = "0.18", features = ["derive"] } # Rate limiting governor = "0.6" nonzero_ext = "0.3" # Caching moka = { version = "0.12", features = ["future"] } # HTTP client reqwest = { version = "0.12", features = ["multipart", "stream", "json"] } # Time and UUID (already in workspace) chrono = { version = "0.4", features = ["serde"] } uuid = { version = "1.11", features = ["v4", "serde"] } # Configuration dotenvy = "0.15" # Async utilities futures = "0.3" async-trait = "0.1" # Security sha2 = "0.10" base64 = "0.22" hmac = "0.12" # SSE support axum-streams = { version = "0.15", features = ["json"] } # Image processing (for future OCR integration) image = "0.25" imageproc = { version = "0.25", optional = true } rayon = { version = "1.10", optional = true } nalgebra = { version = "0.33", optional = true } ndarray = { version = "0.16", optional = true } # ML inference with ONNX Runtime ort = { version = "2.0.0-rc.10", optional = true, features = ["load-dynamic"] } # Concurrent data structures parking_lot = "0.12" dashmap = "6.1" # Math parsing and processing nom = "7.1" once_cell = "1.19" # Font rendering for benchmarks rusttype = "0.9" # System info num_cpus = "1.16" # Performance optimizations memmap2 = { version = "0.9", optional = true } # WebAssembly dependencies (optional) wasm-bindgen = { version = "0.2", optional = true } wasm-bindgen-futures = { version = "0.4", optional = true } js-sys = { version = "0.3", optional = true } web-sys = { version = "0.3", features = ["console", "Window", "Document", "CanvasRenderingContext2d", "HtmlCanvasElement", "ImageData"], optional = true } [dev-dependencies] axum-test = "15.0" mockall = "0.13" proptest = "1.5" tempfile = "3.8" approx = "0.5" criterion = { version = "0.5", features = ["html_reports"] } rusttype = "0.9" env_logger = "0.11" predicates = "3.1" assert_cmd = "2.0" ab_glyph = "0.2" tokio = { workspace = true, features = ["process"] } reqwest = { version = "0.12", features = ["blocking"] } [features] default = ["preprocess", "cache", "optimize"] preprocess = ["imageproc", "rayon", "nalgebra", "ndarray"] cache = [] ocr = ["ort", "preprocess"] math = [] optimize = ["memmap2", "rayon"] wasm = ["wasm-bindgen", "wasm-bindgen-futures", "js-sys", "web-sys"] # Opt-in feature for the heavy integration test suite under tests/integration/. # These tests require a `scipix-ocr` binary (not currently built), real OCR # models, and large fixture files. Gated off by default so `cargo test # --workspace` is green; enable with `--features scipix-integration-tests` # to run them once the missing binary and fixtures are in place. scipix-integration-tests = [] [[bin]] name = "scipix-cli" path = "src/bin/cli.rs" [[bin]] name = "scipix-server" path = "src/bin/server.rs" [[bin]] name = "scipix-benchmark" path = "src/bin/benchmark.rs" [lib] name = "ruvector_scipix" path = "src/lib.rs" crate-type = ["cdylib", "rlib"] # Examples [[example]] name = "simple_ocr" path = "examples/simple_ocr.rs" required-features = ["ocr"] [[example]] name = "batch_processing" path = "examples/batch_processing.rs" required-features = ["ocr"] [[example]] name = "api_server" path = "examples/api_server.rs" required-features = ["ocr"] [[example]] name = "streaming" path = "examples/streaming.rs" required-features = ["ocr"] [[example]] name = "custom_pipeline" path = "examples/custom_pipeline.rs" required-features = ["ocr"] [[example]] name = "lean_agentic" path = "examples/lean_agentic.rs" required-features = ["ocr"] [[example]] name = "accuracy_test" path = "examples/accuracy_test.rs" required-features = ["ocr"] # Benchmark configurations [[bench]] name = "ocr_latency" harness = false [[bench]] name = "preprocessing" harness = false [[bench]] name = "latex_generation" harness = false # Benches `inference`, `cache`, `api`, `memory`, `optimization_bench` removed: # they reference deleted/renamed APIs and are out of scope for the workspace # clippy cleanup pass. Restore them when the relevant code is reintroduced. [target.'cfg(target_arch = "wasm32")'.dependencies] wasm-bindgen = "0.2" wasm-bindgen-futures = "0.4" js-sys = "0.3" web-sys = { version = "0.3", features = [ "Window", "Document", "HtmlCanvasElement", "CanvasRenderingContext2d", "ImageData", "Blob", "Url", "MessageEvent", "Worker", "DedicatedWorkerGlobalScope", "console" ] } getrandom = { version = "0.3", features = ["wasm_js"] } console_error_panic_hook = "0.1" serde-wasm-bindgen = "0.6" tracing-wasm = "0.2" # Workspace cleanup pass: research-tier crate, doc/style churn deferred. Correctness + suspicious lints stay denied. [lints.rust] unexpected_cfgs = { level = "allow", priority = -1 } unused_imports = "allow" dead_code = "allow" unused_variables = "allow" unused_mut = "allow" unused_unit = "allow" unused_assignments = "allow" unused_must_use = "allow" missing_docs = "allow" unsafe_op_in_unsafe_fn = "allow" unused_parens = "allow" unused_comparisons = "allow" non_local_definitions = "allow" static_mut_refs = "allow" non_camel_case_types = "allow" deprecated = "allow" ambiguous_glob_reexports = "allow" non_upper_case_globals = "allow" unused_doc_comments = "allow" unused_unsafe = "allow" unreachable_patterns = "allow" suspicious_double_ref_op = "allow" [lints.clippy] pedantic = { level = "allow", priority = -2 } correctness = { level = "deny", priority = -1 } suspicious = { level = "deny", priority = -1 } needless_range_loop = "allow" needless_borrow = "allow" needless_borrows_for_generic_args = "allow" needless_update = "allow" needless_bool = "allow" needless_pass_by_value = "allow" manual_div_ceil = "allow" manual_is_multiple_of = "allow" manual_range_contains = "allow" manual_clamp = "allow" manual_checked_ops = "allow" manual_let_else = "allow" manual_memcpy = "allow" manual_repeat_n = "allow" manual_contains = "allow" manual_flatten = "allow" manual_abs_diff = "allow" manual_slice_size_calculation = "allow" redundant_closure = "allow" redundant_closure_for_method_calls = "allow" redundant_field_names = "allow" len_zero = "allow" get_first = "allow" useless_vec = "allow" too_many_arguments = "allow" derivable_impls = "allow" approx_constant = "allow" assertions_on_constants = "allow" field_reassign_with_default = "allow" nonminimal_bool = "allow" collapsible_if = "allow" collapsible_match = "allow" inconsistent_digit_grouping = "allow" unnecessary_sort_by = "allow" unnecessary_map_or = "allow" unnecessary_filter_map = "allow" unnecessary_lazy_evaluations = "allow" unnecessary_cast = "allow" unnecessary_to_owned = "allow" unnecessary_wraps = "allow" unnecessary_literal_unwrap = "allow" unnecessary_struct_initialization = "allow" should_implement_trait = "allow" ptr_arg = "allow" let_unit_value = "allow" let_and_return = "allow" type_complexity = "allow" identity_op = "allow" match_like_matches_macro = "allow" match_same_arms = "allow" match_single_binding = "allow" vec_init_then_push = "allow" absurd_extreme_comparisons = "allow" incompatible_msrv = "allow" unused_enumerate_index = "allow" unused_self = "allow" unused_unit = "allow" map_clone = "allow" map_unwrap_or = "allow" result_map_or_into_option = "allow" unusual_byte_groupings = "allow" if_same_then_else = "allow" unnested_or_patterns = "allow" uninlined_format_args = "allow" single_match_else = "allow" single_char_pattern = "allow" mixed_attributes_style = "allow" arc_with_non_send_sync = "allow" bool_assert_comparison = "allow" bool_comparison = "allow" bind_instead_of_map = "allow" cloned_ref_to_slice_refs = "allow" large_stack_arrays = "allow" implicit_saturating_sub = "allow" ignored_unit_patterns = "allow" explicit_iter_loop = "allow" elidable_lifetime_names = "allow" doc_markdown = "allow" doc_overindented_list_items = "allow" comparison_chain = "allow" clone_on_copy = "allow" items_after_statements = "allow" inline_always = "allow" format_push_string = "allow" format_collect = "allow" for_kv_map = "allow" float_cmp = "allow" if_not_else = "allow" return_self_not_must_use = "allow" missing_fields_in_debug = "allow" upper_case_acronyms = "allow" wildcard_imports = "allow" must_use_candidate = "allow" cast_possible_truncation = "allow" cast_possible_wrap = "allow" cast_precision_loss = "allow" cast_lossless = "allow" cast_sign_loss = "allow" unreadable_literal = "allow" struct_excessive_bools = "allow" trivially_copy_pass_by_ref = "allow" missing_safety_doc = "allow" missing_errors_doc = "allow" missing_panics_doc = "allow" similar_names = "allow" module_name_repetitions = "allow" assign_op_pattern = "allow" iter_cloned_collect = "allow" excessive_precision = "allow" await_holding_refcell_ref = "allow" unnecessary_unwrap = "allow" unit_arg = "allow" redundant_pattern_matching = "allow" question_mark = "allow" partialeq_to_none = "allow" new_without_default = "allow" map_flatten = "allow" manual_unwrap_or = "allow" len_without_is_empty = "allow" format_in_format_args = "allow" single_char_add_str = "allow" useless_conversion = "allow" useless_format = "allow" doc_lazy_continuation = "allow" manual_strip = "allow" double_ended_iterator_last = "allow" unwrap_or_default = "allow" single_component_path_imports = "allow" needless_return = "allow" int_plus_one = "allow" needless_lifetimes = "allow" explicit_counter_loop = "allow" unnecessary_mut_passed = "allow" module_inception = "allow" option_as_ref_deref = "allow" print_literal = "allow" explicit_auto_deref = "allow" manual_swap = "allow" writeln_empty_string = "allow" items_after_test_module = "allow" no_effect = "allow" non_canonical_partial_ord_impl = "allow" wildcard_in_or_patterns = "allow" large_enum_variant = "allow" not_unsafe_ptr_arg_deref = { level = "allow", priority = 1 } erasing_op = { level = "allow", priority = 1 } almost_swapped = { level = "allow", priority = 1 } cast_abs_to_unsigned = { level = "allow", priority = 1 } let_underscore_lock = { level = "allow", priority = 1 } no_effect_replace = { level = "allow", priority = 1 } await_holding_lock = { level = "allow", priority = 1 } needless_character_iteration = { level = "allow", priority = 1 } unnecessary_get_then_check = { level = "allow", priority = 1 } let_underscore_future = { level = "allow", priority = 1 } overly_complex_bool_expr = { level = "allow", priority = 1 } zombie_processes = { level = "allow", priority = 1 } repeat_vec_with_capacity = { level = "allow", priority = 1 } missing_transmute_annotations = { level = "allow", priority = 1 }