agent-zero/plugins/_document_query/default_config.yaml
Alessandro b2ead06a4e fix(document_query): isolate LiteParse parsing
Run LiteParse in a subprocess so native parser crashes cannot take down the Web UI process. Bound parser concurrency and LiteParse workers for multi-chat stability, seed Q&A context with leading document chunks for title/abstract grounding, and keep a small-document fallback when vector search returns no chunks.
2026-05-29 15:51:59 +02:00

34 lines
1.4 KiB
YAML

# Document Query Plugin Configuration
# All timeout values in seconds
# --- Timeouts ---
fetch_timeout: 30 # HTTP fetch connect/read timeout
fetch_retries: 3 # HTTP retry attempts
fetch_retry_backoff: 1.0 # delay between HTTP retry attempts
per_document_timeout: 60 # max time for a single document parse
gather_timeout: 120 # max time for all documents combined in one call
# --- Parser settings ---
parser_concurrency: 1 # max parser jobs running across all chats in this process
context_intro_chunks: 2 # always include leading chunks per document for title/abstract grounding
chunk_size: 1000
chunk_overlap: 100
search_threshold: 0.5
search_limit: 100
max_remote_bytes: 52428800 # 50 MB
# --- Feature flags ---
liteparse_enabled: true # prefer LiteParse before legacy parser fallbacks
liteparse_ocr_enabled: true
liteparse_ocr_language: eng
liteparse_ocr_server_url:
liteparse_tessdata_path:
liteparse_max_pages: 1000
liteparse_target_pages:
liteparse_dpi: 150
liteparse_preserve_very_small_text: false
liteparse_output_format: text
liteparse_num_workers: 1 # LiteParse defaults to CPU cores - 1; cap it for web runtime stability
liteparse_subprocess: true # isolate LiteParse native runtime crashes from the Web UI process
pdf_ocr_fallback: true # enable legacy Tesseract fallback after PyMuPDF
thread_offload: true # offload sync parsers to thread pool