ruvector/examples/apify/ruvector-postgres/.actor/input_schema.json
rUv c1f89de337 feat(apify): Add AI Synthetic Data Generator with MCP & Actor Integration
- Add agentic-synth actor with TRM/SONA self-learning
- Integrate 13 popular Apify scrapers for data grounding
- Add 6 use case templates (lead-intelligence, competitor-monitor, etc.)
- Include MCP server for AI agent integration
- Add comprehensive README with tutorials and SEO optimization
- Support generate/integrate/template modes
- Add webhook and embedding generation support

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-13 16:27:54 +00:00

469 lines
15 KiB
JSON

{
"title": "Self-Learning Postgres DB Input",
"description": "A distributed vector database that learns. 30+ operations including semantic search, batch operations, RAG queries, clustering, deduplication, and GNN training.",
"type": "object",
"schemaVersion": 1,
"properties": {
"action": {
"title": "Action",
"type": "string",
"description": "The operation to perform on the vector database",
"default": "full_workflow",
"enum": [
"full_workflow",
"search",
"insert",
"batch_insert",
"get",
"list",
"update",
"delete",
"upsert",
"hybrid_search",
"multi_query_search",
"mmr_search",
"graph_search",
"range_search",
"batch_search",
"create_table",
"drop_table",
"list_tables",
"table_stats",
"create_index",
"reindex",
"train_gnn",
"optimize_index",
"analyze_patterns",
"sona_learn",
"sona_status",
"cluster",
"find_duplicates",
"deduplicate",
"export",
"import",
"rag_query",
"summarize",
"ping",
"version",
"embedding_models",
"generate_embedding",
"similarity"
],
"enumTitles": [
"Full Workflow - Demo: create + insert + search",
"Search - Semantic similarity search",
"Insert - Add documents with embeddings",
"Batch Insert - Add many documents efficiently",
"Get - Retrieve single document by ID",
"List - List documents with filters",
"Update - Modify existing document",
"Delete - Remove documents",
"Upsert - Insert or update documents",
"Hybrid Search - Vector + keyword combined",
"Multi Query Search - Multiple queries aggregated",
"MMR Search - Diverse results (Maximal Marginal Relevance)",
"Graph Search - Graph-based similarity",
"Range Search - All results within distance",
"Batch Search - Multiple queries at once",
"Create Table - Create new collection",
"Drop Table - Delete collection",
"List Tables - Show all vector collections",
"Table Stats - Collection statistics",
"Create Index - Add HNSW/IVFFlat index",
"Reindex - Rebuild indexes",
"Train GNN - Train Graph Neural Network",
"Optimize Index - Auto-tune parameters",
"Analyze Patterns - Analyze data patterns",
"SONA Learn - Trigger TRM/SONA self-learning cycle",
"SONA Status - Check SONA learning status",
"Cluster - K-means clustering",
"Find Duplicates - Detect similar documents",
"Deduplicate - Remove duplicates",
"Export - Export data to JSON/CSV",
"Import - Import data from JSON",
"RAG Query - Retrieval-Augmented Generation",
"Summarize - Document statistics",
"Ping - Test database connection",
"Version - Get version info",
"Embedding Models - List available models",
"Generate Embedding - Create embeddings only",
"Similarity - Compare two texts"
],
"editor": "select",
"prefill": "full_workflow",
"sectionCaption": "Core Settings",
"sectionDescription": "Choose the operation to perform"
},
"connectionString": {
"title": "Database Connection",
"type": "string",
"description": "PostgreSQL connection URL. Leave empty for embedded database (non-persistent). For persistent storage, use your own PostgreSQL with ruvector/pgvector extension.",
"editor": "textfield",
"isSecret": true,
"nullable": true,
"example": "postgresql://user:password@host:5432/database"
},
"tableName": {
"title": "Table/Collection Name",
"type": "string",
"description": "Name of the vector table (collection)",
"default": "documents",
"editor": "textfield"
},
"query": {
"title": "Search Query",
"type": "string",
"description": "Natural language query for semantic search. The AI understands meaning, not just keywords.",
"editor": "textarea",
"example": "How does machine learning work?",
"sectionCaption": "Search Settings"
},
"queryVector": {
"title": "Query Vector",
"type": "array",
"description": "Pre-computed embedding vector (alternative to query text). Use with external embedding APIs.",
"editor": "json",
"nullable": true
},
"documents": {
"title": "Documents",
"type": "array",
"description": "Documents to insert. Each should have 'content' and optional 'metadata' and 'embedding'.",
"editor": "json",
"prefill": [
{"content": "Machine learning is a type of AI that learns patterns from data to make predictions.", "metadata": {"category": "AI"}},
{"content": "PostgreSQL is a powerful open-source relational database.", "metadata": {"category": "Database"}},
{"content": "Neural networks are inspired by the human brain and consist of layers of nodes.", "metadata": {"category": "AI"}},
{"content": "Vector databases store data as mathematical embeddings for similarity search.", "metadata": {"category": "Database"}}
],
"sectionCaption": "Document Input"
},
"topK": {
"title": "Number of Results",
"type": "integer",
"description": "Maximum number of results to return",
"default": 10,
"minimum": 1,
"maximum": 1000,
"editor": "number"
},
"distanceMetric": {
"title": "Distance Metric",
"type": "string",
"description": "How to measure vector similarity",
"default": "cosine",
"enum": ["cosine", "l2", "inner_product", "manhattan"],
"enumTitles": [
"Cosine - Angular similarity (recommended)",
"L2 (Euclidean) - Straight-line distance",
"Inner Product - Dot product",
"Manhattan (L1) - City-block distance"
],
"editor": "select"
},
"filter": {
"title": "Filter",
"type": "string",
"description": "SQL WHERE clause for filtering. Example: metadata->>'category' = 'AI'",
"editor": "textfield",
"nullable": true,
"example": "metadata->>'category' = 'AI'"
},
"minScore": {
"title": "Minimum Score",
"type": "number",
"description": "Minimum similarity score threshold (0-1)",
"default": 0,
"minimum": 0,
"maximum": 1,
"editor": "number"
},
"maxDistance": {
"title": "Maximum Distance",
"type": "number",
"description": "Maximum distance threshold for range search",
"nullable": true,
"editor": "number"
},
"includeEmbeddings": {
"title": "Include Embeddings",
"type": "boolean",
"description": "Include embedding vectors in results (increases response size)",
"default": false
},
"includeMetadata": {
"title": "Include Metadata",
"type": "boolean",
"description": "Include metadata in results",
"default": true
},
"embeddingModel": {
"title": "Embedding Model",
"type": "string",
"description": "AI model for generating text embeddings. No API key needed - runs locally!",
"default": "all-MiniLM-L6-v2",
"enum": [
"all-MiniLM-L6-v2",
"bge-small-en-v1.5",
"bge-base-en-v1.5",
"nomic-embed-text-v1",
"gte-small",
"e5-small-v2"
],
"enumTitles": [
"all-MiniLM-L6-v2 (384d) - Fast, general purpose",
"bge-small-en-v1.5 (384d) - MTEB benchmark #1",
"bge-base-en-v1.5 (768d) - Higher accuracy",
"nomic-embed-text-v1 (768d) - Long documents (8K)",
"gte-small (384d) - Good quality, fast",
"e5-small-v2 (384d) - Multilingual"
],
"editor": "select",
"sectionCaption": "Embedding Settings"
},
"generateEmbeddings": {
"title": "Generate Embeddings",
"type": "boolean",
"description": "Auto-generate embeddings for documents without them",
"default": true
},
"dimensions": {
"title": "Vector Dimensions",
"type": "integer",
"description": "Embedding dimensions (384 for MiniLM/BGE-small, 768 for larger models)",
"default": 384,
"minimum": 64,
"maximum": 4096,
"editor": "number"
},
"indexType": {
"title": "Index Type",
"type": "string",
"description": "Vector index algorithm for faster search",
"default": "hnsw",
"enum": ["hnsw", "ivfflat", "none"],
"enumTitles": [
"HNSW - Fastest search, uses more memory",
"IVFFlat - Balanced speed and memory",
"None - No index, exact search"
],
"editor": "select",
"sectionCaption": "Index Settings"
},
"hnswM": {
"title": "HNSW M Parameter",
"type": "integer",
"description": "Max connections per node. Higher = better recall, more memory",
"default": 16,
"minimum": 4,
"maximum": 64,
"editor": "number"
},
"hnswEfConstruction": {
"title": "HNSW ef_construction",
"type": "integer",
"description": "Index build quality. Higher = better index, slower build",
"default": 64,
"minimum": 16,
"maximum": 512,
"editor": "number"
},
"hnswEfSearch": {
"title": "HNSW ef_search",
"type": "integer",
"description": "Search quality. Higher = better recall, slower search",
"default": 100,
"minimum": 16,
"maximum": 512,
"editor": "number"
},
"ivfLists": {
"title": "IVF Lists",
"type": "integer",
"description": "Number of IVF partitions for IVFFlat index",
"default": 100,
"minimum": 10,
"maximum": 1000,
"editor": "number"
},
"hybridWeight": {
"title": "Hybrid Weight",
"type": "number",
"description": "Balance between vector (1.0) and keyword (0.0) search",
"default": 0.7,
"minimum": 0,
"maximum": 1,
"editor": "number",
"sectionCaption": "Hybrid Search"
},
"batchSize": {
"title": "Batch Size",
"type": "integer",
"description": "Documents per batch for batch operations",
"default": 100,
"minimum": 1,
"maximum": 1000,
"editor": "number",
"sectionCaption": "Batch Settings"
},
"documentId": {
"title": "Document ID",
"type": "integer",
"description": "Single document ID for get/update/delete operations",
"nullable": true,
"editor": "number",
"sectionCaption": "Document Operations"
},
"documentIds": {
"title": "Document IDs",
"type": "array",
"description": "Multiple document IDs for batch delete",
"editor": "json",
"nullable": true
},
"updates": {
"title": "Updates",
"type": "object",
"description": "Fields to update: {content, metadata, embedding}",
"editor": "json",
"nullable": true
},
"enableLearning": {
"title": "Enable Learning",
"type": "boolean",
"description": "Enable self-learning index optimization",
"default": false,
"sectionCaption": "Self-Learning / GNN"
},
"learningRate": {
"title": "Learning Rate",
"type": "number",
"description": "GNN training learning rate",
"default": 0.01,
"minimum": 0.0001,
"maximum": 1,
"editor": "number"
},
"gnnLayers": {
"title": "GNN Layers",
"type": "integer",
"description": "Number of Graph Neural Network layers",
"default": 2,
"minimum": 1,
"maximum": 10,
"editor": "number"
},
"trainEpochs": {
"title": "Training Epochs",
"type": "integer",
"description": "Number of GNN training epochs",
"default": 10,
"minimum": 1,
"maximum": 100,
"editor": "number"
},
"numClusters": {
"title": "Number of Clusters",
"type": "integer",
"description": "K-means cluster count",
"default": 10,
"minimum": 2,
"maximum": 100,
"editor": "number",
"sectionCaption": "Clustering & Deduplication"
},
"clusteringAlgorithm": {
"title": "Clustering Algorithm",
"type": "string",
"description": "Clustering method",
"default": "kmeans",
"enum": ["kmeans", "hierarchical"],
"editor": "select"
},
"similarityThreshold": {
"title": "Similarity Threshold",
"type": "number",
"description": "Threshold for duplicate detection (0-1, higher = stricter)",
"default": 0.95,
"minimum": 0.5,
"maximum": 1,
"editor": "number"
},
"exportFormat": {
"title": "Export Format",
"type": "string",
"description": "Data export format",
"default": "json",
"enum": ["json", "csv"],
"editor": "select",
"sectionCaption": "Export/Import"
},
"importData": {
"title": "Import Data",
"type": "array",
"description": "Data to import (array of documents with content and optional metadata)",
"editor": "json",
"nullable": true
},
"ragMaxTokens": {
"title": "RAG Max Tokens",
"type": "integer",
"description": "Maximum context tokens for RAG query",
"default": 2000,
"minimum": 100,
"maximum": 10000,
"editor": "number",
"sectionCaption": "RAG Settings"
},
"ragContext": {
"title": "RAG Context",
"type": "string",
"description": "Additional context to prepend to RAG results",
"editor": "textarea",
"nullable": true
},
"sonaEnabled": {
"title": "Enable SONA Learning",
"type": "boolean",
"description": "Enable TRM/SONA self-learning with trajectory tracking and pattern recognition",
"default": true,
"sectionCaption": "SONA / TRM Self-Learning",
"sectionDescription": "Self-Optimizing Neural Architecture with Tiny Recursive Models"
},
"ewcLambda": {
"title": "EWC Lambda",
"type": "number",
"description": "Elastic Weight Consolidation strength for anti-forgetting protection. Higher values preserve more learned knowledge.",
"default": 2000,
"minimum": 100,
"maximum": 10000,
"editor": "number"
},
"patternThreshold": {
"title": "Pattern Threshold",
"type": "number",
"description": "Minimum confidence threshold for pattern recognition (0-1)",
"default": 0.7,
"minimum": 0.1,
"maximum": 1,
"editor": "number"
},
"maxTrajectories": {
"title": "Max Trajectories",
"type": "integer",
"description": "Maximum number of trajectory steps to track for learning",
"default": 100,
"minimum": 10,
"maximum": 1000,
"editor": "number"
},
"sonaLearningTiers": {
"title": "Learning Tiers",
"type": "array",
"description": "SONA learning tiers to enable (instant=real-time, background=async, deep=comprehensive)",
"default": ["instant", "background"],
"editor": "json"
}
},
"required": ["action"]
}