fix(neural-trader): Add missing technical indicators to live mode

🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-05-24 22:15:18 +00:00 · 2025-12-13 19:27:13 +00:00 · 2025-12-13 19:27:13 +00:00 · ee8c199fe5
commit ee8c199fe5
parent e7b36d1ac2
12 changed files with 705 additions and 60 deletions
--- a/examples/apify/agentic-synth/.actor/actor.json
+++ b/examples/apify/agentic-synth/.actor/actor.json
@ -1,11 +1,11 @@
 {
  "actorSpecification": 1,
  "name": "ai-synthetic-data-generator",
-  "title": "AI Synthetic Data Generator - Enterprise Mock Data with MCP & Actor Integration",
-  "description": "Generate unlimited, high-quality synthetic data with TRM/SONA self-learning. NEW: MCP server for AI agent integration + one-click data from Google Maps, Instagram, TikTok, Amazon scrapers. 6 use-case templates (Lead Intelligence, Competitor Monitor, Support Knowledge). Simulate Bloomberg terminals, medical records, supply chains, e-commerce, and more.",
-  "seoTitle": "AI Synthetic Data Generator - MCP Server & Actor Integration for RAG",
-  "seoDescription": "Generate synthetic data with MCP server support. One-click integration with Google Maps, Instagram, TikTok, Amazon scrapers. 6 pre-built templates for Lead Intelligence, Competitor Monitor, Support RAG. TRM/SONA self-learning.",
-  "version": "2.2",
+  "title": "AI Synthetic Data Generator - ONNX Embeddings, MCP & 21 Actor Integrations",
+  "description": "Generate unlimited synthetic data with ONNX-powered semantic embeddings (all-MiniLM-L6-v2, bge-small). 21 actor integrations (Reddit, Yelp, TripAdvisor, Zillow, Booking.com + more). 12 use-case templates. TRM/SONA self-learning. MCP server support.",
+  "seoTitle": "AI Synthetic Data Generator - ONNX Embeddings & 21 Actor Integrations",
+  "seoDescription": "Generate synthetic data with ONNX semantic embeddings. 21 actor integrations (Google Maps, Reddit, Yelp, Zillow). 12 templates. TRM/SONA self-learning.",
+  "version": "2.5",
  "buildTag": "latest",
  "minMemoryMbytes": 256,
  "maxMemoryMbytes": 4096,
--- a/examples/apify/agentic-synth/.actor/input_schema.json
+++ b/examples/apify/agentic-synth/.actor/input_schema.json
@ -1,6 +1,6 @@
 {
-  "title": "AI Synthetic Data Generator",
-  "description": "Generate unlimited, high-quality synthetic data for testing scrapers, training AI models, and building agentic applications. 18 data types including e-commerce, Bloomberg terminal, medical records, supply chain, and more. Now with one-click Apify actor integration and use case templates. Powered by TRM/SONA self-learning.",
+  "title": "AI Synthetic Data Generator v2.5",
+  "description": "Generate unlimited synthetic data with ONNX-powered semantic embeddings (all-MiniLM-L6-v2, bge-small). 21 actor integrations (Reddit, Yelp, TripAdvisor, Zillow, Booking.com + more). 12 use-case templates. TRM/SONA self-learning. MCP server support.",
  "type": "object",
  "schemaVersion": 1,
  "properties": {
@ -58,7 +58,7 @@
    "integrateActorId": {
      "title": "Apify Actor to Integrate",
      "type": "string",
-      "description": "Actor ID to pull data from (e.g., apify/google-maps-scraper). Used in 'integrate' mode.",
+      "description": "Actor ID to pull data from. 21 actors supported. Used in 'integrate' mode.",
      "editor": "select",
      "enum": [
        "apify/google-maps-scraper",
@ -73,7 +73,15 @@
        "apify/website-content-crawler",
        "apify/cheerio-scraper",
        "apify/news-scraper",
-        "apify/linkedin-scraper"
+        "apify/linkedin-scraper",
+        "trudax/tripadvisor-scraper",
+        "maxcopell/yelp-scraper",
+        "trudax/booking-scraper",
+        "petr_cermak/zillow-scraper",
+        "epctex/craigslist-scraper",
+        "apify/reddit-scraper",
+        "apify/facebook-posts-scraper",
+        "compass/google-places-api"
      ],
      "enumTitles": [
        "Google Maps Scraper - Local business data",
@ -88,10 +96,18 @@
        "Website Content Crawler - Full site content",
        "Cheerio Scraper - Structured extraction",
        "News Scraper - News articles",
-        "LinkedIn Scraper - Jobs, profiles"
+        "LinkedIn Scraper - Jobs, profiles",
+        "TripAdvisor Scraper - Reviews, restaurants, hotels",
+        "Yelp Scraper - Business reviews, ratings",
+        "Booking.com Scraper - Hotels, accommodations",
+        "Zillow Scraper - Real estate listings",
+        "Craigslist Scraper - Classifieds, listings",
+        "Reddit Scraper - Posts, comments, subreddits",
+        "Facebook Posts Scraper - Posts, engagement",
+        "Google Places API - Places, reviews"
      ],
-      "sectionCaption": "Actor Integration",
-      "sectionDescription": "One-click integration with top Apify actors for RAG/memory use cases"
+      "sectionCaption": "Actor Integration (21 Actors)",
+      "sectionDescription": "One-click integration with 21 top Apify actors for RAG/memory use cases"
    },
    "integrateRunId": {
      "title": "Run ID",
@ -116,18 +132,24 @@
    "useTemplate": {
      "title": "Use Case Template",
      "type": "string",
-      "description": "Pre-built template for common use cases. Used in 'template' mode.",
+      "description": "Pre-built template for common use cases. 12 templates available. Used in 'template' mode.",
      "editor": "select",
-      "enum": ["lead-intelligence", "competitor-monitor", "support-knowledge", "research-assistant", "content-library", "product-catalog"],
+      "enum": ["lead-intelligence", "competitor-monitor", "support-knowledge", "research-assistant", "content-library", "product-catalog", "review-aggregator", "price-tracker", "social-listening", "talent-sourcing", "real-estate-intel", "travel-planner"],
      "enumTitles": [
        "Lead Intelligence - Sales teams memorizing prospect data",
        "Competitor Monitor - Track competitor mentions/changes",
        "Support Knowledge - Customer support RAG system",
        "Research Assistant - Academic/market research",
        "Content Library - Content creators' reference",
-        "Product Catalog - E-commerce product memory"
+        "Product Catalog - E-commerce product memory",
+        "Review Aggregator - Aggregate reviews from multiple platforms",
+        "Price Tracker - Monitor prices for competitive intelligence",
+        "Social Listening - Monitor social conversations and trends",
+        "Talent Sourcing - Recruit candidates from job platforms",
+        "Real Estate Intel - Property market analysis",
+        "Travel Planner - Hotels, restaurants, activities data"
      ],
-      "sectionCaption": "Use Case Templates",
+      "sectionCaption": "Use Case Templates (12 Templates)",
      "sectionDescription": "Pre-configured templates for common data workflows"
    },
    "schema": {
@ -183,7 +205,30 @@
      "title": "Generate Embeddings",
      "type": "boolean",
      "description": "Generate vector embeddings for all output records (useful for RAG systems)",
-      "default": false
+      "default": false,
+      "sectionCaption": "ONNX Embeddings",
+      "sectionDescription": "Semantic embeddings powered by ONNX Runtime (Hugging Face Transformers.js)"
+    },
+    "useOnnxEmbeddings": {
+      "title": "Use ONNX Semantic Embeddings",
+      "type": "boolean",
+      "description": "Use real semantic embeddings via ONNX (slower but more accurate) vs random vectors (fast for testing)",
+      "default": true
+    },
+    "embeddingModel": {
+      "title": "ONNX Embedding Model",
+      "type": "string",
+      "description": "Choose embedding model. Smaller models are faster, larger models are more accurate.",
+      "editor": "select",
+      "default": "all-MiniLM-L6-v2",
+      "enum": ["all-MiniLM-L6-v2", "bge-small-en-v1.5", "all-mpnet-base-v2", "e5-small-v2", "gte-small"],
+      "enumTitles": [
+        "all-MiniLM-L6-v2 (384d, Fast, Good) - Recommended",
+        "bge-small-en-v1.5 (384d, Fast, Excellent)",
+        "all-mpnet-base-v2 (768d, Medium, Excellent)",
+        "e5-small-v2 (384d, Fast, Very Good)",
+        "gte-small (384d, Fast, Very Good)"
+      ]
    },
    "simulationMode": {
      "title": "Long-Running Simulation",
--- a/examples/apify/agentic-synth/README.md
+++ b/examples/apify/agentic-synth/README.md
@ -1,24 +1,24 @@
-# AI Synthetic Data Generator - MCP Server & Actor Integration
+# AI Synthetic Data Generator - ONNX Embeddings, MCP & 21 Actor Integrations

-**Generate unlimited synthetic data** grounded in real-world patterns. **One-click integration** with 13 popular Apify web scrapers (Google Maps, Instagram, TikTok, Amazon, LinkedIn) lets you transform real scraped data into AI-ready formats for RAG systems, agent memory, and model training.
+**Generate unlimited synthetic data** with **ONNX-powered semantic embeddings** (all-MiniLM-L6-v2, bge-small). **One-click integration** with 21 popular Apify web scrapers (Google Maps, Instagram, TikTok, Amazon, LinkedIn, Reddit, Yelp, TripAdvisor, Zillow, Booking.com + more). 12 use-case templates. TRM/SONA self-learning. MCP server support.

 **Why grounding matters:** Pure synthetic data can drift from reality. By integrating with live Apify scrapers, your synthetic data inherits real naming conventions, price distributions, engagement patterns, and business characteristics - making your AI models and tests far more realistic.

 [![Apify Actor](https://img.shields.io/badge/Apify-Actor-blue)](https://apify.com/ruv/ai-synthetic-data-generator)
 [![MCP Server](https://img.shields.io/badge/MCP-Server-purple)](https://modelcontextprotocol.io)
 [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT)
-[![Version](https://img.shields.io/badge/version-2.2-green)](https://github.com/ruvnet/ruvector)
+[![Version](https://img.shields.io/badge/version-2.5-green)](https://github.com/ruvnet/ruvector)

-## What's New in v2.2
+## What's New in v2.5

 | Feature | Description |
 |---------|-------------|
-| **One-Click Scraper Integration** | Ground synthetic data with real patterns from Google Maps, Instagram, TikTok, YouTube, Amazon, LinkedIn, and 7 more scrapers |
+| **ONNX Semantic Embeddings** | Real semantic embeddings via Hugging Face Transformers.js (all-MiniLM-L6-v2, bge-small, all-mpnet-base-v2, e5-small, gte-small) |
+| **21 Actor Integrations** | Added Reddit, Yelp, TripAdvisor, Zillow, Booking.com, Craigslist, Facebook, Google Places |
+| **12 Use Case Templates** | Added Review Aggregator, Price Tracker, Social Listening, Talent Sourcing, Real Estate Intel, Travel Planner |
 | **MCP Server** | Use as AI agent tool (Claude, GPT) via Model Context Protocol |
-| **6 Use Case Templates** | Lead Intelligence, Competitor Monitor, Support RAG, and more |
-| **Enhanced Grounding** | Transform scraped data into AI-ready formats that preserve real-world distributions |
-| **Webhook Support** | POST results to your endpoint for async workflows |
-| **Output Formats** | JSON, JSONL, CSV export options |
+| **TRM/SONA Self-Learning** | 7M parameter recursive reasoning with 3-tier learning |
+| **Output Formats** | JSON, JSONL, CSV export with webhook support |

 ---

@ -26,9 +26,10 @@

 | Feature | Description |
 |---------|-------------|
-| **Scraper Integration** | One-click grounding with 13 popular Apify scrapers for realistic data |
+| **ONNX Embeddings** | 5 semantic embedding models via Transformers.js for RAG-ready vectors |
+| **21 Actor Integrations** | One-click grounding with 21 popular Apify scrapers |
+| **12 Templates** | Pre-built workflows for common use cases |
 | **MCP Server** | Integrate with Claude Code, GPT, and AI agents |
-| **6 Templates** | Pre-built workflows for common use cases |
 | **TRM** | 7M parameter recursive reasoning (83% on GSM8K) |
 | **SONA** | 3-tier self-learning (Instant/Background/Deep) |
 | **EWC++** | Pattern preservation across generations (lambda=2000) |
@ -92,7 +93,35 @@ Once connected, you get these tools in your AI agent:

 ---

-## One-Click Actor Integration
+## ONNX Semantic Embeddings
+
+Generate real semantic embeddings for RAG systems using ONNX Runtime and Hugging Face Transformers.js.
+
+### Available Models
+
+| Model | Dimensions | Speed | Quality | Use Case |
+|-------|------------|-------|---------|----------|
+| **all-MiniLM-L6-v2** | 384 | Fast | Good | General purpose (recommended) |
+| **bge-small-en-v1.5** | 384 | Fast | Excellent | High-quality retrieval |
+| **all-mpnet-base-v2** | 768 | Medium | Excellent | Maximum accuracy |
+| **e5-small-v2** | 384 | Fast | Very Good | Balanced performance |
+| **gte-small** | 384 | Fast | Very Good | Efficient retrieval |
+
+### Example: Generate with ONNX Embeddings
+
+```json
+{
+  "dataType": "ecommerce",
+  "count": 100,
+  "generateEmbeddings": true,
+  "useOnnxEmbeddings": true,
+  "embeddingModel": "all-MiniLM-L6-v2"
+}
+```
+
+---
+
+## One-Click Actor Integration (21 Actors)

 Transform data from popular Apify scrapers into AI-ready format.

@ -113,6 +142,14 @@ Transform data from popular Apify scrapers into AI-ready format.
 | **apify/web-scraper** | General | Any web page data |
 | **apify/cheerio-scraper** | General | Structured extraction |
 | **apify/news-scraper** | News | Articles, authors, sources |
+| **trudax/tripadvisor-scraper** | Reviews | Hotels, restaurants, reviews |
+| **maxcopell/yelp-scraper** | Reviews | Business reviews, ratings |
+| **trudax/booking-scraper** | Travel | Hotels, accommodations, prices |
+| **petr_cermak/zillow-scraper** | Real Estate | Property listings, prices |
+| **epctex/craigslist-scraper** | Classifieds | Listings, local posts |
+| **apify/reddit-scraper** | Social Media | Posts, comments, subreddits |
+| **apify/facebook-posts-scraper** | Social Media | Posts, engagement |
+| **compass/google-places-api** | Local Business | Places, reviews, details |

 ### Example: Integrate Google Maps Data

@ -128,7 +165,7 @@ Transform data from popular Apify scrapers into AI-ready format.

 ---

-## Use Case Templates
+## Use Case Templates (12 Templates)

 One-click deployment for common AI/RAG scenarios.

@ -140,6 +177,12 @@ One-click deployment for common AI/RAG scenarios.
 | **research-assistant** | Academic/market research | Researchers | Google Search, News, Content |
 | **content-library** | Content creators' reference | Creators | Instagram, TikTok, YouTube |
 | **product-catalog** | E-commerce product memory | E-commerce | Amazon, Shopify, Google Maps |
+| **review-aggregator** | Aggregate reviews from platforms | Product Managers | TripAdvisor, Yelp, Google Maps |
+| **price-tracker** | Monitor prices across sites | Pricing Teams | Amazon, Zillow, Booking.com |
+| **social-listening** | Monitor social conversations | Social Media, PR | Reddit, Twitter, Facebook |
+| **talent-sourcing** | Recruit from job platforms | Recruiters, HR | LinkedIn, Craigslist |
+| **real-estate-intel** | Property market analysis | Real Estate, Investors | Zillow, Google Maps, Craigslist |
+| **travel-planner** | Hotels, restaurants, activities | Travel Agents | TripAdvisor, Booking.com, Google Maps |

 ### Example: Lead Intelligence Template

--- a/examples/apify/agentic-synth/package.json
+++ b/examples/apify/agentic-synth/package.json
@ -1,7 +1,7 @@
 {
  "name": "agentic-synth-apify-actor",
-  "version": "2.2.0",
-  "description": "AI Synthetic Data Generator with TRM/SONA self-learning, MCP server, and Apify actor integrations",
+  "version": "2.5.0",
+  "description": "AI Synthetic Data Generator with ONNX embeddings, TRM/SONA self-learning, MCP server, and 21 Apify actor integrations",
  "main": "src/main.js",
  "type": "module",
  "bin": {
@ -19,6 +19,7 @@
    "@google/generative-ai": "^0.24.1",
    "@modelcontextprotocol/sdk": "^1.0.0",
    "@ruvector/ruvllm": "^0.2.3",
+    "@xenova/transformers": "^2.17.2",
    "apify": "^3.5.2"
  },
  "optionalDependencies": {
--- a/examples/apify/agentic-synth/src/embeddings.js
+++ b/examples/apify/agentic-synth/src/embeddings.js
@ -0,0 +1,96 @@
+/**
+ * ONNX-based Embeddings Module
+ * Uses @xenova/transformers (Hugging Face Transformers.js) for ONNX Runtime embeddings
+ */
+
+import { log } from 'apify';
+
+let embeddingPipeline = null;
+let currentModel = null;
+
+export const EMBEDDING_MODELS = {
+  'all-MiniLM-L6-v2': { id: 'Xenova/all-MiniLM-L6-v2', dimensions: 384, speed: 'fast', quality: 'good' },
+  'bge-small-en-v1.5': { id: 'Xenova/bge-small-en-v1.5', dimensions: 384, speed: 'fast', quality: 'excellent' },
+  'all-mpnet-base-v2': { id: 'Xenova/all-mpnet-base-v2', dimensions: 768, speed: 'medium', quality: 'excellent' },
+  'e5-small-v2': { id: 'Xenova/e5-small-v2', dimensions: 384, speed: 'fast', quality: 'very-good' },
+  'gte-small': { id: 'Xenova/gte-small', dimensions: 384, speed: 'fast', quality: 'very-good' }
+};
+
+export async function initEmbeddingPipeline(modelName = 'all-MiniLM-L6-v2') {
+  const modelConfig = EMBEDDING_MODELS[modelName];
+  if (!modelConfig) throw new Error(`Unknown model: ${modelName}`);
+  if (embeddingPipeline && currentModel === modelName) return embeddingPipeline;
+
+  const { pipeline } = await import('@xenova/transformers');
+  log.info(`Loading ONNX embedding model: ${modelConfig.id}...`);
+  embeddingPipeline = await pipeline('feature-extraction', modelConfig.id, { quantized: true });
+  currentModel = modelName;
+  return embeddingPipeline;
+}
+
+export async function generateEmbedding(text, options = {}) {
+  const { modelName = 'all-MiniLM-L6-v2', normalize = true } = options;
+  const pipe = await initEmbeddingPipeline(modelName);
+  const output = await pipe(text.substring(0, 8000), { pooling: 'mean', normalize });
+  return Array.from(output.data);
+}
+
+export async function generateEmbeddingsBatch(texts, options = {}) {
+  const { modelName = 'all-MiniLM-L6-v2', batchSize = 32, onProgress = null } = options;
+  const pipe = await initEmbeddingPipeline(modelName);
+  const embeddings = [];
+
+  for (let i = 0; i < texts.length; i += batchSize) {
+    const batch = texts.slice(i, i + batchSize);
+    const batchEmbeddings = await Promise.all(
+      batch.map(async (text) => {
+        const output = await pipe((text || '').substring(0, 8000), { pooling: 'mean', normalize: true });
+        return Array.from(output.data);
+      })
+    );
+    embeddings.push(...batchEmbeddings);
+    if (onProgress) onProgress({ processed: Math.min(i + batchSize, texts.length), total: texts.length });
+  }
+  return embeddings;
+}
+
+export async function addEmbeddingsToRecords(records, options = {}) {
+  const { modelName = 'all-MiniLM-L6-v2', textFields = ['title', 'description', 'text', 'content', 'caption', 'body', 'name'] } = options;
+  if (!records?.length) return records;
+
+  const modelConfig = EMBEDDING_MODELS[modelName];
+  log.info(`Generating ONNX embeddings for ${records.length} records with ${modelName}`);
+
+  const texts = records.map(record => {
+    const parts = textFields.map(f => {
+      const v = record[f] || record.data?.[f];
+      return typeof v === 'string' ? v : Array.isArray(v) ? v.join(' ') : '';
+    }).filter(Boolean);
+    return parts.join(' ') || 'empty';
+  });
+
+  const embeddings = await generateEmbeddingsBatch(texts, { modelName });
+  return records.map((record, i) => ({ ...record, embedding: embeddings[i], embeddingModel: modelName, embeddingDimensions: modelConfig.dimensions }));
+}
+
+export function generateRandomEmbedding(dimensions, random = Math.random) {
+  const embedding = [];
+  let norm = 0;
+  for (let i = 0; i < dimensions; i++) {
+    const val = random() * 2 - 1;
+    embedding.push(val);
+    norm += val * val;
+  }
+  norm = Math.sqrt(norm);
+  return embedding.map(v => Math.round((v / norm) * 1000000) / 1000000);
+}
+
+export function cosineSimilarity(a, b) {
+  let dot = 0, normA = 0, normB = 0;
+  for (let i = 0; i < a.length; i++) {
+    dot += a[i] * b[i];
+    normA += a[i] * a[i];
+    normB += b[i] * b[i];
+  }
+  return dot / (Math.sqrt(normA) * Math.sqrt(normB));
+}
--- a/examples/apify/agentic-synth/src/integrations.js
+++ b/examples/apify/agentic-synth/src/integrations.js
@ -98,6 +98,62 @@ export const SUPPORTED_ACTORS = {
    category: 'professional',
    defaultFields: ['title', 'company', 'location', 'description', 'salary'],
    transform: transformLinkedIn
+  },
+
+  // Reviews & Local
+  'trudax/tripadvisor-scraper': {
+    name: 'TripAdvisor Scraper',
+    category: 'reviews',
+    defaultFields: ['name', 'rating', 'reviewCount', 'address', 'priceLevel', 'cuisine'],
+    transform: transformTripAdvisor
+  },
+  'maxcopell/yelp-scraper': {
+    name: 'Yelp Scraper',
+    category: 'reviews',
+    defaultFields: ['name', 'rating', 'reviewCount', 'address', 'categories', 'phone'],
+    transform: transformYelp
+  },
+  'trudax/booking-scraper': {
+    name: 'Booking.com Scraper',
+    category: 'travel',
+    defaultFields: ['name', 'rating', 'price', 'location', 'amenities', 'reviewScore'],
+    transform: transformBooking
+  },
+
+  // Real Estate
+  'petr_cermak/zillow-scraper': {
+    name: 'Zillow Scraper',
+    category: 'real-estate',
+    defaultFields: ['address', 'price', 'bedrooms', 'bathrooms', 'sqft', 'propertyType'],
+    transform: transformZillow
+  },
+  'epctex/craigslist-scraper': {
+    name: 'Craigslist Scraper',
+    category: 'classifieds',
+    defaultFields: ['title', 'price', 'location', 'description', 'category', 'postedAt'],
+    transform: transformCraigslist
+  },
+
+  // Social Platforms
+  'apify/reddit-scraper': {
+    name: 'Reddit Scraper',
+    category: 'social-media',
+    defaultFields: ['title', 'text', 'subreddit', 'score', 'comments', 'author'],
+    transform: transformReddit
+  },
+  'apify/facebook-posts-scraper': {
+    name: 'Facebook Posts Scraper',
+    category: 'social-media',
+    defaultFields: ['text', 'likes', 'comments', 'shares', 'author', 'timestamp'],
+    transform: transformFacebook
+  },
+
+  // Places & Maps
+  'compass/google-places-api': {
+    name: 'Google Places API',
+    category: 'local-business',
+    defaultFields: ['name', 'rating', 'address', 'phone', 'website', 'types', 'priceLevel'],
+    transform: transformGooglePlaces
  }
 };

@ -217,6 +273,128 @@ export const USE_CASE_TEMPLATES = {
      recommendedAlternatives: 'array<string>',
      embedding: 'array<number>'
    }
+  },
+
+  'review-aggregator': {
+    name: 'Review Aggregator',
+    description: 'Aggregate and analyze reviews from multiple platforms',
+    targetUsers: ['Product Managers', 'Brand Managers', 'Customer Experience'],
+    suggestedActors: ['trudax/tripadvisor-scraper', 'maxcopell/yelp-scraper', 'apify/google-maps-scraper'],
+    memorizeFields: ['name', 'rating', 'reviewCount', 'text', 'sentiment', 'categories'],
+    enrichWith: ['sentiment_score', 'common_themes', 'rating_trend'],
+    outputFormat: {
+      entityId: 'string',
+      name: 'string',
+      averageRating: 'number',
+      totalReviews: 'number',
+      platforms: 'array<object>',
+      sentimentAnalysis: 'object',
+      topPraises: 'array<string>',
+      topComplaints: 'array<string>',
+      embedding: 'array<number>'
+    }
+  },
+
+  'price-tracker': {
+    name: 'Price Tracker',
+    description: 'Monitor prices across platforms for competitive intelligence',
+    targetUsers: ['Pricing Teams', 'Buyers', 'Resellers'],
+    suggestedActors: ['apify/amazon-scraper', 'petr_cermak/zillow-scraper', 'trudax/booking-scraper'],
+    memorizeFields: ['title', 'price', 'currency', 'availability', 'seller', 'timestamp'],
+    enrichWith: ['price_history', 'price_alerts', 'competitor_comparison'],
+    outputFormat: {
+      productId: 'string',
+      title: 'string',
+      currentPrice: 'number',
+      priceHistory: 'array<object>',
+      lowestPrice: 'number',
+      highestPrice: 'number',
+      priceChange: 'number',
+      competitors: 'array<object>',
+      embedding: 'array<number>'
+    }
+  },
+
+  'social-listening': {
+    name: 'Social Listening',
+    description: 'Monitor social conversations about brands, topics, and trends',
+    targetUsers: ['Social Media Managers', 'PR Teams', 'Brand Managers'],
+    suggestedActors: ['apify/reddit-scraper', 'apify/twitter-scraper', 'apify/facebook-posts-scraper'],
+    memorizeFields: ['text', 'author', 'engagement', 'sentiment', 'platform', 'timestamp'],
+    enrichWith: ['sentiment_analysis', 'influencer_score', 'viral_potential'],
+    outputFormat: {
+      postId: 'string',
+      platform: 'string',
+      content: 'string',
+      author: 'object',
+      engagement: 'object',
+      sentiment: 'number (-1 to 1)',
+      mentions: 'array<string>',
+      hashtags: 'array<string>',
+      viralScore: 'number (1-100)',
+      embedding: 'array<number>'
+    }
+  },
+
+  'talent-sourcing': {
+    name: 'Talent Sourcing',
+    description: 'Recruit and source candidates from job platforms',
+    targetUsers: ['Recruiters', 'HR Teams', 'Talent Acquisition'],
+    suggestedActors: ['apify/linkedin-scraper', 'epctex/craigslist-scraper'],
+    memorizeFields: ['title', 'company', 'location', 'skills', 'experience', 'salary'],
+    enrichWith: ['skill_match', 'culture_fit', 'availability'],
+    outputFormat: {
+      candidateId: 'string',
+      name: 'string',
+      currentRole: 'string',
+      company: 'string',
+      skills: 'array<string>',
+      experience: 'number',
+      location: 'string',
+      matchScore: 'number (1-100)',
+      embedding: 'array<number>'
+    }
+  },
+
+  'real-estate-intel': {
+    name: 'Real Estate Intelligence',
+    description: 'Market analysis and property intelligence for real estate',
+    targetUsers: ['Real Estate Agents', 'Investors', 'Property Managers'],
+    suggestedActors: ['petr_cermak/zillow-scraper', 'apify/google-maps-scraper', 'epctex/craigslist-scraper'],
+    memorizeFields: ['address', 'price', 'sqft', 'bedrooms', 'bathrooms', 'propertyType'],
+    enrichWith: ['market_trends', 'comparable_sales', 'neighborhood_score'],
+    outputFormat: {
+      propertyId: 'string',
+      address: 'string',
+      price: 'number',
+      pricePerSqft: 'number',
+      propertyType: 'string',
+      specs: 'object',
+      marketAnalysis: 'object',
+      comparables: 'array<object>',
+      investmentScore: 'number (1-100)',
+      embedding: 'array<number>'
+    }
+  },
+
+  'travel-planner': {
+    name: 'Travel Planner',
+    description: 'Plan trips with aggregated hotel, restaurant, and activity data',
+    targetUsers: ['Travel Agents', 'Travelers', 'Tourism Boards'],
+    suggestedActors: ['trudax/tripadvisor-scraper', 'trudax/booking-scraper', 'apify/google-maps-scraper'],
+    memorizeFields: ['name', 'rating', 'price', 'location', 'amenities', 'reviews'],
+    enrichWith: ['booking_availability', 'best_time_to_visit', 'local_tips'],
+    outputFormat: {
+      placeId: 'string',
+      name: 'string',
+      type: 'string (hotel, restaurant, attraction)',
+      rating: 'number',
+      priceRange: 'string',
+      location: 'object',
+      highlights: 'array<string>',
+      reviews: 'array<object>',
+      embedding: 'array<number>'
+    }
  }
 };

@ -515,6 +693,225 @@ function transformLinkedIn(item) {
  };
 }

+function transformTripAdvisor(item) {
+  return {
+    id: item.id || item.locationId || generateId(),
+    source: 'tripadvisor',
+    name: item.name || item.title,
+    type: item.type || item.category,
+    rating: item.rating || item.overallRating,
+    reviewCount: item.reviewCount || item.numberOfReviews,
+    priceLevel: item.priceLevel || item.priceRange,
+    address: {
+      full: item.address || item.addressObj?.street1,
+      city: item.city || item.addressObj?.city,
+      country: item.country || item.addressObj?.country
+    },
+    cuisine: item.cuisine || item.cuisines || [],
+    features: item.features || item.amenities || [],
+    awards: item.awards || [],
+    photos: (item.photos || []).slice(0, 5).map(p => p.url || p),
+    url: item.url || item.webUrl,
+    scrapedAt: item.scrapedAt || new Date().toISOString()
+  };
+}
+
+function transformYelp(item) {
+  return {
+    id: item.id || item.businessId || generateId(),
+    source: 'yelp',
+    name: item.name || item.businessName,
+    rating: item.rating,
+    reviewCount: item.reviewCount || item.review_count,
+    priceLevel: item.price || item.priceRange,
+    address: {
+      full: item.address || item.location?.display_address?.join(', '),
+      street: item.location?.address1,
+      city: item.location?.city,
+      state: item.location?.state,
+      zip: item.location?.zip_code
+    },
+    phone: item.phone || item.display_phone,
+    categories: (item.categories || []).map(c => c.title || c),
+    hours: item.hours || item.businessHours,
+    photos: (item.photos || []).slice(0, 5),
+    isClaimed: item.is_claimed,
+    url: item.url,
+    scrapedAt: item.scrapedAt || new Date().toISOString()
+  };
+}
+
+function transformBooking(item) {
+  return {
+    id: item.id || item.hotelId || generateId(),
+    source: 'booking',
+    name: item.name || item.hotelName,
+    type: item.type || item.accommodationType || 'hotel',
+    rating: item.rating || item.reviewScore,
+    reviewScore: item.reviewScore || item.score,
+    reviewCount: item.reviewCount || item.numberOfReviews,
+    stars: item.stars || item.starRating,
+    price: {
+      amount: item.price || item.priceAmount,
+      currency: item.currency || 'USD',
+      perNight: item.pricePerNight || item.price
+    },
+    location: {
+      address: item.address,
+      city: item.city,
+      country: item.country,
+      lat: item.latitude || item.location?.lat,
+      lng: item.longitude || item.location?.lng
+    },
+    amenities: item.amenities || item.facilities || [],
+    photos: (item.photos || []).slice(0, 5).map(p => p.url || p),
+    url: item.url,
+    scrapedAt: item.scrapedAt || new Date().toISOString()
+  };
+}
+
+function transformZillow(item) {
+  return {
+    id: item.zpid || item.id || generateId(),
+    source: 'zillow',
+    address: {
+      full: item.address || item.streetAddress,
+      street: item.streetAddress,
+      city: item.city,
+      state: item.state,
+      zip: item.zipcode
+    },
+    price: item.price || item.zestimate,
+    zestimate: item.zestimate,
+    rentZestimate: item.rentZestimate,
+    propertyType: item.homeType || item.propertyType,
+    status: item.homeStatus || item.status,
+    specs: {
+      bedrooms: item.bedrooms || item.beds,
+      bathrooms: item.bathrooms || item.baths,
+      sqft: item.livingArea || item.sqft,
+      lotSize: item.lotSize || item.lotAreaValue,
+      yearBuilt: item.yearBuilt
+    },
+    features: item.resoFacts?.atAGlanceFacts || [],
+    priceHistory: item.priceHistory || [],
+    taxHistory: item.taxHistory || [],
+    photos: (item.photos || item.hiResImageLink || []).slice(0, 5),
+    url: item.url || item.hdpUrl,
+    scrapedAt: item.scrapedAt || new Date().toISOString()
+  };
+}
+
+function transformCraigslist(item) {
+  return {
+    id: item.id || item.postId || generateId(),
+    source: 'craigslist',
+    title: item.title || item.postTitle,
+    price: item.price,
+    category: item.category || item.section,
+    subcategory: item.subcategory,
+    location: {
+      area: item.location || item.hood,
+      city: item.city,
+      region: item.region
+    },
+    description: item.description || item.body,
+    attributes: item.attributes || {},
+    images: (item.images || item.pics || []).slice(0, 5),
+    postedAt: item.datetime || item.postedAt,
+    updatedAt: item.updated,
+    url: item.url || item.postUrl,
+    scrapedAt: item.scrapedAt || new Date().toISOString()
+  };
+}
+
+function transformReddit(item) {
+  return {
+    id: item.id || item.postId || generateId(),
+    source: 'reddit',
+    type: item.type || (item.isSelf ? 'text' : 'link'),
+    title: item.title,
+    text: item.selftext || item.body || item.text,
+    subreddit: item.subreddit || item.subredditName,
+    author: {
+      username: item.author || item.authorName,
+      id: item.authorId
+    },
+    engagement: {
+      score: item.score || item.ups - (item.downs || 0),
+      upvotes: item.ups,
+      downvotes: item.downs,
+      comments: item.numComments || item.num_comments,
+      awards: item.totalAwards || item.total_awards_received
+    },
+    flair: item.linkFlair || item.link_flair_text,
+    nsfw: item.over18 || item.over_18 || false,
+    spoiler: item.spoiler || false,
+    url: item.url || `https://reddit.com${item.permalink}`,
+    mediaUrl: item.mediaUrl || item.url_overridden_by_dest,
+    createdAt: item.created || item.createdUtc,
+    scrapedAt: item.scrapedAt || new Date().toISOString()
+  };
+}
+
+function transformFacebook(item) {
+  return {
+    id: item.id || item.postId || generateId(),
+    source: 'facebook',
+    type: item.type || 'post',
+    text: item.text || item.message || item.content,
+    author: {
+      name: item.authorName || item.user?.name,
+      id: item.authorId || item.user?.id,
+      url: item.authorUrl || item.user?.url
+    },
+    engagement: {
+      likes: item.likes || item.likesCount,
+      comments: item.comments || item.commentsCount,
+      shares: item.shares || item.sharesCount,
+      reactions: item.reactions || {}
+    },
+    media: {
+      images: item.images || [],
+      videos: item.videos || [],
+      links: item.links || []
+    },
+    hashtags: extractHashtags(item.text || item.message),
+    mentions: extractMentions(item.text || item.message),
+    timestamp: item.time || item.timestamp || item.createdAt,
+    url: item.url || item.postUrl,
+    scrapedAt: item.scrapedAt || new Date().toISOString()
+  };
+}
+
+function transformGooglePlaces(item) {
+  return {
+    id: item.place_id || item.placeId || generateId(),
+    source: 'google-places',
+    name: item.name,
+    rating: item.rating,
+    reviewCount: item.user_ratings_total || item.reviewCount,
+    priceLevel: item.price_level || item.priceLevel,
+    address: item.formatted_address || item.address,
+    phone: item.formatted_phone_number || item.phone,
+    website: item.website,
+    types: item.types || [],
+    location: {
+      lat: item.geometry?.location?.lat || item.lat,
+      lng: item.geometry?.location?.lng || item.lng
+    },
+    hours: item.opening_hours || item.hours,
+    photos: (item.photos || []).slice(0, 5).map(p => p.photo_reference || p),
+    reviews: (item.reviews || []).slice(0, 5).map(r => ({
+      text: r.text,
+      rating: r.rating,
+      author: r.author_name
+    })),
+    url: item.url,
+    scrapedAt: item.scrapedAt || new Date().toISOString()
+  };
+}
+
 // ============================================
 // UTILITY FUNCTIONS
 // ============================================
--- a/examples/apify/agentic-synth/src/main.js
+++ b/examples/apify/agentic-synth/src/main.js
@ -2,6 +2,7 @@ import { Actor, log } from 'apify';
 import { GoogleGenerativeAI } from '@google/generative-ai';
 import { createRequire } from 'module';
 import { integrateActorData, SUPPORTED_ACTORS, USE_CASE_TEMPLATES, getTemplate, listSupportedActors, listTemplates } from './integrations.js';
+import { addEmbeddingsToRecords, generateRandomEmbedding, EMBEDDING_MODELS } from './embeddings.js';

 // CJS import workaround for RuvLLM native extension
 const require = createRequire(import.meta.url);
@ -58,10 +59,13 @@ try {
    sonaEnabled = true,
    ewcLambda = 2000,
    patternThreshold = 0.7,
-    sonaLearningTiers = ['instant', 'background']
+    sonaLearningTiers = ['instant', 'background'],
+    // ONNX Embedding parameters
+    useOnnxEmbeddings = true,
+    embeddingModel = 'all-MiniLM-L6-v2'
  } = input;

-  log.info('AI Synthetic Data Generator v2.2 with TRM/SONA', { mode, dataType, count, provider, model, sonaEnabled });
+  log.info('AI Synthetic Data Generator v2.5 with ONNX Embeddings & TRM/SONA', { mode, dataType, count, provider, model, sonaEnabled, useOnnxEmbeddings, embeddingModel });

  // Initialize SONA if available and enabled
  if (ruvllm && sonaEnabled) {
@ -274,33 +278,42 @@ try {
  // EMBEDDING GENERATION (optional)
  // ============================================
  if (generateEmbeddings && generatedData.length > 0) {
-    log.info(`Generating embeddings with ${embeddingDimensions} dimensions...`);
+    const modelConfig = EMBEDDING_MODELS[embeddingModel] || EMBEDDING_MODELS['all-MiniLM-L6-v2'];
+    const effectiveDimensions = useOnnxEmbeddings ? modelConfig.dimensions : embeddingDimensions;

-    const random = createSeededRandom(seed);
-
-    generatedData = generatedData.map((item, idx) => {
-      // Generate normalized random embedding
-      const embedding = [];
-      let norm = 0;
-
-      for (let j = 0; j < embeddingDimensions; j++) {
-        const val = random() * 2 - 1;
-        embedding.push(val);
-        norm += val * val;
-      }
-
-      norm = Math.sqrt(norm);
-      for (let j = 0; j < embeddingDimensions; j++) {
-        embedding[j] = Math.round((embedding[j] / norm) * 1000000) / 1000000;
-      }
-
-      return {
-        ...item,
-        embedding,
-        embeddingDimensions
-      };
+    log.info(`Generating embeddings with ${effectiveDimensions} dimensions...`, {
+      useOnnx: useOnnxEmbeddings,
+      model: useOnnxEmbeddings ? embeddingModel : 'random'
    });

+    if (useOnnxEmbeddings) {
+      // Use ONNX-powered semantic embeddings
+      try {
+        generatedData = await addEmbeddingsToRecords(generatedData, { modelName: embeddingModel });
+        log.info(`Added ONNX embeddings using ${embeddingModel} model`);
+        await Actor.charge({ eventName: 'onnx-embedding-generation', count: generatedData.length });
+      } catch (e) {
+        log.warning(`ONNX embedding failed: ${e.message}. Falling back to random embeddings.`);
+        // Fall back to random embeddings
+        const random = createSeededRandom(seed);
+        generatedData = generatedData.map((item) => ({
+          ...item,
+          embedding: generateRandomEmbedding(effectiveDimensions, random),
+          embeddingModel: 'random',
+          embeddingDimensions: effectiveDimensions
+        }));
+      }
+    } else {
+      // Use random embeddings (faster, for testing)
+      const random = createSeededRandom(seed);
+      generatedData = generatedData.map((item) => ({
+        ...item,
+        embedding: generateRandomEmbedding(effectiveDimensions, random),
+        embeddingModel: 'random',
+        embeddingDimensions: effectiveDimensions
+      }));
+    }
+
    // Charge for embedding generation
    await Actor.charge({ eventName: 'embedding-generation', count: generatedData.length });
    log.info(`Added embeddings to ${generatedData.length} records`);
@ -476,7 +489,7 @@ try {
        method: 'POST',
        headers: {
          'Content-Type': 'application/json',
-          'User-Agent': 'Apify-AI-Synthetic-Data-Generator/2.2'
+          'User-Agent': 'Apify-AI-Synthetic-Data-Generator/2.5'
        },
        body: JSON.stringify(webhookPayload)
      });
--- a/examples/apify/neural-trader-system/apify_storage/key_value_stores/default/INPUT.json
+++ b/examples/apify/neural-trader-system/apify_storage/key_value_stores/default/INPUT.json
@ -0,0 +1 @@
+{"mode":"backtest","symbols":["AAPL"]}
--- a/examples/apify/neural-trader-system/src/main.js
+++ b/examples/apify/neural-trader-system/src/main.js
@ -1371,7 +1371,12 @@ await Actor.main(async () => {
        const signals = [];
        for (const symbol of symbols) {
            const marketData = generateMarketData(symbol, lookbackPeriod, { stopLoss, takeProfit, timeframe });
-            const technicalData = { rsi: TechnicalIndicators.calculateRSI(marketData.prices), macd: TechnicalIndicators.calculateMACD(marketData.prices) };
+            const technicalData = {
+                rsi: TechnicalIndicators.calculateRSI(marketData.prices),
+                macd: TechnicalIndicators.calculateMACD(marketData.prices),
+                bollinger: TechnicalIndicators.calculateBollinger(marketData.prices),
+                atr: TechnicalIndicators.calculateATR(marketData.highs, marketData.lows, marketData.prices)
+            };
            const features = prepareFeatures(marketData, technicalData);
            const output = neuralEngine.forward(features);
            const signal = signalGenerator.generateSignal([output[0]], marketData);
--- a/examples/apify/neural-trader-system/storage/datasets/default/000000001.json
+++ b/examples/apify/neural-trader-system/storage/datasets/default/000000001.json
@ -0,0 +1,32 @@
+{
+	"timestamp": "2025-12-13T19:20:02.211Z",
+	"symbol": "BTC/USD",
+	"price": 245.881612493563,
+	"signal": "SELL",
+	"confidence": 100,
+	"reasons": [
+		"Neural prediction: 1.93%",
+		"Patterns: double_top"
+	],
+	"target": 233.58753186888484,
+	"stopLoss": 252.02865280590206,
+	"patterns": [
+		"double_top"
+	],
+	"technical": {
+		"rsi": null,
+		"macd": null,
+		"bollinger": null,
+		"atr": null
+	},
+	"prediction": 0.01929405266773886,
+	"swarmPredictions": [
+		0.0732855217097578,
+		0,
+		0,
+		0,
+		0.02318474162893649
+	],
+	"timeframe": "1h",
+	"strategy": "ensemble"
+}
--- a/examples/apify/neural-trader-system/test-analyze.json
+++ b/examples/apify/neural-trader-system/test-analyze.json
@ -0,0 +1,5 @@
+{
+  "mode": "analyze",
+  "symbols": ["AAPL"],
+  "dataSource": "yahoo"
+}
--- a/examples/apify/neural-trader-system/test-backtest.json
+++ b/examples/apify/neural-trader-system/test-backtest.json
@ -0,0 +1,7 @@
+{
+  "mode": "backtest",
+  "symbols": ["AAPL"],
+  "initialCapital": 100000,
+  "monteCarloRuns": 100,
+  "strategy": "ensemble"
+}