#!/usr/bin/env bash # ============================================================================= # RuVector Training Orchestrator # Interactive CLI for managing the pi.ruv.io brain API # # Provides 6 modes: # 1. Discovery Scanner - Scan local discovery JSON files # 2. Brain Gap Analysis - Query brain for high-novelty domains # 3. Batch Upload - Upload discoveries with nonce auth + PII strip # 4. Training & Optimization - Trigger training, view SONA stats # 5. Cross-Domain Discovery - Find connections via drift & partition # 6. Interactive Explorer - Search brain memories # # Usage: # PI= ./scripts/training_orchestrator.sh [--help] [--dry-run] # # Environment: # PI Bearer token for brain API authentication # DISCOVERIES_DIR Override default discoveries directory # ============================================================================= set -euo pipefail # --------------------------------------------------------------------------- # Configuration # --------------------------------------------------------------------------- BRAIN_API="https://pi.ruv.io" DISCOVERIES_DIR="${DISCOVERIES_DIR:-$(cd "$(dirname "$0")/.." && pwd)/examples/data/discoveries}" DRY_RUN=false # --------------------------------------------------------------------------- # ANSI color codes # --------------------------------------------------------------------------- RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' CYAN='\033[0;36m' BLUE='\033[0;34m' MAGENTA='\033[0;35m' BOLD='\033[1m' DIM='\033[2m' NC='\033[0m' # --------------------------------------------------------------------------- # Logging helpers # --------------------------------------------------------------------------- log_info() { echo -e " ${CYAN}[INFO]${NC} $(date '+%H:%M:%S') $*"; } log_ok() { echo -e " ${GREEN}[ OK ]${NC} $(date '+%H:%M:%S') $*"; } log_fail() { echo -e " ${RED}[FAIL]${NC} $(date '+%H:%M:%S') $*"; } log_warn() { echo -e " ${YELLOW}[WARN]${NC} $(date '+%H:%M:%S') $*"; } log_head() { echo -e "\n ${BOLD}${MAGENTA}=== $* ===${NC}\n"; } # --------------------------------------------------------------------------- # CLI flags # --------------------------------------------------------------------------- show_help() { cat <<'HELPTEXT' RuVector Training Orchestrator - Interactive brain training CLI USAGE: PI= ./scripts/training_orchestrator.sh [OPTIONS] OPTIONS: --help, -h Show this help message and exit --dry-run Simulate uploads without sending data to the API ENVIRONMENT: PI Bearer token for API authentication (required for modes 2-6) DISCOVERIES_DIR Override the default discoveries directory MODES (interactive menu): 1 Discovery Scanner Scan local JSON files, count entries, show domain coverage 2 Brain Gap Analysis Query /v1/explore for curiosity/novelty, find gaps 3 Batch Upload Upload discovery entries via /v1/memories with nonce auth 4 Training & Optimization Trigger /v1/train, display SONA stats from /v1/sona/stats 5 Cross-Domain Discovery Query /v1/drift and /v1/partition for cross-domain links 6 Interactive Explorer Search brain memories via /v1/memories/search?q=QUERY FEATURES: - PII stripping: emails, phone numbers, SSNs removed before upload - Progress bar for batch uploads - Colored terminal output - Graceful error handling on all API calls EXAMPLES: PI=my-secret-token ./scripts/training_orchestrator.sh PI=my-secret-token ./scripts/training_orchestrator.sh --dry-run HELPTEXT exit 0 } for arg in "$@"; do case "$arg" in --help|-h) show_help ;; --dry-run) DRY_RUN=true ;; *) echo "Unknown option: $arg" >&2; show_help ;; esac done # --------------------------------------------------------------------------- # Dependency checks # --------------------------------------------------------------------------- check_deps() { local missing=() for cmd in curl jq; do command -v "$cmd" &>/dev/null || missing+=("$cmd") done if [[ ${#missing[@]} -gt 0 ]]; then log_fail "Missing required tools: ${missing[*]}" echo " Install with: sudo apt-get install -y ${missing[*]}" exit 1 fi } # --------------------------------------------------------------------------- # API helper - wraps curl with Bearer auth and error handling # Returns the response body on success, prints error and returns 1 on failure # --------------------------------------------------------------------------- api_call() { local method="$1" endpoint="$2" shift 2 local url="${BRAIN_API}${endpoint}" local -a headers=(-H "Content-Type: application/json") if [[ -n "${PI:-}" ]]; then headers+=(-H "Authorization: Bearer ${PI}") fi local response http_code body response=$(curl -s -w "\n%{http_code}" --max-time 15 \ "${headers[@]}" -X "$method" "$@" "$url" 2>/dev/null) || { log_fail "Network error calling $method $url" return 1 } http_code=$(echo "$response" | tail -1) body=$(echo "$response" | sed '$d') if [[ "$http_code" -ge 200 && "$http_code" -lt 300 ]]; then echo "$body" return 0 else log_fail "HTTP $http_code on $method $endpoint" echo "$body" | jq . 2>/dev/null || echo "$body" return 1 fi } # Checks that PI token is set; prints instructions if not require_token() { if [[ -z "${PI:-}" ]]; then log_fail "PI environment variable not set." echo -e " Export your bearer token first: ${BOLD}export PI=your-token${NC}" return 1 fi } # --------------------------------------------------------------------------- # PII stripping - remove emails, phone numbers, SSNs from text # --------------------------------------------------------------------------- strip_pii() { sed -E \ -e 's/[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}/[REDACTED_EMAIL]/g' \ -e 's/(\+?1?[-. ]?\(?[0-9]{3}\)?[-. ]?[0-9]{3}[-. ]?[0-9]{4})/[REDACTED_PHONE]/g' \ -e 's/[0-9]{3}-[0-9]{2}-[0-9]{4}/[REDACTED_SSN]/g' } # --------------------------------------------------------------------------- # Progress bar: progress_bar # --------------------------------------------------------------------------- progress_bar() { local current="$1" total="$2" width=40 local pct=0 filled=0 empty=0 if (( total > 0 )); then pct=$(( current * 100 / total )) filled=$(( current * width / total )) fi empty=$(( width - filled )) local bar_full="" bar_empty="" for (( i=0; i/dev/null || echo 0) # Extract unique domains domains_str=$(jq -r ' if type == "array" then [.[].domain // "unknown"] | unique | join(", ") else .domain // "unknown" end ' "$f" 2>/dev/null || echo "parse-error") # Track domain counts per file while IFS=',' read -ra doms; do for d in "${doms[@]}"; do d=$(echo "$d" | xargs) # trim whitespace [[ -n "$d" ]] && domain_counts["$d"]=$(( ${domain_counts["$d"]:-0} + 1 )) done done <<< "$domains_str" printf " ${CYAN}%-45s${NC} ${GREEN}%8d${NC} %s\n" "$fname" "$count" "$domains_str" total_files=$((total_files + 1)) total_entries=$((total_entries + count)) done printf " %s\n" "$(printf '%.0s-' {1..85})" echo -e "\n ${BOLD}Summary:${NC}" echo -e " Files scanned: ${GREEN}${total_files}${NC}" echo -e " Total entries: ${GREEN}${total_entries}${NC}" echo -e " Unique domains: ${GREEN}${#domain_counts[@]}${NC}" if [[ ${#domain_counts[@]} -gt 0 ]]; then echo -e "\n ${BOLD}Domain Coverage:${NC}" # Sort domains by count descending for domain in $( for k in "${!domain_counts[@]}"; do echo "${domain_counts[$k]} $k" done | sort -rn | awk '{print $2}' ); do local cnt=${domain_counts[$domain]} printf " ${BLUE}%-30s${NC} %d file(s)\n" "$domain" "$cnt" done fi } # =========================================================================== # Mode 2: Brain Gap Analysis # =========================================================================== mode_gap_analysis() { log_head "Brain Gap Analysis" require_token || return 1 log_info "Querying brain exploration data from /v1/explore..." local explore_data explore_data=$(api_call GET "/v1/explore") || { log_fail "Could not reach /v1/explore" return 1 } echo -e "\n ${BOLD}Curiosity & Novelty Landscape:${NC}\n" # Parse the explore response - handles various response shapes # Extract domain/novelty/curiosity tuples and display as bar chart echo "$explore_data" | jq -r ' # Normalize different response shapes into lines of "domain\tnovelty\tcuriosity" if type == "object" then if .domains then .domains | to_entries[] | "\(.key)\t\(.value.novelty // .value.score // 0)\t\(.value.curiosity // 0)" elif .explorations then .explorations[] | "\(.domain // .topic // "unknown")\t\(.novelty // 0)\t\(.curiosity // 0)" elif .clusters then .clusters[] | "\(.label // .category // "unknown")\t\(.novelty // .coherence // 0)\t\(.curiosity // 0)" else to_entries[] | "\(.key)\t\(if (.value | type) == "number" then .value else 0 end)\t0" end elif type == "array" then .[] | "\(.domain // .topic // .label // "unknown")\t\(.novelty // 0)\t\(.curiosity // 0)" else empty end ' 2>/dev/null | sort -t$'\t' -k2 -rn | head -20 | while IFS=$'\t' read -r domain novelty curiosity; do # Build a visual bar proportional to novelty score local bar_len color bar_len=$(printf '%.0f' "$(echo "$novelty * 30" | bc -l 2>/dev/null || echo 5)") [[ "$bar_len" -gt 30 ]] && bar_len=30 [[ "$bar_len" -lt 1 ]] && bar_len=1 local bar="" for (( i=0; i 0.7" | bc -l 2>/dev/null || echo 0) )); then color="$RED" elif (( $(echo "$novelty > 0.4" | bc -l 2>/dev/null || echo 0) )); then color="$YELLOW" else color="$GREEN" fi printf " ${BOLD}%-25s${NC} ${color}%-30s${NC} novelty=%-6s curiosity=%s\n" \ "$domain" "$bar" "$novelty" "$curiosity" done echo "" echo -e " ${BOLD}Legend:${NC}" echo -e " ${RED}█${NC} High novelty (>0.7) = domain needs more content" echo -e " ${YELLOW}█${NC} Medium novelty (0.4-0.7) = partially covered" echo -e " ${GREEN}█${NC} Low novelty (<0.4) = well covered" } # =========================================================================== # Mode 3: Batch Upload # =========================================================================== mode_batch_upload() { log_head "Batch Upload to Brain" require_token || return 1 if $DRY_RUN; then echo -e " ${YELLOW}*** DRY-RUN MODE -- no data will be sent ***${NC}\n" fi if [[ ! -d "$DISCOVERIES_DIR" ]]; then log_fail "Discoveries directory not found: $DISCOVERIES_DIR" return 1 fi # Collect all entries into a temp file (one JSON object per line) local entries_file entries_file=$(mktemp /tmp/ruv_upload.XXXXXX) trap "rm -f '$entries_file'" RETURN for f in "$DISCOVERIES_DIR"/*.json; do [[ -f "$f" ]] || continue jq -c 'if type == "array" then .[] else . end' "$f" 2>/dev/null >> "$entries_file" done local total total=$(wc -l < "$entries_file") if [[ "$total" -eq 0 ]]; then log_warn "No discovery entries found in $DISCOVERIES_DIR" return 0 fi log_info "Found $total entries to upload" echo "" local success=0 fail=0 skipped=0 current=0 while IFS= read -r entry; do current=$((current + 1)) progress_bar "$current" "$total" # Extract fields local title content tags domain title=$(echo "$entry" | jq -r '.title // "untitled"') content=$(echo "$entry" | jq -r '.content // ""') tags=$(echo "$entry" | jq -c '.tags // []') domain=$(echo "$entry" | jq -r '.domain // "general"') # Skip entries missing content if [[ -z "$content" || "$content" == "null" ]]; then skipped=$((skipped + 1)) continue fi # Strip PII from title and content title=$(echo "$title" | strip_pii) content=$(echo "$content" | strip_pii) # In dry-run mode, skip actual upload if $DRY_RUN; then skipped=$((skipped + 1)) continue fi # Step 1: Get challenge nonce local nonce_resp nonce nonce_resp=$(api_call GET "/v1/challenge" 2>/dev/null) || { fail=$((fail + 1)); continue; } nonce=$(echo "$nonce_resp" | jq -r '.nonce // .challenge // empty' 2>/dev/null) if [[ -z "$nonce" ]]; then fail=$((fail + 1)) continue fi # Step 2: Build payload with nonce local payload payload=$(jq -n \ --arg t "$title" \ --arg c "$content" \ --arg d "$domain" \ --arg n "$nonce" \ --argjson tags "$tags" \ '{title: $t, content: ($c | .[:2000]), domain: $d, tags: $tags, nonce: $n}') # Step 3: POST to /v1/memories if api_call POST "/v1/memories" -d "$payload" &>/dev/null; then success=$((success + 1)) else fail=$((fail + 1)) fi # Brief rate-limit pause sleep 0.3 done < "$entries_file" echo "" # clear progress bar line echo "" echo -e " ${BOLD}Upload Summary:${NC}" echo -e " Total processed: $total" echo -e " ${GREEN}Uploaded: $success${NC}" [[ $fail -gt 0 ]] && echo -e " ${RED}Failed: $fail${NC}" [[ $skipped -gt 0 ]] && echo -e " ${YELLOW}Skipped: $skipped${NC}" $DRY_RUN && echo -e " ${DIM}(dry-run -- nothing was sent)${NC}" } # =========================================================================== # Mode 4: Training & Optimization # =========================================================================== mode_training() { log_head "Training & Optimization" require_token || return 1 # Trigger training log_info "Triggering training via POST /v1/train..." local train_result train_result=$(api_call POST "/v1/train" -d '{}') || { log_warn "Training endpoint returned an error (may still have triggered)" } if [[ -n "${train_result:-}" ]]; then echo -e "\n ${BOLD}Training Response:${NC}" echo "$train_result" | jq -r ' to_entries[] | " \(.key): \(.value)" ' 2>/dev/null || echo " $train_result" fi # Fetch SONA stats echo "" log_info "Fetching SONA stats from GET /v1/sona/stats..." local sona_stats sona_stats=$(api_call GET "/v1/sona/stats") || { log_warn "Could not retrieve SONA stats" return 0 } echo -e "\n ${BOLD}SONA Statistics:${NC}" echo "$sona_stats" | jq -r ' if type == "object" then to_entries[] | if (.value | type) == "object" then "\n \(.key):", (.value | to_entries[] | " \(.key): \(.value)") else " \(.key): \(.value)" end else " \(.)" end ' 2>/dev/null || echo " $sona_stats" } # =========================================================================== # Mode 5: Cross-Domain Discovery # =========================================================================== mode_cross_domain() { log_head "Cross-Domain Discovery" require_token || return 1 # Query semantic drift log_info "Querying semantic drift via GET /v1/drift..." local drift_data drift_data=$(api_call GET "/v1/drift" 2>/dev/null) || true if [[ -n "${drift_data:-}" ]]; then echo -e "\n ${BOLD}Semantic Drift:${NC}" echo "$drift_data" | jq -r ' if type == "array" then .[] | " [\(.from // .source // "?")] --> [\(.to // .target // "?")] drift=\(.score // .magnitude // "n/a")" elif type == "object" then if .drifts then .drifts[] | " [\(.from // .source)] --> [\(.to // .target)] drift=\(.score // .magnitude // "n/a")" else to_entries[] | " \(.key): \(.value)" end else " \(.)" end ' 2>/dev/null || echo "$drift_data" | jq . 2>/dev/null || echo " $drift_data" else log_warn "Drift endpoint unavailable" fi echo "" # Query domain partitions log_info "Querying domain partitions via GET /v1/partition..." local partition_data partition_data=$(api_call GET "/v1/partition" 2>/dev/null) || true if [[ -n "${partition_data:-}" ]]; then echo -e "\n ${BOLD}Domain Partitions:${NC}" echo "$partition_data" | jq -r ' if type == "array" then .[] | " Cluster: \(.id // .name // "?") | Members: \(.members // .domains // [] | join(", ")) | Size: \(.size // (.members // [] | length))" elif type == "object" then if .partitions then .partitions[] | " Cluster: \(.id // .name) | Size: \(.size // "?") | Members: \(.members // .domains // [] | join(", "))" else to_entries[] | " \(.key): \(.value)" end else " \(.)" end ' 2>/dev/null || echo "$partition_data" | jq . 2>/dev/null || echo " $partition_data" else log_warn "Partition endpoint unavailable" fi # Show cross-domain insight if [[ -n "${drift_data:-}" && -n "${partition_data:-}" ]]; then echo "" echo -e " ${BOLD}Insight:${NC} Domains with high drift and small partition size" echo -e " are the best candidates for cross-domain knowledge transfer." fi } # =========================================================================== # Mode 6: Interactive Explorer # =========================================================================== mode_explorer() { log_head "Interactive Explorer" require_token || return 1 echo -e " Search the brain for memories. Type ${BOLD}q${NC} to return to menu.\n" while true; do echo -ne " ${CYAN}search>${NC} " read -r query || break [[ -z "$query" ]] && continue [[ "$query" == "q" || "$query" == "quit" || "$query" == "exit" ]] && break # URL-encode the query local encoded_query encoded_query=$(printf '%s' "$query" | jq -sRr @uri 2>/dev/null || echo "$query") local results results=$(api_call GET "/v1/memories/search?q=${encoded_query}") || { log_fail "Search failed" continue } # Count results (handle array or wrapper object) local count count=$(echo "$results" | jq ' if type == "array" then length elif .results then .results | length elif .memories then .memories | length else 0 end ' 2>/dev/null || echo 0) echo -e " ${GREEN}Found $count result(s)${NC}\n" # Display results echo "$results" | jq -r ' (if type == "array" then . elif .results then .results elif .memories then .memories else [.] end)[:10][] | " \u001b[1m\(.title // .key // "untitled")\u001b[0m", " Domain: \(.domain // "unknown") | Score: \(.score // .similarity // "n/a")", " \(.content // .value // "" | if length > 120 then .[:120] + "..." else . end)", "" ' 2>/dev/null || echo "$results" | jq . 2>/dev/null || echo " $results" done } # =========================================================================== # Banner and main menu # =========================================================================== show_banner() { echo -e "${BOLD}${MAGENTA}" echo " ╔═══════════════════════════════════════════════════════════╗" echo " ║ RuVector Training Orchestrator v1.0 ║" echo " ║ Brain API: pi.ruv.io ║" echo " ╚═══════════════════════════════════════════════════════════╝" echo -ne "${NC}" if $DRY_RUN; then echo -e " ${YELLOW}[DRY-RUN MODE ACTIVE]${NC}" fi if [[ -n "${PI:-}" ]]; then echo -e " ${GREEN}API token: configured${NC}" else echo -e " ${YELLOW}API token: not set (export PI=your-token)${NC}" fi echo "" } main_menu() { echo -e " ${BOLD}Select a mode:${NC}\n" echo -e " ${CYAN}1${NC} Discovery Scanner ${DIM}Scan local JSON files for entries and domains${NC}" echo -e " ${CYAN}2${NC} Brain Gap Analysis ${DIM}Query /v1/explore for novelty gaps${NC}" echo -e " ${CYAN}3${NC} Batch Upload ${DIM}Upload entries via /v1/memories with nonce auth${NC}" echo -e " ${CYAN}4${NC} Training & Optimization ${DIM}POST /v1/train + GET /v1/sona/stats${NC}" echo -e " ${CYAN}5${NC} Cross-Domain Discovery ${DIM}GET /v1/drift + /v1/partition${NC}" echo -e " ${CYAN}6${NC} Interactive Explorer ${DIM}Search brain with /v1/memories/search${NC}" echo -e " ${CYAN}q${NC} Quit" echo "" echo -ne " ${BOLD}Choice [1-6/q]:${NC} " } # =========================================================================== # Entry point # =========================================================================== check_deps show_banner while true; do main_menu read -r choice || break case "$choice" in 1) mode_discovery_scanner ;; 2) mode_gap_analysis ;; 3) mode_batch_upload ;; 4) mode_training ;; 5) mode_cross_domain ;; 6) mode_explorer ;; q|Q|quit|exit) echo -e "\n ${GREEN}Goodbye.${NC}\n"; exit 0 ;; *) log_warn "Invalid choice: '$choice'. Enter 1-6 or q." ;; esac echo "" done