mirror of
https://github.com/ruvnet/RuVector.git
synced 2026-05-23 12:55:26 +00:00
Add scripts/discover_and_train.sh — a 2-cycle feedback loop that: 1. DISCOVER: Fetches live data from NASA (exoplanets, NEOs), USGS (earthquakes), NOAA (solar/geomagnetic), PubMed, LIGO GraceDB, and World Bank APIs 2. TRAIN: Uploads discoveries to pi.ruv.io brain via challenge-nonce auth 3. REFLECT: Queries brain for underrepresented domains 4. REDISCOVER: Targeted gap-filling (PubMed, deep earthquakes, GW events) 5. RETRAIN: Feeds gap-fill discoveries back to brain Includes live discovery data from today's run: - 16 anomalous exoplanets (z-score > 2σ mass outliers) - 4 near-Earth objects (1 hazardous) - 9 significant earthquakes + 1 geomagnetic storm - 5 PubMed medical research papers - 5 LIGO gravitational wave events - 2 World Bank GDP indicators 61 total memories successfully trained to brain (46 + 15 gap-fill). https://claude.ai/code/session_01UWE22wnsZRSHKhT4h4Axby
703 lines
32 KiB
Bash
Executable file
703 lines
32 KiB
Bash
Executable file
#!/usr/bin/env bash
|
||
#
|
||
# discover_and_train.sh - Back-and-forth discovery ↔ training feedback loop
|
||
#
|
||
# Cycle:
|
||
# 1. DISCOVER: Fetch fresh data from live open APIs
|
||
# 2. TRAIN: Upload discoveries to pi.ruv.io brain
|
||
# 3. REFLECT: Query brain for gaps & learned patterns
|
||
# 4. REDISCOVER: Target gaps with focused queries
|
||
# 5. RETRAIN: Feed gap-filling discoveries back to brain
|
||
#
|
||
# Usage: ./scripts/discover_and_train.sh [--cycles N] [--output-dir DIR]
|
||
#
|
||
set -euo pipefail
|
||
|
||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||
REPO_ROOT="$(dirname "$SCRIPT_DIR")"
|
||
OUTPUT_DIR="${REPO_ROOT}/examples/data/discoveries"
|
||
BRAIN_API="https://pi.ruv.io"
|
||
BRAIN_API_KEY="${BRAIN_API_KEY:-ruvector-discovery-trainer-benevolent}"
|
||
MAX_CYCLES=2
|
||
TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
||
DATE_TODAY=$(date -u +"%Y-%m-%d")
|
||
DATE_WEEK_AGO=$(date -u -d "7 days ago" +"%Y-%m-%d" 2>/dev/null || date -u -v-7d +"%Y-%m-%d" 2>/dev/null || echo "2026-03-08")
|
||
|
||
# Colors
|
||
RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'
|
||
CYAN='\033[0;36m'; MAGENTA='\033[0;35m'; NC='\033[0m'
|
||
|
||
log_info() { echo -e "${CYAN}[INFO]${NC} $(date '+%H:%M:%S') $*"; }
|
||
log_ok() { echo -e "${GREEN}[OK]${NC} $(date '+%H:%M:%S') $*"; }
|
||
log_warn() { echo -e "${YELLOW}[WARN]${NC} $(date '+%H:%M:%S') $*"; }
|
||
log_fail() { echo -e "${RED}[FAIL]${NC} $(date '+%H:%M:%S') $*"; }
|
||
log_phase() { echo -e "\n${MAGENTA}═══════════════════════════════════════${NC}"; echo -e "${MAGENTA} $*${NC}"; echo -e "${MAGENTA}═══════════════════════════════════════${NC}\n"; }
|
||
|
||
# Parse args
|
||
while [[ $# -gt 0 ]]; do
|
||
case $1 in
|
||
--cycles) MAX_CYCLES="$2"; shift 2 ;;
|
||
--output-dir) OUTPUT_DIR="$2"; shift 2 ;;
|
||
*) shift ;;
|
||
esac
|
||
done
|
||
|
||
mkdir -p "$OUTPUT_DIR"
|
||
|
||
for cmd in curl jq; do
|
||
if ! command -v "$cmd" &>/dev/null; then
|
||
log_fail "$cmd is required but not found"
|
||
exit 1
|
||
fi
|
||
done
|
||
|
||
# ─────────────────────────────────────────────────────────────
|
||
# Helper: merge multiple JSON arrays from temp files into one
|
||
# ─────────────────────────────────────────────────────────────
|
||
merge_json_arrays() {
|
||
local output="$1"
|
||
shift
|
||
# Merge all input files (each a JSON array) into one flat array
|
||
jq -s 'flatten | [.[] | select(. != null)]' "$@" > "$output" 2>/dev/null || echo "[]" > "$output"
|
||
}
|
||
|
||
# ─────────────────────────────────────────────────────────────
|
||
# DISCOVER FUNCTIONS
|
||
# ─────────────────────────────────────────────────────────────
|
||
|
||
discover_space() {
|
||
log_info "Fetching NASA Exoplanet Archive (recent discoveries)..."
|
||
local out_file="$OUTPUT_DIR/live_space_discoveries.json"
|
||
local tmp_exo="/tmp/rv_exo_$$.json"
|
||
local tmp_neo="/tmp/rv_neo_$$.json"
|
||
local tmp_solar="/tmp/rv_solar_$$.json"
|
||
echo "[]" > "$tmp_exo"
|
||
echo "[]" > "$tmp_neo"
|
||
echo "[]" > "$tmp_solar"
|
||
|
||
# NASA Exoplanet Archive TAP
|
||
local exo_data
|
||
exo_data=$(curl -sf --max-time 30 \
|
||
"https://exoplanetarchive.ipac.caltech.edu/TAP/sync?query=SELECT+pl_name,pl_bmassj,pl_orbper,pl_orbeccen,pl_eqt,disc_year,discoverymethod,sy_dist+FROM+ps+WHERE+disc_year>=2025+AND+pl_bmassj+IS+NOT+NULL+ORDER+BY+disc_year+DESC&format=json" 2>/dev/null) || true
|
||
|
||
if [[ -n "$exo_data" ]] && echo "$exo_data" | jq -e 'length > 0' &>/dev/null; then
|
||
local num_planets
|
||
num_planets=$(echo "$exo_data" | jq 'length')
|
||
log_ok "Got $num_planets exoplanets from NASA"
|
||
|
||
# Compute mean and stddev, then find z-score outliers — all in one jq call
|
||
echo "$exo_data" | jq --arg ts "$TIMESTAMP" --argjson np "$num_planets" '
|
||
[.[].pl_bmassj | select(. != null and . > 0)] as $masses |
|
||
($masses | add / length) as $mean |
|
||
($masses | map(pow(. - $mean; 2)) | add / length | sqrt) as $sd |
|
||
[
|
||
.[] | select(.pl_bmassj != null and .pl_bmassj > 0) |
|
||
((if .pl_bmassj > $mean then .pl_bmassj - $mean else $mean - .pl_bmassj end) / (if $sd > 0.001 then $sd else 0.001 end)) as $z |
|
||
select($z > 2.0) |
|
||
{
|
||
title: ("Anomalous exoplanet: " + (.pl_name // "unknown") + " (" + ($z * 10 | floor / 10 | tostring) + "σ mass outlier)"),
|
||
content: ("Planet " + (.pl_name // "unknown") + " has mass " + (.pl_bmassj | tostring) + " Mj (" + ($z * 10 | floor / 10 | tostring) + "σ from mean " + ($mean * 100 | floor / 100 | tostring) + "±" + ($sd * 100 | floor / 100 | tostring) + "). Period: " + ((.pl_orbper // 0) | tostring) + "d. Ecc: " + ((.pl_orbeccen // 0) | tostring) + ". Teq: " + ((.pl_eqt // 0) | tostring) + "K. Method: " + (.discoverymethod // "unknown") + "."),
|
||
category: "anomaly",
|
||
tags: ["space", "exoplanet", "anomaly", "mass-outlier", (.discoverymethod // "unknown")],
|
||
domain: "space-science",
|
||
source_api: "NASA Exoplanet Archive TAP",
|
||
timestamp: $ts,
|
||
confidence: ([$z / 5.0, 0.99] | min),
|
||
data_points: $np
|
||
}
|
||
]
|
||
' > "$tmp_exo" 2>/dev/null || echo "[]" > "$tmp_exo"
|
||
|
||
local nexo
|
||
nexo=$(jq 'length' "$tmp_exo")
|
||
log_ok " Found $nexo exoplanet anomalies"
|
||
fi
|
||
|
||
sleep 1
|
||
|
||
# NASA NEO
|
||
log_info "Fetching NASA Near-Earth Objects..."
|
||
local neo_data
|
||
neo_data=$(curl -sf --max-time 20 \
|
||
"https://api.nasa.gov/neo/rest/v1/feed?start_date=${DATE_TODAY}&end_date=${DATE_TODAY}&api_key=DEMO_KEY" 2>/dev/null) || true
|
||
|
||
if [[ -n "$neo_data" ]]; then
|
||
echo "$neo_data" | jq --arg ts "$TIMESTAMP" '
|
||
[
|
||
.near_earth_objects[][] |
|
||
select(.is_potentially_hazardous_asteroid == true or
|
||
(.close_approach_data[0].miss_distance.kilometers | tonumber) < 5000000) |
|
||
{
|
||
title: ("NEO close approach: " + .name + (if .is_potentially_hazardous_asteroid then " [HAZARDOUS]" else "" end)),
|
||
content: ("Asteroid " + .name + " passes Earth at " + .close_approach_data[0].miss_distance.kilometers + " km (" + ((.close_approach_data[0].miss_distance.kilometers | tonumber / 384400 * 100 | floor / 100) | tostring) + " LD). Velocity: " + .close_approach_data[0].relative_velocity.kilometers_per_hour + " km/h. Diameter: " + (.estimated_diameter.meters.estimated_diameter_max | tostring) + "m."),
|
||
category: "anomaly",
|
||
tags: ["space", "neo", "asteroid", (if .is_potentially_hazardous_asteroid then "hazardous" else "close-approach" end)],
|
||
domain: "space-science",
|
||
source_api: "NASA NEO API",
|
||
timestamp: $ts,
|
||
confidence: (if .is_potentially_hazardous_asteroid then 0.95 else 0.80 end),
|
||
data_points: 1
|
||
}
|
||
]
|
||
' > "$tmp_neo" 2>/dev/null || echo "[]" > "$tmp_neo"
|
||
|
||
local nneo
|
||
nneo=$(jq 'length' "$tmp_neo")
|
||
log_ok " Found $nneo NEO entries"
|
||
fi
|
||
|
||
sleep 1
|
||
|
||
# NOAA solar flares
|
||
log_info "Fetching NOAA solar weather..."
|
||
local solar_data
|
||
solar_data=$(curl -sf --max-time 15 \
|
||
"https://services.swpc.noaa.gov/json/goes/primary/xray-flares-latest.json" 2>/dev/null) || true
|
||
|
||
if [[ -n "$solar_data" ]]; then
|
||
echo "$solar_data" | jq --arg ts "$TIMESTAMP" '
|
||
[
|
||
.[] | select(.max_class != null) |
|
||
select(.max_class | startswith("M") or startswith("X")) |
|
||
{
|
||
title: ("Solar flare: " + .max_class + "-class event"),
|
||
content: (.max_class + "-class solar X-ray flare. Begin: " + (.begin_time // "unknown") + ", peak: " + (.max_time // "unknown") + ". Flux: " + ((.max_xrlong // 0) | tostring) + " W/m2. " + (if (.max_class | startswith("X")) then "X-class: disrupts HF radio, GPS, power grids." else "M-class: brief HF radio blackouts at high latitudes." end)),
|
||
category: "anomaly",
|
||
tags: ["space", "solar", "flare", (.max_class | ascii_downcase)],
|
||
domain: "space-science",
|
||
source_api: "NOAA SWPC",
|
||
timestamp: $ts,
|
||
confidence: (if (.max_class | startswith("X")) then 0.98 else 0.85 end),
|
||
data_points: 1
|
||
}
|
||
]
|
||
' > "$tmp_solar" 2>/dev/null || echo "[]" > "$tmp_solar"
|
||
|
||
local nsolar
|
||
nsolar=$(jq 'length' "$tmp_solar")
|
||
log_ok " Found $nsolar solar flare entries"
|
||
fi
|
||
|
||
merge_json_arrays "$out_file" "$tmp_exo" "$tmp_neo" "$tmp_solar"
|
||
rm -f "$tmp_exo" "$tmp_neo" "$tmp_solar"
|
||
|
||
local total
|
||
total=$(jq 'length' "$out_file" 2>/dev/null || echo 0)
|
||
log_ok "Space discoveries total: $total entries"
|
||
}
|
||
|
||
discover_earth() {
|
||
log_info "Fetching USGS significant earthquakes..."
|
||
local out_file="$OUTPUT_DIR/live_earth_discoveries.json"
|
||
local tmp_quake="/tmp/rv_quake_$$.json"
|
||
local tmp_storm="/tmp/rv_storm_$$.json"
|
||
echo "[]" > "$tmp_quake"
|
||
echo "[]" > "$tmp_storm"
|
||
|
||
local quake_data
|
||
quake_data=$(curl -sf --max-time 20 \
|
||
"https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/significant_month.geojson" 2>/dev/null) || true
|
||
|
||
if [[ -n "$quake_data" ]]; then
|
||
local num_quakes
|
||
num_quakes=$(echo "$quake_data" | jq '.features | length' 2>/dev/null) || num_quakes=0
|
||
log_ok "Got $num_quakes significant earthquakes"
|
||
|
||
echo "$quake_data" | jq --arg ts "$TIMESTAMP" --argjson nq "$num_quakes" '
|
||
[
|
||
.features[] |
|
||
select(.properties.mag >= 5.0) |
|
||
{
|
||
title: ("M" + (.properties.mag | tostring) + " earthquake: " + (.properties.place // "unknown")),
|
||
content: ("Significant M" + (.properties.mag | tostring) + " at " + (.properties.place // "unknown") + ", depth " + ((.geometry.coordinates[2] // 0) | tostring) + " km. " + (if (.geometry.coordinates[2] // 0) > 300 then "Deep-focus: subduction zone dynamics." else "Shallow: higher surface impact." end) + " Tsunami: " + ((.properties.tsunami // 0) | tostring) + "."),
|
||
category: "anomaly",
|
||
tags: ["earth", "seismic", "earthquake", (if (.geometry.coordinates[2] // 0) > 300 then "deep-focus" else "shallow" end)],
|
||
domain: "earth-science",
|
||
source_api: "USGS Earthquake Hazards",
|
||
timestamp: $ts,
|
||
confidence: ([(.properties.mag / 10.0), 0.99] | min),
|
||
data_points: $nq
|
||
}
|
||
]
|
||
' > "$tmp_quake" 2>/dev/null || echo "[]" > "$tmp_quake"
|
||
fi
|
||
|
||
sleep 1
|
||
|
||
# DONKI geomagnetic storms
|
||
log_info "Fetching NOAA DONKI geomagnetic storms..."
|
||
local donki_data
|
||
donki_data=$(curl -sf --max-time 15 \
|
||
"https://api.nasa.gov/DONKI/GST?startDate=${DATE_WEEK_AGO}&endDate=${DATE_TODAY}&api_key=DEMO_KEY" 2>/dev/null) || true
|
||
|
||
if [[ -n "$donki_data" ]] && echo "$donki_data" | jq -e 'type == "array" and length > 0' &>/dev/null; then
|
||
log_ok "Got $(echo "$donki_data" | jq 'length') geomagnetic storms"
|
||
|
||
echo "$donki_data" | jq --arg ts "$TIMESTAMP" '
|
||
[
|
||
.[] |
|
||
{
|
||
title: ("Geomagnetic storm: " + (.gstID // "unknown")),
|
||
content: ("Storm " + (.gstID // "unknown") + ". Start: " + (.startTime // "unknown") + ". " + (if .allKpIndex then ("Peak Kp: " + ([.allKpIndex[].kpIndex] | max | tostring) + ". ") else "" end) + "Linked CMEs: " + (if .linkedEvents then ([.linkedEvents[].activityID] | join(", ")) else "none" end) + "."),
|
||
category: "anomaly",
|
||
tags: ["earth", "geomagnetic", "storm", "space-weather"],
|
||
domain: "earth-science",
|
||
source_api: "NASA DONKI",
|
||
timestamp: $ts,
|
||
confidence: 0.90,
|
||
data_points: 1
|
||
}
|
||
]
|
||
' > "$tmp_storm" 2>/dev/null || echo "[]" > "$tmp_storm"
|
||
fi
|
||
|
||
merge_json_arrays "$out_file" "$tmp_quake" "$tmp_storm"
|
||
rm -f "$tmp_quake" "$tmp_storm"
|
||
|
||
local total
|
||
total=$(jq 'length' "$out_file" 2>/dev/null || echo 0)
|
||
log_ok "Earth discoveries total: $total entries"
|
||
}
|
||
|
||
discover_academic() {
|
||
log_info "Fetching arXiv recent papers..."
|
||
local out_file="$OUTPUT_DIR/live_academic_discoveries.json"
|
||
local all_entries="[]"
|
||
|
||
# Try arxiv.org directly (export.arxiv.org may have CDN issues)
|
||
for category in "astro-ph" "cs.AI" "physics.gen-ph" "q-bio"; do
|
||
local arxiv_data
|
||
arxiv_data=$(curl -sf --max-time 20 \
|
||
"https://arxiv.org/api/query?search_query=cat:${category}&sortBy=submittedDate&sortOrder=descending&max_results=3" 2>/dev/null) || \
|
||
arxiv_data=$(curl -sf --max-time 20 \
|
||
"http://export.arxiv.org/api/query?search_query=cat:${category}&sortBy=submittedDate&sortOrder=descending&max_results=3" 2>/dev/null) || true
|
||
|
||
if [[ -n "$arxiv_data" ]] && echo "$arxiv_data" | grep -q '<entry>'; then
|
||
local i=0
|
||
|
||
while IFS= read -r title; do
|
||
local summary link
|
||
summary=$(echo "$arxiv_data" | grep -oP '<summary>\K[^<]+' | sed -n "$((i+1))p" | tr '\n' ' ' | sed 's/ */ /g' | head -c 400)
|
||
link=$(echo "$arxiv_data" | grep -oP '<id>\K[^<]+' | tail -n +2 | sed -n "$((i+1))p")
|
||
|
||
if [[ -n "$title" && "$title" != "ArXiv Query"* ]]; then
|
||
all_entries=$(echo "$all_entries" | jq \
|
||
--arg t "arXiv [$category]: $title" \
|
||
--arg c "Recent ${category} paper: ${title}. ${summary} URL: ${link}" \
|
||
--arg cat "$category" \
|
||
--arg ts "$TIMESTAMP" \
|
||
'. + [{
|
||
title: $t,
|
||
content: $c,
|
||
category: "pattern",
|
||
tags: ["academic", "arxiv", $cat, "research"],
|
||
domain: "academic-research",
|
||
source_api: "arXiv API",
|
||
timestamp: $ts,
|
||
confidence: 0.80,
|
||
data_points: 1
|
||
}]')
|
||
fi
|
||
i=$((i + 1))
|
||
[[ $i -ge 3 ]] && break
|
||
done < <(echo "$arxiv_data" | grep -oP '<title>\K[^<]+' | tail -n +2)
|
||
else
|
||
log_warn " arXiv $category: no data (CDN/DNS issue)"
|
||
fi
|
||
sleep 1
|
||
done
|
||
|
||
echo "$all_entries" | jq '.' > "$out_file"
|
||
local total
|
||
total=$(jq 'length' "$out_file" 2>/dev/null || echo 0)
|
||
log_ok "Academic discoveries: $total entries"
|
||
}
|
||
|
||
discover_economics() {
|
||
log_info "Fetching FRED economic indicators..."
|
||
local out_file="$OUTPUT_DIR/live_economics_discoveries.json"
|
||
local entries="[]"
|
||
local fred_key="${FRED_API_KEY:-}"
|
||
|
||
# FRED requires a real 32-char API key (get free at fred.stlouisfed.org/docs/api/api_key.html)
|
||
if [[ -z "$fred_key" ]]; then
|
||
log_warn " FRED_API_KEY not set — using World Bank API fallback"
|
||
# Fallback: World Bank indicators
|
||
local wb_data
|
||
wb_data=$(curl -sf --max-time 15 \
|
||
"https://api.worldbank.org/v2/country/US/indicator/NY.GDP.MKTP.CD?format=json&date=2023:2025&per_page=3" 2>/dev/null) || true
|
||
|
||
if [[ -n "$wb_data" ]] && echo "$wb_data" | jq -e '.[1] | length > 0' &>/dev/null; then
|
||
entries=$(echo "$wb_data" | jq --arg ts "$TIMESTAMP" '
|
||
[
|
||
.[1][] | select(.value != null) |
|
||
{
|
||
title: ("World Bank: US GDP " + (.date // "unknown")),
|
||
content: ("US GDP (current USD): " + (.value | tostring) + " for " + (.date // "unknown") + ". Source: World Bank Development Indicators."),
|
||
category: "pattern",
|
||
tags: ["economics", "worldbank", "gdp", "indicator"],
|
||
domain: "economics-finance",
|
||
source_api: "World Bank API",
|
||
timestamp: $ts,
|
||
confidence: 0.90,
|
||
data_points: 1
|
||
}
|
||
]
|
||
' 2>/dev/null) || entries="[]"
|
||
fi
|
||
fi
|
||
|
||
for series in "DGS10" "UNRATE" "CPIAUCSL" "GDP" "FEDFUNDS"; do
|
||
[[ -z "$fred_key" ]] && continue
|
||
|
||
local fred_data
|
||
fred_data=$(curl -sf --max-time 15 \
|
||
"https://api.stlouisfed.org/fred/series/observations?series_id=${series}&sort_order=desc&limit=2&file_type=json&api_key=${fred_key}" 2>/dev/null) || true
|
||
|
||
if [[ -n "$fred_data" ]] && echo "$fred_data" | jq -e '.observations | length > 0' &>/dev/null; then
|
||
local latest_val latest_date prev_val series_title
|
||
latest_val=$(echo "$fred_data" | jq -r '.observations[0].value // "N/A"')
|
||
latest_date=$(echo "$fred_data" | jq -r '.observations[0].date // "unknown"')
|
||
prev_val=$(echo "$fred_data" | jq -r '.observations[1].value // "N/A"')
|
||
|
||
case $series in
|
||
DGS10) series_title="10-Year Treasury Yield" ;;
|
||
UNRATE) series_title="US Unemployment Rate" ;;
|
||
CPIAUCSL) series_title="Consumer Price Index" ;;
|
||
GDP) series_title="US Gross Domestic Product" ;;
|
||
FEDFUNDS) series_title="Federal Funds Rate" ;;
|
||
esac
|
||
|
||
if [[ "$latest_val" != "." && "$latest_val" != "N/A" ]]; then
|
||
entries=$(echo "$entries" | jq \
|
||
--arg t "Economic indicator: ${series_title} (${series})" \
|
||
--arg c "${series_title}: latest ${latest_val} as of ${latest_date}. Previous: ${prev_val}. Source: FRED." \
|
||
--arg s "$series" \
|
||
--arg ts "$TIMESTAMP" \
|
||
'. + [{
|
||
title: $t,
|
||
content: $c,
|
||
category: "pattern",
|
||
tags: ["economics", "fred", $s, "indicator"],
|
||
domain: "economics-finance",
|
||
source_api: "FRED API",
|
||
timestamp: $ts,
|
||
confidence: 0.92,
|
||
data_points: 1
|
||
}]')
|
||
fi
|
||
fi
|
||
sleep 1
|
||
done
|
||
|
||
echo "$entries" | jq '.' > "$out_file"
|
||
local total
|
||
total=$(jq 'length' "$out_file" 2>/dev/null || echo 0)
|
||
log_ok "Economics discoveries: $total entries"
|
||
}
|
||
|
||
# ─────────────────────────────────────────────────────────────
|
||
# TRAIN - Upload discoveries to brain
|
||
# ─────────────────────────────────────────────────────────────
|
||
|
||
get_nonce() {
|
||
curl -sf --max-time 10 "${BRAIN_API}/v1/challenge" 2>/dev/null | jq -r '.nonce // empty'
|
||
}
|
||
|
||
train_brain() {
|
||
local file_pattern="${1:-live_*_discoveries.json}"
|
||
local trained=0
|
||
local failed=0
|
||
|
||
shopt -s nullglob
|
||
local files=("${OUTPUT_DIR}"/${file_pattern})
|
||
shopt -u nullglob
|
||
|
||
if [[ ${#files[@]} -eq 0 ]]; then
|
||
log_warn "No discovery files matching: $file_pattern"
|
||
return
|
||
fi
|
||
|
||
for filepath in "${files[@]}"; do
|
||
local filename
|
||
filename=$(basename "$filepath")
|
||
local file_len
|
||
file_len=$(jq 'length' "$filepath" 2>/dev/null) || file_len=0
|
||
|
||
if [[ "$file_len" -eq 0 ]]; then
|
||
log_warn "Skipping $filename - empty"
|
||
continue
|
||
fi
|
||
|
||
log_info "Training from: $filename ($file_len entries)"
|
||
|
||
local idx=0
|
||
while [[ $idx -lt $file_len ]]; do
|
||
local title content tags_json
|
||
title=$(jq -r ".[$idx].title // \"Discovery $idx\"" "$filepath")
|
||
content=$(jq -r ".[$idx].content // (.[$idx] | tostring)" "$filepath")
|
||
tags_json=$(jq -c ".[$idx].tags // [\"discovery\"]" "$filepath")
|
||
|
||
local nonce
|
||
nonce=$(get_nonce) || { log_warn "No nonce"; idx=$((idx + 1)); continue; }
|
||
|
||
local payload
|
||
payload=$(jq -n --arg t "$title" --arg c "$content" --argjson tags "$tags_json" \
|
||
'{ title: $t, content: $c, category: "pattern", tags: $tags }')
|
||
|
||
local http_code
|
||
http_code=$(curl -s -o /dev/null -w "%{http_code}" --max-time 15 \
|
||
-X POST "${BRAIN_API}/v1/memories" \
|
||
-H "Content-Type: application/json" \
|
||
-H "Authorization: Bearer ${BRAIN_API_KEY}" \
|
||
-H "X-Challenge-Nonce: ${nonce}" \
|
||
-d "$payload" 2>/dev/null) || http_code=0
|
||
|
||
if [[ "$http_code" =~ ^2 ]]; then
|
||
trained=$((trained + 1))
|
||
log_ok " [${trained}] $title"
|
||
else
|
||
failed=$((failed + 1))
|
||
log_fail " ✗ $title (HTTP $http_code)"
|
||
fi
|
||
|
||
sleep 1
|
||
idx=$((idx + 1))
|
||
done
|
||
done
|
||
|
||
log_info "Training batch: $trained trained, $failed failed"
|
||
}
|
||
|
||
# ─────────────────────────────────────────────────────────────
|
||
# REFLECT - Query brain for patterns and gaps
|
||
# ─────────────────────────────────────────────────────────────
|
||
|
||
query_brain_patterns() {
|
||
log_info "Querying brain for learned patterns..."
|
||
local gaps_file="$OUTPUT_DIR/brain_gaps.json"
|
||
|
||
local nonce
|
||
nonce=$(get_nonce) || { log_warn "Cannot get nonce for reflection"; echo '{"underrepresented":["medical","materials","genomics","gravitational-wave"],"gap_analysis":"Using defaults"}' > "$gaps_file"; return; }
|
||
|
||
local memories
|
||
memories=$(curl -sf --max-time 15 \
|
||
"${BRAIN_API}/v1/memories" \
|
||
-H "Authorization: Bearer ${BRAIN_API_KEY}" \
|
||
-H "X-Challenge-Nonce: ${nonce}" 2>/dev/null) || true
|
||
|
||
if [[ -n "$memories" ]]; then
|
||
local total_memories
|
||
total_memories=$(echo "$memories" | jq 'if type == "array" then length else (.memories // []) | length end' 2>/dev/null) || total_memories=0
|
||
log_ok "Brain has $total_memories total memories"
|
||
|
||
# Analyze tag frequency to find underrepresented domains
|
||
echo "$memories" | jq '
|
||
(if type == "array" then . else (.memories // []) end) as $mems |
|
||
[$mems[].tags // [] | .[]] |
|
||
group_by(.) |
|
||
map({tag: .[0], count: length}) |
|
||
sort_by(.count) as $sorted |
|
||
{
|
||
total_memories: ($mems | length),
|
||
underrepresented: [$sorted[:5][].tag],
|
||
well_covered: [$sorted[-5:][].tag],
|
||
gap_analysis: "Domains with fewest entries need more discovery focus"
|
||
}
|
||
' > "$gaps_file" 2>/dev/null || echo '{"underrepresented":["medical","materials","genomics"],"gap_analysis":"Parse error, using defaults"}' > "$gaps_file"
|
||
|
||
log_ok "Gap analysis:"
|
||
jq '.' "$gaps_file" 2>/dev/null || true
|
||
else
|
||
log_warn "Brain unreachable — using default gap targets"
|
||
echo '{"underrepresented":["medical","materials","genomics","gravitational-wave","deep-focus"],"gap_analysis":"Brain unreachable, using defaults"}' > "$gaps_file"
|
||
fi
|
||
}
|
||
|
||
# ─────────────────────────────────────────────────────────────
|
||
# REDISCOVER - Targeted discovery based on gaps
|
||
# ─────────────────────────────────────────────────────────────
|
||
|
||
discover_gaps() {
|
||
log_info "Running targeted gap-filling discovery..."
|
||
local gaps_file="$OUTPUT_DIR/brain_gaps.json"
|
||
local out_file="$OUTPUT_DIR/live_gap_discoveries.json"
|
||
local entries="[]"
|
||
|
||
local gaps
|
||
gaps=$(jq -r '.underrepresented // [] | .[]' "$gaps_file" 2>/dev/null) || gaps="medical materials genomics"
|
||
|
||
# PubMed gap fill
|
||
if echo "$gaps" | grep -qiE "medical|genomics|pubmed|bio"; then
|
||
log_info "Gap-fill: PubMed trending research..."
|
||
local pubmed_ids
|
||
pubmed_ids=$(curl -sf --max-time 20 \
|
||
"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=breakthrough+OR+novel+discovery&retmax=5&sort=date&retmode=json" 2>/dev/null | jq -r '.esearchresult.idlist[]' 2>/dev/null) || true
|
||
|
||
for pmid in $pubmed_ids; do
|
||
local article_data
|
||
article_data=$(curl -sf --max-time 15 \
|
||
"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&id=${pmid}&retmode=json" 2>/dev/null) || true
|
||
|
||
if [[ -n "$article_data" ]]; then
|
||
local art_title art_source
|
||
art_title=$(echo "$article_data" | jq -r ".result.\"${pmid}\".title // empty" 2>/dev/null)
|
||
art_source=$(echo "$article_data" | jq -r ".result.\"${pmid}\".source // \"unknown\"" 2>/dev/null)
|
||
|
||
if [[ -n "$art_title" ]]; then
|
||
entries=$(echo "$entries" | jq \
|
||
--arg t "PubMed: $art_title" \
|
||
--arg c "Medical/genomics paper: ${art_title}. Journal: ${art_source}. PMID: ${pmid}." \
|
||
--arg ts "$TIMESTAMP" \
|
||
'. + [{
|
||
title: $t,
|
||
content: $c,
|
||
category: "pattern",
|
||
tags: ["medical", "pubmed", "research", "gap-fill"],
|
||
domain: "medical-genomics",
|
||
source_api: "PubMed E-utilities",
|
||
timestamp: $ts,
|
||
confidence: 0.82,
|
||
data_points: 1
|
||
}]')
|
||
fi
|
||
fi
|
||
sleep 0.5
|
||
done
|
||
fi
|
||
|
||
sleep 1
|
||
|
||
# Deep earthquake gap fill
|
||
if echo "$gaps" | grep -qiE "earth|seismic|deep-focus|volcano"; then
|
||
log_info "Gap-fill: USGS M4.5+ earthquakes (7 days)..."
|
||
local eq_data
|
||
eq_data=$(curl -sf --max-time 20 \
|
||
"https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/4.5_week.geojson" 2>/dev/null) || true
|
||
|
||
if [[ -n "$eq_data" ]]; then
|
||
local gap_quakes
|
||
gap_quakes=$(echo "$eq_data" | jq --arg ts "$TIMESTAMP" '
|
||
[
|
||
.features[] |
|
||
select((.geometry.coordinates[2] // 0) > 200 or (.properties.mag // 0) >= 6.0) |
|
||
{
|
||
title: ("M" + (.properties.mag | tostring) + " earthquake: " + (.properties.place // "unknown")),
|
||
content: ("M" + (.properties.mag | tostring) + " at " + (.properties.place // "unknown") + ". Depth: " + ((.geometry.coordinates[2] // 0) | tostring) + " km. " + (if (.geometry.coordinates[2] // 0) > 300 then "Deep-focus subduction event." elif (.properties.mag // 0) >= 7.0 then "Major earthquake." else "Significant event." end)),
|
||
category: "anomaly",
|
||
tags: ["earth", "seismic", "gap-fill", (if (.geometry.coordinates[2] // 0) > 300 then "deep-focus" else "significant" end)],
|
||
domain: "earth-science",
|
||
source_api: "USGS Earthquake Hazards",
|
||
timestamp: $ts,
|
||
confidence: 0.88,
|
||
data_points: 1
|
||
}
|
||
] | .[:5]
|
||
' 2>/dev/null) || gap_quakes="[]"
|
||
|
||
entries=$(echo "$entries" "$gap_quakes" | jq -s 'flatten')
|
||
log_ok " Added $(echo "$gap_quakes" | jq 'length') deep/major earthquake entries"
|
||
fi
|
||
fi
|
||
|
||
sleep 1
|
||
|
||
# Gravitational wave gap fill
|
||
if echo "$gaps" | grep -qiE "gravitational|wave|ligo|gw"; then
|
||
log_info "Gap-fill: LIGO GraceDB..."
|
||
local gw_data
|
||
gw_data=$(curl -sf --max-time 15 \
|
||
"https://gracedb.ligo.org/api/superevents/?query=far+%3C+1e-6&format=json&count=5" 2>/dev/null) || true
|
||
|
||
if [[ -n "$gw_data" ]]; then
|
||
local gw_entries
|
||
gw_entries=$(echo "$gw_data" | jq --arg ts "$TIMESTAMP" '
|
||
[
|
||
.superevents[]? |
|
||
{
|
||
title: ("Gravitational wave: " + (.superevent_id // "unknown")),
|
||
content: ("GW superevent " + (.superevent_id // "unknown") + " (category: " + (.category // "unknown") + "). FAR: " + ((.far // 0) | tostring) + " Hz. Preferred: " + (.preferred_event // "unknown") + ". LIGO/Virgo/KAGRA detection."),
|
||
category: "anomaly",
|
||
tags: ["space", "gravitational-wave", "ligo", "gap-fill"],
|
||
domain: "space-science",
|
||
source_api: "LIGO GraceDB",
|
||
timestamp: $ts,
|
||
confidence: 0.90,
|
||
data_points: 1
|
||
}
|
||
]
|
||
' 2>/dev/null) || gw_entries="[]"
|
||
|
||
entries=$(echo "$entries" "$gw_entries" | jq -s 'flatten')
|
||
fi
|
||
fi
|
||
|
||
echo "$entries" | jq '.' > "$out_file"
|
||
local total
|
||
total=$(jq 'length' "$out_file" 2>/dev/null || echo 0)
|
||
log_ok "Gap-fill discoveries: $total entries"
|
||
}
|
||
|
||
# ─────────────────────────────────────────────────────────────
|
||
# MAIN FEEDBACK LOOP
|
||
# ─────────────────────────────────────────────────────────────
|
||
|
||
main() {
|
||
echo ""
|
||
echo "╔═══════════════════════════════════════════════════════════╗"
|
||
echo "║ RuVector Discovery ↔ Training Feedback Loop ║"
|
||
echo "║ Cycles: ${MAX_CYCLES} | Date: ${DATE_TODAY} ║"
|
||
echo "╚═══════════════════════════════════════════════════════════╝"
|
||
echo ""
|
||
|
||
for cycle in $(seq 1 "$MAX_CYCLES"); do
|
||
log_phase "CYCLE ${cycle}/${MAX_CYCLES}"
|
||
|
||
if [[ $cycle -eq 1 ]]; then
|
||
# Phase 1: DISCOVER
|
||
log_phase "Phase 1: DISCOVER (live API fetch)"
|
||
discover_space
|
||
discover_earth
|
||
discover_academic
|
||
discover_economics
|
||
|
||
# Phase 2: TRAIN
|
||
log_phase "Phase 2: TRAIN (upload to brain)"
|
||
train_brain "live_*_discoveries.json"
|
||
else
|
||
# Phase 3: REFLECT
|
||
log_phase "Phase 3: REFLECT (query brain for gaps)"
|
||
query_brain_patterns
|
||
|
||
# Phase 4: REDISCOVER
|
||
log_phase "Phase 4: REDISCOVER (targeted gap-filling)"
|
||
discover_gaps
|
||
|
||
# Phase 5: RETRAIN
|
||
log_phase "Phase 5: RETRAIN (gap-fill → brain)"
|
||
train_brain "live_gap_discoveries.json"
|
||
fi
|
||
done
|
||
|
||
# Final summary
|
||
log_phase "FEEDBACK LOOP COMPLETE"
|
||
|
||
local total_discoveries=0
|
||
for f in "$OUTPUT_DIR"/live_*_discoveries.json; do
|
||
if [[ -f "$f" ]]; then
|
||
local c
|
||
c=$(jq 'length' "$f" 2>/dev/null) || c=0
|
||
total_discoveries=$((total_discoveries + c))
|
||
log_info " $(basename "$f"): $c entries"
|
||
fi
|
||
done
|
||
|
||
log_ok "Total discoveries: $total_discoveries"
|
||
|
||
if [[ -f "$OUTPUT_DIR/brain_gaps.json" ]]; then
|
||
log_ok "Brain gap analysis:"
|
||
jq '.' "$OUTPUT_DIR/brain_gaps.json" 2>/dev/null || true
|
||
fi
|
||
}
|
||
|
||
main "$@"
|