ruvector/scripts/discover_and_train.sh
Claude 67444abf9c feat: discover ↔ train feedback loop with live API discovery
Add scripts/discover_and_train.sh — a 2-cycle feedback loop that:
1. DISCOVER: Fetches live data from NASA (exoplanets, NEOs), USGS
   (earthquakes), NOAA (solar/geomagnetic), PubMed, LIGO GraceDB,
   and World Bank APIs
2. TRAIN: Uploads discoveries to pi.ruv.io brain via challenge-nonce auth
3. REFLECT: Queries brain for underrepresented domains
4. REDISCOVER: Targeted gap-filling (PubMed, deep earthquakes, GW events)
5. RETRAIN: Feeds gap-fill discoveries back to brain

Includes live discovery data from today's run:
- 16 anomalous exoplanets (z-score > 2σ mass outliers)
- 4 near-Earth objects (1 hazardous)
- 9 significant earthquakes + 1 geomagnetic storm
- 5 PubMed medical research papers
- 5 LIGO gravitational wave events
- 2 World Bank GDP indicators

61 total memories successfully trained to brain (46 + 15 gap-fill).

https://claude.ai/code/session_01UWE22wnsZRSHKhT4h4Axby
2026-03-16 23:16:24 -04:00

703 lines
32 KiB
Bash
Executable file
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env bash
#
# discover_and_train.sh - Back-and-forth discovery ↔ training feedback loop
#
# Cycle:
# 1. DISCOVER: Fetch fresh data from live open APIs
# 2. TRAIN: Upload discoveries to pi.ruv.io brain
# 3. REFLECT: Query brain for gaps & learned patterns
# 4. REDISCOVER: Target gaps with focused queries
# 5. RETRAIN: Feed gap-filling discoveries back to brain
#
# Usage: ./scripts/discover_and_train.sh [--cycles N] [--output-dir DIR]
#
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
REPO_ROOT="$(dirname "$SCRIPT_DIR")"
OUTPUT_DIR="${REPO_ROOT}/examples/data/discoveries"
BRAIN_API="https://pi.ruv.io"
BRAIN_API_KEY="${BRAIN_API_KEY:-ruvector-discovery-trainer-benevolent}"
MAX_CYCLES=2
TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
DATE_TODAY=$(date -u +"%Y-%m-%d")
DATE_WEEK_AGO=$(date -u -d "7 days ago" +"%Y-%m-%d" 2>/dev/null || date -u -v-7d +"%Y-%m-%d" 2>/dev/null || echo "2026-03-08")
# Colors
RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'
CYAN='\033[0;36m'; MAGENTA='\033[0;35m'; NC='\033[0m'
log_info() { echo -e "${CYAN}[INFO]${NC} $(date '+%H:%M:%S') $*"; }
log_ok() { echo -e "${GREEN}[OK]${NC} $(date '+%H:%M:%S') $*"; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $(date '+%H:%M:%S') $*"; }
log_fail() { echo -e "${RED}[FAIL]${NC} $(date '+%H:%M:%S') $*"; }
log_phase() { echo -e "\n${MAGENTA}═══════════════════════════════════════${NC}"; echo -e "${MAGENTA} $*${NC}"; echo -e "${MAGENTA}═══════════════════════════════════════${NC}\n"; }
# Parse args
while [[ $# -gt 0 ]]; do
case $1 in
--cycles) MAX_CYCLES="$2"; shift 2 ;;
--output-dir) OUTPUT_DIR="$2"; shift 2 ;;
*) shift ;;
esac
done
mkdir -p "$OUTPUT_DIR"
for cmd in curl jq; do
if ! command -v "$cmd" &>/dev/null; then
log_fail "$cmd is required but not found"
exit 1
fi
done
# ─────────────────────────────────────────────────────────────
# Helper: merge multiple JSON arrays from temp files into one
# ─────────────────────────────────────────────────────────────
merge_json_arrays() {
local output="$1"
shift
# Merge all input files (each a JSON array) into one flat array
jq -s 'flatten | [.[] | select(. != null)]' "$@" > "$output" 2>/dev/null || echo "[]" > "$output"
}
# ─────────────────────────────────────────────────────────────
# DISCOVER FUNCTIONS
# ─────────────────────────────────────────────────────────────
discover_space() {
log_info "Fetching NASA Exoplanet Archive (recent discoveries)..."
local out_file="$OUTPUT_DIR/live_space_discoveries.json"
local tmp_exo="/tmp/rv_exo_$$.json"
local tmp_neo="/tmp/rv_neo_$$.json"
local tmp_solar="/tmp/rv_solar_$$.json"
echo "[]" > "$tmp_exo"
echo "[]" > "$tmp_neo"
echo "[]" > "$tmp_solar"
# NASA Exoplanet Archive TAP
local exo_data
exo_data=$(curl -sf --max-time 30 \
"https://exoplanetarchive.ipac.caltech.edu/TAP/sync?query=SELECT+pl_name,pl_bmassj,pl_orbper,pl_orbeccen,pl_eqt,disc_year,discoverymethod,sy_dist+FROM+ps+WHERE+disc_year>=2025+AND+pl_bmassj+IS+NOT+NULL+ORDER+BY+disc_year+DESC&format=json" 2>/dev/null) || true
if [[ -n "$exo_data" ]] && echo "$exo_data" | jq -e 'length > 0' &>/dev/null; then
local num_planets
num_planets=$(echo "$exo_data" | jq 'length')
log_ok "Got $num_planets exoplanets from NASA"
# Compute mean and stddev, then find z-score outliers — all in one jq call
echo "$exo_data" | jq --arg ts "$TIMESTAMP" --argjson np "$num_planets" '
[.[].pl_bmassj | select(. != null and . > 0)] as $masses |
($masses | add / length) as $mean |
($masses | map(pow(. - $mean; 2)) | add / length | sqrt) as $sd |
[
.[] | select(.pl_bmassj != null and .pl_bmassj > 0) |
((if .pl_bmassj > $mean then .pl_bmassj - $mean else $mean - .pl_bmassj end) / (if $sd > 0.001 then $sd else 0.001 end)) as $z |
select($z > 2.0) |
{
title: ("Anomalous exoplanet: " + (.pl_name // "unknown") + " (" + ($z * 10 | floor / 10 | tostring) + "σ mass outlier)"),
content: ("Planet " + (.pl_name // "unknown") + " has mass " + (.pl_bmassj | tostring) + " Mj (" + ($z * 10 | floor / 10 | tostring) + "σ from mean " + ($mean * 100 | floor / 100 | tostring) + "±" + ($sd * 100 | floor / 100 | tostring) + "). Period: " + ((.pl_orbper // 0) | tostring) + "d. Ecc: " + ((.pl_orbeccen // 0) | tostring) + ". Teq: " + ((.pl_eqt // 0) | tostring) + "K. Method: " + (.discoverymethod // "unknown") + "."),
category: "anomaly",
tags: ["space", "exoplanet", "anomaly", "mass-outlier", (.discoverymethod // "unknown")],
domain: "space-science",
source_api: "NASA Exoplanet Archive TAP",
timestamp: $ts,
confidence: ([$z / 5.0, 0.99] | min),
data_points: $np
}
]
' > "$tmp_exo" 2>/dev/null || echo "[]" > "$tmp_exo"
local nexo
nexo=$(jq 'length' "$tmp_exo")
log_ok " Found $nexo exoplanet anomalies"
fi
sleep 1
# NASA NEO
log_info "Fetching NASA Near-Earth Objects..."
local neo_data
neo_data=$(curl -sf --max-time 20 \
"https://api.nasa.gov/neo/rest/v1/feed?start_date=${DATE_TODAY}&end_date=${DATE_TODAY}&api_key=DEMO_KEY" 2>/dev/null) || true
if [[ -n "$neo_data" ]]; then
echo "$neo_data" | jq --arg ts "$TIMESTAMP" '
[
.near_earth_objects[][] |
select(.is_potentially_hazardous_asteroid == true or
(.close_approach_data[0].miss_distance.kilometers | tonumber) < 5000000) |
{
title: ("NEO close approach: " + .name + (if .is_potentially_hazardous_asteroid then " [HAZARDOUS]" else "" end)),
content: ("Asteroid " + .name + " passes Earth at " + .close_approach_data[0].miss_distance.kilometers + " km (" + ((.close_approach_data[0].miss_distance.kilometers | tonumber / 384400 * 100 | floor / 100) | tostring) + " LD). Velocity: " + .close_approach_data[0].relative_velocity.kilometers_per_hour + " km/h. Diameter: " + (.estimated_diameter.meters.estimated_diameter_max | tostring) + "m."),
category: "anomaly",
tags: ["space", "neo", "asteroid", (if .is_potentially_hazardous_asteroid then "hazardous" else "close-approach" end)],
domain: "space-science",
source_api: "NASA NEO API",
timestamp: $ts,
confidence: (if .is_potentially_hazardous_asteroid then 0.95 else 0.80 end),
data_points: 1
}
]
' > "$tmp_neo" 2>/dev/null || echo "[]" > "$tmp_neo"
local nneo
nneo=$(jq 'length' "$tmp_neo")
log_ok " Found $nneo NEO entries"
fi
sleep 1
# NOAA solar flares
log_info "Fetching NOAA solar weather..."
local solar_data
solar_data=$(curl -sf --max-time 15 \
"https://services.swpc.noaa.gov/json/goes/primary/xray-flares-latest.json" 2>/dev/null) || true
if [[ -n "$solar_data" ]]; then
echo "$solar_data" | jq --arg ts "$TIMESTAMP" '
[
.[] | select(.max_class != null) |
select(.max_class | startswith("M") or startswith("X")) |
{
title: ("Solar flare: " + .max_class + "-class event"),
content: (.max_class + "-class solar X-ray flare. Begin: " + (.begin_time // "unknown") + ", peak: " + (.max_time // "unknown") + ". Flux: " + ((.max_xrlong // 0) | tostring) + " W/m2. " + (if (.max_class | startswith("X")) then "X-class: disrupts HF radio, GPS, power grids." else "M-class: brief HF radio blackouts at high latitudes." end)),
category: "anomaly",
tags: ["space", "solar", "flare", (.max_class | ascii_downcase)],
domain: "space-science",
source_api: "NOAA SWPC",
timestamp: $ts,
confidence: (if (.max_class | startswith("X")) then 0.98 else 0.85 end),
data_points: 1
}
]
' > "$tmp_solar" 2>/dev/null || echo "[]" > "$tmp_solar"
local nsolar
nsolar=$(jq 'length' "$tmp_solar")
log_ok " Found $nsolar solar flare entries"
fi
merge_json_arrays "$out_file" "$tmp_exo" "$tmp_neo" "$tmp_solar"
rm -f "$tmp_exo" "$tmp_neo" "$tmp_solar"
local total
total=$(jq 'length' "$out_file" 2>/dev/null || echo 0)
log_ok "Space discoveries total: $total entries"
}
discover_earth() {
log_info "Fetching USGS significant earthquakes..."
local out_file="$OUTPUT_DIR/live_earth_discoveries.json"
local tmp_quake="/tmp/rv_quake_$$.json"
local tmp_storm="/tmp/rv_storm_$$.json"
echo "[]" > "$tmp_quake"
echo "[]" > "$tmp_storm"
local quake_data
quake_data=$(curl -sf --max-time 20 \
"https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/significant_month.geojson" 2>/dev/null) || true
if [[ -n "$quake_data" ]]; then
local num_quakes
num_quakes=$(echo "$quake_data" | jq '.features | length' 2>/dev/null) || num_quakes=0
log_ok "Got $num_quakes significant earthquakes"
echo "$quake_data" | jq --arg ts "$TIMESTAMP" --argjson nq "$num_quakes" '
[
.features[] |
select(.properties.mag >= 5.0) |
{
title: ("M" + (.properties.mag | tostring) + " earthquake: " + (.properties.place // "unknown")),
content: ("Significant M" + (.properties.mag | tostring) + " at " + (.properties.place // "unknown") + ", depth " + ((.geometry.coordinates[2] // 0) | tostring) + " km. " + (if (.geometry.coordinates[2] // 0) > 300 then "Deep-focus: subduction zone dynamics." else "Shallow: higher surface impact." end) + " Tsunami: " + ((.properties.tsunami // 0) | tostring) + "."),
category: "anomaly",
tags: ["earth", "seismic", "earthquake", (if (.geometry.coordinates[2] // 0) > 300 then "deep-focus" else "shallow" end)],
domain: "earth-science",
source_api: "USGS Earthquake Hazards",
timestamp: $ts,
confidence: ([(.properties.mag / 10.0), 0.99] | min),
data_points: $nq
}
]
' > "$tmp_quake" 2>/dev/null || echo "[]" > "$tmp_quake"
fi
sleep 1
# DONKI geomagnetic storms
log_info "Fetching NOAA DONKI geomagnetic storms..."
local donki_data
donki_data=$(curl -sf --max-time 15 \
"https://api.nasa.gov/DONKI/GST?startDate=${DATE_WEEK_AGO}&endDate=${DATE_TODAY}&api_key=DEMO_KEY" 2>/dev/null) || true
if [[ -n "$donki_data" ]] && echo "$donki_data" | jq -e 'type == "array" and length > 0' &>/dev/null; then
log_ok "Got $(echo "$donki_data" | jq 'length') geomagnetic storms"
echo "$donki_data" | jq --arg ts "$TIMESTAMP" '
[
.[] |
{
title: ("Geomagnetic storm: " + (.gstID // "unknown")),
content: ("Storm " + (.gstID // "unknown") + ". Start: " + (.startTime // "unknown") + ". " + (if .allKpIndex then ("Peak Kp: " + ([.allKpIndex[].kpIndex] | max | tostring) + ". ") else "" end) + "Linked CMEs: " + (if .linkedEvents then ([.linkedEvents[].activityID] | join(", ")) else "none" end) + "."),
category: "anomaly",
tags: ["earth", "geomagnetic", "storm", "space-weather"],
domain: "earth-science",
source_api: "NASA DONKI",
timestamp: $ts,
confidence: 0.90,
data_points: 1
}
]
' > "$tmp_storm" 2>/dev/null || echo "[]" > "$tmp_storm"
fi
merge_json_arrays "$out_file" "$tmp_quake" "$tmp_storm"
rm -f "$tmp_quake" "$tmp_storm"
local total
total=$(jq 'length' "$out_file" 2>/dev/null || echo 0)
log_ok "Earth discoveries total: $total entries"
}
discover_academic() {
log_info "Fetching arXiv recent papers..."
local out_file="$OUTPUT_DIR/live_academic_discoveries.json"
local all_entries="[]"
# Try arxiv.org directly (export.arxiv.org may have CDN issues)
for category in "astro-ph" "cs.AI" "physics.gen-ph" "q-bio"; do
local arxiv_data
arxiv_data=$(curl -sf --max-time 20 \
"https://arxiv.org/api/query?search_query=cat:${category}&sortBy=submittedDate&sortOrder=descending&max_results=3" 2>/dev/null) || \
arxiv_data=$(curl -sf --max-time 20 \
"http://export.arxiv.org/api/query?search_query=cat:${category}&sortBy=submittedDate&sortOrder=descending&max_results=3" 2>/dev/null) || true
if [[ -n "$arxiv_data" ]] && echo "$arxiv_data" | grep -q '<entry>'; then
local i=0
while IFS= read -r title; do
local summary link
summary=$(echo "$arxiv_data" | grep -oP '<summary>\K[^<]+' | sed -n "$((i+1))p" | tr '\n' ' ' | sed 's/ */ /g' | head -c 400)
link=$(echo "$arxiv_data" | grep -oP '<id>\K[^<]+' | tail -n +2 | sed -n "$((i+1))p")
if [[ -n "$title" && "$title" != "ArXiv Query"* ]]; then
all_entries=$(echo "$all_entries" | jq \
--arg t "arXiv [$category]: $title" \
--arg c "Recent ${category} paper: ${title}. ${summary} URL: ${link}" \
--arg cat "$category" \
--arg ts "$TIMESTAMP" \
'. + [{
title: $t,
content: $c,
category: "pattern",
tags: ["academic", "arxiv", $cat, "research"],
domain: "academic-research",
source_api: "arXiv API",
timestamp: $ts,
confidence: 0.80,
data_points: 1
}]')
fi
i=$((i + 1))
[[ $i -ge 3 ]] && break
done < <(echo "$arxiv_data" | grep -oP '<title>\K[^<]+' | tail -n +2)
else
log_warn " arXiv $category: no data (CDN/DNS issue)"
fi
sleep 1
done
echo "$all_entries" | jq '.' > "$out_file"
local total
total=$(jq 'length' "$out_file" 2>/dev/null || echo 0)
log_ok "Academic discoveries: $total entries"
}
discover_economics() {
log_info "Fetching FRED economic indicators..."
local out_file="$OUTPUT_DIR/live_economics_discoveries.json"
local entries="[]"
local fred_key="${FRED_API_KEY:-}"
# FRED requires a real 32-char API key (get free at fred.stlouisfed.org/docs/api/api_key.html)
if [[ -z "$fred_key" ]]; then
log_warn " FRED_API_KEY not set — using World Bank API fallback"
# Fallback: World Bank indicators
local wb_data
wb_data=$(curl -sf --max-time 15 \
"https://api.worldbank.org/v2/country/US/indicator/NY.GDP.MKTP.CD?format=json&date=2023:2025&per_page=3" 2>/dev/null) || true
if [[ -n "$wb_data" ]] && echo "$wb_data" | jq -e '.[1] | length > 0' &>/dev/null; then
entries=$(echo "$wb_data" | jq --arg ts "$TIMESTAMP" '
[
.[1][] | select(.value != null) |
{
title: ("World Bank: US GDP " + (.date // "unknown")),
content: ("US GDP (current USD): " + (.value | tostring) + " for " + (.date // "unknown") + ". Source: World Bank Development Indicators."),
category: "pattern",
tags: ["economics", "worldbank", "gdp", "indicator"],
domain: "economics-finance",
source_api: "World Bank API",
timestamp: $ts,
confidence: 0.90,
data_points: 1
}
]
' 2>/dev/null) || entries="[]"
fi
fi
for series in "DGS10" "UNRATE" "CPIAUCSL" "GDP" "FEDFUNDS"; do
[[ -z "$fred_key" ]] && continue
local fred_data
fred_data=$(curl -sf --max-time 15 \
"https://api.stlouisfed.org/fred/series/observations?series_id=${series}&sort_order=desc&limit=2&file_type=json&api_key=${fred_key}" 2>/dev/null) || true
if [[ -n "$fred_data" ]] && echo "$fred_data" | jq -e '.observations | length > 0' &>/dev/null; then
local latest_val latest_date prev_val series_title
latest_val=$(echo "$fred_data" | jq -r '.observations[0].value // "N/A"')
latest_date=$(echo "$fred_data" | jq -r '.observations[0].date // "unknown"')
prev_val=$(echo "$fred_data" | jq -r '.observations[1].value // "N/A"')
case $series in
DGS10) series_title="10-Year Treasury Yield" ;;
UNRATE) series_title="US Unemployment Rate" ;;
CPIAUCSL) series_title="Consumer Price Index" ;;
GDP) series_title="US Gross Domestic Product" ;;
FEDFUNDS) series_title="Federal Funds Rate" ;;
esac
if [[ "$latest_val" != "." && "$latest_val" != "N/A" ]]; then
entries=$(echo "$entries" | jq \
--arg t "Economic indicator: ${series_title} (${series})" \
--arg c "${series_title}: latest ${latest_val} as of ${latest_date}. Previous: ${prev_val}. Source: FRED." \
--arg s "$series" \
--arg ts "$TIMESTAMP" \
'. + [{
title: $t,
content: $c,
category: "pattern",
tags: ["economics", "fred", $s, "indicator"],
domain: "economics-finance",
source_api: "FRED API",
timestamp: $ts,
confidence: 0.92,
data_points: 1
}]')
fi
fi
sleep 1
done
echo "$entries" | jq '.' > "$out_file"
local total
total=$(jq 'length' "$out_file" 2>/dev/null || echo 0)
log_ok "Economics discoveries: $total entries"
}
# ─────────────────────────────────────────────────────────────
# TRAIN - Upload discoveries to brain
# ─────────────────────────────────────────────────────────────
get_nonce() {
curl -sf --max-time 10 "${BRAIN_API}/v1/challenge" 2>/dev/null | jq -r '.nonce // empty'
}
train_brain() {
local file_pattern="${1:-live_*_discoveries.json}"
local trained=0
local failed=0
shopt -s nullglob
local files=("${OUTPUT_DIR}"/${file_pattern})
shopt -u nullglob
if [[ ${#files[@]} -eq 0 ]]; then
log_warn "No discovery files matching: $file_pattern"
return
fi
for filepath in "${files[@]}"; do
local filename
filename=$(basename "$filepath")
local file_len
file_len=$(jq 'length' "$filepath" 2>/dev/null) || file_len=0
if [[ "$file_len" -eq 0 ]]; then
log_warn "Skipping $filename - empty"
continue
fi
log_info "Training from: $filename ($file_len entries)"
local idx=0
while [[ $idx -lt $file_len ]]; do
local title content tags_json
title=$(jq -r ".[$idx].title // \"Discovery $idx\"" "$filepath")
content=$(jq -r ".[$idx].content // (.[$idx] | tostring)" "$filepath")
tags_json=$(jq -c ".[$idx].tags // [\"discovery\"]" "$filepath")
local nonce
nonce=$(get_nonce) || { log_warn "No nonce"; idx=$((idx + 1)); continue; }
local payload
payload=$(jq -n --arg t "$title" --arg c "$content" --argjson tags "$tags_json" \
'{ title: $t, content: $c, category: "pattern", tags: $tags }')
local http_code
http_code=$(curl -s -o /dev/null -w "%{http_code}" --max-time 15 \
-X POST "${BRAIN_API}/v1/memories" \
-H "Content-Type: application/json" \
-H "Authorization: Bearer ${BRAIN_API_KEY}" \
-H "X-Challenge-Nonce: ${nonce}" \
-d "$payload" 2>/dev/null) || http_code=0
if [[ "$http_code" =~ ^2 ]]; then
trained=$((trained + 1))
log_ok " [${trained}] $title"
else
failed=$((failed + 1))
log_fail "$title (HTTP $http_code)"
fi
sleep 1
idx=$((idx + 1))
done
done
log_info "Training batch: $trained trained, $failed failed"
}
# ─────────────────────────────────────────────────────────────
# REFLECT - Query brain for patterns and gaps
# ─────────────────────────────────────────────────────────────
query_brain_patterns() {
log_info "Querying brain for learned patterns..."
local gaps_file="$OUTPUT_DIR/brain_gaps.json"
local nonce
nonce=$(get_nonce) || { log_warn "Cannot get nonce for reflection"; echo '{"underrepresented":["medical","materials","genomics","gravitational-wave"],"gap_analysis":"Using defaults"}' > "$gaps_file"; return; }
local memories
memories=$(curl -sf --max-time 15 \
"${BRAIN_API}/v1/memories" \
-H "Authorization: Bearer ${BRAIN_API_KEY}" \
-H "X-Challenge-Nonce: ${nonce}" 2>/dev/null) || true
if [[ -n "$memories" ]]; then
local total_memories
total_memories=$(echo "$memories" | jq 'if type == "array" then length else (.memories // []) | length end' 2>/dev/null) || total_memories=0
log_ok "Brain has $total_memories total memories"
# Analyze tag frequency to find underrepresented domains
echo "$memories" | jq '
(if type == "array" then . else (.memories // []) end) as $mems |
[$mems[].tags // [] | .[]] |
group_by(.) |
map({tag: .[0], count: length}) |
sort_by(.count) as $sorted |
{
total_memories: ($mems | length),
underrepresented: [$sorted[:5][].tag],
well_covered: [$sorted[-5:][].tag],
gap_analysis: "Domains with fewest entries need more discovery focus"
}
' > "$gaps_file" 2>/dev/null || echo '{"underrepresented":["medical","materials","genomics"],"gap_analysis":"Parse error, using defaults"}' > "$gaps_file"
log_ok "Gap analysis:"
jq '.' "$gaps_file" 2>/dev/null || true
else
log_warn "Brain unreachable — using default gap targets"
echo '{"underrepresented":["medical","materials","genomics","gravitational-wave","deep-focus"],"gap_analysis":"Brain unreachable, using defaults"}' > "$gaps_file"
fi
}
# ─────────────────────────────────────────────────────────────
# REDISCOVER - Targeted discovery based on gaps
# ─────────────────────────────────────────────────────────────
discover_gaps() {
log_info "Running targeted gap-filling discovery..."
local gaps_file="$OUTPUT_DIR/brain_gaps.json"
local out_file="$OUTPUT_DIR/live_gap_discoveries.json"
local entries="[]"
local gaps
gaps=$(jq -r '.underrepresented // [] | .[]' "$gaps_file" 2>/dev/null) || gaps="medical materials genomics"
# PubMed gap fill
if echo "$gaps" | grep -qiE "medical|genomics|pubmed|bio"; then
log_info "Gap-fill: PubMed trending research..."
local pubmed_ids
pubmed_ids=$(curl -sf --max-time 20 \
"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?db=pubmed&term=breakthrough+OR+novel+discovery&retmax=5&sort=date&retmode=json" 2>/dev/null | jq -r '.esearchresult.idlist[]' 2>/dev/null) || true
for pmid in $pubmed_ids; do
local article_data
article_data=$(curl -sf --max-time 15 \
"https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&id=${pmid}&retmode=json" 2>/dev/null) || true
if [[ -n "$article_data" ]]; then
local art_title art_source
art_title=$(echo "$article_data" | jq -r ".result.\"${pmid}\".title // empty" 2>/dev/null)
art_source=$(echo "$article_data" | jq -r ".result.\"${pmid}\".source // \"unknown\"" 2>/dev/null)
if [[ -n "$art_title" ]]; then
entries=$(echo "$entries" | jq \
--arg t "PubMed: $art_title" \
--arg c "Medical/genomics paper: ${art_title}. Journal: ${art_source}. PMID: ${pmid}." \
--arg ts "$TIMESTAMP" \
'. + [{
title: $t,
content: $c,
category: "pattern",
tags: ["medical", "pubmed", "research", "gap-fill"],
domain: "medical-genomics",
source_api: "PubMed E-utilities",
timestamp: $ts,
confidence: 0.82,
data_points: 1
}]')
fi
fi
sleep 0.5
done
fi
sleep 1
# Deep earthquake gap fill
if echo "$gaps" | grep -qiE "earth|seismic|deep-focus|volcano"; then
log_info "Gap-fill: USGS M4.5+ earthquakes (7 days)..."
local eq_data
eq_data=$(curl -sf --max-time 20 \
"https://earthquake.usgs.gov/earthquakes/feed/v1.0/summary/4.5_week.geojson" 2>/dev/null) || true
if [[ -n "$eq_data" ]]; then
local gap_quakes
gap_quakes=$(echo "$eq_data" | jq --arg ts "$TIMESTAMP" '
[
.features[] |
select((.geometry.coordinates[2] // 0) > 200 or (.properties.mag // 0) >= 6.0) |
{
title: ("M" + (.properties.mag | tostring) + " earthquake: " + (.properties.place // "unknown")),
content: ("M" + (.properties.mag | tostring) + " at " + (.properties.place // "unknown") + ". Depth: " + ((.geometry.coordinates[2] // 0) | tostring) + " km. " + (if (.geometry.coordinates[2] // 0) > 300 then "Deep-focus subduction event." elif (.properties.mag // 0) >= 7.0 then "Major earthquake." else "Significant event." end)),
category: "anomaly",
tags: ["earth", "seismic", "gap-fill", (if (.geometry.coordinates[2] // 0) > 300 then "deep-focus" else "significant" end)],
domain: "earth-science",
source_api: "USGS Earthquake Hazards",
timestamp: $ts,
confidence: 0.88,
data_points: 1
}
] | .[:5]
' 2>/dev/null) || gap_quakes="[]"
entries=$(echo "$entries" "$gap_quakes" | jq -s 'flatten')
log_ok " Added $(echo "$gap_quakes" | jq 'length') deep/major earthquake entries"
fi
fi
sleep 1
# Gravitational wave gap fill
if echo "$gaps" | grep -qiE "gravitational|wave|ligo|gw"; then
log_info "Gap-fill: LIGO GraceDB..."
local gw_data
gw_data=$(curl -sf --max-time 15 \
"https://gracedb.ligo.org/api/superevents/?query=far+%3C+1e-6&format=json&count=5" 2>/dev/null) || true
if [[ -n "$gw_data" ]]; then
local gw_entries
gw_entries=$(echo "$gw_data" | jq --arg ts "$TIMESTAMP" '
[
.superevents[]? |
{
title: ("Gravitational wave: " + (.superevent_id // "unknown")),
content: ("GW superevent " + (.superevent_id // "unknown") + " (category: " + (.category // "unknown") + "). FAR: " + ((.far // 0) | tostring) + " Hz. Preferred: " + (.preferred_event // "unknown") + ". LIGO/Virgo/KAGRA detection."),
category: "anomaly",
tags: ["space", "gravitational-wave", "ligo", "gap-fill"],
domain: "space-science",
source_api: "LIGO GraceDB",
timestamp: $ts,
confidence: 0.90,
data_points: 1
}
]
' 2>/dev/null) || gw_entries="[]"
entries=$(echo "$entries" "$gw_entries" | jq -s 'flatten')
fi
fi
echo "$entries" | jq '.' > "$out_file"
local total
total=$(jq 'length' "$out_file" 2>/dev/null || echo 0)
log_ok "Gap-fill discoveries: $total entries"
}
# ─────────────────────────────────────────────────────────────
# MAIN FEEDBACK LOOP
# ─────────────────────────────────────────────────────────────
main() {
echo ""
echo "╔═══════════════════════════════════════════════════════════╗"
echo "║ RuVector Discovery ↔ Training Feedback Loop ║"
echo "║ Cycles: ${MAX_CYCLES} | Date: ${DATE_TODAY}"
echo "╚═══════════════════════════════════════════════════════════╝"
echo ""
for cycle in $(seq 1 "$MAX_CYCLES"); do
log_phase "CYCLE ${cycle}/${MAX_CYCLES}"
if [[ $cycle -eq 1 ]]; then
# Phase 1: DISCOVER
log_phase "Phase 1: DISCOVER (live API fetch)"
discover_space
discover_earth
discover_academic
discover_economics
# Phase 2: TRAIN
log_phase "Phase 2: TRAIN (upload to brain)"
train_brain "live_*_discoveries.json"
else
# Phase 3: REFLECT
log_phase "Phase 3: REFLECT (query brain for gaps)"
query_brain_patterns
# Phase 4: REDISCOVER
log_phase "Phase 4: REDISCOVER (targeted gap-filling)"
discover_gaps
# Phase 5: RETRAIN
log_phase "Phase 5: RETRAIN (gap-fill → brain)"
train_brain "live_gap_discoveries.json"
fi
done
# Final summary
log_phase "FEEDBACK LOOP COMPLETE"
local total_discoveries=0
for f in "$OUTPUT_DIR"/live_*_discoveries.json; do
if [[ -f "$f" ]]; then
local c
c=$(jq 'length' "$f" 2>/dev/null) || c=0
total_discoveries=$((total_discoveries + c))
log_info " $(basename "$f"): $c entries"
fi
done
log_ok "Total discoveries: $total_discoveries"
if [[ -f "$OUTPUT_DIR/brain_gaps.json" ]]; then
log_ok "Brain gap analysis:"
jq '.' "$OUTPUT_DIR/brain_gaps.json" 2>/dev/null || true
fi
}
main "$@"