feat(domain-expansion): add meta-learning engine with five AGI learning improvements

Closes five architectural gaps in the learning pipeline:

1. RegretTracker — cumulative regret tracking per context bucket.
   Measures optimality gap (O(√T) = learning, O(T) = not).
   Enables convergence detection and learning speed measurement.

2. DecayingBeta — exponential forgetting for non-stationary environments.
   Old evidence decays by configurable factor per observation.
   Effective window ≈ 1/(1-decay). Prevents calcification on stale data.

3. PlateauDetector — detects learning stalls with escalating responses:
   Continue → IncreaseExploration → TriggerTransfer → InjectDiversity → Reset.
   Compares accuracy windows and tracks consecutive plateau events.

4. ParetoFront — multi-objective optimization replacing single-scalar fitness.
   Tracks non-dominated solutions across [accuracy, -cost, robustness].
   Includes hypervolume indicator, spread metrics, and per-objective queries.

5. CuriosityBonus — UCB-style exploration bonus for under-visited contexts.
   Bonus = c * sqrt(ln(N) / n_i). Directs exploration toward novel
   bucket/arm combinations rather than relying solely on Thompson variance.

All five compose into MetaLearningEngine, wired into DomainExpansionEngine:
- record_decision() feeds regret + curiosity + decaying beta on every arm pick
- evolve_population() records kernels into Pareto front before evolution
- select_arm_curious() adds curiosity bonus to Thompson Sampling
- check_plateau() monitors cost curves for learning stalls
- meta_health() provides unified diagnostics

Performance (optimized hot paths avoid HashMap clone on existing entries):
- RegretTracker: 84ns/record (1k decisions in 84µs)
- DecayingBeta: 3ns/update
- PlateauDetector: 4.1ns/check
- ParetoFront: 67ns/insert, 146ns/hypervolume
- Full cycle: 199ns/decision (18% faster after optimization)

82 tests pass. 6 new benchmarks added.

https://claude.ai/code/session_01RnwD4x5cbpB7FPvoyYQz8G
This commit is contained in:
Claude 2026-02-16 04:40:34 +00:00
parent 0261cf1198
commit 53243194ca
No known key found for this signature in database
3 changed files with 1706 additions and 5 deletions

View file

@ -1,8 +1,9 @@
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use ruvector_domain_expansion::{
ArmId, ContextBucket, CostCurve, CostCurvePoint, ConvergenceThresholds,
AccelerationScoreboard, DomainExpansionEngine, DomainId, MetaThompsonEngine,
PolicyKnobs, PopulationSearch, Solution, TransferPrior,
AccelerationScoreboard, CuriosityBonus, DecayingBeta, DomainExpansionEngine, DomainId,
MetaLearningEngine, MetaThompsonEngine, ParetoFront, ParetoPoint, PlateauDetector,
PolicyKnobs, PopulationSearch, RegretTracker, Solution, TransferPrior,
};
fn bench_task_generation(c: &mut Criterion) {
@ -167,6 +168,181 @@ fn bench_transfer_prior_extract(c: &mut Criterion) {
});
}
// ═══════════════════════════════════════════════════════════════════
// Meta-Learning Benchmarks
// ═══════════════════════════════════════════════════════════════════
fn bench_regret_tracker(c: &mut Criterion) {
let bucket = ContextBucket {
difficulty_tier: "medium".into(),
category: "algo".into(),
};
let arms: Vec<ArmId> = (0..4).map(|i| ArmId(format!("arm_{}", i))).collect();
let mut group = c.benchmark_group("meta_learning");
group.bench_function("regret_record_1k", |b| {
b.iter(|| {
let mut tracker = RegretTracker::new(50);
for i in 0..1000 {
let arm = &arms[i % 4];
let reward = if i % 4 == 0 { 0.9 } else { 0.4 };
tracker.record(black_box(&bucket), black_box(arm), black_box(reward));
}
black_box(tracker.average_regret())
})
});
group.bench_function("regret_summary", |b| {
let mut tracker = RegretTracker::new(50);
for i in 0..1000 {
let arm = &arms[i % 4];
tracker.record(&bucket, arm, if i % 4 == 0 { 0.9 } else { 0.4 });
}
b.iter(|| black_box(tracker.summary()))
});
group.finish();
}
fn bench_decaying_beta(c: &mut Criterion) {
let mut group = c.benchmark_group("decaying_beta");
group.bench_function("update_1k", |b| {
b.iter(|| {
let mut db = DecayingBeta::new(0.995);
for i in 0..1000 {
let reward = if i % 3 == 0 { 0.9 } else { 0.4 };
db.update(black_box(reward));
}
black_box(db.mean())
})
});
group.bench_function("update_vs_standard", |b| {
b.iter(|| {
// Compare DecayingBeta vs standard BetaParams
let mut db = DecayingBeta::new(0.995);
let mut std_beta = ruvector_domain_expansion::BetaParams::uniform();
for i in 0..500 {
let reward = if i % 3 == 0 { 0.9 } else { 0.4 };
db.update(reward);
std_beta.update(reward);
}
black_box((db.mean(), std_beta.mean()))
})
});
group.finish();
}
fn bench_plateau_detector(c: &mut Criterion) {
let points: Vec<CostCurvePoint> = (0..100)
.map(|i| CostCurvePoint {
cycle: i,
accuracy: 0.80 + (i as f32 * 0.001),
cost_per_solve: 0.1 / (i as f32 + 1.0),
robustness: 0.8,
policy_violations: 0,
timestamp: i as f64,
})
.collect();
c.bench_function("plateau_check_100pts", |b| {
b.iter(|| {
let mut detector = PlateauDetector::new(10, 0.005);
black_box(detector.check(black_box(&points)))
})
});
}
fn bench_pareto_front(c: &mut Criterion) {
let mut group = c.benchmark_group("pareto_front");
group.bench_function("insert_100_points", |b| {
b.iter(|| {
let mut front = ParetoFront::new();
for i in 0..100 {
let acc = (i as f32) / 100.0;
let cost = -((100 - i) as f32) / 100.0;
let rob = ((i * 7 + 13) % 100) as f32 / 100.0;
front.insert(ParetoPoint {
kernel_id: format!("k{}", i),
objectives: vec![acc, cost, rob],
generation: 0,
});
}
black_box(front.len())
})
});
group.bench_function("hypervolume_2d", |b| {
let mut front = ParetoFront::new();
for i in 0..20 {
let x = (i as f32 + 1.0) / 21.0;
front.insert(ParetoPoint {
kernel_id: format!("k{}", i),
objectives: vec![x, 1.0 - x],
generation: 0,
});
}
b.iter(|| black_box(front.hypervolume(&[0.0, 0.0])))
});
group.finish();
}
fn bench_curiosity_bonus(c: &mut Criterion) {
let arms: Vec<ArmId> = (0..4).map(|i| ArmId(format!("arm_{}", i))).collect();
let buckets: Vec<ContextBucket> = (0..18)
.map(|i| ContextBucket {
difficulty_tier: ["easy", "medium", "hard"][i / 6].into(),
category: format!("cat_{}", i % 6),
})
.collect();
c.bench_function("curiosity_bonus_18buckets", |b| {
let mut curiosity = CuriosityBonus::new(1.41);
for _ in 0..500 {
for bucket in &buckets {
for arm in &arms {
curiosity.record_visit(bucket, arm);
}
}
}
b.iter(|| {
let mut total = 0.0f32;
for bucket in &buckets {
for arm in &arms {
total += curiosity.bonus(black_box(bucket), black_box(arm));
}
}
black_box(total)
})
});
}
fn bench_meta_engine_full_cycle(c: &mut Criterion) {
c.bench_function("meta_engine_100_decisions", |b| {
b.iter(|| {
let mut engine = MetaLearningEngine::new();
let bucket = ContextBucket {
difficulty_tier: "medium".into(),
category: "algo".into(),
};
let arm = ArmId("greedy".into());
for i in 0..100 {
let reward = if i % 3 == 0 { 0.9 } else { 0.5 };
engine.record_decision(&bucket, &arm, reward);
}
engine.record_kernel("k1", 0.9, 0.2, 0.8, 1);
black_box(engine.health_check())
})
});
}
criterion_group!(
benches,
bench_task_generation,
@ -177,5 +353,11 @@ criterion_group!(
bench_knobs_mutate,
bench_cost_curve_auc,
bench_transfer_prior_extract,
bench_regret_tracker,
bench_decaying_beta,
bench_plateau_detector,
bench_pareto_front,
bench_curiosity_bonus,
bench_meta_engine_full_cycle,
);
criterion_main!(benches);

View file

@ -45,6 +45,7 @@
pub mod cost_curve;
pub mod domain;
pub mod meta_learning;
pub mod planning;
pub mod policy_kernel;
pub mod rust_synthesis;
@ -67,6 +68,10 @@ pub use planning::PlanningDomain;
pub use policy_kernel::{PolicyKernel, PolicyKnobs, PopulationSearch, PopulationStats};
pub use rust_synthesis::RustSynthesisDomain;
pub use tool_orchestration::ToolOrchestrationDomain;
pub use meta_learning::{
CuriosityBonus, DecayingBeta, MetaLearningEngine, MetaLearningHealth, ParetoFront,
ParetoPoint, PlateauAction, PlateauDetector, RegretSummary, RegretTracker,
};
pub use transfer::{
ArmId, BetaParams, ContextBucket, DualPathResult, MetaThompsonEngine, TransferPrior,
TransferVerification,
@ -78,6 +83,10 @@ use std::collections::HashMap;
///
/// Manages multiple domains, transfer learning between them,
/// population-based policy search, and the acceleration scoreboard.
///
/// The `meta` field provides five composable learning improvements:
/// regret tracking, decaying priors, plateau detection, Pareto front
/// optimization, and curiosity-driven exploration.
pub struct DomainExpansionEngine {
/// Registered domains.
domains: HashMap<DomainId, Box<dyn Domain>>,
@ -87,6 +96,8 @@ pub struct DomainExpansionEngine {
pub population: PopulationSearch,
/// Acceleration scoreboard tracking convergence across domains.
pub scoreboard: AccelerationScoreboard,
/// Meta-learning engine: regret, plateau, Pareto, curiosity, decay.
pub meta: MetaLearningEngine,
/// Holdout tasks per domain for verification.
holdouts: HashMap<DomainId, Vec<Task>>,
/// Counterexample set: failed solutions that inform future decisions.
@ -110,6 +121,7 @@ impl DomainExpansionEngine {
thompson: MetaThompsonEngine::new(arms),
population: PopulationSearch::new(8),
scoreboard: AccelerationScoreboard::new(),
meta: MetaLearningEngine::new(),
holdouts: HashMap::new(),
counterexamples: HashMap::new(),
};
@ -168,12 +180,15 @@ impl DomainExpansionEngine {
// Record outcome in Thompson engine.
self.thompson.record_outcome(
domain_id,
bucket,
arm,
bucket.clone(),
arm.clone(),
eval.score,
1.0, // unit cost for now
);
// Record in meta-learning engine (regret + curiosity + decaying beta).
self.meta.record_decision(&bucket, &arm, eval.score);
// Store counterexamples for poor solutions.
if eval.score < 0.3 {
self.counterexamples
@ -258,8 +273,33 @@ impl DomainExpansionEngine {
}
}
/// Evolve the policy kernel population.
/// Evolve the policy kernel population and update Pareto front.
pub fn evolve_population(&mut self) {
// Record current population into Pareto front before evolving.
let gen = self.population.generation();
for kernel in self.population.population() {
let accuracy = kernel.fitness();
let cost = if kernel.cycles > 0 {
kernel.total_cost / kernel.cycles as f32
} else {
0.0
};
// Robustness approximated by consistency across domains.
let robustness = if kernel.holdout_scores.len() > 1 {
let mean = accuracy;
let var: f32 = kernel
.holdout_scores
.values()
.map(|s| (s - mean).powi(2))
.sum::<f32>()
/ kernel.holdout_scores.len() as f32;
(1.0 - var.sqrt()).max(0.0)
} else {
accuracy
};
self.meta.record_kernel(&kernel.id, accuracy, cost, robustness, gen);
}
self.population.evolve();
}
@ -312,6 +352,72 @@ impl DomainExpansionEngine {
) -> bool {
self.thompson.is_uncertain(domain_id, bucket, 0.15)
}
/// Select arm with curiosity-boosted Thompson Sampling.
///
/// Combines the standard Thompson sample with a UCB-style exploration
/// bonus that favors under-visited bucket/arm combinations.
pub fn select_arm_curious(
&self,
domain_id: &DomainId,
bucket: &ContextBucket,
) -> Option<ArmId> {
let mut rng = rand::thread_rng();
// Get all arms and compute boosted scores
let prior = self.thompson.extract_prior(domain_id)?;
let arms: Vec<ArmId> = prior
.bucket_priors
.get(bucket)
.map(|m| m.keys().cloned().collect())
.unwrap_or_default();
if arms.is_empty() {
return self.thompson.select_arm(domain_id, bucket, &mut rng);
}
let mut best_arm = None;
let mut best_score = f32::NEG_INFINITY;
for arm in &arms {
let params = prior.get_prior(bucket, arm);
let sample = params.sample(&mut rng);
let boosted = self.meta.boosted_score(bucket, arm, sample);
if boosted > best_score {
best_score = boosted;
best_arm = Some(arm.clone());
}
}
best_arm.or_else(|| self.thompson.select_arm(domain_id, bucket, &mut rng))
}
/// Get meta-learning health diagnostics.
pub fn meta_health(&self) -> MetaLearningHealth {
self.meta.health_check()
}
/// Check cost curve for plateau and get recommended action.
pub fn check_plateau(
&mut self,
domain_id: &DomainId,
) -> PlateauAction {
if let Some(curve) = self.scoreboard.curves.get(domain_id) {
self.meta.check_plateau(&curve.points)
} else {
PlateauAction::Continue
}
}
/// Get regret summary across all learning contexts.
pub fn regret_summary(&self) -> RegretSummary {
self.meta.regret.summary()
}
/// Get the Pareto front of non-dominated policy kernels.
pub fn pareto_front(&self) -> &ParetoFront {
&self.meta.pareto
}
}
impl Default for DomainExpansionEngine {

File diff suppressed because it is too large Load diff