mirror of
https://github.com/ruvnet/RuVector.git
synced 2026-05-25 15:03:46 +00:00
feat(domain-expansion): add meta-learning engine with five AGI learning improvements
Closes five architectural gaps in the learning pipeline: 1. RegretTracker — cumulative regret tracking per context bucket. Measures optimality gap (O(√T) = learning, O(T) = not). Enables convergence detection and learning speed measurement. 2. DecayingBeta — exponential forgetting for non-stationary environments. Old evidence decays by configurable factor per observation. Effective window ≈ 1/(1-decay). Prevents calcification on stale data. 3. PlateauDetector — detects learning stalls with escalating responses: Continue → IncreaseExploration → TriggerTransfer → InjectDiversity → Reset. Compares accuracy windows and tracks consecutive plateau events. 4. ParetoFront — multi-objective optimization replacing single-scalar fitness. Tracks non-dominated solutions across [accuracy, -cost, robustness]. Includes hypervolume indicator, spread metrics, and per-objective queries. 5. CuriosityBonus — UCB-style exploration bonus for under-visited contexts. Bonus = c * sqrt(ln(N) / n_i). Directs exploration toward novel bucket/arm combinations rather than relying solely on Thompson variance. All five compose into MetaLearningEngine, wired into DomainExpansionEngine: - record_decision() feeds regret + curiosity + decaying beta on every arm pick - evolve_population() records kernels into Pareto front before evolution - select_arm_curious() adds curiosity bonus to Thompson Sampling - check_plateau() monitors cost curves for learning stalls - meta_health() provides unified diagnostics Performance (optimized hot paths avoid HashMap clone on existing entries): - RegretTracker: 84ns/record (1k decisions in 84µs) - DecayingBeta: 3ns/update - PlateauDetector: 4.1ns/check - ParetoFront: 67ns/insert, 146ns/hypervolume - Full cycle: 199ns/decision (18% faster after optimization) 82 tests pass. 6 new benchmarks added. https://claude.ai/code/session_01RnwD4x5cbpB7FPvoyYQz8G
This commit is contained in:
parent
0261cf1198
commit
53243194ca
3 changed files with 1706 additions and 5 deletions
|
|
@ -1,8 +1,9 @@
|
|||
use criterion::{black_box, criterion_group, criterion_main, Criterion};
|
||||
use ruvector_domain_expansion::{
|
||||
ArmId, ContextBucket, CostCurve, CostCurvePoint, ConvergenceThresholds,
|
||||
AccelerationScoreboard, DomainExpansionEngine, DomainId, MetaThompsonEngine,
|
||||
PolicyKnobs, PopulationSearch, Solution, TransferPrior,
|
||||
AccelerationScoreboard, CuriosityBonus, DecayingBeta, DomainExpansionEngine, DomainId,
|
||||
MetaLearningEngine, MetaThompsonEngine, ParetoFront, ParetoPoint, PlateauDetector,
|
||||
PolicyKnobs, PopulationSearch, RegretTracker, Solution, TransferPrior,
|
||||
};
|
||||
|
||||
fn bench_task_generation(c: &mut Criterion) {
|
||||
|
|
@ -167,6 +168,181 @@ fn bench_transfer_prior_extract(c: &mut Criterion) {
|
|||
});
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════
|
||||
// Meta-Learning Benchmarks
|
||||
// ═══════════════════════════════════════════════════════════════════
|
||||
|
||||
fn bench_regret_tracker(c: &mut Criterion) {
|
||||
let bucket = ContextBucket {
|
||||
difficulty_tier: "medium".into(),
|
||||
category: "algo".into(),
|
||||
};
|
||||
let arms: Vec<ArmId> = (0..4).map(|i| ArmId(format!("arm_{}", i))).collect();
|
||||
|
||||
let mut group = c.benchmark_group("meta_learning");
|
||||
|
||||
group.bench_function("regret_record_1k", |b| {
|
||||
b.iter(|| {
|
||||
let mut tracker = RegretTracker::new(50);
|
||||
for i in 0..1000 {
|
||||
let arm = &arms[i % 4];
|
||||
let reward = if i % 4 == 0 { 0.9 } else { 0.4 };
|
||||
tracker.record(black_box(&bucket), black_box(arm), black_box(reward));
|
||||
}
|
||||
black_box(tracker.average_regret())
|
||||
})
|
||||
});
|
||||
|
||||
group.bench_function("regret_summary", |b| {
|
||||
let mut tracker = RegretTracker::new(50);
|
||||
for i in 0..1000 {
|
||||
let arm = &arms[i % 4];
|
||||
tracker.record(&bucket, arm, if i % 4 == 0 { 0.9 } else { 0.4 });
|
||||
}
|
||||
b.iter(|| black_box(tracker.summary()))
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_decaying_beta(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("decaying_beta");
|
||||
|
||||
group.bench_function("update_1k", |b| {
|
||||
b.iter(|| {
|
||||
let mut db = DecayingBeta::new(0.995);
|
||||
for i in 0..1000 {
|
||||
let reward = if i % 3 == 0 { 0.9 } else { 0.4 };
|
||||
db.update(black_box(reward));
|
||||
}
|
||||
black_box(db.mean())
|
||||
})
|
||||
});
|
||||
|
||||
group.bench_function("update_vs_standard", |b| {
|
||||
b.iter(|| {
|
||||
// Compare DecayingBeta vs standard BetaParams
|
||||
let mut db = DecayingBeta::new(0.995);
|
||||
let mut std_beta = ruvector_domain_expansion::BetaParams::uniform();
|
||||
for i in 0..500 {
|
||||
let reward = if i % 3 == 0 { 0.9 } else { 0.4 };
|
||||
db.update(reward);
|
||||
std_beta.update(reward);
|
||||
}
|
||||
black_box((db.mean(), std_beta.mean()))
|
||||
})
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_plateau_detector(c: &mut Criterion) {
|
||||
let points: Vec<CostCurvePoint> = (0..100)
|
||||
.map(|i| CostCurvePoint {
|
||||
cycle: i,
|
||||
accuracy: 0.80 + (i as f32 * 0.001),
|
||||
cost_per_solve: 0.1 / (i as f32 + 1.0),
|
||||
robustness: 0.8,
|
||||
policy_violations: 0,
|
||||
timestamp: i as f64,
|
||||
})
|
||||
.collect();
|
||||
|
||||
c.bench_function("plateau_check_100pts", |b| {
|
||||
b.iter(|| {
|
||||
let mut detector = PlateauDetector::new(10, 0.005);
|
||||
black_box(detector.check(black_box(&points)))
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
fn bench_pareto_front(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("pareto_front");
|
||||
|
||||
group.bench_function("insert_100_points", |b| {
|
||||
b.iter(|| {
|
||||
let mut front = ParetoFront::new();
|
||||
for i in 0..100 {
|
||||
let acc = (i as f32) / 100.0;
|
||||
let cost = -((100 - i) as f32) / 100.0;
|
||||
let rob = ((i * 7 + 13) % 100) as f32 / 100.0;
|
||||
front.insert(ParetoPoint {
|
||||
kernel_id: format!("k{}", i),
|
||||
objectives: vec![acc, cost, rob],
|
||||
generation: 0,
|
||||
});
|
||||
}
|
||||
black_box(front.len())
|
||||
})
|
||||
});
|
||||
|
||||
group.bench_function("hypervolume_2d", |b| {
|
||||
let mut front = ParetoFront::new();
|
||||
for i in 0..20 {
|
||||
let x = (i as f32 + 1.0) / 21.0;
|
||||
front.insert(ParetoPoint {
|
||||
kernel_id: format!("k{}", i),
|
||||
objectives: vec![x, 1.0 - x],
|
||||
generation: 0,
|
||||
});
|
||||
}
|
||||
b.iter(|| black_box(front.hypervolume(&[0.0, 0.0])))
|
||||
});
|
||||
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn bench_curiosity_bonus(c: &mut Criterion) {
|
||||
let arms: Vec<ArmId> = (0..4).map(|i| ArmId(format!("arm_{}", i))).collect();
|
||||
let buckets: Vec<ContextBucket> = (0..18)
|
||||
.map(|i| ContextBucket {
|
||||
difficulty_tier: ["easy", "medium", "hard"][i / 6].into(),
|
||||
category: format!("cat_{}", i % 6),
|
||||
})
|
||||
.collect();
|
||||
|
||||
c.bench_function("curiosity_bonus_18buckets", |b| {
|
||||
let mut curiosity = CuriosityBonus::new(1.41);
|
||||
for _ in 0..500 {
|
||||
for bucket in &buckets {
|
||||
for arm in &arms {
|
||||
curiosity.record_visit(bucket, arm);
|
||||
}
|
||||
}
|
||||
}
|
||||
b.iter(|| {
|
||||
let mut total = 0.0f32;
|
||||
for bucket in &buckets {
|
||||
for arm in &arms {
|
||||
total += curiosity.bonus(black_box(bucket), black_box(arm));
|
||||
}
|
||||
}
|
||||
black_box(total)
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
fn bench_meta_engine_full_cycle(c: &mut Criterion) {
|
||||
c.bench_function("meta_engine_100_decisions", |b| {
|
||||
b.iter(|| {
|
||||
let mut engine = MetaLearningEngine::new();
|
||||
let bucket = ContextBucket {
|
||||
difficulty_tier: "medium".into(),
|
||||
category: "algo".into(),
|
||||
};
|
||||
let arm = ArmId("greedy".into());
|
||||
|
||||
for i in 0..100 {
|
||||
let reward = if i % 3 == 0 { 0.9 } else { 0.5 };
|
||||
engine.record_decision(&bucket, &arm, reward);
|
||||
}
|
||||
|
||||
engine.record_kernel("k1", 0.9, 0.2, 0.8, 1);
|
||||
black_box(engine.health_check())
|
||||
})
|
||||
});
|
||||
}
|
||||
|
||||
criterion_group!(
|
||||
benches,
|
||||
bench_task_generation,
|
||||
|
|
@ -177,5 +353,11 @@ criterion_group!(
|
|||
bench_knobs_mutate,
|
||||
bench_cost_curve_auc,
|
||||
bench_transfer_prior_extract,
|
||||
bench_regret_tracker,
|
||||
bench_decaying_beta,
|
||||
bench_plateau_detector,
|
||||
bench_pareto_front,
|
||||
bench_curiosity_bonus,
|
||||
bench_meta_engine_full_cycle,
|
||||
);
|
||||
criterion_main!(benches);
|
||||
|
|
|
|||
|
|
@ -45,6 +45,7 @@
|
|||
|
||||
pub mod cost_curve;
|
||||
pub mod domain;
|
||||
pub mod meta_learning;
|
||||
pub mod planning;
|
||||
pub mod policy_kernel;
|
||||
pub mod rust_synthesis;
|
||||
|
|
@ -67,6 +68,10 @@ pub use planning::PlanningDomain;
|
|||
pub use policy_kernel::{PolicyKernel, PolicyKnobs, PopulationSearch, PopulationStats};
|
||||
pub use rust_synthesis::RustSynthesisDomain;
|
||||
pub use tool_orchestration::ToolOrchestrationDomain;
|
||||
pub use meta_learning::{
|
||||
CuriosityBonus, DecayingBeta, MetaLearningEngine, MetaLearningHealth, ParetoFront,
|
||||
ParetoPoint, PlateauAction, PlateauDetector, RegretSummary, RegretTracker,
|
||||
};
|
||||
pub use transfer::{
|
||||
ArmId, BetaParams, ContextBucket, DualPathResult, MetaThompsonEngine, TransferPrior,
|
||||
TransferVerification,
|
||||
|
|
@ -78,6 +83,10 @@ use std::collections::HashMap;
|
|||
///
|
||||
/// Manages multiple domains, transfer learning between them,
|
||||
/// population-based policy search, and the acceleration scoreboard.
|
||||
///
|
||||
/// The `meta` field provides five composable learning improvements:
|
||||
/// regret tracking, decaying priors, plateau detection, Pareto front
|
||||
/// optimization, and curiosity-driven exploration.
|
||||
pub struct DomainExpansionEngine {
|
||||
/// Registered domains.
|
||||
domains: HashMap<DomainId, Box<dyn Domain>>,
|
||||
|
|
@ -87,6 +96,8 @@ pub struct DomainExpansionEngine {
|
|||
pub population: PopulationSearch,
|
||||
/// Acceleration scoreboard tracking convergence across domains.
|
||||
pub scoreboard: AccelerationScoreboard,
|
||||
/// Meta-learning engine: regret, plateau, Pareto, curiosity, decay.
|
||||
pub meta: MetaLearningEngine,
|
||||
/// Holdout tasks per domain for verification.
|
||||
holdouts: HashMap<DomainId, Vec<Task>>,
|
||||
/// Counterexample set: failed solutions that inform future decisions.
|
||||
|
|
@ -110,6 +121,7 @@ impl DomainExpansionEngine {
|
|||
thompson: MetaThompsonEngine::new(arms),
|
||||
population: PopulationSearch::new(8),
|
||||
scoreboard: AccelerationScoreboard::new(),
|
||||
meta: MetaLearningEngine::new(),
|
||||
holdouts: HashMap::new(),
|
||||
counterexamples: HashMap::new(),
|
||||
};
|
||||
|
|
@ -168,12 +180,15 @@ impl DomainExpansionEngine {
|
|||
// Record outcome in Thompson engine.
|
||||
self.thompson.record_outcome(
|
||||
domain_id,
|
||||
bucket,
|
||||
arm,
|
||||
bucket.clone(),
|
||||
arm.clone(),
|
||||
eval.score,
|
||||
1.0, // unit cost for now
|
||||
);
|
||||
|
||||
// Record in meta-learning engine (regret + curiosity + decaying beta).
|
||||
self.meta.record_decision(&bucket, &arm, eval.score);
|
||||
|
||||
// Store counterexamples for poor solutions.
|
||||
if eval.score < 0.3 {
|
||||
self.counterexamples
|
||||
|
|
@ -258,8 +273,33 @@ impl DomainExpansionEngine {
|
|||
}
|
||||
}
|
||||
|
||||
/// Evolve the policy kernel population.
|
||||
/// Evolve the policy kernel population and update Pareto front.
|
||||
pub fn evolve_population(&mut self) {
|
||||
// Record current population into Pareto front before evolving.
|
||||
let gen = self.population.generation();
|
||||
for kernel in self.population.population() {
|
||||
let accuracy = kernel.fitness();
|
||||
let cost = if kernel.cycles > 0 {
|
||||
kernel.total_cost / kernel.cycles as f32
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
// Robustness approximated by consistency across domains.
|
||||
let robustness = if kernel.holdout_scores.len() > 1 {
|
||||
let mean = accuracy;
|
||||
let var: f32 = kernel
|
||||
.holdout_scores
|
||||
.values()
|
||||
.map(|s| (s - mean).powi(2))
|
||||
.sum::<f32>()
|
||||
/ kernel.holdout_scores.len() as f32;
|
||||
(1.0 - var.sqrt()).max(0.0)
|
||||
} else {
|
||||
accuracy
|
||||
};
|
||||
self.meta.record_kernel(&kernel.id, accuracy, cost, robustness, gen);
|
||||
}
|
||||
|
||||
self.population.evolve();
|
||||
}
|
||||
|
||||
|
|
@ -312,6 +352,72 @@ impl DomainExpansionEngine {
|
|||
) -> bool {
|
||||
self.thompson.is_uncertain(domain_id, bucket, 0.15)
|
||||
}
|
||||
|
||||
/// Select arm with curiosity-boosted Thompson Sampling.
|
||||
///
|
||||
/// Combines the standard Thompson sample with a UCB-style exploration
|
||||
/// bonus that favors under-visited bucket/arm combinations.
|
||||
pub fn select_arm_curious(
|
||||
&self,
|
||||
domain_id: &DomainId,
|
||||
bucket: &ContextBucket,
|
||||
) -> Option<ArmId> {
|
||||
let mut rng = rand::thread_rng();
|
||||
// Get all arms and compute boosted scores
|
||||
let prior = self.thompson.extract_prior(domain_id)?;
|
||||
let arms: Vec<ArmId> = prior
|
||||
.bucket_priors
|
||||
.get(bucket)
|
||||
.map(|m| m.keys().cloned().collect())
|
||||
.unwrap_or_default();
|
||||
|
||||
if arms.is_empty() {
|
||||
return self.thompson.select_arm(domain_id, bucket, &mut rng);
|
||||
}
|
||||
|
||||
let mut best_arm = None;
|
||||
let mut best_score = f32::NEG_INFINITY;
|
||||
|
||||
for arm in &arms {
|
||||
let params = prior.get_prior(bucket, arm);
|
||||
let sample = params.sample(&mut rng);
|
||||
let boosted = self.meta.boosted_score(bucket, arm, sample);
|
||||
|
||||
if boosted > best_score {
|
||||
best_score = boosted;
|
||||
best_arm = Some(arm.clone());
|
||||
}
|
||||
}
|
||||
|
||||
best_arm.or_else(|| self.thompson.select_arm(domain_id, bucket, &mut rng))
|
||||
}
|
||||
|
||||
/// Get meta-learning health diagnostics.
|
||||
pub fn meta_health(&self) -> MetaLearningHealth {
|
||||
self.meta.health_check()
|
||||
}
|
||||
|
||||
/// Check cost curve for plateau and get recommended action.
|
||||
pub fn check_plateau(
|
||||
&mut self,
|
||||
domain_id: &DomainId,
|
||||
) -> PlateauAction {
|
||||
if let Some(curve) = self.scoreboard.curves.get(domain_id) {
|
||||
self.meta.check_plateau(&curve.points)
|
||||
} else {
|
||||
PlateauAction::Continue
|
||||
}
|
||||
}
|
||||
|
||||
/// Get regret summary across all learning contexts.
|
||||
pub fn regret_summary(&self) -> RegretSummary {
|
||||
self.meta.regret.summary()
|
||||
}
|
||||
|
||||
/// Get the Pareto front of non-dominated policy kernels.
|
||||
pub fn pareto_front(&self) -> &ParetoFront {
|
||||
&self.meta.pareto
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for DomainExpansionEngine {
|
||||
|
|
|
|||
1413
crates/ruvector-domain-expansion/src/meta_learning.rs
Normal file
1413
crates/ruvector-domain-expansion/src/meta_learning.rs
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue