diff --git a/examples/benchmarks/src/acceptance_test.rs b/examples/benchmarks/src/acceptance_test.rs index 7355e6b8..a217be4b 100644 --- a/examples/benchmarks/src/acceptance_test.rs +++ b/examples/benchmarks/src/acceptance_test.rs @@ -23,7 +23,7 @@ use crate::agi_contract::{ContractDelta, ContractHealth, ViabilityChecklist}; use crate::intelligence_metrics::{DifficultyStats, RawMetrics}; use crate::reasoning_bank::ReasoningBank; -use crate::temporal::{AdaptiveSolver, KnowledgeCompiler, TemporalConstraint, TemporalPuzzle}; +use crate::temporal::{AdaptiveSolver, KnowledgeCompiler, PolicyKernel, TemporalConstraint, TemporalPuzzle}; use crate::timepuzzles::{PuzzleGenerator, PuzzleGeneratorConfig}; use anyhow::Result; use serde::{Deserialize, Serialize}; @@ -33,23 +33,28 @@ use serde::{Deserialize, Serialize}; // ═══════════════════════════════════════════════════════════════════════════ /// Ablation mode for controlled comparison. -/// Every cycle runs the same seeded tasks in each mode. +/// +/// All modes share the same solver capabilities (including skip_weekday). +/// What differs is the **policy mechanism** that decides how to use them: +/// - Mode A: Fixed heuristic policy (posterior_range + distractor_count) +/// - Mode B: Compiler-suggested policy (compiled skip_mode from signatures) +/// - Mode C: Learned PolicyKernel policy (contextual bandit over skip modes) #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] pub enum AblationMode { - /// Mode A: No compiler, fixed router (baseline) + /// Mode A: Fixed heuristic policy (baseline) Baseline, - /// Mode B: Compiler enabled, fixed router + /// Mode B: Compiler-suggested policy CompilerOnly, - /// Mode C: Compiler enabled, adaptive router + /// Mode C: Learned PolicyKernel policy (compiler + router + learning) Full, } impl std::fmt::Display for AblationMode { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - AblationMode::Baseline => write!(f, "A (baseline)"), - AblationMode::CompilerOnly => write!(f, "B (compiler)"), - AblationMode::Full => write!(f, "C (compiler+router)"), + AblationMode::Baseline => write!(f, "A (fixed policy)"), + AblationMode::CompilerOnly => write!(f, "B (compiled policy)"), + AblationMode::Full => write!(f, "C (learned policy)"), } } } @@ -64,6 +69,10 @@ pub struct AblationResult { pub compiler_misses: usize, pub compiler_false_hits: usize, pub cost_saved_by_compiler: f64, + /// PolicyKernel stats + pub early_commit_rate: f64, + pub early_commit_penalties: f64, + pub policy_context_buckets: usize, } /// Full ablation comparison across all three modes. @@ -113,6 +122,17 @@ impl AblationComparison { self.mode_b.compiler_hits, self.mode_b.compiler_misses, self.mode_b.compiler_false_hits); println!(" Cost saved by compiler: {:.2}", self.mode_b.cost_saved_by_compiler); println!(); + println!(" PolicyKernel:"); + println!(" Mode A early-commit rate: {:.2}%", self.mode_a.early_commit_rate * 100.0); + println!(" Mode B early-commit rate: {:.2}%", self.mode_b.early_commit_rate * 100.0); + println!(" Mode C early-commit rate: {:.2}% (context buckets: {})", + self.mode_c.early_commit_rate * 100.0, self.mode_c.policy_context_buckets); + println!(); + println!(" Policy Differences (all modes have same capabilities):"); + println!(" Mode A: fixed heuristic (posterior_range + distractor_count)"); + println!(" Mode B: compiler-suggested skip_mode from signatures"); + println!(" Mode C: learned PolicyKernel (contextual bandit)"); + println!(); println!(" Ablation Assertions:"); println!(" B beats A on cost (>=15%): {}", if self.b_beats_a_cost { "PASS" } else { "FAIL" }); @@ -327,6 +347,12 @@ pub fn run_acceptance_test(config: &HoldoutConfig) -> Result { } /// Run acceptance test in a specific ablation mode. +/// +/// All modes share the same solver capabilities. +/// Policy mechanism differs: +/// - Baseline: fixed heuristic policy +/// - CompilerOnly: compiler-suggested policy +/// - Full: learned PolicyKernel policy pub fn run_acceptance_test_mode(config: &HoldoutConfig, mode: &AblationMode) -> Result { // 1. Generate frozen holdout set let holdout = generate_holdout(config)?; @@ -334,6 +360,7 @@ pub fn run_acceptance_test_mode(config: &HoldoutConfig, mode: &AblationMode) -> // 2. Initialize persistent learning state let mut bank = ReasoningBank::new(); let mut compiler = KnowledgeCompiler::new(); + let mut policy_kernel = PolicyKernel::new(); let mut cycle_metrics: Vec = Vec::new(); let mut health_history: Vec = Vec::new(); @@ -354,10 +381,16 @@ pub fn run_acceptance_test_mode(config: &HoldoutConfig, mode: &AblationMode) -> let checkpoint_id = bank.checkpoint(); // 3. Training phase: solve new tasks, update bank - let training_acc = train_cycle_mode(&mut bank, &mut compiler, config, cycle, compiler_enabled, router_enabled)?; + let training_acc = train_cycle_mode( + &mut bank, &mut compiler, &mut policy_kernel, + config, cycle, compiler_enabled, router_enabled, + )?; // 4. Holdout evaluation: clean pass (quick probe for rollback check) - let (_, probe_acc) = evaluate_holdout_clean_mode(&holdout, &bank, &compiler, config, compiler_enabled, router_enabled)?; + let (_, probe_acc) = evaluate_holdout_clean_mode( + &holdout, &bank, &compiler, &policy_kernel, + config, compiler_enabled, router_enabled, + )?; // Rollback if training made accuracy worse (viability check #3) if cycle > 0 { @@ -382,12 +415,18 @@ pub fn run_acceptance_test_mode(config: &HoldoutConfig, mode: &AblationMode) -> } // 5. Holdout evaluation: clean (definitive, with possibly rolled-back bank) - let (clean_raw, clean_acc) = evaluate_holdout_clean_mode(&holdout, &bank, &compiler, config, compiler_enabled, router_enabled)?; + let (clean_raw, clean_acc) = evaluate_holdout_clean_mode( + &holdout, &bank, &compiler, &policy_kernel, + config, compiler_enabled, router_enabled, + )?; // 6. Holdout evaluation: noisy pass - let (noisy_raw, noise_acc) = evaluate_holdout_noisy_mode(&holdout, &bank, &compiler, config, cycle, compiler_enabled, router_enabled)?; + let (noisy_raw, noise_acc) = evaluate_holdout_noisy_mode( + &holdout, &bank, &compiler, &policy_kernel, + config, cycle, compiler_enabled, router_enabled, + )?; - // 6. Merge clean + noisy into combined contract raw + // Merge clean + noisy into combined contract raw let combined = merge_raw(&clean_raw, &noisy_raw); let health = ContractHealth::from_raw(&combined); health_history.push(health.clone()); @@ -506,10 +545,13 @@ pub fn run_acceptance_test_mode(config: &HoldoutConfig, mode: &AblationMode) -> 0.0 }; - // Print compiler diagnostics in verbose mode + // Print diagnostics in verbose mode if config.verbose && compiler_enabled { compiler.print_diagnostics(); } + if config.verbose { + policy_kernel.print_diagnostics(); + } Ok(AblationResult { mode: mode.clone(), @@ -518,13 +560,19 @@ pub fn run_acceptance_test_mode(config: &HoldoutConfig, mode: &AblationMode) -> compiler_misses: compiler.misses, compiler_false_hits: compiler.false_hits, cost_saved_by_compiler: cost_saved, + early_commit_rate: policy_kernel.early_commit_rate(), + early_commit_penalties: policy_kernel.early_commit_penalties, + policy_context_buckets: policy_kernel.context_stats.len(), }) } /// Run all three ablation modes and compare results. -/// Mode A = baseline (no compiler, fixed router) -/// Mode B = compiler only (Strategy Zero enabled) -/// Mode C = full (compiler + adaptive router) +/// +/// All modes share the same solver capabilities (skip_weekday, rewriting, etc). +/// What differs is the policy mechanism: +/// Mode A = fixed heuristic policy (posterior_range + distractor_count) +/// Mode B = compiler-suggested policy (compiled skip_mode) +/// Mode C = learned PolicyKernel policy (contextual bandit) pub fn run_ablation_comparison(config: &HoldoutConfig) -> Result { let mode_a = run_acceptance_test_mode(config, &AblationMode::Baseline)?; let mode_b = run_acceptance_test_mode(config, &AblationMode::CompilerOnly)?; @@ -587,6 +635,7 @@ fn generate_holdout(config: &HoldoutConfig) -> Result> { fn train_cycle_mode( bank: &mut ReasoningBank, compiler: &mut KnowledgeCompiler, + policy_kernel: &mut PolicyKernel, config: &HoldoutConfig, cycle: usize, compiler_enabled: bool, @@ -596,6 +645,7 @@ fn train_cycle_mode( solver.compiler = compiler.clone(); solver.compiler_enabled = compiler_enabled; solver.router_enabled = router_enabled; + solver.policy_kernel = policy_kernel.clone(); let pc = PuzzleGeneratorConfig { min_difficulty: 1, max_difficulty: 10, @@ -659,6 +709,7 @@ fn train_cycle_mode( *bank = solver.reasoning_bank.clone(); *compiler = solver.compiler.clone(); + *policy_kernel = solver.policy_kernel.clone(); Ok(correct as f64 / puzzles.len() as f64) } @@ -666,6 +717,7 @@ fn evaluate_holdout_clean_mode( holdout: &[TemporalPuzzle], bank: &ReasoningBank, compiler: &KnowledgeCompiler, + policy_kernel: &PolicyKernel, config: &HoldoutConfig, compiler_enabled: bool, router_enabled: bool, @@ -675,6 +727,7 @@ fn evaluate_holdout_clean_mode( solver.compiler = compiler.clone(); solver.compiler_enabled = compiler_enabled; solver.router_enabled = router_enabled; + solver.policy_kernel = policy_kernel.clone(); solver.external_step_limit = Some(config.step_budget); for puzzle in holdout { @@ -711,6 +764,7 @@ fn evaluate_holdout_noisy_mode( holdout: &[TemporalPuzzle], bank: &ReasoningBank, compiler: &KnowledgeCompiler, + policy_kernel: &PolicyKernel, config: &HoldoutConfig, cycle: usize, compiler_enabled: bool, @@ -721,6 +775,7 @@ fn evaluate_holdout_noisy_mode( solver.compiler = compiler.clone(); solver.compiler_enabled = compiler_enabled; solver.router_enabled = router_enabled; + solver.policy_kernel = policy_kernel.clone(); solver.external_step_limit = Some(config.step_budget); let mut rng = Rng64::new(config.holdout_seed.wrapping_add(cycle as u64 * 31337)); diff --git a/examples/benchmarks/src/temporal.rs b/examples/benchmarks/src/temporal.rs index 25ea2803..98bfcb22 100644 --- a/examples/benchmarks/src/temporal.rs +++ b/examples/benchmarks/src/temporal.rs @@ -54,6 +54,8 @@ pub struct TemporalPuzzle { pub difficulty: u8, /// Tags for categorization pub tags: Vec, + /// Multi-dimensional difficulty vector (None = use scalar difficulty) + pub difficulty_vector: Option, } impl TemporalPuzzle { @@ -67,6 +69,7 @@ impl TemporalPuzzle { solutions: Vec::new(), difficulty: 5, tags: Vec::new(), + difficulty_vector: None, } } @@ -497,6 +500,265 @@ mod tests { // ============================================================================ use crate::reasoning_bank::{ReasoningBank, Strategy, Trajectory, Verdict}; +use crate::timepuzzles::DifficultyVector; + +// ═══════════════════════════════════════════════════════════════════════════ +// PolicyKernel — learned skip-mode selection +// ═══════════════════════════════════════════════════════════════════════════ + +/// Skip mode for the temporal solver scan loop. +/// All modes have access to all skip modes. +/// What differs is the *policy* that selects the mode. +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +pub enum SkipMode { + /// Linear scan: check every date in range (1-day increments) + None, + /// Weekday skip: advance by 7 days when DayOfWeek constraint is present + Weekday, + /// Hybrid: weekday skip for initial scan, then full refinement pass + /// around candidates to catch near-misses under noise + Hybrid, +} + +impl Default for SkipMode { + fn default() -> Self { + SkipMode::None + } +} + +impl std::fmt::Display for SkipMode { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + SkipMode::None => write!(f, "none"), + SkipMode::Weekday => write!(f, "weekday"), + SkipMode::Hybrid => write!(f, "hybrid"), + } + } +} + +/// Context features for PolicyKernel decisions. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct PolicyContext { + /// Number of dates in the posterior (search range) + pub posterior_range: usize, + /// Number of distractor constraints in the puzzle + pub distractor_count: usize, + /// Whether a DayOfWeek constraint is present + pub has_day_of_week: bool, + /// Whether noise was injected + pub noisy: bool, + /// Difficulty vector components + pub difficulty: DifficultyVector, + /// Recent false-hit density (rolling window) + pub recent_false_hit_rate: f64, +} + +/// Outcome of a skip-mode decision for learning. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct SkipOutcome { + /// The skip mode that was used + pub mode: SkipMode, + /// Whether the solve was correct + pub correct: bool, + /// Steps taken + pub steps: usize, + /// Whether this was an early commit that turned out wrong + pub early_commit_wrong: bool, +} + +/// Per-context skip-mode statistics for learned policy. +#[derive(Clone, Debug, Default, Serialize, Deserialize)] +pub struct SkipModeStats { + pub attempts: usize, + pub successes: usize, + pub total_steps: usize, + pub early_commit_wrongs: usize, +} + +impl SkipModeStats { + /// Reward: balances accuracy, cost, and early-commit safety. + pub fn reward(&self) -> f64 { + if self.attempts == 0 { return 0.5; } + let accuracy = self.successes as f64 / self.attempts as f64; + let cost_bonus = 0.3 * (1.0 - (self.total_steps as f64 / self.attempts as f64) / 200.0).max(0.0); + let penalty = if self.early_commit_wrongs > 0 { + 0.2 * (self.early_commit_wrongs as f64 / self.attempts as f64) + } else { + 0.0 + }; + (accuracy * 0.5 + cost_bonus - penalty).max(0.0) + } +} + +/// PolicyKernel: decides skip_mode based on context. +/// +/// Three policy levels: +/// - **Fixed** (Mode A): deterministic heuristic based on posterior_range + distractor_count +/// - **Compiled** (Mode B): compiler-suggested skip_mode from CompiledSolveConfig +/// - **Learned** (Mode C): contextual stats drive selection, adapts from outcomes +#[derive(Clone, Debug, Default, Serialize, Deserialize)] +pub struct PolicyKernel { + /// Per-context bucket → per-skip-mode stats (for learned policy) + pub context_stats: HashMap>, + /// Early commit penalty accumulator + pub early_commit_penalties: f64, + /// Total early commits tracked + pub early_commits_total: usize, + /// Total early commits that were wrong + pub early_commits_wrong: usize, + /// Exploration rate for learned policy + pub epsilon: f64, + /// RNG state + rng_state: u64, +} + +impl PolicyKernel { + pub fn new() -> Self { + Self { + epsilon: 0.15, + rng_state: 42, + ..Default::default() + } + } + + /// Fixed baseline policy (Mode A): + /// Uses posterior_range + distractor_count to decide. + /// - If DayOfWeek is present AND posterior_range > 30 AND distractor_count == 0: Weekday + /// - If DayOfWeek is present AND distractor_count > 0: Hybrid (safe fallback) + /// - Otherwise: None + pub fn fixed_policy(ctx: &PolicyContext) -> SkipMode { + if !ctx.has_day_of_week { + return SkipMode::None; + } + if ctx.distractor_count == 0 && ctx.posterior_range > 30 { + SkipMode::Weekday + } else if ctx.distractor_count > 0 { + // Distractors present: skip is risky, use hybrid for safety + SkipMode::Hybrid + } else { + // Small range: skip saves little, linear is fine + SkipMode::None + } + } + + /// Compiled policy (Mode B): + /// Uses compiler-suggested skip_mode from CompiledSolveConfig. + /// Falls back to fixed policy if compiler has no suggestion. + pub fn compiled_policy(ctx: &PolicyContext, compiled_skip: Option) -> SkipMode { + compiled_skip.unwrap_or_else(|| Self::fixed_policy(ctx)) + } + + /// Learned policy (Mode C): + /// Uses contextual stats to pick the best skip mode. + /// Epsilon-greedy exploration for discovering better policies. + pub fn learned_policy(&mut self, ctx: &PolicyContext) -> SkipMode { + if !ctx.has_day_of_week { + return SkipMode::None; + } + + let bucket = Self::context_bucket(ctx); + + // Epsilon-greedy exploration + let r = self.next_f64(); + if r < self.epsilon { + // Explore: random mode + return match (self.next_f64() * 3.0) as u8 { + 0 => SkipMode::None, + 1 => SkipMode::Weekday, + _ => SkipMode::Hybrid, + }; + } + + // Exploit: pick mode with highest reward + let stats_map = self.context_stats.entry(bucket).or_default(); + let modes = ["none", "weekday", "hybrid"]; + let mut best_mode = SkipMode::None; + let mut best_reward = -1.0f64; + + for mode_name in &modes { + let stats = stats_map.get(*mode_name).cloned().unwrap_or_default(); + let reward = stats.reward(); + if reward > best_reward { + best_reward = reward; + best_mode = match *mode_name { + "weekday" => SkipMode::Weekday, + "hybrid" => SkipMode::Hybrid, + _ => SkipMode::None, + }; + } + } + + best_mode + } + + /// Record the outcome of a skip-mode decision. + pub fn record_outcome(&mut self, ctx: &PolicyContext, outcome: &SkipOutcome) { + let bucket = Self::context_bucket(ctx); + let mode_name = outcome.mode.to_string(); + + let stats_map = self.context_stats.entry(bucket).or_default(); + let stats = stats_map.entry(mode_name).or_default(); + stats.attempts += 1; + stats.total_steps += outcome.steps; + if outcome.correct { stats.successes += 1; } + if outcome.early_commit_wrong { + stats.early_commit_wrongs += 1; + self.early_commits_wrong += 1; + // Penalty proportional to how early the commit was + // (fewer steps = earlier commit = higher penalty) + let penalty = 1.0 - (outcome.steps as f64 / 200.0).min(1.0); + self.early_commit_penalties += penalty; + } + self.early_commits_total += 1; + } + + /// Early commit penalty rate. + pub fn early_commit_rate(&self) -> f64 { + if self.early_commits_total == 0 { return 0.0; } + self.early_commits_wrong as f64 / self.early_commits_total as f64 + } + + /// Build a context bucket key for stats grouping. + fn context_bucket(ctx: &PolicyContext) -> String { + let range_bucket = match ctx.posterior_range { + 0..=30 => "small", + 31..=100 => "medium", + 101..=300 => "large", + _ => "xlarge", + }; + let distractor_bucket = if ctx.distractor_count == 0 { "clean" } else { "distracted" }; + format!("{}:{}", range_bucket, distractor_bucket) + } + + fn next_f64(&mut self) -> f64 { + let mut x = self.rng_state.max(1); + x ^= x << 13; x ^= x >> 7; x ^= x << 17; + self.rng_state = x; + (x as f64) / (u64::MAX as f64) + } + + /// Print diagnostic summary. + pub fn print_diagnostics(&self) { + println!(); + println!(" PolicyKernel Diagnostics"); + println!(" Early commits: {}/{} wrong ({:.1}%)", + self.early_commits_wrong, self.early_commits_total, + self.early_commit_rate() * 100.0); + println!(" Accumulated penalty: {:.2}", self.early_commit_penalties); + println!(" Context buckets: {}", self.context_stats.len()); + + for (bucket, modes) in &self.context_stats { + println!(" {}", bucket); + for (mode, stats) in modes { + println!(" {:<8} attempts={:<4} success={:<4} avg_steps={:.1} ecw={} reward={:.3}", + mode, stats.attempts, stats.successes, + if stats.attempts > 0 { stats.total_steps as f64 / stats.attempts as f64 } else { 0.0 }, + stats.early_commit_wrongs, + stats.reward()); + } + } + } +} /// Adaptive temporal solver with learning capabilities /// @@ -529,6 +791,8 @@ pub struct CompiledSolveConfig { pub hit_count: usize, /// Counterexample count (failures on this signature) pub counterexample_count: usize, + /// Compiled skip mode suggestion (for Mode B policy) + pub compiled_skip_mode: SkipMode, } impl CompiledSolveConfig { @@ -607,6 +871,10 @@ impl KnowledgeCompiler { let sig = format!("{}:{}:{}", COMPILER_SIG_VERSION, traj.difficulty, sig_parts.join(",")); if let Some(attempt) = traj.attempts.first() { + // Determine compiled skip mode from constraint types + let has_dow = traj.constraint_types.iter().any(|c| c == "DayOfWeek"); + let compiled_skip = if has_dow { SkipMode::Weekday } else { SkipMode::None }; + let entry = self.signature_cache.entry(sig).or_insert(CompiledSolveConfig { use_rewriting: true, max_steps: attempt.steps, @@ -616,6 +884,7 @@ impl KnowledgeCompiler { stop_after_first: true, hit_count: 0, counterexample_count: 0, + compiled_skip_mode: compiled_skip, }); // Keep minimum steps that succeeded entry.max_steps = entry.max_steps.min(attempt.steps); @@ -898,6 +1167,8 @@ pub struct AdaptiveSolver { pub router: StrategyRouter, /// Whether to use the adaptive router instead of fixed strategy selection pub router_enabled: bool, + /// PolicyKernel for skip-mode decisions (all modes use this) + pub policy_kernel: PolicyKernel, } impl Default for AdaptiveSolver { @@ -919,6 +1190,7 @@ impl AdaptiveSolver { compiler_enabled: false, router: StrategyRouter::new(), router_enabled: false, + policy_kernel: PolicyKernel::new(), } } @@ -934,6 +1206,7 @@ impl AdaptiveSolver { compiler_enabled: false, router: StrategyRouter::new(), router_enabled: false, + policy_kernel: PolicyKernel::new(), } } @@ -947,11 +1220,45 @@ impl AdaptiveSolver { &mut self.solver } + /// Build a PolicyContext from puzzle features. + fn build_policy_context(&self, puzzle: &TemporalPuzzle) -> PolicyContext { + let has_dow = puzzle.constraints.iter().any(|c| matches!(c, TemporalConstraint::DayOfWeek(_))); + + // Estimate posterior range from Between constraint + let posterior_range = puzzle.constraints.iter().find_map(|c| match c { + TemporalConstraint::Between(start, end) => { + Some((*end - *start).num_days().max(0) as usize) + } + _ => None, + }).unwrap_or(365); + + // Count distractors: redundant constraints that don't narrow the search + // (wider Between, redundant InYear, After well before range) + let distractor_count = count_distractors(puzzle); + + let dv = puzzle.difficulty_vector.clone().unwrap_or_else(|| { + DifficultyVector::from_scalar(puzzle.difficulty) + }); + + PolicyContext { + posterior_range, + distractor_count, + has_day_of_week: has_dow, + noisy: false, + difficulty: dv, + recent_false_hit_rate: self.policy_kernel.early_commit_rate(), + } + } + /// Solve a puzzle with adaptive learning. - /// If compiler_enabled, tries Strategy Zero (compiled config) first. - /// If router_enabled, uses contextual bandit for strategy selection. + /// + /// All modes have access to the same solver capabilities (including skip_weekday). + /// What differs is the **policy** that decides how to use them: + /// - Mode A (baseline): fixed heuristic policy + /// - Mode B (compiler): compiler-suggested policy + /// - Mode C (full): learned PolicyKernel policy pub fn solve(&mut self, puzzle: &TemporalPuzzle) -> Result { - // Reset weekday skipping (set for Mode C in fallback path) + // Reset solver state self.solver.skip_weekday = None; // Get constraint types for pattern matching @@ -961,6 +1268,44 @@ impl AdaptiveSolver { .map(|c| constraint_type_name(c)) .collect(); + // Build policy context (same for all modes) + let policy_ctx = self.build_policy_context(puzzle); + + // ─── PolicyKernel: decide skip_mode (all modes participate) ────── + let skip_mode = if self.router_enabled { + // Mode C: learned policy + self.policy_kernel.learned_policy(&policy_ctx) + } else if self.compiler_enabled { + // Mode B: compiler-suggested policy + let compiled_skip = self.compiler.lookup(puzzle) + .map(|config| config.compiled_skip_mode.clone()); + PolicyKernel::compiled_policy(&policy_ctx, compiled_skip) + } else { + // Mode A: fixed baseline policy + PolicyKernel::fixed_policy(&policy_ctx) + }; + + // Apply skip_mode to solver + match &skip_mode { + SkipMode::None => { + self.solver.skip_weekday = None; + } + SkipMode::Weekday => { + self.solver.skip_weekday = puzzle.constraints.iter().find_map(|c| match c { + TemporalConstraint::DayOfWeek(w) => Some(*w), + _ => None, + }); + } + SkipMode::Hybrid => { + // Hybrid: use weekday skip for initial scan (set here), + // then do a refinement pass below if needed + self.solver.skip_weekday = puzzle.constraints.iter().find_map(|c| match c { + TemporalConstraint::DayOfWeek(w) => Some(*w), + _ => None, + }); + } + } + // Accumulated steps across all attempts (Strategy Zero + fallback) let mut extra_steps: usize = 0; let mut extra_tool_calls: usize = 0; @@ -968,7 +1313,6 @@ impl AdaptiveSolver { // ─── Strategy Zero: KnowledgeCompiler (bounded trial) ──────────── if self.compiler_enabled { let conf_threshold = self.compiler.confidence_threshold; - // Extract all config data before releasing the borrow let compiled = self.compiler.lookup(puzzle).map(|config| { ( config.expected_correct, @@ -981,7 +1325,6 @@ impl AdaptiveSolver { if let Some((expected_correct, confidence, trial_budget, use_rewriting, stop_first)) = compiled { if expected_correct && confidence >= conf_threshold { - // Bounded trial: cap at 25% of external limit to make misses cheap self.solver.calendar_tool = use_rewriting; self.solver.stop_after_first = stop_first; self.solver.max_steps = trial_budget; @@ -990,11 +1333,9 @@ impl AdaptiveSolver { let result = self.solver.solve(puzzle)?; let latency = start.elapsed().as_millis() as u64; - // Reset stop_after_first for fallback path self.solver.stop_after_first = false; if result.correct { - // Strategy Zero win — record and return self.compiler.record_success(puzzle, result.steps); let mut trajectory = Trajectory::new(&puzzle.id, puzzle.difficulty); trajectory.constraint_types = constraint_types; @@ -1011,7 +1352,15 @@ impl AdaptiveSolver { self.reasoning_bank.record_trajectory(trajectory); self.episodes += 1; - // Update router if enabled + // Record successful skip outcome + let outcome = SkipOutcome { + mode: skip_mode, + correct: true, + steps: result.steps, + early_commit_wrong: false, + }; + self.policy_kernel.record_outcome(&policy_ctx, &outcome); + if self.router_enabled { let ctx = StrategyRouter::context(puzzle, false); self.router.update(&ctx, "compiler", true, result.steps, false); @@ -1019,10 +1368,20 @@ impl AdaptiveSolver { return Ok(result); } else { - // Strategy Zero failed — bounded trial overhead only extra_steps += result.steps; extra_tool_calls += result.tool_calls; self.compiler.record_failure(puzzle); + + // Record early commit wrong if solver claimed solved but was wrong + if result.solved && !result.correct { + let outcome = SkipOutcome { + mode: skip_mode.clone(), + correct: false, + steps: result.steps, + early_commit_wrong: true, + }; + self.policy_kernel.record_outcome(&policy_ctx, &outcome); + } } } } @@ -1038,13 +1397,11 @@ impl AdaptiveSolver { "adaptive".to_string(), ]; let ranked = self.router.select(&ctx, &available); - // Use the top-ranked strategy if let Some((top_strategy, _)) = ranked.first() { self.current_strategy = self.reasoning_bank .strategy_from_name(top_strategy, puzzle.difficulty); } } else { - // Fixed strategy selection from ReasoningBank self.current_strategy = self .reasoning_bank .get_strategy(puzzle.difficulty, &constraint_types); @@ -1056,17 +1413,6 @@ impl AdaptiveSolver { .unwrap_or(self.current_strategy.max_steps); self.solver.stop_after_first = false; - // Weekday skipping: detect DayOfWeek constraint for compiler/router modes - // Mode A (baseline): no skipping → linear scan - // Mode B (compiler): skipping → compiler policy reduces cost - // Mode C (full): skipping → compiler + router optimize further - if self.compiler_enabled || self.router_enabled { - self.solver.skip_weekday = puzzle.constraints.iter().find_map(|c| match c { - TemporalConstraint::DayOfWeek(w) => Some(*w), - _ => None, - }); - } - // Create trajectory for this puzzle let mut trajectory = Trajectory::new(&puzzle.id, puzzle.difficulty); trajectory.constraint_types = constraint_types; @@ -1076,6 +1422,50 @@ impl AdaptiveSolver { let mut result = self.solver.solve(puzzle)?; trajectory.latency_ms = start.elapsed().as_millis() as u64; + // ─── Hybrid refinement pass ────────────────────────────────────── + // If Hybrid mode was used and we found solutions via weekday skip, + // do a narrow linear scan around each candidate to catch near-misses. + if skip_mode == SkipMode::Hybrid && !result.solutions.is_empty() { + let mut refined_solutions = result.solutions.clone(); + self.solver.skip_weekday = None; // Linear for refinement + let saved_max = self.solver.max_steps; + self.solver.max_steps = 14; // Check ±7 days around each candidate + + for candidate in &result.solutions { + let refine_start = *candidate - chrono::Duration::days(7); + let refine_end = *candidate + chrono::Duration::days(7); + let refine_puzzle = TemporalPuzzle { + id: puzzle.id.clone(), + description: puzzle.description.clone(), + constraints: puzzle.constraints.clone(), + references: puzzle.references.clone(), + solutions: puzzle.solutions.clone(), + difficulty: puzzle.difficulty, + tags: puzzle.tags.clone(), + difficulty_vector: puzzle.difficulty_vector.clone(), + }; + // Manually search the refinement window + let mut cur = refine_start; + while cur <= refine_end { + if let Ok(true) = refine_puzzle.check_date(cur) { + if !refined_solutions.contains(&cur) { + refined_solutions.push(cur); + } + } + cur = match cur.succ_opt() { Some(d) => d, None => break }; + result.steps += 1; + } + } + self.solver.max_steps = saved_max; + result.solutions = refined_solutions; + // Re-check correctness after refinement + result.correct = if puzzle.solutions.is_empty() { + true + } else { + puzzle.solutions.iter().all(|s| result.solutions.contains(s)) + }; + } + // Accumulate overhead from failed Strategy Zero attempt result.steps += extra_steps; result.tool_calls += extra_tool_calls; @@ -1117,6 +1507,16 @@ impl AdaptiveSolver { trajectory.set_verdict(verdict, puzzle.solutions.first().map(|d| d.to_string())); + // ─── Record PolicyKernel outcome ───────────────────────────────── + let early_commit_wrong = result.solved && !result.correct; + let outcome = SkipOutcome { + mode: skip_mode, + correct: result.correct, + steps: result.steps, + early_commit_wrong, + }; + self.policy_kernel.record_outcome(&policy_ctx, &outcome); + // Update router stats if self.router_enabled { let ctx = StrategyRouter::context(puzzle, false); @@ -1178,6 +1578,53 @@ impl AdaptiveSolver { } } +/// Count distractor constraints in a puzzle. +/// A distractor is a constraint that is likely redundant (doesn't narrow the search much). +fn count_distractors(puzzle: &TemporalPuzzle) -> usize { + let mut count = 0; + let mut seen_between = false; + let mut seen_inyear = false; + let mut seen_dow = false; + + for c in &puzzle.constraints { + match c { + TemporalConstraint::Between(_, _) => { + if seen_between { + count += 1; // Redundant Between (wider or duplicate) + } + seen_between = true; + } + TemporalConstraint::InYear(_) => { + if seen_inyear { + count += 1; // Redundant InYear + } + seen_inyear = true; + } + TemporalConstraint::DayOfWeek(_) => { + if seen_dow { + count += 1; // Redundant DayOfWeek + } + seen_dow = true; + } + TemporalConstraint::After(d) => { + // After a date well before the Between range → distractor + if seen_between { + if let Some(between_start) = puzzle.constraints.iter().find_map(|c2| match c2 { + TemporalConstraint::Between(s, _) => Some(*s), + _ => None, + }) { + if *d < between_start - chrono::Duration::days(14) { + count += 1; + } + } + } + } + _ => {} + } + } + count +} + /// Get the type name of a constraint for pattern matching fn constraint_type_name(constraint: &TemporalConstraint) -> String { match constraint { diff --git a/examples/benchmarks/src/timepuzzles.rs b/examples/benchmarks/src/timepuzzles.rs index b1e8cc88..19aa74c3 100644 --- a/examples/benchmarks/src/timepuzzles.rs +++ b/examples/benchmarks/src/timepuzzles.rs @@ -15,6 +15,61 @@ use chrono::{Datelike, NaiveDate}; use rand::prelude::*; use serde::{Deserialize, Serialize}; +/// Multi-dimensional difficulty vector. +/// +/// Replaces single-axis difficulty to prevent collapsing effects. +/// Higher difficulty = more work and more ambiguity, NOT tighter posterior. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct DifficultyVector { + /// Size of the search range (days) + pub range_size: usize, + /// Target number of valid candidates in posterior + pub posterior_target: usize, + /// Rate of distractor constraints (0.0 - 1.0) + pub distractor_rate: f64, + /// Rate of noise injection (0.0 - 1.0) + pub noise_rate: f64, + /// Number of ambiguous solutions (dates that almost satisfy constraints) + pub ambiguity_count: usize, +} + +impl Default for DifficultyVector { + fn default() -> Self { + Self { + range_size: 60, + posterior_target: 60, + distractor_rate: 0.0, + noise_rate: 0.0, + ambiguity_count: 0, + } + } +} + +impl DifficultyVector { + /// Build from scalar difficulty (backward compatible). + /// Higher difficulty = wider range, more distractors, more ambiguity. + pub fn from_scalar(difficulty: u8) -> Self { + let d = difficulty.min(10).max(1); + Self { + range_size: difficulty_to_range_size(d), + posterior_target: difficulty_to_posterior(d), + distractor_rate: difficulty_to_distractor_rate(d), + noise_rate: difficulty_to_noise_rate(d), + ambiguity_count: difficulty_to_ambiguity(d), + } + } + + /// Scalar difficulty estimate (for backward compat). + pub fn scalar(&self) -> u8 { + // Weighted combination back to 1-10 scale + let range_score = (self.range_size as f64 / 365.0 * 10.0).min(10.0); + let distractor_score = self.distractor_rate * 10.0; + let ambiguity_score = (self.ambiguity_count as f64 / 5.0 * 10.0).min(10.0); + let combined = (range_score * 0.3 + distractor_score * 0.3 + ambiguity_score * 0.4) as u8; + combined.max(1).min(10) + } +} + /// Puzzle generator configuration #[derive(Clone, Debug, Serialize, Deserialize)] pub struct PuzzleGeneratorConfig { @@ -205,33 +260,28 @@ impl PuzzleGenerator { )); } - /// Generate a single puzzle with difficulty-based posterior targeting. + /// Generate a single puzzle with multi-dimensional difficulty vector. /// - /// Range size scales with difficulty: - /// - Low difficulty (1-2): wide range, no DayOfWeek → many valid dates - /// - Medium difficulty (3-6): DayOfWeek creates 7x cost surface - /// - High difficulty (7-10): narrower range + anchor constraints + /// Difficulty scaling (higher = more work, not tighter posterior): + /// - Low (1-2): small range, no DayOfWeek, no distractors + /// - Medium (3-6): DayOfWeek + moderate range = 7x cost surface + /// - High (7-10): wide range + distractors + ambiguity + anchor constraints /// - /// DayOfWeek constraint (difficulty 3+) creates a cost surface that - /// weekday-skipping in Mode C can exploit for ~7x speedup. + /// All modes have access to weekday skipping; what differs is the policy. pub fn generate_puzzle(&mut self, id: impl Into) -> Result { let id = id.into(); let difficulty = self .rng .gen_range(self.config.min_difficulty..=self.config.max_difficulty); - // Target posterior: number of valid dates after all constraints - let target_post = target_posterior(difficulty); + // Build difficulty vector from scalar + let dv = DifficultyVector::from_scalar(difficulty); - // DayOfWeek (difficulty 3+): creates 7x cost surface for solver optimization + // DayOfWeek (difficulty 3+): creates cost surface for policy decisions let use_day_of_week = difficulty >= 3; - // Search range: posterior * 7 when DayOfWeek constrains (solver scans all) - let range_days = if use_day_of_week { - (target_post * 7).min(365) as i64 - } else { - target_post as i64 - }; + // Range size from difficulty vector (wider range at higher difficulty) + let range_days = dv.range_size as i64; // Pick target date let year = self @@ -255,6 +305,9 @@ impl PuzzleGenerator { .with_difficulty(difficulty) .with_solutions(vec![target]); + // Attach difficulty vector + puzzle.difficulty_vector = Some(dv.clone()); + // Base constraints: InYear + Between (defines search range) puzzle .constraints @@ -265,15 +318,15 @@ impl PuzzleGenerator { let mut used_anchors: Vec = Vec::new(); - // DayOfWeek (difficulty 3+): creates 7x cost surface + // DayOfWeek (difficulty 3+): creates cost surface for all modes if use_day_of_week { puzzle .constraints .push(TemporalConstraint::DayOfWeek(target.weekday())); } - // Anchor reference for high difficulty (8+) - if difficulty >= 8 && self.config.relative_constraints { + // Anchor reference for high difficulty (7+) + if difficulty >= 7 && self.config.relative_constraints { if let Some(anchor) = self.anchors.choose(&mut self.rng).cloned() { let diff = (target - anchor.date).num_days(); let constraint = if diff >= 0 { @@ -291,23 +344,51 @@ impl PuzzleGenerator { puzzle.references.insert(anchor.name.clone(), anchor.date); } - // Distractor injection (difficulty 5+) - let distractor_chance: f64 = match difficulty { - 1..=4 => 0.0, - 5..=6 => 0.10, - 7..=8 => 0.15, - _ => 0.25, - }; - if distractor_chance > 0.0 && self.rng.gen_bool(distractor_chance.min(0.99)) { + // Distractor injection (from difficulty vector rate) + if dv.distractor_rate > 0.0 && self.rng.gen_bool(dv.distractor_rate.min(0.99)) { let distractor = self.generate_distractor(target, range_start, range_end); puzzle.constraints.push(distractor); } + // Distractor DayOfWeek (difficulty 6+): DayOfWeek present but misleading. + // Adds a SECOND DayOfWeek that is a distractor — it matches the target + // but unconditional weekday skipping on the wrong dow will miss solutions. + // This creates a real tradeoff for the PolicyKernel. + if difficulty >= 6 && use_day_of_week { + let distractor_dow_chance: f64 = match difficulty { + 6 => 0.15, + 7 => 0.25, + 8 => 0.35, + 9..=10 => 0.50, + _ => 0.0, + }; + if self.rng.gen_bool(distractor_dow_chance.min(0.99)) { + // Add a redundant wider Between that doesn't narrow search + // but pairs with the existing DayOfWeek to create a trap: + // the DayOfWeek is valid but the wider range means skip saves less + let wider_start = range_start - chrono::Duration::days(self.rng.gen_range(14..60)); + let wider_end = range_end + chrono::Duration::days(self.rng.gen_range(14..60)); + puzzle.constraints.push(TemporalConstraint::Between(wider_start, wider_end)); + } + } + + // Ambiguity: add near-miss solutions at high difficulty + // These are dates that satisfy most but not all constraints, + // making early commits risky. + if dv.ambiguity_count > 0 { + // No-op structurally (solutions list stays correct), + // but the wider range at high difficulty naturally creates more + // dates that pass most constraints, increasing false-positive risk + // for aggressive skip modes. + } + // Tags puzzle.tags = vec![ format!("difficulty:{}", difficulty), format!("year:{}", year), - format!("posterior:{}", target_post), + format!("range_size:{}", dv.range_size), + format!("distractor_rate:{:.2}", dv.distractor_rate), + format!("ambiguity:{}", dv.ambiguity_count), ]; Ok(puzzle) @@ -372,21 +453,79 @@ impl PuzzleGenerator { } } -/// Target posterior (valid candidates) by difficulty level. -/// Higher difficulty → fewer valid dates → harder to search. -fn target_posterior(difficulty: u8) -> usize { +/// Range size by difficulty level. +/// Higher difficulty → wider range → more work for the solver. +fn difficulty_to_range_size(difficulty: u8) -> usize { match difficulty { - 1 => 300, - 2 => 200, - 3 => 120, - 4 => 80, - 5 => 60, - 6 => 50, - 7 => 40, - 8 => 30, - 9 => 25, - 10 => 20, - _ => 60, + 1 => 14, + 2 => 30, + 3 => 56, // 8 weeks + 4 => 84, // 12 weeks + 5 => 120, + 6 => 150, + 7 => 200, + 8 => 250, + 9 => 300, + 10 => 365, + _ => 120, + } +} + +/// Posterior target by difficulty level. +/// Higher difficulty → more valid candidates → more ambiguity. +/// (Flipped from old model: difficulty increases ambiguity, not reduces it.) +fn difficulty_to_posterior(difficulty: u8) -> usize { + match difficulty { + 1 => 2, + 2 => 4, + 3 => 8, + 4 => 12, + 5 => 18, + 6 => 25, + 7 => 35, + 8 => 50, + 9 => 70, + 10 => 100, + _ => 18, + } +} + +/// Distractor rate by difficulty level. +fn difficulty_to_distractor_rate(difficulty: u8) -> f64 { + match difficulty { + 1..=3 => 0.0, + 4 => 0.05, + 5 => 0.10, + 6 => 0.20, + 7 => 0.30, + 8 => 0.40, + 9 => 0.50, + 10 => 0.60, + _ => 0.10, + } +} + +/// Noise rate by difficulty level. +fn difficulty_to_noise_rate(difficulty: u8) -> f64 { + match difficulty { + 1..=3 => 0.0, + 4..=5 => 0.10, + 6..=7 => 0.20, + 8..=9 => 0.30, + 10 => 0.40, + _ => 0.10, + } +} + +/// Ambiguity count by difficulty level (near-miss solutions). +fn difficulty_to_ambiguity(difficulty: u8) -> usize { + match difficulty { + 1..=4 => 0, + 5..=6 => 1, + 7..=8 => 2, + 9 => 3, + 10 => 5, + _ => 0, } }