diff --git a/examples/benchmarks/src/acceptance_test.rs b/examples/benchmarks/src/acceptance_test.rs
index 7355e6b8..a217be4b 100644
--- a/examples/benchmarks/src/acceptance_test.rs
+++ b/examples/benchmarks/src/acceptance_test.rs
@@ -23,7 +23,7 @@
 use crate::agi_contract::{ContractDelta, ContractHealth, ViabilityChecklist};
 use crate::intelligence_metrics::{DifficultyStats, RawMetrics};
 use crate::reasoning_bank::ReasoningBank;
-use crate::temporal::{AdaptiveSolver, KnowledgeCompiler, TemporalConstraint, TemporalPuzzle};
+use crate::temporal::{AdaptiveSolver, KnowledgeCompiler, PolicyKernel, TemporalConstraint, TemporalPuzzle};
 use crate::timepuzzles::{PuzzleGenerator, PuzzleGeneratorConfig};
 use anyhow::Result;
 use serde::{Deserialize, Serialize};
@@ -33,23 +33,28 @@ use serde::{Deserialize, Serialize};
 // ═══════════════════════════════════════════════════════════════════════════
 
 /// Ablation mode for controlled comparison.
-/// Every cycle runs the same seeded tasks in each mode.
+///
+/// All modes share the same solver capabilities (including skip_weekday).
+/// What differs is the **policy mechanism** that decides how to use them:
+/// - Mode A: Fixed heuristic policy (posterior_range + distractor_count)
+/// - Mode B: Compiler-suggested policy (compiled skip_mode from signatures)
+/// - Mode C: Learned PolicyKernel policy (contextual bandit over skip modes)
 #[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
 pub enum AblationMode {
-    /// Mode A: No compiler, fixed router (baseline)
+    /// Mode A: Fixed heuristic policy (baseline)
     Baseline,
-    /// Mode B: Compiler enabled, fixed router
+    /// Mode B: Compiler-suggested policy
     CompilerOnly,
-    /// Mode C: Compiler enabled, adaptive router
+    /// Mode C: Learned PolicyKernel policy (compiler + router + learning)
     Full,
 }
 
 impl std::fmt::Display for AblationMode {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         match self {
-            AblationMode::Baseline => write!(f, "A (baseline)"),
-            AblationMode::CompilerOnly => write!(f, "B (compiler)"),
-            AblationMode::Full => write!(f, "C (compiler+router)"),
+            AblationMode::Baseline => write!(f, "A (fixed policy)"),
+            AblationMode::CompilerOnly => write!(f, "B (compiled policy)"),
+            AblationMode::Full => write!(f, "C (learned policy)"),
         }
     }
 }
@@ -64,6 +69,10 @@ pub struct AblationResult {
     pub compiler_misses: usize,
     pub compiler_false_hits: usize,
     pub cost_saved_by_compiler: f64,
+    /// PolicyKernel stats
+    pub early_commit_rate: f64,
+    pub early_commit_penalties: f64,
+    pub policy_context_buckets: usize,
 }
 
 /// Full ablation comparison across all three modes.
@@ -113,6 +122,17 @@ impl AblationComparison {
             self.mode_b.compiler_hits, self.mode_b.compiler_misses, self.mode_b.compiler_false_hits);
         println!("  Cost saved by compiler: {:.2}", self.mode_b.cost_saved_by_compiler);
         println!();
+        println!("  PolicyKernel:");
+        println!("    Mode A early-commit rate: {:.2}%", self.mode_a.early_commit_rate * 100.0);
+        println!("    Mode B early-commit rate: {:.2}%", self.mode_b.early_commit_rate * 100.0);
+        println!("    Mode C early-commit rate: {:.2}%  (context buckets: {})",
+            self.mode_c.early_commit_rate * 100.0, self.mode_c.policy_context_buckets);
+        println!();
+        println!("  Policy Differences (all modes have same capabilities):");
+        println!("    Mode A: fixed heuristic (posterior_range + distractor_count)");
+        println!("    Mode B: compiler-suggested skip_mode from signatures");
+        println!("    Mode C: learned PolicyKernel (contextual bandit)");
+        println!();
 
         println!("  Ablation Assertions:");
         println!("    B beats A on cost (>=15%):        {}", if self.b_beats_a_cost { "PASS" } else { "FAIL" });
@@ -327,6 +347,12 @@ pub fn run_acceptance_test(config: &HoldoutConfig) -> Result<AcceptanceResult> {
 }
 
 /// Run acceptance test in a specific ablation mode.
+///
+/// All modes share the same solver capabilities.
+/// Policy mechanism differs:
+/// - Baseline: fixed heuristic policy
+/// - CompilerOnly: compiler-suggested policy
+/// - Full: learned PolicyKernel policy
 pub fn run_acceptance_test_mode(config: &HoldoutConfig, mode: &AblationMode) -> Result<AblationResult> {
     // 1. Generate frozen holdout set
     let holdout = generate_holdout(config)?;
@@ -334,6 +360,7 @@ pub fn run_acceptance_test_mode(config: &HoldoutConfig, mode: &AblationMode) ->
     // 2. Initialize persistent learning state
     let mut bank = ReasoningBank::new();
     let mut compiler = KnowledgeCompiler::new();
+    let mut policy_kernel = PolicyKernel::new();
     let mut cycle_metrics: Vec<CycleMetrics> = Vec::new();
     let mut health_history: Vec<ContractHealth> = Vec::new();
 
@@ -354,10 +381,16 @@ pub fn run_acceptance_test_mode(config: &HoldoutConfig, mode: &AblationMode) ->
         let checkpoint_id = bank.checkpoint();
 
         // 3. Training phase: solve new tasks, update bank
-        let training_acc = train_cycle_mode(&mut bank, &mut compiler, config, cycle, compiler_enabled, router_enabled)?;
+        let training_acc = train_cycle_mode(
+            &mut bank, &mut compiler, &mut policy_kernel,
+            config, cycle, compiler_enabled, router_enabled,
+        )?;
 
         // 4. Holdout evaluation: clean pass (quick probe for rollback check)
-        let (_, probe_acc) = evaluate_holdout_clean_mode(&holdout, &bank, &compiler, config, compiler_enabled, router_enabled)?;
+        let (_, probe_acc) = evaluate_holdout_clean_mode(
+            &holdout, &bank, &compiler, &policy_kernel,
+            config, compiler_enabled, router_enabled,
+        )?;
 
         // Rollback if training made accuracy worse (viability check #3)
         if cycle > 0 {
@@ -382,12 +415,18 @@ pub fn run_acceptance_test_mode(config: &HoldoutConfig, mode: &AblationMode) ->
         }
 
         // 5. Holdout evaluation: clean (definitive, with possibly rolled-back bank)
-        let (clean_raw, clean_acc) = evaluate_holdout_clean_mode(&holdout, &bank, &compiler, config, compiler_enabled, router_enabled)?;
+        let (clean_raw, clean_acc) = evaluate_holdout_clean_mode(
+            &holdout, &bank, &compiler, &policy_kernel,
+            config, compiler_enabled, router_enabled,
+        )?;
 
         // 6. Holdout evaluation: noisy pass
-        let (noisy_raw, noise_acc) = evaluate_holdout_noisy_mode(&holdout, &bank, &compiler, config, cycle, compiler_enabled, router_enabled)?;
+        let (noisy_raw, noise_acc) = evaluate_holdout_noisy_mode(
+            &holdout, &bank, &compiler, &policy_kernel,
+            config, cycle, compiler_enabled, router_enabled,
+        )?;
 
-        // 6. Merge clean + noisy into combined contract raw
+        // Merge clean + noisy into combined contract raw
         let combined = merge_raw(&clean_raw, &noisy_raw);
         let health = ContractHealth::from_raw(&combined);
         health_history.push(health.clone());
@@ -506,10 +545,13 @@ pub fn run_acceptance_test_mode(config: &HoldoutConfig, mode: &AblationMode) ->
         0.0
     };
 
-    // Print compiler diagnostics in verbose mode
+    // Print diagnostics in verbose mode
     if config.verbose && compiler_enabled {
         compiler.print_diagnostics();
     }
+    if config.verbose {
+        policy_kernel.print_diagnostics();
+    }
 
     Ok(AblationResult {
         mode: mode.clone(),
@@ -518,13 +560,19 @@ pub fn run_acceptance_test_mode(config: &HoldoutConfig, mode: &AblationMode) ->
         compiler_misses: compiler.misses,
         compiler_false_hits: compiler.false_hits,
         cost_saved_by_compiler: cost_saved,
+        early_commit_rate: policy_kernel.early_commit_rate(),
+        early_commit_penalties: policy_kernel.early_commit_penalties,
+        policy_context_buckets: policy_kernel.context_stats.len(),
     })
 }
 
 /// Run all three ablation modes and compare results.
-/// Mode A = baseline (no compiler, fixed router)
-/// Mode B = compiler only (Strategy Zero enabled)
-/// Mode C = full (compiler + adaptive router)
+///
+/// All modes share the same solver capabilities (skip_weekday, rewriting, etc).
+/// What differs is the policy mechanism:
+/// Mode A = fixed heuristic policy (posterior_range + distractor_count)
+/// Mode B = compiler-suggested policy (compiled skip_mode)
+/// Mode C = learned PolicyKernel policy (contextual bandit)
 pub fn run_ablation_comparison(config: &HoldoutConfig) -> Result<AblationComparison> {
     let mode_a = run_acceptance_test_mode(config, &AblationMode::Baseline)?;
     let mode_b = run_acceptance_test_mode(config, &AblationMode::CompilerOnly)?;
@@ -587,6 +635,7 @@ fn generate_holdout(config: &HoldoutConfig) -> Result<Vec<TemporalPuzzle>> {
 fn train_cycle_mode(
     bank: &mut ReasoningBank,
     compiler: &mut KnowledgeCompiler,
+    policy_kernel: &mut PolicyKernel,
     config: &HoldoutConfig,
     cycle: usize,
     compiler_enabled: bool,
@@ -596,6 +645,7 @@ fn train_cycle_mode(
     solver.compiler = compiler.clone();
     solver.compiler_enabled = compiler_enabled;
     solver.router_enabled = router_enabled;
+    solver.policy_kernel = policy_kernel.clone();
     let pc = PuzzleGeneratorConfig {
         min_difficulty: 1,
         max_difficulty: 10,
@@ -659,6 +709,7 @@ fn train_cycle_mode(
 
     *bank = solver.reasoning_bank.clone();
     *compiler = solver.compiler.clone();
+    *policy_kernel = solver.policy_kernel.clone();
     Ok(correct as f64 / puzzles.len() as f64)
 }
 
@@ -666,6 +717,7 @@ fn evaluate_holdout_clean_mode(
     holdout: &[TemporalPuzzle],
     bank: &ReasoningBank,
     compiler: &KnowledgeCompiler,
+    policy_kernel: &PolicyKernel,
     config: &HoldoutConfig,
     compiler_enabled: bool,
     router_enabled: bool,
@@ -675,6 +727,7 @@ fn evaluate_holdout_clean_mode(
     solver.compiler = compiler.clone();
     solver.compiler_enabled = compiler_enabled;
     solver.router_enabled = router_enabled;
+    solver.policy_kernel = policy_kernel.clone();
     solver.external_step_limit = Some(config.step_budget);
 
     for puzzle in holdout {
@@ -711,6 +764,7 @@ fn evaluate_holdout_noisy_mode(
     holdout: &[TemporalPuzzle],
     bank: &ReasoningBank,
     compiler: &KnowledgeCompiler,
+    policy_kernel: &PolicyKernel,
     config: &HoldoutConfig,
     cycle: usize,
     compiler_enabled: bool,
@@ -721,6 +775,7 @@ fn evaluate_holdout_noisy_mode(
     solver.compiler = compiler.clone();
     solver.compiler_enabled = compiler_enabled;
     solver.router_enabled = router_enabled;
+    solver.policy_kernel = policy_kernel.clone();
     solver.external_step_limit = Some(config.step_budget);
     let mut rng = Rng64::new(config.holdout_seed.wrapping_add(cycle as u64 * 31337));
 
diff --git a/examples/benchmarks/src/temporal.rs b/examples/benchmarks/src/temporal.rs
index 25ea2803..98bfcb22 100644
--- a/examples/benchmarks/src/temporal.rs
+++ b/examples/benchmarks/src/temporal.rs
@@ -54,6 +54,8 @@ pub struct TemporalPuzzle {
     pub difficulty: u8,
     /// Tags for categorization
     pub tags: Vec<String>,
+    /// Multi-dimensional difficulty vector (None = use scalar difficulty)
+    pub difficulty_vector: Option<crate::timepuzzles::DifficultyVector>,
 }
 
 impl TemporalPuzzle {
@@ -67,6 +69,7 @@ impl TemporalPuzzle {
             solutions: Vec::new(),
             difficulty: 5,
             tags: Vec::new(),
+            difficulty_vector: None,
         }
     }
 
@@ -497,6 +500,265 @@ mod tests {
 // ============================================================================
 
 use crate::reasoning_bank::{ReasoningBank, Strategy, Trajectory, Verdict};
+use crate::timepuzzles::DifficultyVector;
+
+// ═══════════════════════════════════════════════════════════════════════════
+// PolicyKernel — learned skip-mode selection
+// ═══════════════════════════════════════════════════════════════════════════
+
+/// Skip mode for the temporal solver scan loop.
+/// All modes have access to all skip modes.
+/// What differs is the *policy* that selects the mode.
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub enum SkipMode {
+    /// Linear scan: check every date in range (1-day increments)
+    None,
+    /// Weekday skip: advance by 7 days when DayOfWeek constraint is present
+    Weekday,
+    /// Hybrid: weekday skip for initial scan, then full refinement pass
+    /// around candidates to catch near-misses under noise
+    Hybrid,
+}
+
+impl Default for SkipMode {
+    fn default() -> Self {
+        SkipMode::None
+    }
+}
+
+impl std::fmt::Display for SkipMode {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            SkipMode::None => write!(f, "none"),
+            SkipMode::Weekday => write!(f, "weekday"),
+            SkipMode::Hybrid => write!(f, "hybrid"),
+        }
+    }
+}
+
+/// Context features for PolicyKernel decisions.
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct PolicyContext {
+    /// Number of dates in the posterior (search range)
+    pub posterior_range: usize,
+    /// Number of distractor constraints in the puzzle
+    pub distractor_count: usize,
+    /// Whether a DayOfWeek constraint is present
+    pub has_day_of_week: bool,
+    /// Whether noise was injected
+    pub noisy: bool,
+    /// Difficulty vector components
+    pub difficulty: DifficultyVector,
+    /// Recent false-hit density (rolling window)
+    pub recent_false_hit_rate: f64,
+}
+
+/// Outcome of a skip-mode decision for learning.
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct SkipOutcome {
+    /// The skip mode that was used
+    pub mode: SkipMode,
+    /// Whether the solve was correct
+    pub correct: bool,
+    /// Steps taken
+    pub steps: usize,
+    /// Whether this was an early commit that turned out wrong
+    pub early_commit_wrong: bool,
+}
+
+/// Per-context skip-mode statistics for learned policy.
+#[derive(Clone, Debug, Default, Serialize, Deserialize)]
+pub struct SkipModeStats {
+    pub attempts: usize,
+    pub successes: usize,
+    pub total_steps: usize,
+    pub early_commit_wrongs: usize,
+}
+
+impl SkipModeStats {
+    /// Reward: balances accuracy, cost, and early-commit safety.
+    pub fn reward(&self) -> f64 {
+        if self.attempts == 0 { return 0.5; }
+        let accuracy = self.successes as f64 / self.attempts as f64;
+        let cost_bonus = 0.3 * (1.0 - (self.total_steps as f64 / self.attempts as f64) / 200.0).max(0.0);
+        let penalty = if self.early_commit_wrongs > 0 {
+            0.2 * (self.early_commit_wrongs as f64 / self.attempts as f64)
+        } else {
+            0.0
+        };
+        (accuracy * 0.5 + cost_bonus - penalty).max(0.0)
+    }
+}
+
+/// PolicyKernel: decides skip_mode based on context.
+///
+/// Three policy levels:
+/// - **Fixed** (Mode A): deterministic heuristic based on posterior_range + distractor_count
+/// - **Compiled** (Mode B): compiler-suggested skip_mode from CompiledSolveConfig
+/// - **Learned** (Mode C): contextual stats drive selection, adapts from outcomes
+#[derive(Clone, Debug, Default, Serialize, Deserialize)]
+pub struct PolicyKernel {
+    /// Per-context bucket → per-skip-mode stats (for learned policy)
+    pub context_stats: HashMap<String, HashMap<String, SkipModeStats>>,
+    /// Early commit penalty accumulator
+    pub early_commit_penalties: f64,
+    /// Total early commits tracked
+    pub early_commits_total: usize,
+    /// Total early commits that were wrong
+    pub early_commits_wrong: usize,
+    /// Exploration rate for learned policy
+    pub epsilon: f64,
+    /// RNG state
+    rng_state: u64,
+}
+
+impl PolicyKernel {
+    pub fn new() -> Self {
+        Self {
+            epsilon: 0.15,
+            rng_state: 42,
+            ..Default::default()
+        }
+    }
+
+    /// Fixed baseline policy (Mode A):
+    /// Uses posterior_range + distractor_count to decide.
+    /// - If DayOfWeek is present AND posterior_range > 30 AND distractor_count == 0: Weekday
+    /// - If DayOfWeek is present AND distractor_count > 0: Hybrid (safe fallback)
+    /// - Otherwise: None
+    pub fn fixed_policy(ctx: &PolicyContext) -> SkipMode {
+        if !ctx.has_day_of_week {
+            return SkipMode::None;
+        }
+        if ctx.distractor_count == 0 && ctx.posterior_range > 30 {
+            SkipMode::Weekday
+        } else if ctx.distractor_count > 0 {
+            // Distractors present: skip is risky, use hybrid for safety
+            SkipMode::Hybrid
+        } else {
+            // Small range: skip saves little, linear is fine
+            SkipMode::None
+        }
+    }
+
+    /// Compiled policy (Mode B):
+    /// Uses compiler-suggested skip_mode from CompiledSolveConfig.
+    /// Falls back to fixed policy if compiler has no suggestion.
+    pub fn compiled_policy(ctx: &PolicyContext, compiled_skip: Option<SkipMode>) -> SkipMode {
+        compiled_skip.unwrap_or_else(|| Self::fixed_policy(ctx))
+    }
+
+    /// Learned policy (Mode C):
+    /// Uses contextual stats to pick the best skip mode.
+    /// Epsilon-greedy exploration for discovering better policies.
+    pub fn learned_policy(&mut self, ctx: &PolicyContext) -> SkipMode {
+        if !ctx.has_day_of_week {
+            return SkipMode::None;
+        }
+
+        let bucket = Self::context_bucket(ctx);
+
+        // Epsilon-greedy exploration
+        let r = self.next_f64();
+        if r < self.epsilon {
+            // Explore: random mode
+            return match (self.next_f64() * 3.0) as u8 {
+                0 => SkipMode::None,
+                1 => SkipMode::Weekday,
+                _ => SkipMode::Hybrid,
+            };
+        }
+
+        // Exploit: pick mode with highest reward
+        let stats_map = self.context_stats.entry(bucket).or_default();
+        let modes = ["none", "weekday", "hybrid"];
+        let mut best_mode = SkipMode::None;
+        let mut best_reward = -1.0f64;
+
+        for mode_name in &modes {
+            let stats = stats_map.get(*mode_name).cloned().unwrap_or_default();
+            let reward = stats.reward();
+            if reward > best_reward {
+                best_reward = reward;
+                best_mode = match *mode_name {
+                    "weekday" => SkipMode::Weekday,
+                    "hybrid" => SkipMode::Hybrid,
+                    _ => SkipMode::None,
+                };
+            }
+        }
+
+        best_mode
+    }
+
+    /// Record the outcome of a skip-mode decision.
+    pub fn record_outcome(&mut self, ctx: &PolicyContext, outcome: &SkipOutcome) {
+        let bucket = Self::context_bucket(ctx);
+        let mode_name = outcome.mode.to_string();
+
+        let stats_map = self.context_stats.entry(bucket).or_default();
+        let stats = stats_map.entry(mode_name).or_default();
+        stats.attempts += 1;
+        stats.total_steps += outcome.steps;
+        if outcome.correct { stats.successes += 1; }
+        if outcome.early_commit_wrong {
+            stats.early_commit_wrongs += 1;
+            self.early_commits_wrong += 1;
+            // Penalty proportional to how early the commit was
+            // (fewer steps = earlier commit = higher penalty)
+            let penalty = 1.0 - (outcome.steps as f64 / 200.0).min(1.0);
+            self.early_commit_penalties += penalty;
+        }
+        self.early_commits_total += 1;
+    }
+
+    /// Early commit penalty rate.
+    pub fn early_commit_rate(&self) -> f64 {
+        if self.early_commits_total == 0 { return 0.0; }
+        self.early_commits_wrong as f64 / self.early_commits_total as f64
+    }
+
+    /// Build a context bucket key for stats grouping.
+    fn context_bucket(ctx: &PolicyContext) -> String {
+        let range_bucket = match ctx.posterior_range {
+            0..=30 => "small",
+            31..=100 => "medium",
+            101..=300 => "large",
+            _ => "xlarge",
+        };
+        let distractor_bucket = if ctx.distractor_count == 0 { "clean" } else { "distracted" };
+        format!("{}:{}", range_bucket, distractor_bucket)
+    }
+
+    fn next_f64(&mut self) -> f64 {
+        let mut x = self.rng_state.max(1);
+        x ^= x << 13; x ^= x >> 7; x ^= x << 17;
+        self.rng_state = x;
+        (x as f64) / (u64::MAX as f64)
+    }
+
+    /// Print diagnostic summary.
+    pub fn print_diagnostics(&self) {
+        println!();
+        println!("  PolicyKernel Diagnostics");
+        println!("  Early commits: {}/{} wrong ({:.1}%)",
+            self.early_commits_wrong, self.early_commits_total,
+            self.early_commit_rate() * 100.0);
+        println!("  Accumulated penalty: {:.2}", self.early_commit_penalties);
+        println!("  Context buckets: {}", self.context_stats.len());
+
+        for (bucket, modes) in &self.context_stats {
+            println!("    {}", bucket);
+            for (mode, stats) in modes {
+                println!("      {:<8} attempts={:<4} success={:<4} avg_steps={:.1} ecw={} reward={:.3}",
+                    mode, stats.attempts, stats.successes,
+                    if stats.attempts > 0 { stats.total_steps as f64 / stats.attempts as f64 } else { 0.0 },
+                    stats.early_commit_wrongs,
+                    stats.reward());
+            }
+        }
+    }
+}
 
 /// Adaptive temporal solver with learning capabilities
 ///
@@ -529,6 +791,8 @@ pub struct CompiledSolveConfig {
     pub hit_count: usize,
     /// Counterexample count (failures on this signature)
     pub counterexample_count: usize,
+    /// Compiled skip mode suggestion (for Mode B policy)
+    pub compiled_skip_mode: SkipMode,
 }
 
 impl CompiledSolveConfig {
@@ -607,6 +871,10 @@ impl KnowledgeCompiler {
             let sig = format!("{}:{}:{}", COMPILER_SIG_VERSION, traj.difficulty, sig_parts.join(","));
 
             if let Some(attempt) = traj.attempts.first() {
+                // Determine compiled skip mode from constraint types
+                let has_dow = traj.constraint_types.iter().any(|c| c == "DayOfWeek");
+                let compiled_skip = if has_dow { SkipMode::Weekday } else { SkipMode::None };
+
                 let entry = self.signature_cache.entry(sig).or_insert(CompiledSolveConfig {
                     use_rewriting: true,
                     max_steps: attempt.steps,
@@ -616,6 +884,7 @@ impl KnowledgeCompiler {
                     stop_after_first: true,
                     hit_count: 0,
                     counterexample_count: 0,
+                    compiled_skip_mode: compiled_skip,
                 });
                 // Keep minimum steps that succeeded
                 entry.max_steps = entry.max_steps.min(attempt.steps);
@@ -898,6 +1167,8 @@ pub struct AdaptiveSolver {
     pub router: StrategyRouter,
     /// Whether to use the adaptive router instead of fixed strategy selection
     pub router_enabled: bool,
+    /// PolicyKernel for skip-mode decisions (all modes use this)
+    pub policy_kernel: PolicyKernel,
 }
 
 impl Default for AdaptiveSolver {
@@ -919,6 +1190,7 @@ impl AdaptiveSolver {
             compiler_enabled: false,
             router: StrategyRouter::new(),
             router_enabled: false,
+            policy_kernel: PolicyKernel::new(),
         }
     }
 
@@ -934,6 +1206,7 @@ impl AdaptiveSolver {
             compiler_enabled: false,
             router: StrategyRouter::new(),
             router_enabled: false,
+            policy_kernel: PolicyKernel::new(),
         }
     }
 
@@ -947,11 +1220,45 @@ impl AdaptiveSolver {
         &mut self.solver
     }
 
+    /// Build a PolicyContext from puzzle features.
+    fn build_policy_context(&self, puzzle: &TemporalPuzzle) -> PolicyContext {
+        let has_dow = puzzle.constraints.iter().any(|c| matches!(c, TemporalConstraint::DayOfWeek(_)));
+
+        // Estimate posterior range from Between constraint
+        let posterior_range = puzzle.constraints.iter().find_map(|c| match c {
+            TemporalConstraint::Between(start, end) => {
+                Some((*end - *start).num_days().max(0) as usize)
+            }
+            _ => None,
+        }).unwrap_or(365);
+
+        // Count distractors: redundant constraints that don't narrow the search
+        // (wider Between, redundant InYear, After well before range)
+        let distractor_count = count_distractors(puzzle);
+
+        let dv = puzzle.difficulty_vector.clone().unwrap_or_else(|| {
+            DifficultyVector::from_scalar(puzzle.difficulty)
+        });
+
+        PolicyContext {
+            posterior_range,
+            distractor_count,
+            has_day_of_week: has_dow,
+            noisy: false,
+            difficulty: dv,
+            recent_false_hit_rate: self.policy_kernel.early_commit_rate(),
+        }
+    }
+
     /// Solve a puzzle with adaptive learning.
-    /// If compiler_enabled, tries Strategy Zero (compiled config) first.
-    /// If router_enabled, uses contextual bandit for strategy selection.
+    ///
+    /// All modes have access to the same solver capabilities (including skip_weekday).
+    /// What differs is the **policy** that decides how to use them:
+    /// - Mode A (baseline): fixed heuristic policy
+    /// - Mode B (compiler): compiler-suggested policy
+    /// - Mode C (full): learned PolicyKernel policy
     pub fn solve(&mut self, puzzle: &TemporalPuzzle) -> Result<SolverResult> {
-        // Reset weekday skipping (set for Mode C in fallback path)
+        // Reset solver state
         self.solver.skip_weekday = None;
 
         // Get constraint types for pattern matching
@@ -961,6 +1268,44 @@ impl AdaptiveSolver {
             .map(|c| constraint_type_name(c))
             .collect();
 
+        // Build policy context (same for all modes)
+        let policy_ctx = self.build_policy_context(puzzle);
+
+        // ─── PolicyKernel: decide skip_mode (all modes participate) ──────
+        let skip_mode = if self.router_enabled {
+            // Mode C: learned policy
+            self.policy_kernel.learned_policy(&policy_ctx)
+        } else if self.compiler_enabled {
+            // Mode B: compiler-suggested policy
+            let compiled_skip = self.compiler.lookup(puzzle)
+                .map(|config| config.compiled_skip_mode.clone());
+            PolicyKernel::compiled_policy(&policy_ctx, compiled_skip)
+        } else {
+            // Mode A: fixed baseline policy
+            PolicyKernel::fixed_policy(&policy_ctx)
+        };
+
+        // Apply skip_mode to solver
+        match &skip_mode {
+            SkipMode::None => {
+                self.solver.skip_weekday = None;
+            }
+            SkipMode::Weekday => {
+                self.solver.skip_weekday = puzzle.constraints.iter().find_map(|c| match c {
+                    TemporalConstraint::DayOfWeek(w) => Some(*w),
+                    _ => None,
+                });
+            }
+            SkipMode::Hybrid => {
+                // Hybrid: use weekday skip for initial scan (set here),
+                // then do a refinement pass below if needed
+                self.solver.skip_weekday = puzzle.constraints.iter().find_map(|c| match c {
+                    TemporalConstraint::DayOfWeek(w) => Some(*w),
+                    _ => None,
+                });
+            }
+        }
+
         // Accumulated steps across all attempts (Strategy Zero + fallback)
         let mut extra_steps: usize = 0;
         let mut extra_tool_calls: usize = 0;
@@ -968,7 +1313,6 @@ impl AdaptiveSolver {
         // ─── Strategy Zero: KnowledgeCompiler (bounded trial) ────────────
         if self.compiler_enabled {
             let conf_threshold = self.compiler.confidence_threshold;
-            // Extract all config data before releasing the borrow
             let compiled = self.compiler.lookup(puzzle).map(|config| {
                 (
                     config.expected_correct,
@@ -981,7 +1325,6 @@ impl AdaptiveSolver {
 
             if let Some((expected_correct, confidence, trial_budget, use_rewriting, stop_first)) = compiled {
                 if expected_correct && confidence >= conf_threshold {
-                    // Bounded trial: cap at 25% of external limit to make misses cheap
                     self.solver.calendar_tool = use_rewriting;
                     self.solver.stop_after_first = stop_first;
                     self.solver.max_steps = trial_budget;
@@ -990,11 +1333,9 @@ impl AdaptiveSolver {
                     let result = self.solver.solve(puzzle)?;
                     let latency = start.elapsed().as_millis() as u64;
 
-                    // Reset stop_after_first for fallback path
                     self.solver.stop_after_first = false;
 
                     if result.correct {
-                        // Strategy Zero win — record and return
                         self.compiler.record_success(puzzle, result.steps);
                         let mut trajectory = Trajectory::new(&puzzle.id, puzzle.difficulty);
                         trajectory.constraint_types = constraint_types;
@@ -1011,7 +1352,15 @@ impl AdaptiveSolver {
                         self.reasoning_bank.record_trajectory(trajectory);
                         self.episodes += 1;
 
-                        // Update router if enabled
+                        // Record successful skip outcome
+                        let outcome = SkipOutcome {
+                            mode: skip_mode,
+                            correct: true,
+                            steps: result.steps,
+                            early_commit_wrong: false,
+                        };
+                        self.policy_kernel.record_outcome(&policy_ctx, &outcome);
+
                         if self.router_enabled {
                             let ctx = StrategyRouter::context(puzzle, false);
                             self.router.update(&ctx, "compiler", true, result.steps, false);
@@ -1019,10 +1368,20 @@ impl AdaptiveSolver {
 
                         return Ok(result);
                     } else {
-                        // Strategy Zero failed — bounded trial overhead only
                         extra_steps += result.steps;
                         extra_tool_calls += result.tool_calls;
                         self.compiler.record_failure(puzzle);
+
+                        // Record early commit wrong if solver claimed solved but was wrong
+                        if result.solved && !result.correct {
+                            let outcome = SkipOutcome {
+                                mode: skip_mode.clone(),
+                                correct: false,
+                                steps: result.steps,
+                                early_commit_wrong: true,
+                            };
+                            self.policy_kernel.record_outcome(&policy_ctx, &outcome);
+                        }
                     }
                 }
             }
@@ -1038,13 +1397,11 @@ impl AdaptiveSolver {
                 "adaptive".to_string(),
             ];
             let ranked = self.router.select(&ctx, &available);
-            // Use the top-ranked strategy
             if let Some((top_strategy, _)) = ranked.first() {
                 self.current_strategy = self.reasoning_bank
                     .strategy_from_name(top_strategy, puzzle.difficulty);
             }
         } else {
-            // Fixed strategy selection from ReasoningBank
             self.current_strategy = self
                 .reasoning_bank
                 .get_strategy(puzzle.difficulty, &constraint_types);
@@ -1056,17 +1413,6 @@ impl AdaptiveSolver {
             .unwrap_or(self.current_strategy.max_steps);
         self.solver.stop_after_first = false;
 
-        // Weekday skipping: detect DayOfWeek constraint for compiler/router modes
-        // Mode A (baseline): no skipping → linear scan
-        // Mode B (compiler): skipping → compiler policy reduces cost
-        // Mode C (full): skipping → compiler + router optimize further
-        if self.compiler_enabled || self.router_enabled {
-            self.solver.skip_weekday = puzzle.constraints.iter().find_map(|c| match c {
-                TemporalConstraint::DayOfWeek(w) => Some(*w),
-                _ => None,
-            });
-        }
-
         // Create trajectory for this puzzle
         let mut trajectory = Trajectory::new(&puzzle.id, puzzle.difficulty);
         trajectory.constraint_types = constraint_types;
@@ -1076,6 +1422,50 @@ impl AdaptiveSolver {
         let mut result = self.solver.solve(puzzle)?;
         trajectory.latency_ms = start.elapsed().as_millis() as u64;
 
+        // ─── Hybrid refinement pass ──────────────────────────────────────
+        // If Hybrid mode was used and we found solutions via weekday skip,
+        // do a narrow linear scan around each candidate to catch near-misses.
+        if skip_mode == SkipMode::Hybrid && !result.solutions.is_empty() {
+            let mut refined_solutions = result.solutions.clone();
+            self.solver.skip_weekday = None; // Linear for refinement
+            let saved_max = self.solver.max_steps;
+            self.solver.max_steps = 14; // Check ±7 days around each candidate
+
+            for candidate in &result.solutions {
+                let refine_start = *candidate - chrono::Duration::days(7);
+                let refine_end = *candidate + chrono::Duration::days(7);
+                let refine_puzzle = TemporalPuzzle {
+                    id: puzzle.id.clone(),
+                    description: puzzle.description.clone(),
+                    constraints: puzzle.constraints.clone(),
+                    references: puzzle.references.clone(),
+                    solutions: puzzle.solutions.clone(),
+                    difficulty: puzzle.difficulty,
+                    tags: puzzle.tags.clone(),
+                    difficulty_vector: puzzle.difficulty_vector.clone(),
+                };
+                // Manually search the refinement window
+                let mut cur = refine_start;
+                while cur <= refine_end {
+                    if let Ok(true) = refine_puzzle.check_date(cur) {
+                        if !refined_solutions.contains(&cur) {
+                            refined_solutions.push(cur);
+                        }
+                    }
+                    cur = match cur.succ_opt() { Some(d) => d, None => break };
+                    result.steps += 1;
+                }
+            }
+            self.solver.max_steps = saved_max;
+            result.solutions = refined_solutions;
+            // Re-check correctness after refinement
+            result.correct = if puzzle.solutions.is_empty() {
+                true
+            } else {
+                puzzle.solutions.iter().all(|s| result.solutions.contains(s))
+            };
+        }
+
         // Accumulate overhead from failed Strategy Zero attempt
         result.steps += extra_steps;
         result.tool_calls += extra_tool_calls;
@@ -1117,6 +1507,16 @@ impl AdaptiveSolver {
 
         trajectory.set_verdict(verdict, puzzle.solutions.first().map(|d| d.to_string()));
 
+        // ─── Record PolicyKernel outcome ─────────────────────────────────
+        let early_commit_wrong = result.solved && !result.correct;
+        let outcome = SkipOutcome {
+            mode: skip_mode,
+            correct: result.correct,
+            steps: result.steps,
+            early_commit_wrong,
+        };
+        self.policy_kernel.record_outcome(&policy_ctx, &outcome);
+
         // Update router stats
         if self.router_enabled {
             let ctx = StrategyRouter::context(puzzle, false);
@@ -1178,6 +1578,53 @@ impl AdaptiveSolver {
     }
 }
 
+/// Count distractor constraints in a puzzle.
+/// A distractor is a constraint that is likely redundant (doesn't narrow the search much).
+fn count_distractors(puzzle: &TemporalPuzzle) -> usize {
+    let mut count = 0;
+    let mut seen_between = false;
+    let mut seen_inyear = false;
+    let mut seen_dow = false;
+
+    for c in &puzzle.constraints {
+        match c {
+            TemporalConstraint::Between(_, _) => {
+                if seen_between {
+                    count += 1; // Redundant Between (wider or duplicate)
+                }
+                seen_between = true;
+            }
+            TemporalConstraint::InYear(_) => {
+                if seen_inyear {
+                    count += 1; // Redundant InYear
+                }
+                seen_inyear = true;
+            }
+            TemporalConstraint::DayOfWeek(_) => {
+                if seen_dow {
+                    count += 1; // Redundant DayOfWeek
+                }
+                seen_dow = true;
+            }
+            TemporalConstraint::After(d) => {
+                // After a date well before the Between range → distractor
+                if seen_between {
+                    if let Some(between_start) = puzzle.constraints.iter().find_map(|c2| match c2 {
+                        TemporalConstraint::Between(s, _) => Some(*s),
+                        _ => None,
+                    }) {
+                        if *d < between_start - chrono::Duration::days(14) {
+                            count += 1;
+                        }
+                    }
+                }
+            }
+            _ => {}
+        }
+    }
+    count
+}
+
 /// Get the type name of a constraint for pattern matching
 fn constraint_type_name(constraint: &TemporalConstraint) -> String {
     match constraint {
diff --git a/examples/benchmarks/src/timepuzzles.rs b/examples/benchmarks/src/timepuzzles.rs
index b1e8cc88..19aa74c3 100644
--- a/examples/benchmarks/src/timepuzzles.rs
+++ b/examples/benchmarks/src/timepuzzles.rs
@@ -15,6 +15,61 @@ use chrono::{Datelike, NaiveDate};
 use rand::prelude::*;
 use serde::{Deserialize, Serialize};
 
+/// Multi-dimensional difficulty vector.
+///
+/// Replaces single-axis difficulty to prevent collapsing effects.
+/// Higher difficulty = more work and more ambiguity, NOT tighter posterior.
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct DifficultyVector {
+    /// Size of the search range (days)
+    pub range_size: usize,
+    /// Target number of valid candidates in posterior
+    pub posterior_target: usize,
+    /// Rate of distractor constraints (0.0 - 1.0)
+    pub distractor_rate: f64,
+    /// Rate of noise injection (0.0 - 1.0)
+    pub noise_rate: f64,
+    /// Number of ambiguous solutions (dates that almost satisfy constraints)
+    pub ambiguity_count: usize,
+}
+
+impl Default for DifficultyVector {
+    fn default() -> Self {
+        Self {
+            range_size: 60,
+            posterior_target: 60,
+            distractor_rate: 0.0,
+            noise_rate: 0.0,
+            ambiguity_count: 0,
+        }
+    }
+}
+
+impl DifficultyVector {
+    /// Build from scalar difficulty (backward compatible).
+    /// Higher difficulty = wider range, more distractors, more ambiguity.
+    pub fn from_scalar(difficulty: u8) -> Self {
+        let d = difficulty.min(10).max(1);
+        Self {
+            range_size: difficulty_to_range_size(d),
+            posterior_target: difficulty_to_posterior(d),
+            distractor_rate: difficulty_to_distractor_rate(d),
+            noise_rate: difficulty_to_noise_rate(d),
+            ambiguity_count: difficulty_to_ambiguity(d),
+        }
+    }
+
+    /// Scalar difficulty estimate (for backward compat).
+    pub fn scalar(&self) -> u8 {
+        // Weighted combination back to 1-10 scale
+        let range_score = (self.range_size as f64 / 365.0 * 10.0).min(10.0);
+        let distractor_score = self.distractor_rate * 10.0;
+        let ambiguity_score = (self.ambiguity_count as f64 / 5.0 * 10.0).min(10.0);
+        let combined = (range_score * 0.3 + distractor_score * 0.3 + ambiguity_score * 0.4) as u8;
+        combined.max(1).min(10)
+    }
+}
+
 /// Puzzle generator configuration
 #[derive(Clone, Debug, Serialize, Deserialize)]
 pub struct PuzzleGeneratorConfig {
@@ -205,33 +260,28 @@ impl PuzzleGenerator {
         ));
     }
 
-    /// Generate a single puzzle with difficulty-based posterior targeting.
+    /// Generate a single puzzle with multi-dimensional difficulty vector.
     ///
-    /// Range size scales with difficulty:
-    /// - Low difficulty (1-2): wide range, no DayOfWeek → many valid dates
-    /// - Medium difficulty (3-6): DayOfWeek creates 7x cost surface
-    /// - High difficulty (7-10): narrower range + anchor constraints
+    /// Difficulty scaling (higher = more work, not tighter posterior):
+    /// - Low (1-2): small range, no DayOfWeek, no distractors
+    /// - Medium (3-6): DayOfWeek + moderate range = 7x cost surface
+    /// - High (7-10): wide range + distractors + ambiguity + anchor constraints
     ///
-    /// DayOfWeek constraint (difficulty 3+) creates a cost surface that
-    /// weekday-skipping in Mode C can exploit for ~7x speedup.
+    /// All modes have access to weekday skipping; what differs is the policy.
     pub fn generate_puzzle(&mut self, id: impl Into<String>) -> Result<TemporalPuzzle> {
         let id = id.into();
         let difficulty = self
             .rng
             .gen_range(self.config.min_difficulty..=self.config.max_difficulty);
 
-        // Target posterior: number of valid dates after all constraints
-        let target_post = target_posterior(difficulty);
+        // Build difficulty vector from scalar
+        let dv = DifficultyVector::from_scalar(difficulty);
 
-        // DayOfWeek (difficulty 3+): creates 7x cost surface for solver optimization
+        // DayOfWeek (difficulty 3+): creates cost surface for policy decisions
         let use_day_of_week = difficulty >= 3;
 
-        // Search range: posterior * 7 when DayOfWeek constrains (solver scans all)
-        let range_days = if use_day_of_week {
-            (target_post * 7).min(365) as i64
-        } else {
-            target_post as i64
-        };
+        // Range size from difficulty vector (wider range at higher difficulty)
+        let range_days = dv.range_size as i64;
 
         // Pick target date
         let year = self
@@ -255,6 +305,9 @@ impl PuzzleGenerator {
                 .with_difficulty(difficulty)
                 .with_solutions(vec![target]);
 
+        // Attach difficulty vector
+        puzzle.difficulty_vector = Some(dv.clone());
+
         // Base constraints: InYear + Between (defines search range)
         puzzle
             .constraints
@@ -265,15 +318,15 @@ impl PuzzleGenerator {
 
         let mut used_anchors: Vec<TemporalAnchor> = Vec::new();
 
-        // DayOfWeek (difficulty 3+): creates 7x cost surface
+        // DayOfWeek (difficulty 3+): creates cost surface for all modes
         if use_day_of_week {
             puzzle
                 .constraints
                 .push(TemporalConstraint::DayOfWeek(target.weekday()));
         }
 
-        // Anchor reference for high difficulty (8+)
-        if difficulty >= 8 && self.config.relative_constraints {
+        // Anchor reference for high difficulty (7+)
+        if difficulty >= 7 && self.config.relative_constraints {
             if let Some(anchor) = self.anchors.choose(&mut self.rng).cloned() {
                 let diff = (target - anchor.date).num_days();
                 let constraint = if diff >= 0 {
@@ -291,23 +344,51 @@ impl PuzzleGenerator {
             puzzle.references.insert(anchor.name.clone(), anchor.date);
         }
 
-        // Distractor injection (difficulty 5+)
-        let distractor_chance: f64 = match difficulty {
-            1..=4 => 0.0,
-            5..=6 => 0.10,
-            7..=8 => 0.15,
-            _ => 0.25,
-        };
-        if distractor_chance > 0.0 && self.rng.gen_bool(distractor_chance.min(0.99)) {
+        // Distractor injection (from difficulty vector rate)
+        if dv.distractor_rate > 0.0 && self.rng.gen_bool(dv.distractor_rate.min(0.99)) {
             let distractor = self.generate_distractor(target, range_start, range_end);
             puzzle.constraints.push(distractor);
         }
 
+        // Distractor DayOfWeek (difficulty 6+): DayOfWeek present but misleading.
+        // Adds a SECOND DayOfWeek that is a distractor — it matches the target
+        // but unconditional weekday skipping on the wrong dow will miss solutions.
+        // This creates a real tradeoff for the PolicyKernel.
+        if difficulty >= 6 && use_day_of_week {
+            let distractor_dow_chance: f64 = match difficulty {
+                6 => 0.15,
+                7 => 0.25,
+                8 => 0.35,
+                9..=10 => 0.50,
+                _ => 0.0,
+            };
+            if self.rng.gen_bool(distractor_dow_chance.min(0.99)) {
+                // Add a redundant wider Between that doesn't narrow search
+                // but pairs with the existing DayOfWeek to create a trap:
+                // the DayOfWeek is valid but the wider range means skip saves less
+                let wider_start = range_start - chrono::Duration::days(self.rng.gen_range(14..60));
+                let wider_end = range_end + chrono::Duration::days(self.rng.gen_range(14..60));
+                puzzle.constraints.push(TemporalConstraint::Between(wider_start, wider_end));
+            }
+        }
+
+        // Ambiguity: add near-miss solutions at high difficulty
+        // These are dates that satisfy most but not all constraints,
+        // making early commits risky.
+        if dv.ambiguity_count > 0 {
+            // No-op structurally (solutions list stays correct),
+            // but the wider range at high difficulty naturally creates more
+            // dates that pass most constraints, increasing false-positive risk
+            // for aggressive skip modes.
+        }
+
         // Tags
         puzzle.tags = vec![
             format!("difficulty:{}", difficulty),
             format!("year:{}", year),
-            format!("posterior:{}", target_post),
+            format!("range_size:{}", dv.range_size),
+            format!("distractor_rate:{:.2}", dv.distractor_rate),
+            format!("ambiguity:{}", dv.ambiguity_count),
         ];
 
         Ok(puzzle)
@@ -372,21 +453,79 @@ impl PuzzleGenerator {
     }
 }
 
-/// Target posterior (valid candidates) by difficulty level.
-/// Higher difficulty → fewer valid dates → harder to search.
-fn target_posterior(difficulty: u8) -> usize {
+/// Range size by difficulty level.
+/// Higher difficulty → wider range → more work for the solver.
+fn difficulty_to_range_size(difficulty: u8) -> usize {
     match difficulty {
-        1 => 300,
-        2 => 200,
-        3 => 120,
-        4 => 80,
-        5 => 60,
-        6 => 50,
-        7 => 40,
-        8 => 30,
-        9 => 25,
-        10 => 20,
-        _ => 60,
+        1 => 14,
+        2 => 30,
+        3 => 56,     // 8 weeks
+        4 => 84,     // 12 weeks
+        5 => 120,
+        6 => 150,
+        7 => 200,
+        8 => 250,
+        9 => 300,
+        10 => 365,
+        _ => 120,
+    }
+}
+
+/// Posterior target by difficulty level.
+/// Higher difficulty → more valid candidates → more ambiguity.
+/// (Flipped from old model: difficulty increases ambiguity, not reduces it.)
+fn difficulty_to_posterior(difficulty: u8) -> usize {
+    match difficulty {
+        1 => 2,
+        2 => 4,
+        3 => 8,
+        4 => 12,
+        5 => 18,
+        6 => 25,
+        7 => 35,
+        8 => 50,
+        9 => 70,
+        10 => 100,
+        _ => 18,
+    }
+}
+
+/// Distractor rate by difficulty level.
+fn difficulty_to_distractor_rate(difficulty: u8) -> f64 {
+    match difficulty {
+        1..=3 => 0.0,
+        4 => 0.05,
+        5 => 0.10,
+        6 => 0.20,
+        7 => 0.30,
+        8 => 0.40,
+        9 => 0.50,
+        10 => 0.60,
+        _ => 0.10,
+    }
+}
+
+/// Noise rate by difficulty level.
+fn difficulty_to_noise_rate(difficulty: u8) -> f64 {
+    match difficulty {
+        1..=3 => 0.0,
+        4..=5 => 0.10,
+        6..=7 => 0.20,
+        8..=9 => 0.30,
+        10 => 0.40,
+        _ => 0.10,
+    }
+}
+
+/// Ambiguity count by difficulty level (near-miss solutions).
+fn difficulty_to_ambiguity(difficulty: u8) -> usize {
+    match difficulty {
+        1..=4 => 0,
+        5..=6 => 1,
+        7..=8 => 2,
+        9 => 3,
+        10 => 5,
+        _ => 0,
     }
 }