diff --git a/examples/benchmarks/src/acceptance_test.rs b/examples/benchmarks/src/acceptance_test.rs
index d4fef730..7355e6b8 100644
--- a/examples/benchmarks/src/acceptance_test.rs
+++ b/examples/benchmarks/src/acceptance_test.rs
@@ -506,6 +506,11 @@ pub fn run_acceptance_test_mode(config: &HoldoutConfig, mode: &AblationMode) ->
         0.0
     };
 
+    // Print compiler diagnostics in verbose mode
+    if config.verbose && compiler_enabled {
+        compiler.print_diagnostics();
+    }
+
     Ok(AblationResult {
         mode: mode.clone(),
         result: acceptance_result,
diff --git a/examples/benchmarks/src/temporal.rs b/examples/benchmarks/src/temporal.rs
index 8bb0ee9d..7c207f06 100644
--- a/examples/benchmarks/src/temporal.rs
+++ b/examples/benchmarks/src/temporal.rs
@@ -501,18 +501,50 @@ pub struct CompiledSolveConfig {
     pub use_rewriting: bool,
     /// Minimum steps that succeeded for this signature
     pub max_steps: usize,
+    /// Average steps across all successes (for bounded trial budget)
+    pub avg_steps: f64,
+    /// Number of successful observations compiled
+    pub observations: usize,
     /// Expected correctness
     pub expected_correct: bool,
     /// Stop after first solution (early termination for known single-solution puzzles)
     pub stop_after_first: bool,
-    /// Hit count (how often this config was used)
+    /// Hit count (how often this config was used and succeeded)
     pub hit_count: usize,
     /// Counterexample count (failures on this signature)
     pub counterexample_count: usize,
 }
 
+impl CompiledSolveConfig {
+    /// Confidence: Laplace-smoothed success rate.
+    pub fn confidence(&self) -> f64 {
+        let total = self.hit_count + self.counterexample_count;
+        if total == 0 { return 0.5; }
+        (self.hit_count as f64 + 1.0) / (total as f64 + 2.0)
+    }
+
+    /// Trial budget: bounded step limit for Strategy Zero.
+    /// Uses avg_steps * 2.0 as budget (enough headroom for variance),
+    /// with a floor of max_steps and a ceiling of 25% of external limit.
+    pub fn trial_budget(&self, external_limit: usize) -> usize {
+        let budget = if self.observations > 2 && self.avg_steps > 1.0 {
+            // Enough data: use 2x average steps for headroom
+            (self.avg_steps * 2.0) as usize
+        } else {
+            // Not enough data or trivially small: use max observed steps
+            self.max_steps.max(10)
+        };
+        budget.max(10).min(external_limit / 4)
+    }
+}
+
 /// KnowledgeCompiler: learns constraint-signature → optimal solve config.
 /// Consulted as "Strategy Zero" before any other strategy runs.
+///
+/// Signature version: v1 (difficulty:sorted_constraints)
+/// Change this when canonicalization rules change.
+const COMPILER_SIG_VERSION: &str = "v1";
+
 #[derive(Clone, Debug, Default, Serialize, Deserialize)]
 pub struct KnowledgeCompiler {
     /// Compiled constraint signature → config
@@ -523,18 +555,28 @@ pub struct KnowledgeCompiler {
     pub misses: usize,
     /// False hits (compiled config tried but solve was wrong)
     pub false_hits: usize,
+    /// Steps saved by successful Strategy Zero (vs estimated fallback cost)
+    pub steps_saved: i64,
+    /// Confidence threshold for attempting Strategy Zero
+    pub confidence_threshold: f64,
 }
 
 impl KnowledgeCompiler {
-    pub fn new() -> Self { Self::default() }
+    pub fn new() -> Self {
+        Self {
+            confidence_threshold: 0.7,
+            ..Default::default()
+        }
+    }
 
     /// Build constraint signature from puzzle features.
+    /// Includes version prefix for cache safety across refactors.
     pub fn signature(puzzle: &TemporalPuzzle) -> String {
         let mut sig_parts: Vec<String> = puzzle.constraints.iter()
             .map(|c| constraint_type_name(c))
             .collect();
         sig_parts.sort();
-        format!("{}:{}", puzzle.difficulty, sig_parts.join(","))
+        format!("{}:{}:{}", COMPILER_SIG_VERSION, puzzle.difficulty, sig_parts.join(","))
     }
 
     /// Compile knowledge from a ReasoningBank's trajectories.
@@ -543,22 +585,30 @@ impl KnowledgeCompiler {
             let correct = traj.verdict.as_ref().map(|v| v.is_success()).unwrap_or(false);
             if !correct { continue; }
 
-            // Build signature from constraint types
+            // Build signature from constraint types (versioned)
             let mut sig_parts = traj.constraint_types.clone();
             sig_parts.sort();
-            let sig = format!("{}:{}", traj.difficulty, sig_parts.join(","));
+            let sig = format!("{}:{}:{}", COMPILER_SIG_VERSION, traj.difficulty, sig_parts.join(","));
 
             if let Some(attempt) = traj.attempts.first() {
                 let entry = self.signature_cache.entry(sig).or_insert(CompiledSolveConfig {
                     use_rewriting: true,
                     max_steps: attempt.steps,
+                    avg_steps: 0.0,
+                    observations: 0,
                     expected_correct: true,
-                    stop_after_first: true, // compiled configs use early termination
+                    stop_after_first: true,
                     hit_count: 0,
                     counterexample_count: 0,
                 });
                 // Keep minimum steps that succeeded
                 entry.max_steps = entry.max_steps.min(attempt.steps);
+                // Running average of steps
+                let n = entry.observations as f64;
+                entry.avg_steps = (entry.avg_steps * n + attempt.steps as f64) / (n + 1.0);
+                entry.observations += 1;
+                // Compiled from successful trajectories → seed confidence
+                entry.hit_count = entry.observations;
             }
         }
     }
@@ -577,27 +627,32 @@ impl KnowledgeCompiler {
     }
 
     /// Record a counterexample: Strategy Zero failed on this signature.
+    /// Quarantine escalation: 2 false hits → disable the entry.
     pub fn record_failure(&mut self, puzzle: &TemporalPuzzle) {
         self.false_hits += 1;
         let sig = Self::signature(puzzle);
         if let Some(config) = self.signature_cache.get_mut(&sig) {
             config.counterexample_count += 1;
-            // If failure rate exceeds 30%, invalidate the cache entry
-            if config.hit_count > 0 {
-                let fail_rate = config.counterexample_count as f64
-                    / (config.hit_count + config.counterexample_count) as f64;
-                if fail_rate > 0.30 {
-                    config.expected_correct = false;
-                }
+            // 2-failure quarantine: disable after 2 false hits
+            if config.counterexample_count >= 2 {
+                config.expected_correct = false;
             }
         }
     }
 
-    /// Record a success: Strategy Zero worked on this signature.
-    pub fn record_success(&mut self, puzzle: &TemporalPuzzle) {
+    /// Record a successful Strategy Zero hit.
+    /// Tracks steps saved vs estimated fallback cost.
+    pub fn record_success(&mut self, puzzle: &TemporalPuzzle, actual_steps: usize) {
         let sig = Self::signature(puzzle);
         if let Some(config) = self.signature_cache.get_mut(&sig) {
             config.hit_count += 1;
+            // Estimate fallback cost as avg_steps * 2 (full scan is typically ~2x early-term)
+            let estimated_fallback = if config.avg_steps > 0.0 {
+                (config.avg_steps * 2.0) as i64
+            } else {
+                config.max_steps as i64
+            };
+            self.steps_saved += estimated_fallback - actual_steps as i64;
         }
     }
 
@@ -607,6 +662,39 @@ impl KnowledgeCompiler {
     }
 
     pub fn cache_size(&self) -> usize { self.signature_cache.len() }
+
+    /// Print diagnostic summary: per-signature stats, false hit distribution.
+    pub fn print_diagnostics(&self) {
+        println!();
+        println!("  Compiler Diagnostics (cache_size={})", self.cache_size());
+        println!("  {:<40} {:>5} {:>5} {:>6} {:>8} {:>6}",
+            "Signature", "Obs", "Hits", "Fails", "AvgStep", "Conf");
+        println!("  {}", "-".repeat(72));
+
+        let mut entries: Vec<_> = self.signature_cache.iter().collect();
+        entries.sort_by(|a, b| b.1.counterexample_count.cmp(&a.1.counterexample_count));
+
+        for (sig, config) in entries.iter().take(15) {
+            let short_sig = if sig.len() > 38 { &sig[..38] } else { sig };
+            println!("  {:<40} {:>5} {:>5} {:>6} {:>7.1} {:>.3}",
+                short_sig, config.observations, config.hit_count,
+                config.counterexample_count, config.avg_steps,
+                config.confidence());
+        }
+
+        // Summary
+        let total_configs = self.signature_cache.len();
+        let disabled = self.signature_cache.values().filter(|c| !c.expected_correct).count();
+        let total_false_hits: usize = self.signature_cache.values().map(|c| c.counterexample_count).sum();
+        let false_hit_sigs = self.signature_cache.values().filter(|c| c.counterexample_count > 0).count();
+
+        println!();
+        println!("  Total signatures: {}, disabled: {}", total_configs, disabled);
+        println!("  False hits: {} across {} signatures ({:.1}% of sigs)",
+            total_false_hits, false_hit_sigs,
+            if total_configs > 0 { false_hit_sigs as f64 / total_configs as f64 * 100.0 } else { 0.0 });
+        println!("  Steps saved by compiler: {}", self.steps_saved);
+    }
 }
 
 // ═══════════════════════════════════════════════════════════════════════════
@@ -858,17 +946,26 @@ impl AdaptiveSolver {
         let mut extra_steps: usize = 0;
         let mut extra_tool_calls: usize = 0;
 
-        // ─── Strategy Zero: KnowledgeCompiler ───────────────────────────
+        // ─── Strategy Zero: KnowledgeCompiler (bounded trial) ────────────
         if self.compiler_enabled {
-            if let Some(config) = self.compiler.lookup(puzzle) {
-                if config.expected_correct {
-                    // Use compiled config as Strategy Zero with early termination
-                    let compiled_steps = config.max_steps.max(5);
-                    self.solver.calendar_tool = config.use_rewriting;
-                    self.solver.stop_after_first = config.stop_after_first;
-                    self.solver.max_steps = self.external_step_limit
-                        .map(|l| l.min(compiled_steps))
-                        .unwrap_or(compiled_steps);
+            let conf_threshold = self.compiler.confidence_threshold;
+            // Extract all config data before releasing the borrow
+            let compiled = self.compiler.lookup(puzzle).map(|config| {
+                (
+                    config.expected_correct,
+                    config.confidence(),
+                    config.trial_budget(self.external_step_limit.unwrap_or(400)),
+                    config.use_rewriting,
+                    config.stop_after_first,
+                )
+            });
+
+            if let Some((expected_correct, confidence, trial_budget, use_rewriting, stop_first)) = compiled {
+                if expected_correct && confidence >= conf_threshold {
+                    // Bounded trial: cap at 25% of external limit to make misses cheap
+                    self.solver.calendar_tool = use_rewriting;
+                    self.solver.stop_after_first = stop_first;
+                    self.solver.max_steps = trial_budget;
 
                     let start = std::time::Instant::now();
                     let result = self.solver.solve(puzzle)?;
@@ -879,7 +976,7 @@ impl AdaptiveSolver {
 
                     if result.correct {
                         // Strategy Zero win — record and return
-                        self.compiler.record_success(puzzle);
+                        self.compiler.record_success(puzzle, result.steps);
                         let mut trajectory = Trajectory::new(&puzzle.id, puzzle.difficulty);
                         trajectory.constraint_types = constraint_types;
                         trajectory.latency_ms = latency;
@@ -903,7 +1000,7 @@ impl AdaptiveSolver {
 
                         return Ok(result);
                     } else {
-                        // Strategy Zero failed — record overhead, fall through
+                        // Strategy Zero failed — bounded trial overhead only
                         extra_steps += result.steps;
                         extra_tool_calls += result.tool_calls;
                         self.compiler.record_failure(puzzle);