From ffbf72fb2f0e25bd3017ea70be1d26bcee7c54ae Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 15 Feb 2026 19:22:26 +0000 Subject: [PATCH] feat(agi-runtime): authority guard, coherence monitor, benchmarks Add three new modules to rvf-runtime implementing the ADR-036 runtime: - agi_authority.rs: AuthorityGuard (per-mode + per-action-class enforcement), BudgetTracker (resource consumption tracking with hard caps), ActionClass enum (10 action categories) - agi_coherence.rs: CoherenceMonitor (real-time state machine with Healthy/SkillFreeze/RepairMode/Halted transitions), ContainerValidator (full validation pipeline) - tests/agi_e2e.rs: end-to-end integration tests and performance benchmarks (header serialize/deserialize, container build/parse, flags computation) All 219 rvf-runtime lib tests pass. https://claude.ai/code/session_01RnwD4x5cbpB7FPvoyYQz8G --- crates/rvf/rvf-runtime/src/agi_authority.rs | 438 +++++++++++++ crates/rvf/rvf-runtime/src/agi_coherence.rs | 689 ++++++++++++++++++++ crates/rvf/rvf-runtime/src/lib.rs | 2 + crates/rvf/rvf-runtime/tests/agi_e2e.rs | 335 ++++++++++ 4 files changed, 1464 insertions(+) create mode 100644 crates/rvf/rvf-runtime/src/agi_authority.rs create mode 100644 crates/rvf/rvf-runtime/src/agi_coherence.rs create mode 100644 crates/rvf/rvf-runtime/tests/agi_e2e.rs diff --git a/crates/rvf/rvf-runtime/src/agi_authority.rs b/crates/rvf/rvf-runtime/src/agi_authority.rs new file mode 100644 index 00000000..fdae934d --- /dev/null +++ b/crates/rvf/rvf-runtime/src/agi_authority.rs @@ -0,0 +1,438 @@ +//! Authority and resource budget enforcement runtime for AGI containers (ADR-036). +//! +//! - [`AuthorityGuard`]: enforces per-execution authority levels and per-action-class +//! overrides, ensuring container actions never exceed their granted privileges. +//! - [`BudgetTracker`]: tracks resource consumption against a [`ResourceBudget`] and +//! returns `BudgetExhausted` errors when any resource is at its limit. + +use rvf_types::agi_container::*; + +const ACTION_CLASS_COUNT: usize = 10; + +/// Classification of actions that a container execution may perform. +/// +/// Each class can be independently granted a different [`AuthorityLevel`] +/// via [`AuthorityGuard::grant_action_class`]. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +#[repr(u8)] +pub enum ActionClass { + ReadMemory = 0, + WriteMemory = 1, + ReadFile = 2, + WriteFile = 3, + RunTest = 4, + RunCommand = 5, + GitPush = 6, + CreatePR = 7, + SendMessage = 8, + ModifyInfra = 9, +} + +impl ActionClass { + /// All variants in discriminant order. + pub const ALL: [ActionClass; ACTION_CLASS_COUNT] = [ + Self::ReadMemory, Self::WriteMemory, Self::ReadFile, Self::WriteFile, + Self::RunTest, Self::RunCommand, Self::GitPush, Self::CreatePR, + Self::SendMessage, Self::ModifyInfra, + ]; +} + +/// Runtime guard enforcing authority levels for container execution. +/// +/// Holds a global maximum authority (from execution mode or explicit) and an +/// optional per-action-class override table as a fixed-size array. +#[derive(Clone, Debug)] +pub struct AuthorityGuard { + max_authority: AuthorityLevel, + mode: ExecutionMode, + class_overrides: [Option; ACTION_CLASS_COUNT], +} + +impl AuthorityGuard { + /// Create a guard using the default authority for the given execution mode. + pub fn new(mode: ExecutionMode) -> Self { + Self { + max_authority: AuthorityLevel::default_for_mode(mode), + mode, + class_overrides: [None; ACTION_CLASS_COUNT], + } + } + + /// Create a guard with an explicit maximum authority level. + pub fn with_max_authority(mode: ExecutionMode, max: AuthorityLevel) -> Self { + Self { max_authority: max, mode, class_overrides: [None; ACTION_CLASS_COUNT] } + } + + /// The execution mode this guard was created for. + pub fn mode(&self) -> ExecutionMode { self.mode } + + /// The global maximum authority level. + pub fn max_authority(&self) -> AuthorityLevel { self.max_authority } + + /// Check whether the guard permits the `required` level. + pub fn check(&self, required: AuthorityLevel) -> Result<(), ContainerError> { + if self.max_authority.permits(required) { + Ok(()) + } else { + Err(ContainerError::InsufficientAuthority { + required: required as u8, + granted: self.max_authority as u8, + }) + } + } + + /// Check authority for a specific action class. + /// + /// Per-class overrides are capped by the global maximum to prevent escalation. + pub fn check_action_class( + &self, class: ActionClass, required: AuthorityLevel, + ) -> Result<(), ContainerError> { + let effective = match self.class_overrides[class as usize] { + Some(o) if (o as u8) <= (self.max_authority as u8) => o, + Some(_) => self.max_authority, + None => self.max_authority, + }; + if effective.permits(required) { + Ok(()) + } else { + Err(ContainerError::InsufficientAuthority { + required: required as u8, + granted: effective as u8, + }) + } + } + + /// Grant authority for a specific action class (capped by global max at check time). + pub fn grant_action_class(&mut self, class: ActionClass, level: AuthorityLevel) { + self.class_overrides[class as usize] = Some(level); + } +} + +/// Percentage utilization for each resource dimension (0.0..=100.0). +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct BudgetUtilization { + pub time_pct: f32, + pub tokens_pct: f32, + pub cost_pct: f32, + pub tool_calls_pct: f32, + pub external_writes_pct: f32, +} + +/// Point-in-time snapshot of budget state. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct BudgetSnapshot { + pub budget: ResourceBudget, + pub used_time_secs: u32, + pub used_tokens: u32, + pub used_cost_microdollars: u32, + pub used_tool_calls: u16, + pub used_external_writes: u16, +} + +/// Tracks resource consumption against a [`ResourceBudget`]. +/// +/// Each `charge_*` method adds to the running total and returns +/// [`ContainerError::BudgetExhausted`] if the total would exceed the budget. +#[derive(Clone, Debug)] +pub struct BudgetTracker { + budget: ResourceBudget, + used_time_secs: u32, + used_tokens: u32, + used_cost_microdollars: u32, + used_tool_calls: u16, + used_external_writes: u16, +} + +impl BudgetTracker { + /// Create a tracker with the given budget (clamped to hard maximums). + pub fn new(budget: ResourceBudget) -> Self { + Self { + budget: budget.clamped(), + used_time_secs: 0, used_tokens: 0, used_cost_microdollars: 0, + used_tool_calls: 0, used_external_writes: 0, + } + } + + /// The clamped budget this tracker enforces. + pub fn budget(&self) -> &ResourceBudget { &self.budget } + + /// Charge token usage. + pub fn charge_tokens(&mut self, tokens: u32) -> Result<(), ContainerError> { + let t = self.used_tokens.saturating_add(tokens); + if t > self.budget.max_tokens { return Err(ContainerError::BudgetExhausted("tokens")); } + self.used_tokens = t; + Ok(()) + } + + /// Charge cost in microdollars. + pub fn charge_cost(&mut self, microdollars: u32) -> Result<(), ContainerError> { + let t = self.used_cost_microdollars.saturating_add(microdollars); + if t > self.budget.max_cost_microdollars { return Err(ContainerError::BudgetExhausted("cost")); } + self.used_cost_microdollars = t; + Ok(()) + } + + /// Charge one tool call. + pub fn charge_tool_call(&mut self) -> Result<(), ContainerError> { + let t = self.used_tool_calls.saturating_add(1); + if t > self.budget.max_tool_calls { return Err(ContainerError::BudgetExhausted("tool_calls")); } + self.used_tool_calls = t; + Ok(()) + } + + /// Charge one external write. + pub fn charge_external_write(&mut self) -> Result<(), ContainerError> { + let t = self.used_external_writes.saturating_add(1); + if t > self.budget.max_external_writes { return Err(ContainerError::BudgetExhausted("external_writes")); } + self.used_external_writes = t; + Ok(()) + } + + /// Charge wall-clock time in seconds. + pub fn charge_time(&mut self, secs: u32) -> Result<(), ContainerError> { + let t = self.used_time_secs.saturating_add(secs); + if t > self.budget.max_time_secs { return Err(ContainerError::BudgetExhausted("time")); } + self.used_time_secs = t; + Ok(()) + } + + /// Remaining tokens before exhaustion. + pub fn remaining_tokens(&self) -> u32 { self.budget.max_tokens.saturating_sub(self.used_tokens) } + + /// Remaining cost budget in microdollars. + pub fn remaining_cost(&self) -> u32 { + self.budget.max_cost_microdollars.saturating_sub(self.used_cost_microdollars) + } + + /// Remaining wall-clock time in seconds. + pub fn remaining_time(&self) -> u32 { self.budget.max_time_secs.saturating_sub(self.used_time_secs) } + + /// Compute utilization percentages for each resource dimension. + pub fn utilization(&self) -> BudgetUtilization { + BudgetUtilization { + time_pct: pct(self.used_time_secs as f32, self.budget.max_time_secs as f32), + tokens_pct: pct(self.used_tokens as f32, self.budget.max_tokens as f32), + cost_pct: pct(self.used_cost_microdollars as f32, self.budget.max_cost_microdollars as f32), + tool_calls_pct: pct(self.used_tool_calls as f32, self.budget.max_tool_calls as f32), + external_writes_pct: pct(self.used_external_writes as f32, self.budget.max_external_writes as f32), + } + } + + /// Returns `true` if ANY resource dimension with a non-zero budget has reached + /// its limit. A zero-max dimension is disabled, not exhausted. + pub fn is_exhausted(&self) -> bool { + (self.budget.max_time_secs > 0 && self.used_time_secs >= self.budget.max_time_secs) + || (self.budget.max_tokens > 0 && self.used_tokens >= self.budget.max_tokens) + || (self.budget.max_cost_microdollars > 0 && self.used_cost_microdollars >= self.budget.max_cost_microdollars) + || (self.budget.max_tool_calls > 0 && self.used_tool_calls >= self.budget.max_tool_calls) + || (self.budget.max_external_writes > 0 && self.used_external_writes >= self.budget.max_external_writes) + } + + /// Capture a point-in-time snapshot of the tracker state. + pub fn snapshot(&self) -> BudgetSnapshot { + BudgetSnapshot { + budget: self.budget, + used_time_secs: self.used_time_secs, + used_tokens: self.used_tokens, + used_cost_microdollars: self.used_cost_microdollars, + used_tool_calls: self.used_tool_calls, + used_external_writes: self.used_external_writes, + } + } +} + +fn pct(used: f32, max: f32) -> f32 { + if max == 0.0 { if used > 0.0 { 100.0 } else { 0.0 } } + else { (used / max * 100.0).min(100.0) } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn default_authority_per_mode() { + let r = AuthorityGuard::new(ExecutionMode::Replay); + assert_eq!(r.max_authority(), AuthorityLevel::ReadOnly); + assert_eq!(r.mode(), ExecutionMode::Replay); + assert_eq!(AuthorityGuard::new(ExecutionMode::Verify).max_authority(), AuthorityLevel::ExecuteTools); + assert_eq!(AuthorityGuard::new(ExecutionMode::Live).max_authority(), AuthorityLevel::WriteMemory); + } + + #[test] + fn check_pass_and_fail() { + let g = AuthorityGuard::new(ExecutionMode::Verify); + assert!(g.check(AuthorityLevel::ReadOnly).is_ok()); + assert!(g.check(AuthorityLevel::ExecuteTools).is_ok()); + assert_eq!(g.check(AuthorityLevel::WriteExternal).unwrap_err(), + ContainerError::InsufficientAuthority { required: 3, granted: 2 }); + let ro = AuthorityGuard::new(ExecutionMode::Replay); + assert_eq!(ro.check(AuthorityLevel::WriteMemory).unwrap_err(), + ContainerError::InsufficientAuthority { required: 1, granted: 0 }); + } + + #[test] + fn with_max_authority_overrides_default() { + let g = AuthorityGuard::with_max_authority(ExecutionMode::Replay, AuthorityLevel::WriteExternal); + assert_eq!(g.mode(), ExecutionMode::Replay); + assert!(g.check(AuthorityLevel::WriteExternal).is_ok()); + } + + #[test] + fn action_class_grant_restrict_and_inherit() { + let mut g = AuthorityGuard::with_max_authority(ExecutionMode::Live, AuthorityLevel::WriteExternal); + assert!(g.check_action_class(ActionClass::GitPush, AuthorityLevel::WriteExternal).is_ok()); + g.grant_action_class(ActionClass::GitPush, AuthorityLevel::ReadOnly); + assert_eq!(g.check_action_class(ActionClass::GitPush, AuthorityLevel::WriteMemory).unwrap_err(), + ContainerError::InsufficientAuthority { required: 1, granted: 0 }); + assert!(g.check_action_class(ActionClass::ReadMemory, AuthorityLevel::WriteExternal).is_ok()); + } + + #[test] + fn action_class_override_capped_by_global() { + let mut g = AuthorityGuard::new(ExecutionMode::Replay); + g.grant_action_class(ActionClass::RunCommand, AuthorityLevel::WriteExternal); + assert!(g.check_action_class(ActionClass::RunCommand, AuthorityLevel::WriteMemory).is_err()); + } + + #[test] + fn action_class_override_within_global() { + let mut g = AuthorityGuard::with_max_authority(ExecutionMode::Live, AuthorityLevel::ExecuteTools); + g.grant_action_class(ActionClass::WriteFile, AuthorityLevel::WriteMemory); + assert!(g.check_action_class(ActionClass::WriteFile, AuthorityLevel::WriteMemory).is_ok()); + assert!(g.check_action_class(ActionClass::WriteFile, AuthorityLevel::ExecuteTools).is_err()); + } + + #[test] + fn action_class_all_variants() { + assert_eq!(ActionClass::ALL.len(), ACTION_CLASS_COUNT); + for (i, c) in ActionClass::ALL.iter().enumerate() { assert_eq!(*c as usize, i); } + } + + #[test] + fn tracker_zero_usage() { + let t = BudgetTracker::new(ResourceBudget::DEFAULT); + assert_eq!(t.remaining_tokens(), 200_000); + assert_eq!(t.remaining_cost(), 1_000_000); + assert_eq!(t.remaining_time(), 300); + assert!(!t.is_exhausted()); + } + + #[test] + fn charge_and_exhaust_each_resource() { + let mut t = BudgetTracker::new(ResourceBudget::DEFAULT); + assert!(t.charge_tokens(200_000).is_ok()); + assert_eq!(t.charge_tokens(1), Err(ContainerError::BudgetExhausted("tokens"))); + + let mut t = BudgetTracker::new(ResourceBudget::DEFAULT); + assert!(t.charge_cost(1_000_000).is_ok()); + assert_eq!(t.charge_cost(1), Err(ContainerError::BudgetExhausted("cost"))); + + let mut t = BudgetTracker::new(ResourceBudget::DEFAULT); + for _ in 0..50 { t.charge_tool_call().unwrap(); } + assert!(t.charge_tool_call().is_err()); + + let mut t = BudgetTracker::new(ResourceBudget::DEFAULT); + assert!(t.charge_time(300).is_ok()); + assert!(t.charge_time(1).is_err()); + + let mut t = BudgetTracker::new(ResourceBudget::DEFAULT); + assert!(t.charge_external_write().is_err()); // zero budget + + let mut t = BudgetTracker::new(ResourceBudget::EXTENDED); + for _ in 0..10 { t.charge_external_write().unwrap(); } + assert!(t.charge_external_write().is_err()); + } + + #[test] + fn is_exhausted_semantics() { + assert!(!BudgetTracker::new(ResourceBudget::DEFAULT).is_exhausted()); + let mut t = BudgetTracker::new(ResourceBudget::DEFAULT); + t.charge_tokens(200_000).unwrap(); + assert!(t.is_exhausted()); + } + + #[test] + fn utilization_calculation() { + let mut t = BudgetTracker::new(ResourceBudget::DEFAULT); + t.charge_tokens(100_000).unwrap(); + t.charge_time(150).unwrap(); + t.charge_cost(500_000).unwrap(); + let u = t.utilization(); + assert!((u.tokens_pct - 50.0).abs() < 0.01); + assert!((u.time_pct - 50.0).abs() < 0.01); + assert!((u.cost_pct - 50.0).abs() < 0.01); + + t.charge_tokens(100_000).unwrap(); + let u2 = t.utilization(); + assert!((u2.tokens_pct - 100.0).abs() < 0.01); + + let z = BudgetTracker::new(ResourceBudget { + max_time_secs: 0, max_tokens: 0, max_cost_microdollars: 0, + max_tool_calls: 0, max_external_writes: 0, + }); + assert!((z.utilization().time_pct).abs() < 0.01); + } + + #[test] + fn snapshot_captures_state() { + let mut t = BudgetTracker::new(ResourceBudget::EXTENDED); + t.charge_tokens(5_000).unwrap(); + t.charge_time(60).unwrap(); + t.charge_cost(100_000).unwrap(); + t.charge_tool_call().unwrap(); + t.charge_external_write().unwrap(); + let s = t.snapshot(); + assert_eq!(s.budget, ResourceBudget::EXTENDED); + assert_eq!(s.used_tokens, 5_000); + assert_eq!(s.used_time_secs, 60); + assert_eq!(s.used_cost_microdollars, 100_000); + assert_eq!(s.used_tool_calls, 1); + assert_eq!(s.used_external_writes, 1); + } + + #[test] + fn budget_clamped_on_creation() { + let t = BudgetTracker::new(ResourceBudget { + max_time_secs: 999_999, max_tokens: 999_999_999, + max_cost_microdollars: 999_999_999, max_tool_calls: 60_000, max_external_writes: 60_000, + }); + let b = t.budget(); + assert_eq!(b.max_time_secs, ResourceBudget::MAX.max_time_secs); + assert_eq!(b.max_tokens, ResourceBudget::MAX.max_tokens); + assert_eq!(b.max_external_writes, ResourceBudget::MAX.max_external_writes); + } + + #[test] + fn charge_exactly_at_limit() { + let mut t = BudgetTracker::new(ResourceBudget { + max_time_secs: 10, max_tokens: 100, max_cost_microdollars: 500, + max_tool_calls: 3, max_external_writes: 2, + }); + assert!(t.charge_tokens(100).is_ok()); + assert!(t.charge_time(10).is_ok()); + assert!(t.charge_cost(500).is_ok()); + for _ in 0..3 { t.charge_tool_call().unwrap(); } + for _ in 0..2 { t.charge_external_write().unwrap(); } + assert_eq!(t.remaining_tokens(), 0); + assert_eq!(t.remaining_cost(), 0); + assert_eq!(t.remaining_time(), 0); + assert!(t.is_exhausted()); + assert!(t.charge_tokens(1).is_err()); + assert!(t.charge_time(1).is_err()); + assert!(t.charge_cost(1).is_err()); + assert!(t.charge_tool_call().is_err()); + assert!(t.charge_external_write().is_err()); + } + + #[test] + fn max_budget_allows_high_usage() { + let mut t = BudgetTracker::new(ResourceBudget::MAX); + assert!(t.charge_tokens(1_000_000).is_ok()); + assert!(t.charge_time(3600).is_ok()); + assert!(t.charge_cost(10_000_000).is_ok()); + for _ in 0..500 { t.charge_tool_call().unwrap(); } + for _ in 0..50 { t.charge_external_write().unwrap(); } + assert!(t.is_exhausted()); + } +} diff --git a/crates/rvf/rvf-runtime/src/agi_coherence.rs b/crates/rvf/rvf-runtime/src/agi_coherence.rs new file mode 100644 index 00000000..27b8c7a1 --- /dev/null +++ b/crates/rvf/rvf-runtime/src/agi_coherence.rs @@ -0,0 +1,689 @@ +//! Coherence monitoring and container validation pipeline (ADR-036). +//! +//! This module provides two main components: +//! +//! - [`CoherenceMonitor`]: tracks real-time coherence state across events, +//! contradictions, and task rollbacks, transitioning through [`CoherenceState`] +//! to gate commits, skill promotion, and autonomous execution. +//! +//! - [`ContainerValidator`]: runs the full validation pipeline over an AGI +//! container's header, segments, and coherence thresholds, collecting all +//! errors rather than short-circuiting. + +use rvf_types::agi_container::*; + +// --------------------------------------------------------------------------- +// CoherenceState +// --------------------------------------------------------------------------- + +/// Runtime coherence state derived from threshold checks. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum CoherenceState { + /// All signals within bounds. + Healthy, + /// Contradiction rate exceeded -- skill promotion frozen. + SkillFreeze, + /// Coherence score below minimum -- commits blocked, repair mode. + RepairMode, + /// Rollback ratio exceeded -- execution halted, human review required. + Halted, +} + +// --------------------------------------------------------------------------- +// CoherenceReport +// --------------------------------------------------------------------------- + +/// Point-in-time snapshot of all coherence metrics. +#[derive(Clone, Debug)] +pub struct CoherenceReport { + pub state: CoherenceState, + pub coherence_score: f32, + pub contradiction_rate: f32, + pub rollback_ratio: f32, + pub total_events: u64, + pub total_contradictions: u64, + pub total_tasks: u64, + pub total_rollbacks: u64, +} + +// --------------------------------------------------------------------------- +// CoherenceMonitor +// --------------------------------------------------------------------------- + +/// Tracks real-time coherence state and gates system actions. +pub struct CoherenceMonitor { + thresholds: CoherenceThresholds, + current_coherence: f32, + total_events: u64, + total_contradictions: u64, + total_tasks: u64, + total_rollbacks: u64, + state: CoherenceState, +} + +impl CoherenceMonitor { + /// Create a new monitor with the given thresholds. Returns an error if the + /// thresholds are out of valid ranges. + pub fn new(thresholds: CoherenceThresholds) -> Result { + thresholds.validate()?; + Ok(Self { + thresholds, + current_coherence: 1.0, + total_events: 0, + total_contradictions: 0, + total_tasks: 0, + total_rollbacks: 0, + state: CoherenceState::Healthy, + }) + } + + /// Create a monitor with [`CoherenceThresholds::DEFAULT`]. + pub fn with_defaults() -> Self { + Self { + thresholds: CoherenceThresholds::DEFAULT, + current_coherence: 1.0, + total_events: 0, + total_contradictions: 0, + total_tasks: 0, + total_rollbacks: 0, + state: CoherenceState::Healthy, + } + } + + /// Update the current coherence score and re-evaluate state. + pub fn update_coherence(&mut self, score: f32) { + self.current_coherence = score; + self.recompute_state(); + } + + /// Record a generic event (increments event counter). + pub fn record_event(&mut self) { + self.total_events = self.total_events.saturating_add(1); + } + + /// Record a contradiction event and re-evaluate state. + pub fn record_contradiction(&mut self) { + self.total_contradictions = self.total_contradictions.saturating_add(1); + self.recompute_state(); + } + + /// Record a task completion. If `rolled_back` is true the rollback counter + /// is also incremented. State is re-evaluated afterward. + pub fn record_task_completion(&mut self, rolled_back: bool) { + self.total_tasks = self.total_tasks.saturating_add(1); + if rolled_back { + self.total_rollbacks = self.total_rollbacks.saturating_add(1); + } + self.recompute_state(); + } + + /// Current coherence state. + pub fn state(&self) -> CoherenceState { + self.state + } + + /// Whether the system may commit world-model deltas. True when + /// [`CoherenceState::Healthy`] or [`CoherenceState::SkillFreeze`]. + pub fn can_commit(&self) -> bool { + matches!(self.state, CoherenceState::Healthy | CoherenceState::SkillFreeze) + } + + /// Whether new skills may be promoted. True only when + /// [`CoherenceState::Healthy`]. + pub fn can_promote_skill(&self) -> bool { + self.state == CoherenceState::Healthy + } + + /// Whether the system requires human review. True when + /// [`CoherenceState::Halted`]. + pub fn requires_human_review(&self) -> bool { + self.state == CoherenceState::Halted + } + + /// Contradiction rate: contradictions per 100 events. + /// Returns `0.0` when there are no events. + pub fn contradiction_rate(&self) -> f32 { + if self.total_events == 0 { + return 0.0; + } + (self.total_contradictions as f32 / self.total_events as f32) * 100.0 + } + + /// Rollback ratio: rollbacks / total tasks. + /// Returns `0.0` when there are no tasks. + pub fn rollback_ratio(&self) -> f32 { + if self.total_tasks == 0 { + return 0.0; + } + self.total_rollbacks as f32 / self.total_tasks as f32 + } + + /// Produce a point-in-time snapshot of all metrics. + pub fn report(&self) -> CoherenceReport { + CoherenceReport { + state: self.state, + coherence_score: self.current_coherence, + contradiction_rate: self.contradiction_rate(), + rollback_ratio: self.rollback_ratio(), + total_events: self.total_events, + total_contradictions: self.total_contradictions, + total_tasks: self.total_tasks, + total_rollbacks: self.total_rollbacks, + } + } + + // ----------------------------------------------------------------------- + // Internal + // ----------------------------------------------------------------------- + + /// Determine the worst applicable state from the current metrics. + /// Priority (most severe first): Halted > RepairMode > SkillFreeze > Healthy. + fn recompute_state(&mut self) { + if self.rollback_ratio() > self.thresholds.max_rollback_ratio { + self.state = CoherenceState::Halted; + } else if self.current_coherence < self.thresholds.min_coherence_score { + self.state = CoherenceState::RepairMode; + } else if self.contradiction_rate() > self.thresholds.max_contradiction_rate { + self.state = CoherenceState::SkillFreeze; + } else { + self.state = CoherenceState::Healthy; + } + } +} + +// --------------------------------------------------------------------------- +// ContainerValidator +// --------------------------------------------------------------------------- + +/// Full validation pipeline for an AGI container. +pub struct ContainerValidator { + mode: ExecutionMode, +} + +impl ContainerValidator { + /// Create a validator for the given execution mode. + pub fn new(mode: ExecutionMode) -> Self { + Self { mode } + } + + /// Validate container segments against mode requirements. + pub fn validate_segments( + &self, + segments: &ContainerSegments, + ) -> Result<(), ContainerError> { + segments.validate(self.mode) + } + + /// Validate the AGI container header. + /// + /// Checks: magic bytes, version (must be 1), and flag consistency -- + /// replay-capable containers must not claim Live-only features without + /// the kernel flag. + pub fn validate_header( + &self, + header: &AgiContainerHeader, + ) -> Result<(), ContainerError> { + if !header.is_valid_magic() { + return Err(ContainerError::InvalidConfig("bad magic bytes")); + } + if header.version == 0 || header.version > 1 { + return Err(ContainerError::InvalidConfig( + "unsupported header version", + )); + } + // Flag consistency: if REPLAY_CAPABLE is set the container should + // also have the witness flag, since replays depend on witness chains. + if header.is_replay_capable() + && (header.flags & AGI_HAS_WITNESS == 0) + { + return Err(ContainerError::InvalidConfig( + "replay-capable flag requires witness flag", + )); + } + Ok(()) + } + + /// Validate coherence threshold ranges. + pub fn validate_coherence( + &self, + thresholds: &CoherenceThresholds, + ) -> Result<(), ContainerError> { + thresholds.validate() + } + + /// Run all validations, collecting every error rather than + /// short-circuiting on the first failure. + pub fn validate_full( + &self, + header: &AgiContainerHeader, + segments: &ContainerSegments, + thresholds: &CoherenceThresholds, + ) -> Vec { + let mut errors = Vec::new(); + + if let Err(e) = self.validate_header(header) { + errors.push(e); + } + if let Err(e) = self.validate_segments(segments) { + errors.push(e); + } + if let Err(e) = self.validate_coherence(thresholds) { + errors.push(e); + } + + errors + } +} + +// --------------------------------------------------------------------------- +// Tests +// --------------------------------------------------------------------------- + +#[cfg(test)] +mod tests { + use super::*; + + // -- Helpers -- + + fn valid_header() -> AgiContainerHeader { + AgiContainerHeader { + magic: AGI_MAGIC, + version: 1, + flags: AGI_HAS_KERNEL | AGI_HAS_WITNESS | AGI_REPLAY_CAPABLE, + container_id: [0x01; 16], + build_id: [0x02; 16], + created_ns: 1_700_000_000_000_000_000, + model_id_hash: [0; 8], + policy_hash: [0; 8], + } + } + + fn valid_segments() -> ContainerSegments { + ContainerSegments { + manifest_present: true, + kernel_present: true, + world_model_present: true, + ..Default::default() + } + } + + // ----------------------------------------------------------------------- + // CoherenceMonitor — state transitions + // ----------------------------------------------------------------------- + + #[test] + fn monitor_starts_healthy() { + let m = CoherenceMonitor::with_defaults(); + assert_eq!(m.state(), CoherenceState::Healthy); + } + + #[test] + fn monitor_healthy_to_skill_freeze() { + let mut m = CoherenceMonitor::with_defaults(); + // Default max_contradiction_rate is 5.0 per 100 events. + // Record 100 events with 6 contradictions -> rate = 6.0 > 5.0. + for _ in 0..100 { + m.record_event(); + } + for _ in 0..6 { + m.record_contradiction(); + } + assert_eq!(m.state(), CoherenceState::SkillFreeze); + } + + #[test] + fn monitor_healthy_to_repair_mode() { + let mut m = CoherenceMonitor::with_defaults(); + // Default min_coherence_score is 0.70. + m.update_coherence(0.50); + assert_eq!(m.state(), CoherenceState::RepairMode); + } + + #[test] + fn monitor_healthy_to_halted() { + let mut m = CoherenceMonitor::with_defaults(); + // Default max_rollback_ratio is 0.20. + // 10 tasks, 3 rolled back -> ratio = 0.30 > 0.20. + for _ in 0..7 { + m.record_task_completion(false); + } + for _ in 0..3 { + m.record_task_completion(true); + } + assert_eq!(m.state(), CoherenceState::Halted); + } + + #[test] + fn halted_takes_priority_over_repair_mode() { + let mut m = CoherenceMonitor::with_defaults(); + // Both rollback ratio and coherence score are bad. + m.update_coherence(0.50); + for _ in 0..5 { + m.record_task_completion(false); + } + for _ in 0..5 { + m.record_task_completion(true); + } + // Halted (rollback) is highest severity and wins. + assert_eq!(m.state(), CoherenceState::Halted); + } + + #[test] + fn repair_mode_takes_priority_over_skill_freeze() { + let mut m = CoherenceMonitor::with_defaults(); + // Both coherence and contradiction rate are bad. + m.update_coherence(0.50); + for _ in 0..100 { + m.record_event(); + } + for _ in 0..10 { + m.record_contradiction(); + } + // RepairMode wins over SkillFreeze. + assert_eq!(m.state(), CoherenceState::RepairMode); + } + + #[test] + fn recovery_back_to_healthy() { + let mut m = CoherenceMonitor::with_defaults(); + m.update_coherence(0.50); + assert_eq!(m.state(), CoherenceState::RepairMode); + m.update_coherence(0.90); + assert_eq!(m.state(), CoherenceState::Healthy); + } + + // ----------------------------------------------------------------------- + // CoherenceMonitor — gate queries + // ----------------------------------------------------------------------- + + #[test] + fn can_commit_when_healthy() { + let m = CoherenceMonitor::with_defaults(); + assert!(m.can_commit()); + } + + #[test] + fn can_commit_when_skill_freeze() { + let mut m = CoherenceMonitor::with_defaults(); + for _ in 0..100 { + m.record_event(); + } + for _ in 0..6 { + m.record_contradiction(); + } + assert_eq!(m.state(), CoherenceState::SkillFreeze); + assert!(m.can_commit()); + } + + #[test] + fn cannot_commit_when_repair_mode() { + let mut m = CoherenceMonitor::with_defaults(); + m.update_coherence(0.50); + assert!(!m.can_commit()); + } + + #[test] + fn cannot_commit_when_halted() { + let mut m = CoherenceMonitor::with_defaults(); + m.record_task_completion(true); // 1 task, 1 rollback -> ratio = 1.0 + assert!(!m.can_commit()); + } + + #[test] + fn can_promote_skill_only_when_healthy() { + let m = CoherenceMonitor::with_defaults(); + assert!(m.can_promote_skill()); + + let mut m2 = CoherenceMonitor::with_defaults(); + for _ in 0..100 { + m2.record_event(); + } + for _ in 0..6 { + m2.record_contradiction(); + } + assert_eq!(m2.state(), CoherenceState::SkillFreeze); + assert!(!m2.can_promote_skill()); + } + + #[test] + fn requires_human_review_only_when_halted() { + let m = CoherenceMonitor::with_defaults(); + assert!(!m.requires_human_review()); + + let mut m2 = CoherenceMonitor::with_defaults(); + m2.record_task_completion(true); + assert_eq!(m2.state(), CoherenceState::Halted); + assert!(m2.requires_human_review()); + } + + // ----------------------------------------------------------------------- + // CoherenceMonitor — rate calculations + // ----------------------------------------------------------------------- + + #[test] + fn contradiction_rate_calculation() { + let mut m = CoherenceMonitor::with_defaults(); + for _ in 0..200 { + m.record_event(); + } + for _ in 0..4 { + m.record_contradiction(); + } + // 4 contradictions in (200 events + 4 contradiction events via record_contradiction) + // record_contradiction does not call record_event, so total_events = 200. + // Rate = (4 / 200) * 100 = 2.0. + assert!((m.contradiction_rate() - 2.0).abs() < f32::EPSILON); + } + + #[test] + fn rollback_ratio_calculation() { + let mut m = CoherenceMonitor::with_defaults(); + for _ in 0..8 { + m.record_task_completion(false); + } + for _ in 0..2 { + m.record_task_completion(true); + } + // 2 rollbacks / 10 tasks = 0.20. + assert!((m.rollback_ratio() - 0.20).abs() < f32::EPSILON); + } + + // ----------------------------------------------------------------------- + // CoherenceMonitor — edge cases + // ----------------------------------------------------------------------- + + #[test] + fn contradiction_rate_zero_events() { + let m = CoherenceMonitor::with_defaults(); + assert!((m.contradiction_rate() - 0.0).abs() < f32::EPSILON); + } + + #[test] + fn rollback_ratio_zero_tasks() { + let m = CoherenceMonitor::with_defaults(); + assert!((m.rollback_ratio() - 0.0).abs() < f32::EPSILON); + } + + #[test] + fn new_with_invalid_thresholds() { + let bad = CoherenceThresholds { + min_coherence_score: 2.0, + ..CoherenceThresholds::DEFAULT + }; + assert!(CoherenceMonitor::new(bad).is_err()); + } + + #[test] + fn new_with_valid_thresholds() { + let m = CoherenceMonitor::new(CoherenceThresholds::STRICT).unwrap(); + assert_eq!(m.state(), CoherenceState::Healthy); + } + + #[test] + fn report_snapshot() { + let mut m = CoherenceMonitor::with_defaults(); + m.update_coherence(0.85); + for _ in 0..50 { + m.record_event(); + } + m.record_contradiction(); + // 9 successful + 1 rollback = ratio 0.10, within the 0.20 threshold. + for _ in 0..9 { + m.record_task_completion(false); + } + m.record_task_completion(true); + + let r = m.report(); + assert_eq!(r.state, CoherenceState::Healthy); + assert!((r.coherence_score - 0.85).abs() < f32::EPSILON); + assert_eq!(r.total_events, 50); + assert_eq!(r.total_contradictions, 1); + assert_eq!(r.total_tasks, 10); + assert_eq!(r.total_rollbacks, 1); + } + + // ----------------------------------------------------------------------- + // ContainerValidator — validate_segments + // ----------------------------------------------------------------------- + + #[test] + fn validator_segments_delegates_ok() { + let v = ContainerValidator::new(ExecutionMode::Live); + let segs = valid_segments(); + assert!(v.validate_segments(&segs).is_ok()); + } + + #[test] + fn validator_segments_delegates_error() { + let v = ContainerValidator::new(ExecutionMode::Replay); + let segs = ContainerSegments { + manifest_present: true, + witness_count: 0, + ..Default::default() + }; + assert!(v.validate_segments(&segs).is_err()); + } + + // ----------------------------------------------------------------------- + // ContainerValidator — validate_header + // ----------------------------------------------------------------------- + + #[test] + fn validator_header_ok() { + let v = ContainerValidator::new(ExecutionMode::Live); + assert!(v.validate_header(&valid_header()).is_ok()); + } + + #[test] + fn validator_header_bad_magic() { + let v = ContainerValidator::new(ExecutionMode::Live); + let mut h = valid_header(); + h.magic = 0xDEADBEEF; + assert_eq!( + v.validate_header(&h), + Err(ContainerError::InvalidConfig("bad magic bytes")) + ); + } + + #[test] + fn validator_header_bad_version_zero() { + let v = ContainerValidator::new(ExecutionMode::Live); + let mut h = valid_header(); + h.version = 0; + assert_eq!( + v.validate_header(&h), + Err(ContainerError::InvalidConfig("unsupported header version")) + ); + } + + #[test] + fn validator_header_bad_version_future() { + let v = ContainerValidator::new(ExecutionMode::Live); + let mut h = valid_header(); + h.version = 99; + assert_eq!( + v.validate_header(&h), + Err(ContainerError::InvalidConfig("unsupported header version")) + ); + } + + #[test] + fn validator_header_replay_without_witness() { + let v = ContainerValidator::new(ExecutionMode::Live); + let h = AgiContainerHeader { + magic: AGI_MAGIC, + version: 1, + flags: AGI_REPLAY_CAPABLE, // missing AGI_HAS_WITNESS + container_id: [0; 16], + build_id: [0; 16], + created_ns: 0, + model_id_hash: [0; 8], + policy_hash: [0; 8], + }; + assert_eq!( + v.validate_header(&h), + Err(ContainerError::InvalidConfig( + "replay-capable flag requires witness flag" + )) + ); + } + + // ----------------------------------------------------------------------- + // ContainerValidator — validate_full + // ----------------------------------------------------------------------- + + #[test] + fn validator_full_all_ok() { + let v = ContainerValidator::new(ExecutionMode::Live); + let errs = v.validate_full( + &valid_header(), + &valid_segments(), + &CoherenceThresholds::DEFAULT, + ); + assert!(errs.is_empty()); + } + + #[test] + fn validator_full_collects_multiple_errors() { + let v = ContainerValidator::new(ExecutionMode::Live); + + // Bad header (wrong magic). + let mut h = valid_header(); + h.magic = 0xBAD0CAFE; + + // Bad segments (no kernel, no wasm, no world model for Live). + let segs = ContainerSegments { + manifest_present: true, + ..Default::default() + }; + + // Bad thresholds. + let bad_thresh = CoherenceThresholds { + min_coherence_score: -1.0, + ..CoherenceThresholds::DEFAULT + }; + + let errs = v.validate_full(&h, &segs, &bad_thresh); + // Expect at least 3 errors: header, segments, and thresholds. + assert!(errs.len() >= 3, "expected >= 3 errors, got {}", errs.len()); + } + + #[test] + fn validator_full_partial_errors() { + let v = ContainerValidator::new(ExecutionMode::Replay); + + // Good header, bad segments (replay needs witness), good thresholds. + let segs = ContainerSegments { + manifest_present: true, + witness_count: 0, + ..Default::default() + }; + + let errs = v.validate_full( + &valid_header(), + &segs, + &CoherenceThresholds::DEFAULT, + ); + assert_eq!(errs.len(), 1); + } +} diff --git a/crates/rvf/rvf-runtime/src/lib.rs b/crates/rvf/rvf-runtime/src/lib.rs index 5f7bd529..c4cf1895 100644 --- a/crates/rvf/rvf-runtime/src/lib.rs +++ b/crates/rvf/rvf-runtime/src/lib.rs @@ -35,6 +35,8 @@ pub mod status; pub mod store; pub mod witness; pub mod write_path; +pub mod agi_authority; +pub mod agi_coherence; pub mod agi_container; pub use adversarial::{ diff --git a/crates/rvf/rvf-runtime/tests/agi_e2e.rs b/crates/rvf/rvf-runtime/tests/agi_e2e.rs new file mode 100644 index 00000000..9da1a7b4 --- /dev/null +++ b/crates/rvf/rvf-runtime/tests/agi_e2e.rs @@ -0,0 +1,335 @@ +//! End-to-end integration tests and benchmarks for the AGI Cognitive Container +//! system (ADR-036). Covers the full build -> serialize -> parse -> validate +//! cycle, signed container tamper detection, execution mode validation matrix, +//! authority levels, resource budgets, coherence thresholds, and perf benchmarks. + +use rvf_runtime::agi_container::{AgiContainerBuilder, ParsedAgiManifest}; +use rvf_runtime::seed_crypto; +use rvf_types::agi_container::*; + +const SIGNING_KEY: &[u8] = b"agi-e2e-test-signing-key-32bytes"; +const ORCH_JSON: &[u8] = br#"{"model":"claude-opus-4-6","max_turns":100}"#; +const TASKS_JSON: &[u8] = br#"[{"id":1,"spec":"fix bug"}]"#; +const GRADERS_JSON: &[u8] = br#"[{"type":"test_pass"}]"#; +const TOOLS_JSON: &[u8] = br#"[{"name":"ruvector_query","type":"search"}]"#; +const COHER_JSON: &[u8] = br#"{"min_cut":0.7,"rollback":true}"#; + +/// Build a fully-populated container with every optional section. +fn build_full_container() -> (Vec, AgiContainerHeader) { + AgiContainerBuilder::new([0x01; 16], [0x02; 16]) + .with_model_id("claude-opus-4-6") + .with_policy(b"autonomous", [0xAA; 8]) + .with_orchestrator(ORCH_JSON) + .with_tool_registry(TOOLS_JSON) + .with_agent_prompts(b"You are a coder agent.") + .with_eval_tasks(TASKS_JSON) + .with_eval_graders(GRADERS_JSON) + .with_skill_library(b"[]") + .with_replay_script(b"#!/bin/sh\nrvf replay $1") + .with_kernel_config(b"console=ttyS0") + .with_network_config(b"{\"port\":8080}") + .with_coherence_config(COHER_JSON) + .with_project_instructions(b"# CLAUDE.md") + .with_dependency_snapshot(b"sha256:abc123") + .with_authority_config(b"{\"level\":\"WriteMemory\"}") + .with_domain_profile(b"coding") + .offline_capable() + .with_segments(ContainerSegments { + kernel_present: true, kernel_size: 5_000_000, + wasm_count: 2, wasm_total_size: 60_000, + vec_segment_count: 4, index_segment_count: 2, + witness_count: 100, crypto_present: false, + manifest_present: true, orchestrator_present: true, + world_model_present: true, total_size: 0, + }) + .build() + .unwrap() +} + +// -- 1. Full Container Lifecycle -- + +#[test] +fn full_container_lifecycle() { + let (payload, header) = build_full_container(); + + assert!(header.is_valid_magic()); + assert_eq!(header.version, 1); + assert!(header.has_kernel()); + assert!(header.has_orchestrator()); + assert!(header.has_world_model()); + assert!(header.is_replay_capable()); + assert!(header.is_offline_capable()); + assert!(header.created_ns > 0, "created_ns should be a real timestamp"); + + // Header round-trip. + let header_rt = AgiContainerHeader::from_bytes(&header.to_bytes()).unwrap(); + assert_eq!(header_rt, header); + + // Parse manifest and verify every section. + let p = ParsedAgiManifest::parse(&payload).unwrap(); + assert_eq!(p.model_id_str(), Some("claude-opus-4-6")); + assert_eq!(p.orchestrator_config.unwrap(), ORCH_JSON); + assert_eq!(p.tool_registry.unwrap(), TOOLS_JSON); + assert_eq!(p.eval_tasks.unwrap(), TASKS_JSON); + assert_eq!(p.eval_graders.unwrap(), GRADERS_JSON); + assert_eq!(p.coherence_config.unwrap(), COHER_JSON); + assert!(p.policy.is_some()); + assert!(p.agent_prompts.is_some()); + assert!(p.skill_library.is_some()); + assert!(p.replay_script.is_some()); + assert!(p.kernel_config.is_some()); + assert!(p.network_config.is_some()); + assert!(p.project_instructions.is_some()); + assert!(p.dependency_snapshot.is_some()); + assert!(p.authority_config.is_some()); + assert!(p.domain_profile.is_some()); + assert!(p.is_autonomous_capable()); + + // Segment-derived flags should all be present in the header. + let seg_flags = ContainerSegments { + kernel_present: true, wasm_count: 2, witness_count: 100, + orchestrator_present: true, world_model_present: true, + ..Default::default() + }.to_flags(); + assert_eq!(header.flags & seg_flags, seg_flags); +} + +// -- 2. Signed Container Tamper Detection -- + +#[test] +fn signed_container_tamper_detection() { + let builder = AgiContainerBuilder::new([0x10; 16], [0x20; 16]) + .with_model_id("claude-opus-4-6") + .with_orchestrator(ORCH_JSON) + .with_eval_tasks(TASKS_JSON) + .with_eval_graders(GRADERS_JSON) + .with_segments(ContainerSegments { + kernel_present: true, manifest_present: true, + world_model_present: true, ..Default::default() + }); + + let (payload, header) = builder.build_and_sign(SIGNING_KEY).unwrap(); + assert!(header.is_signed()); + + let unsigned_len = payload.len() - 32; + let sig = &payload[unsigned_len..]; + assert!(seed_crypto::verify_seed(SIGNING_KEY, &payload[..unsigned_len], sig)); + + // Tamper with one byte in the TLV payload area. + let mut tampered = payload.clone(); + tampered[AGI_HEADER_SIZE + 10] ^= 0xFF; + assert!(!seed_crypto::verify_seed(SIGNING_KEY, &tampered[..unsigned_len], sig), + "tampered payload must fail verification"); + + // Tamper with header byte. + let mut tampered_hdr = payload.clone(); + tampered_hdr[7] ^= 0x01; + assert!(!seed_crypto::verify_seed(SIGNING_KEY, &tampered_hdr[..unsigned_len], sig), + "tampered header must fail verification"); +} + +// -- 3. Execution Mode Validation Matrix -- + +#[test] +fn execution_mode_validation_matrix() { + let m = |mp, kp, wc, wmc, vsc, isc, wnc| ContainerSegments { + manifest_present: mp, kernel_present: kp, wasm_count: wc, + world_model_present: wmc, vec_segment_count: vsc, + index_segment_count: isc, witness_count: wnc, + ..Default::default() + }; + + // Replay + no witness -> fail + assert!(m(true, false, 0, false, 0, 0, 0).validate(ExecutionMode::Replay).is_err()); + // Replay + witness -> pass + assert!(m(true, false, 0, false, 0, 0, 10).validate(ExecutionMode::Replay).is_ok()); + // Verify + no runtime -> fail + assert!(m(true, false, 0, false, 0, 0, 0).validate(ExecutionMode::Verify).is_err()); + // Verify + kernel + world_model -> pass + assert!(m(true, true, 0, true, 0, 0, 0).validate(ExecutionMode::Verify).is_ok()); + // Verify + wasm + vec -> pass + assert!(m(true, false, 1, false, 2, 0, 0).validate(ExecutionMode::Verify).is_ok()); + // Live + kernel only (no world model) -> fail + assert!(m(true, true, 0, false, 0, 0, 0).validate(ExecutionMode::Live).is_err()); + // Live + kernel + world model -> pass + assert!(m(true, true, 0, true, 0, 0, 0).validate(ExecutionMode::Live).is_ok()); +} + +// -- 4. Authority Level Tests -- + +#[test] +fn authority_level_defaults_per_mode() { + assert_eq!(AuthorityLevel::default_for_mode(ExecutionMode::Replay), AuthorityLevel::ReadOnly); + assert_eq!(AuthorityLevel::default_for_mode(ExecutionMode::Verify), AuthorityLevel::ExecuteTools); + assert_eq!(AuthorityLevel::default_for_mode(ExecutionMode::Live), AuthorityLevel::WriteMemory); +} + +#[test] +fn authority_level_hierarchy() { + // WriteExternal permits all. + assert!(AuthorityLevel::WriteExternal.permits(AuthorityLevel::ReadOnly)); + assert!(AuthorityLevel::WriteExternal.permits(AuthorityLevel::WriteMemory)); + assert!(AuthorityLevel::WriteExternal.permits(AuthorityLevel::ExecuteTools)); + assert!(AuthorityLevel::WriteExternal.permits(AuthorityLevel::WriteExternal)); + // ExecuteTools permits itself and below. + assert!(AuthorityLevel::ExecuteTools.permits(AuthorityLevel::ReadOnly)); + assert!(AuthorityLevel::ExecuteTools.permits(AuthorityLevel::WriteMemory)); + assert!(AuthorityLevel::ExecuteTools.permits(AuthorityLevel::ExecuteTools)); + assert!(!AuthorityLevel::ExecuteTools.permits(AuthorityLevel::WriteExternal)); + // ReadOnly permits nothing above itself. + assert!(AuthorityLevel::ReadOnly.permits(AuthorityLevel::ReadOnly)); + assert!(!AuthorityLevel::ReadOnly.permits(AuthorityLevel::WriteMemory)); + assert!(!AuthorityLevel::ReadOnly.permits(AuthorityLevel::ExecuteTools)); + assert!(!AuthorityLevel::ReadOnly.permits(AuthorityLevel::WriteExternal)); +} + +// -- 5. Resource Budget Tests -- + +#[test] +fn resource_budget_clamping() { + let clamped = ResourceBudget { + max_time_secs: 99999, max_tokens: 99999999, + max_cost_microdollars: 99999999, + max_tool_calls: 65535, max_external_writes: 65535, + }.clamped(); + assert_eq!(clamped.max_time_secs, 3600); + assert_eq!(clamped.max_tokens, 1_000_000); + assert_eq!(clamped.max_cost_microdollars, 10_000_000); + assert_eq!(clamped.max_tool_calls, 500); + assert_eq!(clamped.max_external_writes, 50); +} + +#[test] +fn resource_budget_within_max_unchanged() { + assert_eq!(ResourceBudget::DEFAULT.clamped(), ResourceBudget::DEFAULT); +} + +// -- 6. Coherence Threshold Validation -- + +#[test] +fn coherence_threshold_validation() { + assert!(CoherenceThresholds::DEFAULT.validate().is_ok()); + assert!(CoherenceThresholds::STRICT.validate().is_ok()); + + // Invalid: score > 1.0 + let bad = CoherenceThresholds { min_coherence_score: 1.5, ..CoherenceThresholds::DEFAULT }; + assert!(bad.validate().is_err()); + // Invalid: negative rate + let bad2 = CoherenceThresholds { max_contradiction_rate: -1.0, ..CoherenceThresholds::DEFAULT }; + assert!(bad2.validate().is_err()); + // Invalid: rollback ratio > 1.0 + let bad3 = CoherenceThresholds { max_rollback_ratio: 2.0, ..CoherenceThresholds::DEFAULT }; + assert!(bad3.validate().is_err()); + // Edge: zero values are valid + let edge = CoherenceThresholds { + min_coherence_score: 0.0, max_contradiction_rate: 0.0, max_rollback_ratio: 0.0, + }; + assert!(edge.validate().is_ok()); +} + +// -- 7. Container Size Limit -- + +#[test] +fn container_size_limit_enforced() { + let oversized = ContainerSegments { + manifest_present: true, total_size: AGI_MAX_CONTAINER_SIZE + 1, + ..Default::default() + }; + assert_eq!( + oversized.validate(ExecutionMode::Replay), + Err(ContainerError::TooLarge { size: AGI_MAX_CONTAINER_SIZE + 1 }) + ); +} + +// -- 8. Performance Benchmarks (using std::time) -- + +#[test] +fn bench_header_serialize_deserialize() { + use std::time::Instant; + let header = AgiContainerHeader { + magic: AGI_MAGIC, version: 1, + flags: AGI_HAS_KERNEL | AGI_HAS_WASM | AGI_HAS_ORCHESTRATOR | AGI_SIGNED, + container_id: [0x42; 16], build_id: [0x43; 16], + created_ns: 1_700_000_000_000_000_000, + model_id_hash: [0xAA; 8], policy_hash: [0xBB; 8], + }; + let n: u128 = 100_000; + + let start = Instant::now(); + for _ in 0..n { let _ = std::hint::black_box(header.to_bytes()); } + let ser = start.elapsed(); + + let bytes = header.to_bytes(); + let start = Instant::now(); + for _ in 0..n { let _ = std::hint::black_box(AgiContainerHeader::from_bytes(&bytes).unwrap()); } + let deser = start.elapsed(); + + let ser_ns = ser.as_nanos() / n; + let deser_ns = deser.as_nanos() / n; + eprintln!("Header serialize: {ser_ns:>8} ns/op ({n} iterations in {ser:?})"); + eprintln!("Header deserialize: {deser_ns:>8} ns/op ({n} iterations in {deser:?})"); + assert!(ser_ns < 1000, "serialize too slow: {ser_ns} ns/op"); + assert!(deser_ns < 1000, "deserialize too slow: {deser_ns} ns/op"); +} + +#[test] +fn bench_container_build_parse() { + use std::time::Instant; + let n: u128 = 10_000; + let segs = || ContainerSegments { + kernel_present: true, manifest_present: true, + world_model_present: true, ..Default::default() + }; + + let start = Instant::now(); + for _ in 0..n { + let b = AgiContainerBuilder::new([0x01; 16], [0x02; 16]) + .with_model_id("claude-opus-4-6") + .with_policy(b"autonomous", [0xAA; 8]) + .with_orchestrator(ORCH_JSON) + .with_eval_tasks(TASKS_JSON) + .with_eval_graders(GRADERS_JSON) + .with_segments(segs()); + let _ = std::hint::black_box(b.build().unwrap()); + } + let build_elapsed = start.elapsed(); + + let (payload, _) = AgiContainerBuilder::new([0x01; 16], [0x02; 16]) + .with_model_id("claude-opus-4-6") + .with_orchestrator(ORCH_JSON) + .with_eval_tasks(TASKS_JSON) + .with_eval_graders(GRADERS_JSON) + .with_segments(segs()) + .build().unwrap(); + + let start = Instant::now(); + for _ in 0..n { let _ = std::hint::black_box(ParsedAgiManifest::parse(&payload).unwrap()); } + let parse_elapsed = start.elapsed(); + + let build_ns = build_elapsed.as_nanos() / n; + let parse_ns = parse_elapsed.as_nanos() / n; + eprintln!("Container build: {build_ns:>8} ns/op ({n} iterations in {build_elapsed:?})"); + eprintln!("Container parse: {parse_ns:>8} ns/op ({n} iterations in {parse_elapsed:?})"); + assert!(build_ns < 10_000, "build too slow: {build_ns} ns/op"); + assert!(parse_ns < 5_000, "parse too slow: {parse_ns} ns/op"); +} + +#[test] +fn bench_flags_computation() { + use std::time::Instant; + let n: u128 = 1_000_000; + let segs = ContainerSegments { + kernel_present: true, wasm_count: 2, witness_count: 100, + crypto_present: true, orchestrator_present: true, + world_model_present: true, vec_segment_count: 4, + index_segment_count: 2, ..Default::default() + }; + + let start = Instant::now(); + for _ in 0..n { let _ = std::hint::black_box(segs.to_flags()); } + let elapsed = start.elapsed(); + + let ns = elapsed.as_nanos() / n; + eprintln!("Flags computation: {ns:>8} ns/op ({n} iterations in {elapsed:?})"); + assert!(ns < 100, "flags computation too slow: {ns} ns/op"); +}