feat(agi-runtime): authority guard, coherence monitor, benchmarks

Add three new modules to rvf-runtime implementing the ADR-036 runtime:

- agi_authority.rs: AuthorityGuard (per-mode + per-action-class enforcement),
  BudgetTracker (resource consumption tracking with hard caps),
  ActionClass enum (10 action categories)
- agi_coherence.rs: CoherenceMonitor (real-time state machine with
  Healthy/SkillFreeze/RepairMode/Halted transitions),
  ContainerValidator (full validation pipeline)
- tests/agi_e2e.rs: end-to-end integration tests and performance
  benchmarks (header serialize/deserialize, container build/parse,
  flags computation)

All 219 rvf-runtime lib tests pass.

https://claude.ai/code/session_01RnwD4x5cbpB7FPvoyYQz8G
This commit is contained in:
Claude 2026-02-15 19:22:26 +00:00
parent 4f9107fdf1
commit ffbf72fb2f
4 changed files with 1464 additions and 0 deletions

View file

@ -0,0 +1,438 @@
//! Authority and resource budget enforcement runtime for AGI containers (ADR-036).
//!
//! - [`AuthorityGuard`]: enforces per-execution authority levels and per-action-class
//! overrides, ensuring container actions never exceed their granted privileges.
//! - [`BudgetTracker`]: tracks resource consumption against a [`ResourceBudget`] and
//! returns `BudgetExhausted` errors when any resource is at its limit.
use rvf_types::agi_container::*;
const ACTION_CLASS_COUNT: usize = 10;
/// Classification of actions that a container execution may perform.
///
/// Each class can be independently granted a different [`AuthorityLevel`]
/// via [`AuthorityGuard::grant_action_class`].
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
#[repr(u8)]
pub enum ActionClass {
ReadMemory = 0,
WriteMemory = 1,
ReadFile = 2,
WriteFile = 3,
RunTest = 4,
RunCommand = 5,
GitPush = 6,
CreatePR = 7,
SendMessage = 8,
ModifyInfra = 9,
}
impl ActionClass {
/// All variants in discriminant order.
pub const ALL: [ActionClass; ACTION_CLASS_COUNT] = [
Self::ReadMemory, Self::WriteMemory, Self::ReadFile, Self::WriteFile,
Self::RunTest, Self::RunCommand, Self::GitPush, Self::CreatePR,
Self::SendMessage, Self::ModifyInfra,
];
}
/// Runtime guard enforcing authority levels for container execution.
///
/// Holds a global maximum authority (from execution mode or explicit) and an
/// optional per-action-class override table as a fixed-size array.
#[derive(Clone, Debug)]
pub struct AuthorityGuard {
max_authority: AuthorityLevel,
mode: ExecutionMode,
class_overrides: [Option<AuthorityLevel>; ACTION_CLASS_COUNT],
}
impl AuthorityGuard {
/// Create a guard using the default authority for the given execution mode.
pub fn new(mode: ExecutionMode) -> Self {
Self {
max_authority: AuthorityLevel::default_for_mode(mode),
mode,
class_overrides: [None; ACTION_CLASS_COUNT],
}
}
/// Create a guard with an explicit maximum authority level.
pub fn with_max_authority(mode: ExecutionMode, max: AuthorityLevel) -> Self {
Self { max_authority: max, mode, class_overrides: [None; ACTION_CLASS_COUNT] }
}
/// The execution mode this guard was created for.
pub fn mode(&self) -> ExecutionMode { self.mode }
/// The global maximum authority level.
pub fn max_authority(&self) -> AuthorityLevel { self.max_authority }
/// Check whether the guard permits the `required` level.
pub fn check(&self, required: AuthorityLevel) -> Result<(), ContainerError> {
if self.max_authority.permits(required) {
Ok(())
} else {
Err(ContainerError::InsufficientAuthority {
required: required as u8,
granted: self.max_authority as u8,
})
}
}
/// Check authority for a specific action class.
///
/// Per-class overrides are capped by the global maximum to prevent escalation.
pub fn check_action_class(
&self, class: ActionClass, required: AuthorityLevel,
) -> Result<(), ContainerError> {
let effective = match self.class_overrides[class as usize] {
Some(o) if (o as u8) <= (self.max_authority as u8) => o,
Some(_) => self.max_authority,
None => self.max_authority,
};
if effective.permits(required) {
Ok(())
} else {
Err(ContainerError::InsufficientAuthority {
required: required as u8,
granted: effective as u8,
})
}
}
/// Grant authority for a specific action class (capped by global max at check time).
pub fn grant_action_class(&mut self, class: ActionClass, level: AuthorityLevel) {
self.class_overrides[class as usize] = Some(level);
}
}
/// Percentage utilization for each resource dimension (0.0..=100.0).
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct BudgetUtilization {
pub time_pct: f32,
pub tokens_pct: f32,
pub cost_pct: f32,
pub tool_calls_pct: f32,
pub external_writes_pct: f32,
}
/// Point-in-time snapshot of budget state.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct BudgetSnapshot {
pub budget: ResourceBudget,
pub used_time_secs: u32,
pub used_tokens: u32,
pub used_cost_microdollars: u32,
pub used_tool_calls: u16,
pub used_external_writes: u16,
}
/// Tracks resource consumption against a [`ResourceBudget`].
///
/// Each `charge_*` method adds to the running total and returns
/// [`ContainerError::BudgetExhausted`] if the total would exceed the budget.
#[derive(Clone, Debug)]
pub struct BudgetTracker {
budget: ResourceBudget,
used_time_secs: u32,
used_tokens: u32,
used_cost_microdollars: u32,
used_tool_calls: u16,
used_external_writes: u16,
}
impl BudgetTracker {
/// Create a tracker with the given budget (clamped to hard maximums).
pub fn new(budget: ResourceBudget) -> Self {
Self {
budget: budget.clamped(),
used_time_secs: 0, used_tokens: 0, used_cost_microdollars: 0,
used_tool_calls: 0, used_external_writes: 0,
}
}
/// The clamped budget this tracker enforces.
pub fn budget(&self) -> &ResourceBudget { &self.budget }
/// Charge token usage.
pub fn charge_tokens(&mut self, tokens: u32) -> Result<(), ContainerError> {
let t = self.used_tokens.saturating_add(tokens);
if t > self.budget.max_tokens { return Err(ContainerError::BudgetExhausted("tokens")); }
self.used_tokens = t;
Ok(())
}
/// Charge cost in microdollars.
pub fn charge_cost(&mut self, microdollars: u32) -> Result<(), ContainerError> {
let t = self.used_cost_microdollars.saturating_add(microdollars);
if t > self.budget.max_cost_microdollars { return Err(ContainerError::BudgetExhausted("cost")); }
self.used_cost_microdollars = t;
Ok(())
}
/// Charge one tool call.
pub fn charge_tool_call(&mut self) -> Result<(), ContainerError> {
let t = self.used_tool_calls.saturating_add(1);
if t > self.budget.max_tool_calls { return Err(ContainerError::BudgetExhausted("tool_calls")); }
self.used_tool_calls = t;
Ok(())
}
/// Charge one external write.
pub fn charge_external_write(&mut self) -> Result<(), ContainerError> {
let t = self.used_external_writes.saturating_add(1);
if t > self.budget.max_external_writes { return Err(ContainerError::BudgetExhausted("external_writes")); }
self.used_external_writes = t;
Ok(())
}
/// Charge wall-clock time in seconds.
pub fn charge_time(&mut self, secs: u32) -> Result<(), ContainerError> {
let t = self.used_time_secs.saturating_add(secs);
if t > self.budget.max_time_secs { return Err(ContainerError::BudgetExhausted("time")); }
self.used_time_secs = t;
Ok(())
}
/// Remaining tokens before exhaustion.
pub fn remaining_tokens(&self) -> u32 { self.budget.max_tokens.saturating_sub(self.used_tokens) }
/// Remaining cost budget in microdollars.
pub fn remaining_cost(&self) -> u32 {
self.budget.max_cost_microdollars.saturating_sub(self.used_cost_microdollars)
}
/// Remaining wall-clock time in seconds.
pub fn remaining_time(&self) -> u32 { self.budget.max_time_secs.saturating_sub(self.used_time_secs) }
/// Compute utilization percentages for each resource dimension.
pub fn utilization(&self) -> BudgetUtilization {
BudgetUtilization {
time_pct: pct(self.used_time_secs as f32, self.budget.max_time_secs as f32),
tokens_pct: pct(self.used_tokens as f32, self.budget.max_tokens as f32),
cost_pct: pct(self.used_cost_microdollars as f32, self.budget.max_cost_microdollars as f32),
tool_calls_pct: pct(self.used_tool_calls as f32, self.budget.max_tool_calls as f32),
external_writes_pct: pct(self.used_external_writes as f32, self.budget.max_external_writes as f32),
}
}
/// Returns `true` if ANY resource dimension with a non-zero budget has reached
/// its limit. A zero-max dimension is disabled, not exhausted.
pub fn is_exhausted(&self) -> bool {
(self.budget.max_time_secs > 0 && self.used_time_secs >= self.budget.max_time_secs)
|| (self.budget.max_tokens > 0 && self.used_tokens >= self.budget.max_tokens)
|| (self.budget.max_cost_microdollars > 0 && self.used_cost_microdollars >= self.budget.max_cost_microdollars)
|| (self.budget.max_tool_calls > 0 && self.used_tool_calls >= self.budget.max_tool_calls)
|| (self.budget.max_external_writes > 0 && self.used_external_writes >= self.budget.max_external_writes)
}
/// Capture a point-in-time snapshot of the tracker state.
pub fn snapshot(&self) -> BudgetSnapshot {
BudgetSnapshot {
budget: self.budget,
used_time_secs: self.used_time_secs,
used_tokens: self.used_tokens,
used_cost_microdollars: self.used_cost_microdollars,
used_tool_calls: self.used_tool_calls,
used_external_writes: self.used_external_writes,
}
}
}
fn pct(used: f32, max: f32) -> f32 {
if max == 0.0 { if used > 0.0 { 100.0 } else { 0.0 } }
else { (used / max * 100.0).min(100.0) }
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn default_authority_per_mode() {
let r = AuthorityGuard::new(ExecutionMode::Replay);
assert_eq!(r.max_authority(), AuthorityLevel::ReadOnly);
assert_eq!(r.mode(), ExecutionMode::Replay);
assert_eq!(AuthorityGuard::new(ExecutionMode::Verify).max_authority(), AuthorityLevel::ExecuteTools);
assert_eq!(AuthorityGuard::new(ExecutionMode::Live).max_authority(), AuthorityLevel::WriteMemory);
}
#[test]
fn check_pass_and_fail() {
let g = AuthorityGuard::new(ExecutionMode::Verify);
assert!(g.check(AuthorityLevel::ReadOnly).is_ok());
assert!(g.check(AuthorityLevel::ExecuteTools).is_ok());
assert_eq!(g.check(AuthorityLevel::WriteExternal).unwrap_err(),
ContainerError::InsufficientAuthority { required: 3, granted: 2 });
let ro = AuthorityGuard::new(ExecutionMode::Replay);
assert_eq!(ro.check(AuthorityLevel::WriteMemory).unwrap_err(),
ContainerError::InsufficientAuthority { required: 1, granted: 0 });
}
#[test]
fn with_max_authority_overrides_default() {
let g = AuthorityGuard::with_max_authority(ExecutionMode::Replay, AuthorityLevel::WriteExternal);
assert_eq!(g.mode(), ExecutionMode::Replay);
assert!(g.check(AuthorityLevel::WriteExternal).is_ok());
}
#[test]
fn action_class_grant_restrict_and_inherit() {
let mut g = AuthorityGuard::with_max_authority(ExecutionMode::Live, AuthorityLevel::WriteExternal);
assert!(g.check_action_class(ActionClass::GitPush, AuthorityLevel::WriteExternal).is_ok());
g.grant_action_class(ActionClass::GitPush, AuthorityLevel::ReadOnly);
assert_eq!(g.check_action_class(ActionClass::GitPush, AuthorityLevel::WriteMemory).unwrap_err(),
ContainerError::InsufficientAuthority { required: 1, granted: 0 });
assert!(g.check_action_class(ActionClass::ReadMemory, AuthorityLevel::WriteExternal).is_ok());
}
#[test]
fn action_class_override_capped_by_global() {
let mut g = AuthorityGuard::new(ExecutionMode::Replay);
g.grant_action_class(ActionClass::RunCommand, AuthorityLevel::WriteExternal);
assert!(g.check_action_class(ActionClass::RunCommand, AuthorityLevel::WriteMemory).is_err());
}
#[test]
fn action_class_override_within_global() {
let mut g = AuthorityGuard::with_max_authority(ExecutionMode::Live, AuthorityLevel::ExecuteTools);
g.grant_action_class(ActionClass::WriteFile, AuthorityLevel::WriteMemory);
assert!(g.check_action_class(ActionClass::WriteFile, AuthorityLevel::WriteMemory).is_ok());
assert!(g.check_action_class(ActionClass::WriteFile, AuthorityLevel::ExecuteTools).is_err());
}
#[test]
fn action_class_all_variants() {
assert_eq!(ActionClass::ALL.len(), ACTION_CLASS_COUNT);
for (i, c) in ActionClass::ALL.iter().enumerate() { assert_eq!(*c as usize, i); }
}
#[test]
fn tracker_zero_usage() {
let t = BudgetTracker::new(ResourceBudget::DEFAULT);
assert_eq!(t.remaining_tokens(), 200_000);
assert_eq!(t.remaining_cost(), 1_000_000);
assert_eq!(t.remaining_time(), 300);
assert!(!t.is_exhausted());
}
#[test]
fn charge_and_exhaust_each_resource() {
let mut t = BudgetTracker::new(ResourceBudget::DEFAULT);
assert!(t.charge_tokens(200_000).is_ok());
assert_eq!(t.charge_tokens(1), Err(ContainerError::BudgetExhausted("tokens")));
let mut t = BudgetTracker::new(ResourceBudget::DEFAULT);
assert!(t.charge_cost(1_000_000).is_ok());
assert_eq!(t.charge_cost(1), Err(ContainerError::BudgetExhausted("cost")));
let mut t = BudgetTracker::new(ResourceBudget::DEFAULT);
for _ in 0..50 { t.charge_tool_call().unwrap(); }
assert!(t.charge_tool_call().is_err());
let mut t = BudgetTracker::new(ResourceBudget::DEFAULT);
assert!(t.charge_time(300).is_ok());
assert!(t.charge_time(1).is_err());
let mut t = BudgetTracker::new(ResourceBudget::DEFAULT);
assert!(t.charge_external_write().is_err()); // zero budget
let mut t = BudgetTracker::new(ResourceBudget::EXTENDED);
for _ in 0..10 { t.charge_external_write().unwrap(); }
assert!(t.charge_external_write().is_err());
}
#[test]
fn is_exhausted_semantics() {
assert!(!BudgetTracker::new(ResourceBudget::DEFAULT).is_exhausted());
let mut t = BudgetTracker::new(ResourceBudget::DEFAULT);
t.charge_tokens(200_000).unwrap();
assert!(t.is_exhausted());
}
#[test]
fn utilization_calculation() {
let mut t = BudgetTracker::new(ResourceBudget::DEFAULT);
t.charge_tokens(100_000).unwrap();
t.charge_time(150).unwrap();
t.charge_cost(500_000).unwrap();
let u = t.utilization();
assert!((u.tokens_pct - 50.0).abs() < 0.01);
assert!((u.time_pct - 50.0).abs() < 0.01);
assert!((u.cost_pct - 50.0).abs() < 0.01);
t.charge_tokens(100_000).unwrap();
let u2 = t.utilization();
assert!((u2.tokens_pct - 100.0).abs() < 0.01);
let z = BudgetTracker::new(ResourceBudget {
max_time_secs: 0, max_tokens: 0, max_cost_microdollars: 0,
max_tool_calls: 0, max_external_writes: 0,
});
assert!((z.utilization().time_pct).abs() < 0.01);
}
#[test]
fn snapshot_captures_state() {
let mut t = BudgetTracker::new(ResourceBudget::EXTENDED);
t.charge_tokens(5_000).unwrap();
t.charge_time(60).unwrap();
t.charge_cost(100_000).unwrap();
t.charge_tool_call().unwrap();
t.charge_external_write().unwrap();
let s = t.snapshot();
assert_eq!(s.budget, ResourceBudget::EXTENDED);
assert_eq!(s.used_tokens, 5_000);
assert_eq!(s.used_time_secs, 60);
assert_eq!(s.used_cost_microdollars, 100_000);
assert_eq!(s.used_tool_calls, 1);
assert_eq!(s.used_external_writes, 1);
}
#[test]
fn budget_clamped_on_creation() {
let t = BudgetTracker::new(ResourceBudget {
max_time_secs: 999_999, max_tokens: 999_999_999,
max_cost_microdollars: 999_999_999, max_tool_calls: 60_000, max_external_writes: 60_000,
});
let b = t.budget();
assert_eq!(b.max_time_secs, ResourceBudget::MAX.max_time_secs);
assert_eq!(b.max_tokens, ResourceBudget::MAX.max_tokens);
assert_eq!(b.max_external_writes, ResourceBudget::MAX.max_external_writes);
}
#[test]
fn charge_exactly_at_limit() {
let mut t = BudgetTracker::new(ResourceBudget {
max_time_secs: 10, max_tokens: 100, max_cost_microdollars: 500,
max_tool_calls: 3, max_external_writes: 2,
});
assert!(t.charge_tokens(100).is_ok());
assert!(t.charge_time(10).is_ok());
assert!(t.charge_cost(500).is_ok());
for _ in 0..3 { t.charge_tool_call().unwrap(); }
for _ in 0..2 { t.charge_external_write().unwrap(); }
assert_eq!(t.remaining_tokens(), 0);
assert_eq!(t.remaining_cost(), 0);
assert_eq!(t.remaining_time(), 0);
assert!(t.is_exhausted());
assert!(t.charge_tokens(1).is_err());
assert!(t.charge_time(1).is_err());
assert!(t.charge_cost(1).is_err());
assert!(t.charge_tool_call().is_err());
assert!(t.charge_external_write().is_err());
}
#[test]
fn max_budget_allows_high_usage() {
let mut t = BudgetTracker::new(ResourceBudget::MAX);
assert!(t.charge_tokens(1_000_000).is_ok());
assert!(t.charge_time(3600).is_ok());
assert!(t.charge_cost(10_000_000).is_ok());
for _ in 0..500 { t.charge_tool_call().unwrap(); }
for _ in 0..50 { t.charge_external_write().unwrap(); }
assert!(t.is_exhausted());
}
}

View file

@ -0,0 +1,689 @@
//! Coherence monitoring and container validation pipeline (ADR-036).
//!
//! This module provides two main components:
//!
//! - [`CoherenceMonitor`]: tracks real-time coherence state across events,
//! contradictions, and task rollbacks, transitioning through [`CoherenceState`]
//! to gate commits, skill promotion, and autonomous execution.
//!
//! - [`ContainerValidator`]: runs the full validation pipeline over an AGI
//! container's header, segments, and coherence thresholds, collecting all
//! errors rather than short-circuiting.
use rvf_types::agi_container::*;
// ---------------------------------------------------------------------------
// CoherenceState
// ---------------------------------------------------------------------------
/// Runtime coherence state derived from threshold checks.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum CoherenceState {
/// All signals within bounds.
Healthy,
/// Contradiction rate exceeded -- skill promotion frozen.
SkillFreeze,
/// Coherence score below minimum -- commits blocked, repair mode.
RepairMode,
/// Rollback ratio exceeded -- execution halted, human review required.
Halted,
}
// ---------------------------------------------------------------------------
// CoherenceReport
// ---------------------------------------------------------------------------
/// Point-in-time snapshot of all coherence metrics.
#[derive(Clone, Debug)]
pub struct CoherenceReport {
pub state: CoherenceState,
pub coherence_score: f32,
pub contradiction_rate: f32,
pub rollback_ratio: f32,
pub total_events: u64,
pub total_contradictions: u64,
pub total_tasks: u64,
pub total_rollbacks: u64,
}
// ---------------------------------------------------------------------------
// CoherenceMonitor
// ---------------------------------------------------------------------------
/// Tracks real-time coherence state and gates system actions.
pub struct CoherenceMonitor {
thresholds: CoherenceThresholds,
current_coherence: f32,
total_events: u64,
total_contradictions: u64,
total_tasks: u64,
total_rollbacks: u64,
state: CoherenceState,
}
impl CoherenceMonitor {
/// Create a new monitor with the given thresholds. Returns an error if the
/// thresholds are out of valid ranges.
pub fn new(thresholds: CoherenceThresholds) -> Result<Self, ContainerError> {
thresholds.validate()?;
Ok(Self {
thresholds,
current_coherence: 1.0,
total_events: 0,
total_contradictions: 0,
total_tasks: 0,
total_rollbacks: 0,
state: CoherenceState::Healthy,
})
}
/// Create a monitor with [`CoherenceThresholds::DEFAULT`].
pub fn with_defaults() -> Self {
Self {
thresholds: CoherenceThresholds::DEFAULT,
current_coherence: 1.0,
total_events: 0,
total_contradictions: 0,
total_tasks: 0,
total_rollbacks: 0,
state: CoherenceState::Healthy,
}
}
/// Update the current coherence score and re-evaluate state.
pub fn update_coherence(&mut self, score: f32) {
self.current_coherence = score;
self.recompute_state();
}
/// Record a generic event (increments event counter).
pub fn record_event(&mut self) {
self.total_events = self.total_events.saturating_add(1);
}
/// Record a contradiction event and re-evaluate state.
pub fn record_contradiction(&mut self) {
self.total_contradictions = self.total_contradictions.saturating_add(1);
self.recompute_state();
}
/// Record a task completion. If `rolled_back` is true the rollback counter
/// is also incremented. State is re-evaluated afterward.
pub fn record_task_completion(&mut self, rolled_back: bool) {
self.total_tasks = self.total_tasks.saturating_add(1);
if rolled_back {
self.total_rollbacks = self.total_rollbacks.saturating_add(1);
}
self.recompute_state();
}
/// Current coherence state.
pub fn state(&self) -> CoherenceState {
self.state
}
/// Whether the system may commit world-model deltas. True when
/// [`CoherenceState::Healthy`] or [`CoherenceState::SkillFreeze`].
pub fn can_commit(&self) -> bool {
matches!(self.state, CoherenceState::Healthy | CoherenceState::SkillFreeze)
}
/// Whether new skills may be promoted. True only when
/// [`CoherenceState::Healthy`].
pub fn can_promote_skill(&self) -> bool {
self.state == CoherenceState::Healthy
}
/// Whether the system requires human review. True when
/// [`CoherenceState::Halted`].
pub fn requires_human_review(&self) -> bool {
self.state == CoherenceState::Halted
}
/// Contradiction rate: contradictions per 100 events.
/// Returns `0.0` when there are no events.
pub fn contradiction_rate(&self) -> f32 {
if self.total_events == 0 {
return 0.0;
}
(self.total_contradictions as f32 / self.total_events as f32) * 100.0
}
/// Rollback ratio: rollbacks / total tasks.
/// Returns `0.0` when there are no tasks.
pub fn rollback_ratio(&self) -> f32 {
if self.total_tasks == 0 {
return 0.0;
}
self.total_rollbacks as f32 / self.total_tasks as f32
}
/// Produce a point-in-time snapshot of all metrics.
pub fn report(&self) -> CoherenceReport {
CoherenceReport {
state: self.state,
coherence_score: self.current_coherence,
contradiction_rate: self.contradiction_rate(),
rollback_ratio: self.rollback_ratio(),
total_events: self.total_events,
total_contradictions: self.total_contradictions,
total_tasks: self.total_tasks,
total_rollbacks: self.total_rollbacks,
}
}
// -----------------------------------------------------------------------
// Internal
// -----------------------------------------------------------------------
/// Determine the worst applicable state from the current metrics.
/// Priority (most severe first): Halted > RepairMode > SkillFreeze > Healthy.
fn recompute_state(&mut self) {
if self.rollback_ratio() > self.thresholds.max_rollback_ratio {
self.state = CoherenceState::Halted;
} else if self.current_coherence < self.thresholds.min_coherence_score {
self.state = CoherenceState::RepairMode;
} else if self.contradiction_rate() > self.thresholds.max_contradiction_rate {
self.state = CoherenceState::SkillFreeze;
} else {
self.state = CoherenceState::Healthy;
}
}
}
// ---------------------------------------------------------------------------
// ContainerValidator
// ---------------------------------------------------------------------------
/// Full validation pipeline for an AGI container.
pub struct ContainerValidator {
mode: ExecutionMode,
}
impl ContainerValidator {
/// Create a validator for the given execution mode.
pub fn new(mode: ExecutionMode) -> Self {
Self { mode }
}
/// Validate container segments against mode requirements.
pub fn validate_segments(
&self,
segments: &ContainerSegments,
) -> Result<(), ContainerError> {
segments.validate(self.mode)
}
/// Validate the AGI container header.
///
/// Checks: magic bytes, version (must be 1), and flag consistency --
/// replay-capable containers must not claim Live-only features without
/// the kernel flag.
pub fn validate_header(
&self,
header: &AgiContainerHeader,
) -> Result<(), ContainerError> {
if !header.is_valid_magic() {
return Err(ContainerError::InvalidConfig("bad magic bytes"));
}
if header.version == 0 || header.version > 1 {
return Err(ContainerError::InvalidConfig(
"unsupported header version",
));
}
// Flag consistency: if REPLAY_CAPABLE is set the container should
// also have the witness flag, since replays depend on witness chains.
if header.is_replay_capable()
&& (header.flags & AGI_HAS_WITNESS == 0)
{
return Err(ContainerError::InvalidConfig(
"replay-capable flag requires witness flag",
));
}
Ok(())
}
/// Validate coherence threshold ranges.
pub fn validate_coherence(
&self,
thresholds: &CoherenceThresholds,
) -> Result<(), ContainerError> {
thresholds.validate()
}
/// Run all validations, collecting every error rather than
/// short-circuiting on the first failure.
pub fn validate_full(
&self,
header: &AgiContainerHeader,
segments: &ContainerSegments,
thresholds: &CoherenceThresholds,
) -> Vec<ContainerError> {
let mut errors = Vec::new();
if let Err(e) = self.validate_header(header) {
errors.push(e);
}
if let Err(e) = self.validate_segments(segments) {
errors.push(e);
}
if let Err(e) = self.validate_coherence(thresholds) {
errors.push(e);
}
errors
}
}
// ---------------------------------------------------------------------------
// Tests
// ---------------------------------------------------------------------------
#[cfg(test)]
mod tests {
use super::*;
// -- Helpers --
fn valid_header() -> AgiContainerHeader {
AgiContainerHeader {
magic: AGI_MAGIC,
version: 1,
flags: AGI_HAS_KERNEL | AGI_HAS_WITNESS | AGI_REPLAY_CAPABLE,
container_id: [0x01; 16],
build_id: [0x02; 16],
created_ns: 1_700_000_000_000_000_000,
model_id_hash: [0; 8],
policy_hash: [0; 8],
}
}
fn valid_segments() -> ContainerSegments {
ContainerSegments {
manifest_present: true,
kernel_present: true,
world_model_present: true,
..Default::default()
}
}
// -----------------------------------------------------------------------
// CoherenceMonitor — state transitions
// -----------------------------------------------------------------------
#[test]
fn monitor_starts_healthy() {
let m = CoherenceMonitor::with_defaults();
assert_eq!(m.state(), CoherenceState::Healthy);
}
#[test]
fn monitor_healthy_to_skill_freeze() {
let mut m = CoherenceMonitor::with_defaults();
// Default max_contradiction_rate is 5.0 per 100 events.
// Record 100 events with 6 contradictions -> rate = 6.0 > 5.0.
for _ in 0..100 {
m.record_event();
}
for _ in 0..6 {
m.record_contradiction();
}
assert_eq!(m.state(), CoherenceState::SkillFreeze);
}
#[test]
fn monitor_healthy_to_repair_mode() {
let mut m = CoherenceMonitor::with_defaults();
// Default min_coherence_score is 0.70.
m.update_coherence(0.50);
assert_eq!(m.state(), CoherenceState::RepairMode);
}
#[test]
fn monitor_healthy_to_halted() {
let mut m = CoherenceMonitor::with_defaults();
// Default max_rollback_ratio is 0.20.
// 10 tasks, 3 rolled back -> ratio = 0.30 > 0.20.
for _ in 0..7 {
m.record_task_completion(false);
}
for _ in 0..3 {
m.record_task_completion(true);
}
assert_eq!(m.state(), CoherenceState::Halted);
}
#[test]
fn halted_takes_priority_over_repair_mode() {
let mut m = CoherenceMonitor::with_defaults();
// Both rollback ratio and coherence score are bad.
m.update_coherence(0.50);
for _ in 0..5 {
m.record_task_completion(false);
}
for _ in 0..5 {
m.record_task_completion(true);
}
// Halted (rollback) is highest severity and wins.
assert_eq!(m.state(), CoherenceState::Halted);
}
#[test]
fn repair_mode_takes_priority_over_skill_freeze() {
let mut m = CoherenceMonitor::with_defaults();
// Both coherence and contradiction rate are bad.
m.update_coherence(0.50);
for _ in 0..100 {
m.record_event();
}
for _ in 0..10 {
m.record_contradiction();
}
// RepairMode wins over SkillFreeze.
assert_eq!(m.state(), CoherenceState::RepairMode);
}
#[test]
fn recovery_back_to_healthy() {
let mut m = CoherenceMonitor::with_defaults();
m.update_coherence(0.50);
assert_eq!(m.state(), CoherenceState::RepairMode);
m.update_coherence(0.90);
assert_eq!(m.state(), CoherenceState::Healthy);
}
// -----------------------------------------------------------------------
// CoherenceMonitor — gate queries
// -----------------------------------------------------------------------
#[test]
fn can_commit_when_healthy() {
let m = CoherenceMonitor::with_defaults();
assert!(m.can_commit());
}
#[test]
fn can_commit_when_skill_freeze() {
let mut m = CoherenceMonitor::with_defaults();
for _ in 0..100 {
m.record_event();
}
for _ in 0..6 {
m.record_contradiction();
}
assert_eq!(m.state(), CoherenceState::SkillFreeze);
assert!(m.can_commit());
}
#[test]
fn cannot_commit_when_repair_mode() {
let mut m = CoherenceMonitor::with_defaults();
m.update_coherence(0.50);
assert!(!m.can_commit());
}
#[test]
fn cannot_commit_when_halted() {
let mut m = CoherenceMonitor::with_defaults();
m.record_task_completion(true); // 1 task, 1 rollback -> ratio = 1.0
assert!(!m.can_commit());
}
#[test]
fn can_promote_skill_only_when_healthy() {
let m = CoherenceMonitor::with_defaults();
assert!(m.can_promote_skill());
let mut m2 = CoherenceMonitor::with_defaults();
for _ in 0..100 {
m2.record_event();
}
for _ in 0..6 {
m2.record_contradiction();
}
assert_eq!(m2.state(), CoherenceState::SkillFreeze);
assert!(!m2.can_promote_skill());
}
#[test]
fn requires_human_review_only_when_halted() {
let m = CoherenceMonitor::with_defaults();
assert!(!m.requires_human_review());
let mut m2 = CoherenceMonitor::with_defaults();
m2.record_task_completion(true);
assert_eq!(m2.state(), CoherenceState::Halted);
assert!(m2.requires_human_review());
}
// -----------------------------------------------------------------------
// CoherenceMonitor — rate calculations
// -----------------------------------------------------------------------
#[test]
fn contradiction_rate_calculation() {
let mut m = CoherenceMonitor::with_defaults();
for _ in 0..200 {
m.record_event();
}
for _ in 0..4 {
m.record_contradiction();
}
// 4 contradictions in (200 events + 4 contradiction events via record_contradiction)
// record_contradiction does not call record_event, so total_events = 200.
// Rate = (4 / 200) * 100 = 2.0.
assert!((m.contradiction_rate() - 2.0).abs() < f32::EPSILON);
}
#[test]
fn rollback_ratio_calculation() {
let mut m = CoherenceMonitor::with_defaults();
for _ in 0..8 {
m.record_task_completion(false);
}
for _ in 0..2 {
m.record_task_completion(true);
}
// 2 rollbacks / 10 tasks = 0.20.
assert!((m.rollback_ratio() - 0.20).abs() < f32::EPSILON);
}
// -----------------------------------------------------------------------
// CoherenceMonitor — edge cases
// -----------------------------------------------------------------------
#[test]
fn contradiction_rate_zero_events() {
let m = CoherenceMonitor::with_defaults();
assert!((m.contradiction_rate() - 0.0).abs() < f32::EPSILON);
}
#[test]
fn rollback_ratio_zero_tasks() {
let m = CoherenceMonitor::with_defaults();
assert!((m.rollback_ratio() - 0.0).abs() < f32::EPSILON);
}
#[test]
fn new_with_invalid_thresholds() {
let bad = CoherenceThresholds {
min_coherence_score: 2.0,
..CoherenceThresholds::DEFAULT
};
assert!(CoherenceMonitor::new(bad).is_err());
}
#[test]
fn new_with_valid_thresholds() {
let m = CoherenceMonitor::new(CoherenceThresholds::STRICT).unwrap();
assert_eq!(m.state(), CoherenceState::Healthy);
}
#[test]
fn report_snapshot() {
let mut m = CoherenceMonitor::with_defaults();
m.update_coherence(0.85);
for _ in 0..50 {
m.record_event();
}
m.record_contradiction();
// 9 successful + 1 rollback = ratio 0.10, within the 0.20 threshold.
for _ in 0..9 {
m.record_task_completion(false);
}
m.record_task_completion(true);
let r = m.report();
assert_eq!(r.state, CoherenceState::Healthy);
assert!((r.coherence_score - 0.85).abs() < f32::EPSILON);
assert_eq!(r.total_events, 50);
assert_eq!(r.total_contradictions, 1);
assert_eq!(r.total_tasks, 10);
assert_eq!(r.total_rollbacks, 1);
}
// -----------------------------------------------------------------------
// ContainerValidator — validate_segments
// -----------------------------------------------------------------------
#[test]
fn validator_segments_delegates_ok() {
let v = ContainerValidator::new(ExecutionMode::Live);
let segs = valid_segments();
assert!(v.validate_segments(&segs).is_ok());
}
#[test]
fn validator_segments_delegates_error() {
let v = ContainerValidator::new(ExecutionMode::Replay);
let segs = ContainerSegments {
manifest_present: true,
witness_count: 0,
..Default::default()
};
assert!(v.validate_segments(&segs).is_err());
}
// -----------------------------------------------------------------------
// ContainerValidator — validate_header
// -----------------------------------------------------------------------
#[test]
fn validator_header_ok() {
let v = ContainerValidator::new(ExecutionMode::Live);
assert!(v.validate_header(&valid_header()).is_ok());
}
#[test]
fn validator_header_bad_magic() {
let v = ContainerValidator::new(ExecutionMode::Live);
let mut h = valid_header();
h.magic = 0xDEADBEEF;
assert_eq!(
v.validate_header(&h),
Err(ContainerError::InvalidConfig("bad magic bytes"))
);
}
#[test]
fn validator_header_bad_version_zero() {
let v = ContainerValidator::new(ExecutionMode::Live);
let mut h = valid_header();
h.version = 0;
assert_eq!(
v.validate_header(&h),
Err(ContainerError::InvalidConfig("unsupported header version"))
);
}
#[test]
fn validator_header_bad_version_future() {
let v = ContainerValidator::new(ExecutionMode::Live);
let mut h = valid_header();
h.version = 99;
assert_eq!(
v.validate_header(&h),
Err(ContainerError::InvalidConfig("unsupported header version"))
);
}
#[test]
fn validator_header_replay_without_witness() {
let v = ContainerValidator::new(ExecutionMode::Live);
let h = AgiContainerHeader {
magic: AGI_MAGIC,
version: 1,
flags: AGI_REPLAY_CAPABLE, // missing AGI_HAS_WITNESS
container_id: [0; 16],
build_id: [0; 16],
created_ns: 0,
model_id_hash: [0; 8],
policy_hash: [0; 8],
};
assert_eq!(
v.validate_header(&h),
Err(ContainerError::InvalidConfig(
"replay-capable flag requires witness flag"
))
);
}
// -----------------------------------------------------------------------
// ContainerValidator — validate_full
// -----------------------------------------------------------------------
#[test]
fn validator_full_all_ok() {
let v = ContainerValidator::new(ExecutionMode::Live);
let errs = v.validate_full(
&valid_header(),
&valid_segments(),
&CoherenceThresholds::DEFAULT,
);
assert!(errs.is_empty());
}
#[test]
fn validator_full_collects_multiple_errors() {
let v = ContainerValidator::new(ExecutionMode::Live);
// Bad header (wrong magic).
let mut h = valid_header();
h.magic = 0xBAD0CAFE;
// Bad segments (no kernel, no wasm, no world model for Live).
let segs = ContainerSegments {
manifest_present: true,
..Default::default()
};
// Bad thresholds.
let bad_thresh = CoherenceThresholds {
min_coherence_score: -1.0,
..CoherenceThresholds::DEFAULT
};
let errs = v.validate_full(&h, &segs, &bad_thresh);
// Expect at least 3 errors: header, segments, and thresholds.
assert!(errs.len() >= 3, "expected >= 3 errors, got {}", errs.len());
}
#[test]
fn validator_full_partial_errors() {
let v = ContainerValidator::new(ExecutionMode::Replay);
// Good header, bad segments (replay needs witness), good thresholds.
let segs = ContainerSegments {
manifest_present: true,
witness_count: 0,
..Default::default()
};
let errs = v.validate_full(
&valid_header(),
&segs,
&CoherenceThresholds::DEFAULT,
);
assert_eq!(errs.len(), 1);
}
}

View file

@ -35,6 +35,8 @@ pub mod status;
pub mod store;
pub mod witness;
pub mod write_path;
pub mod agi_authority;
pub mod agi_coherence;
pub mod agi_container;
pub use adversarial::{

View file

@ -0,0 +1,335 @@
//! End-to-end integration tests and benchmarks for the AGI Cognitive Container
//! system (ADR-036). Covers the full build -> serialize -> parse -> validate
//! cycle, signed container tamper detection, execution mode validation matrix,
//! authority levels, resource budgets, coherence thresholds, and perf benchmarks.
use rvf_runtime::agi_container::{AgiContainerBuilder, ParsedAgiManifest};
use rvf_runtime::seed_crypto;
use rvf_types::agi_container::*;
const SIGNING_KEY: &[u8] = b"agi-e2e-test-signing-key-32bytes";
const ORCH_JSON: &[u8] = br#"{"model":"claude-opus-4-6","max_turns":100}"#;
const TASKS_JSON: &[u8] = br#"[{"id":1,"spec":"fix bug"}]"#;
const GRADERS_JSON: &[u8] = br#"[{"type":"test_pass"}]"#;
const TOOLS_JSON: &[u8] = br#"[{"name":"ruvector_query","type":"search"}]"#;
const COHER_JSON: &[u8] = br#"{"min_cut":0.7,"rollback":true}"#;
/// Build a fully-populated container with every optional section.
fn build_full_container() -> (Vec<u8>, AgiContainerHeader) {
AgiContainerBuilder::new([0x01; 16], [0x02; 16])
.with_model_id("claude-opus-4-6")
.with_policy(b"autonomous", [0xAA; 8])
.with_orchestrator(ORCH_JSON)
.with_tool_registry(TOOLS_JSON)
.with_agent_prompts(b"You are a coder agent.")
.with_eval_tasks(TASKS_JSON)
.with_eval_graders(GRADERS_JSON)
.with_skill_library(b"[]")
.with_replay_script(b"#!/bin/sh\nrvf replay $1")
.with_kernel_config(b"console=ttyS0")
.with_network_config(b"{\"port\":8080}")
.with_coherence_config(COHER_JSON)
.with_project_instructions(b"# CLAUDE.md")
.with_dependency_snapshot(b"sha256:abc123")
.with_authority_config(b"{\"level\":\"WriteMemory\"}")
.with_domain_profile(b"coding")
.offline_capable()
.with_segments(ContainerSegments {
kernel_present: true, kernel_size: 5_000_000,
wasm_count: 2, wasm_total_size: 60_000,
vec_segment_count: 4, index_segment_count: 2,
witness_count: 100, crypto_present: false,
manifest_present: true, orchestrator_present: true,
world_model_present: true, total_size: 0,
})
.build()
.unwrap()
}
// -- 1. Full Container Lifecycle --
#[test]
fn full_container_lifecycle() {
let (payload, header) = build_full_container();
assert!(header.is_valid_magic());
assert_eq!(header.version, 1);
assert!(header.has_kernel());
assert!(header.has_orchestrator());
assert!(header.has_world_model());
assert!(header.is_replay_capable());
assert!(header.is_offline_capable());
assert!(header.created_ns > 0, "created_ns should be a real timestamp");
// Header round-trip.
let header_rt = AgiContainerHeader::from_bytes(&header.to_bytes()).unwrap();
assert_eq!(header_rt, header);
// Parse manifest and verify every section.
let p = ParsedAgiManifest::parse(&payload).unwrap();
assert_eq!(p.model_id_str(), Some("claude-opus-4-6"));
assert_eq!(p.orchestrator_config.unwrap(), ORCH_JSON);
assert_eq!(p.tool_registry.unwrap(), TOOLS_JSON);
assert_eq!(p.eval_tasks.unwrap(), TASKS_JSON);
assert_eq!(p.eval_graders.unwrap(), GRADERS_JSON);
assert_eq!(p.coherence_config.unwrap(), COHER_JSON);
assert!(p.policy.is_some());
assert!(p.agent_prompts.is_some());
assert!(p.skill_library.is_some());
assert!(p.replay_script.is_some());
assert!(p.kernel_config.is_some());
assert!(p.network_config.is_some());
assert!(p.project_instructions.is_some());
assert!(p.dependency_snapshot.is_some());
assert!(p.authority_config.is_some());
assert!(p.domain_profile.is_some());
assert!(p.is_autonomous_capable());
// Segment-derived flags should all be present in the header.
let seg_flags = ContainerSegments {
kernel_present: true, wasm_count: 2, witness_count: 100,
orchestrator_present: true, world_model_present: true,
..Default::default()
}.to_flags();
assert_eq!(header.flags & seg_flags, seg_flags);
}
// -- 2. Signed Container Tamper Detection --
#[test]
fn signed_container_tamper_detection() {
let builder = AgiContainerBuilder::new([0x10; 16], [0x20; 16])
.with_model_id("claude-opus-4-6")
.with_orchestrator(ORCH_JSON)
.with_eval_tasks(TASKS_JSON)
.with_eval_graders(GRADERS_JSON)
.with_segments(ContainerSegments {
kernel_present: true, manifest_present: true,
world_model_present: true, ..Default::default()
});
let (payload, header) = builder.build_and_sign(SIGNING_KEY).unwrap();
assert!(header.is_signed());
let unsigned_len = payload.len() - 32;
let sig = &payload[unsigned_len..];
assert!(seed_crypto::verify_seed(SIGNING_KEY, &payload[..unsigned_len], sig));
// Tamper with one byte in the TLV payload area.
let mut tampered = payload.clone();
tampered[AGI_HEADER_SIZE + 10] ^= 0xFF;
assert!(!seed_crypto::verify_seed(SIGNING_KEY, &tampered[..unsigned_len], sig),
"tampered payload must fail verification");
// Tamper with header byte.
let mut tampered_hdr = payload.clone();
tampered_hdr[7] ^= 0x01;
assert!(!seed_crypto::verify_seed(SIGNING_KEY, &tampered_hdr[..unsigned_len], sig),
"tampered header must fail verification");
}
// -- 3. Execution Mode Validation Matrix --
#[test]
fn execution_mode_validation_matrix() {
let m = |mp, kp, wc, wmc, vsc, isc, wnc| ContainerSegments {
manifest_present: mp, kernel_present: kp, wasm_count: wc,
world_model_present: wmc, vec_segment_count: vsc,
index_segment_count: isc, witness_count: wnc,
..Default::default()
};
// Replay + no witness -> fail
assert!(m(true, false, 0, false, 0, 0, 0).validate(ExecutionMode::Replay).is_err());
// Replay + witness -> pass
assert!(m(true, false, 0, false, 0, 0, 10).validate(ExecutionMode::Replay).is_ok());
// Verify + no runtime -> fail
assert!(m(true, false, 0, false, 0, 0, 0).validate(ExecutionMode::Verify).is_err());
// Verify + kernel + world_model -> pass
assert!(m(true, true, 0, true, 0, 0, 0).validate(ExecutionMode::Verify).is_ok());
// Verify + wasm + vec -> pass
assert!(m(true, false, 1, false, 2, 0, 0).validate(ExecutionMode::Verify).is_ok());
// Live + kernel only (no world model) -> fail
assert!(m(true, true, 0, false, 0, 0, 0).validate(ExecutionMode::Live).is_err());
// Live + kernel + world model -> pass
assert!(m(true, true, 0, true, 0, 0, 0).validate(ExecutionMode::Live).is_ok());
}
// -- 4. Authority Level Tests --
#[test]
fn authority_level_defaults_per_mode() {
assert_eq!(AuthorityLevel::default_for_mode(ExecutionMode::Replay), AuthorityLevel::ReadOnly);
assert_eq!(AuthorityLevel::default_for_mode(ExecutionMode::Verify), AuthorityLevel::ExecuteTools);
assert_eq!(AuthorityLevel::default_for_mode(ExecutionMode::Live), AuthorityLevel::WriteMemory);
}
#[test]
fn authority_level_hierarchy() {
// WriteExternal permits all.
assert!(AuthorityLevel::WriteExternal.permits(AuthorityLevel::ReadOnly));
assert!(AuthorityLevel::WriteExternal.permits(AuthorityLevel::WriteMemory));
assert!(AuthorityLevel::WriteExternal.permits(AuthorityLevel::ExecuteTools));
assert!(AuthorityLevel::WriteExternal.permits(AuthorityLevel::WriteExternal));
// ExecuteTools permits itself and below.
assert!(AuthorityLevel::ExecuteTools.permits(AuthorityLevel::ReadOnly));
assert!(AuthorityLevel::ExecuteTools.permits(AuthorityLevel::WriteMemory));
assert!(AuthorityLevel::ExecuteTools.permits(AuthorityLevel::ExecuteTools));
assert!(!AuthorityLevel::ExecuteTools.permits(AuthorityLevel::WriteExternal));
// ReadOnly permits nothing above itself.
assert!(AuthorityLevel::ReadOnly.permits(AuthorityLevel::ReadOnly));
assert!(!AuthorityLevel::ReadOnly.permits(AuthorityLevel::WriteMemory));
assert!(!AuthorityLevel::ReadOnly.permits(AuthorityLevel::ExecuteTools));
assert!(!AuthorityLevel::ReadOnly.permits(AuthorityLevel::WriteExternal));
}
// -- 5. Resource Budget Tests --
#[test]
fn resource_budget_clamping() {
let clamped = ResourceBudget {
max_time_secs: 99999, max_tokens: 99999999,
max_cost_microdollars: 99999999,
max_tool_calls: 65535, max_external_writes: 65535,
}.clamped();
assert_eq!(clamped.max_time_secs, 3600);
assert_eq!(clamped.max_tokens, 1_000_000);
assert_eq!(clamped.max_cost_microdollars, 10_000_000);
assert_eq!(clamped.max_tool_calls, 500);
assert_eq!(clamped.max_external_writes, 50);
}
#[test]
fn resource_budget_within_max_unchanged() {
assert_eq!(ResourceBudget::DEFAULT.clamped(), ResourceBudget::DEFAULT);
}
// -- 6. Coherence Threshold Validation --
#[test]
fn coherence_threshold_validation() {
assert!(CoherenceThresholds::DEFAULT.validate().is_ok());
assert!(CoherenceThresholds::STRICT.validate().is_ok());
// Invalid: score > 1.0
let bad = CoherenceThresholds { min_coherence_score: 1.5, ..CoherenceThresholds::DEFAULT };
assert!(bad.validate().is_err());
// Invalid: negative rate
let bad2 = CoherenceThresholds { max_contradiction_rate: -1.0, ..CoherenceThresholds::DEFAULT };
assert!(bad2.validate().is_err());
// Invalid: rollback ratio > 1.0
let bad3 = CoherenceThresholds { max_rollback_ratio: 2.0, ..CoherenceThresholds::DEFAULT };
assert!(bad3.validate().is_err());
// Edge: zero values are valid
let edge = CoherenceThresholds {
min_coherence_score: 0.0, max_contradiction_rate: 0.0, max_rollback_ratio: 0.0,
};
assert!(edge.validate().is_ok());
}
// -- 7. Container Size Limit --
#[test]
fn container_size_limit_enforced() {
let oversized = ContainerSegments {
manifest_present: true, total_size: AGI_MAX_CONTAINER_SIZE + 1,
..Default::default()
};
assert_eq!(
oversized.validate(ExecutionMode::Replay),
Err(ContainerError::TooLarge { size: AGI_MAX_CONTAINER_SIZE + 1 })
);
}
// -- 8. Performance Benchmarks (using std::time) --
#[test]
fn bench_header_serialize_deserialize() {
use std::time::Instant;
let header = AgiContainerHeader {
magic: AGI_MAGIC, version: 1,
flags: AGI_HAS_KERNEL | AGI_HAS_WASM | AGI_HAS_ORCHESTRATOR | AGI_SIGNED,
container_id: [0x42; 16], build_id: [0x43; 16],
created_ns: 1_700_000_000_000_000_000,
model_id_hash: [0xAA; 8], policy_hash: [0xBB; 8],
};
let n: u128 = 100_000;
let start = Instant::now();
for _ in 0..n { let _ = std::hint::black_box(header.to_bytes()); }
let ser = start.elapsed();
let bytes = header.to_bytes();
let start = Instant::now();
for _ in 0..n { let _ = std::hint::black_box(AgiContainerHeader::from_bytes(&bytes).unwrap()); }
let deser = start.elapsed();
let ser_ns = ser.as_nanos() / n;
let deser_ns = deser.as_nanos() / n;
eprintln!("Header serialize: {ser_ns:>8} ns/op ({n} iterations in {ser:?})");
eprintln!("Header deserialize: {deser_ns:>8} ns/op ({n} iterations in {deser:?})");
assert!(ser_ns < 1000, "serialize too slow: {ser_ns} ns/op");
assert!(deser_ns < 1000, "deserialize too slow: {deser_ns} ns/op");
}
#[test]
fn bench_container_build_parse() {
use std::time::Instant;
let n: u128 = 10_000;
let segs = || ContainerSegments {
kernel_present: true, manifest_present: true,
world_model_present: true, ..Default::default()
};
let start = Instant::now();
for _ in 0..n {
let b = AgiContainerBuilder::new([0x01; 16], [0x02; 16])
.with_model_id("claude-opus-4-6")
.with_policy(b"autonomous", [0xAA; 8])
.with_orchestrator(ORCH_JSON)
.with_eval_tasks(TASKS_JSON)
.with_eval_graders(GRADERS_JSON)
.with_segments(segs());
let _ = std::hint::black_box(b.build().unwrap());
}
let build_elapsed = start.elapsed();
let (payload, _) = AgiContainerBuilder::new([0x01; 16], [0x02; 16])
.with_model_id("claude-opus-4-6")
.with_orchestrator(ORCH_JSON)
.with_eval_tasks(TASKS_JSON)
.with_eval_graders(GRADERS_JSON)
.with_segments(segs())
.build().unwrap();
let start = Instant::now();
for _ in 0..n { let _ = std::hint::black_box(ParsedAgiManifest::parse(&payload).unwrap()); }
let parse_elapsed = start.elapsed();
let build_ns = build_elapsed.as_nanos() / n;
let parse_ns = parse_elapsed.as_nanos() / n;
eprintln!("Container build: {build_ns:>8} ns/op ({n} iterations in {build_elapsed:?})");
eprintln!("Container parse: {parse_ns:>8} ns/op ({n} iterations in {parse_elapsed:?})");
assert!(build_ns < 10_000, "build too slow: {build_ns} ns/op");
assert!(parse_ns < 5_000, "parse too slow: {parse_ns} ns/op");
}
#[test]
fn bench_flags_computation() {
use std::time::Instant;
let n: u128 = 1_000_000;
let segs = ContainerSegments {
kernel_present: true, wasm_count: 2, witness_count: 100,
crypto_present: true, orchestrator_present: true,
world_model_present: true, vec_segment_count: 4,
index_segment_count: 2, ..Default::default()
};
let start = Instant::now();
for _ in 0..n { let _ = std::hint::black_box(segs.to_flags()); }
let elapsed = start.elapsed();
let ns = elapsed.as_nanos() / n;
eprintln!("Flags computation: {ns:>8} ns/op ({n} iterations in {elapsed:?})");
assert!(ns < 100, "flags computation too slow: {ns} ns/op");
}