mirror of
https://github.com/ruvnet/RuVector.git
synced 2026-05-23 04:27:11 +00:00
feat(agi-runtime): authority guard, coherence monitor, benchmarks
Add three new modules to rvf-runtime implementing the ADR-036 runtime: - agi_authority.rs: AuthorityGuard (per-mode + per-action-class enforcement), BudgetTracker (resource consumption tracking with hard caps), ActionClass enum (10 action categories) - agi_coherence.rs: CoherenceMonitor (real-time state machine with Healthy/SkillFreeze/RepairMode/Halted transitions), ContainerValidator (full validation pipeline) - tests/agi_e2e.rs: end-to-end integration tests and performance benchmarks (header serialize/deserialize, container build/parse, flags computation) All 219 rvf-runtime lib tests pass. https://claude.ai/code/session_01RnwD4x5cbpB7FPvoyYQz8G
This commit is contained in:
parent
4f9107fdf1
commit
ffbf72fb2f
4 changed files with 1464 additions and 0 deletions
438
crates/rvf/rvf-runtime/src/agi_authority.rs
Normal file
438
crates/rvf/rvf-runtime/src/agi_authority.rs
Normal file
|
|
@ -0,0 +1,438 @@
|
|||
//! Authority and resource budget enforcement runtime for AGI containers (ADR-036).
|
||||
//!
|
||||
//! - [`AuthorityGuard`]: enforces per-execution authority levels and per-action-class
|
||||
//! overrides, ensuring container actions never exceed their granted privileges.
|
||||
//! - [`BudgetTracker`]: tracks resource consumption against a [`ResourceBudget`] and
|
||||
//! returns `BudgetExhausted` errors when any resource is at its limit.
|
||||
|
||||
use rvf_types::agi_container::*;
|
||||
|
||||
const ACTION_CLASS_COUNT: usize = 10;
|
||||
|
||||
/// Classification of actions that a container execution may perform.
|
||||
///
|
||||
/// Each class can be independently granted a different [`AuthorityLevel`]
|
||||
/// via [`AuthorityGuard::grant_action_class`].
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
|
||||
#[repr(u8)]
|
||||
pub enum ActionClass {
|
||||
ReadMemory = 0,
|
||||
WriteMemory = 1,
|
||||
ReadFile = 2,
|
||||
WriteFile = 3,
|
||||
RunTest = 4,
|
||||
RunCommand = 5,
|
||||
GitPush = 6,
|
||||
CreatePR = 7,
|
||||
SendMessage = 8,
|
||||
ModifyInfra = 9,
|
||||
}
|
||||
|
||||
impl ActionClass {
|
||||
/// All variants in discriminant order.
|
||||
pub const ALL: [ActionClass; ACTION_CLASS_COUNT] = [
|
||||
Self::ReadMemory, Self::WriteMemory, Self::ReadFile, Self::WriteFile,
|
||||
Self::RunTest, Self::RunCommand, Self::GitPush, Self::CreatePR,
|
||||
Self::SendMessage, Self::ModifyInfra,
|
||||
];
|
||||
}
|
||||
|
||||
/// Runtime guard enforcing authority levels for container execution.
|
||||
///
|
||||
/// Holds a global maximum authority (from execution mode or explicit) and an
|
||||
/// optional per-action-class override table as a fixed-size array.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct AuthorityGuard {
|
||||
max_authority: AuthorityLevel,
|
||||
mode: ExecutionMode,
|
||||
class_overrides: [Option<AuthorityLevel>; ACTION_CLASS_COUNT],
|
||||
}
|
||||
|
||||
impl AuthorityGuard {
|
||||
/// Create a guard using the default authority for the given execution mode.
|
||||
pub fn new(mode: ExecutionMode) -> Self {
|
||||
Self {
|
||||
max_authority: AuthorityLevel::default_for_mode(mode),
|
||||
mode,
|
||||
class_overrides: [None; ACTION_CLASS_COUNT],
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a guard with an explicit maximum authority level.
|
||||
pub fn with_max_authority(mode: ExecutionMode, max: AuthorityLevel) -> Self {
|
||||
Self { max_authority: max, mode, class_overrides: [None; ACTION_CLASS_COUNT] }
|
||||
}
|
||||
|
||||
/// The execution mode this guard was created for.
|
||||
pub fn mode(&self) -> ExecutionMode { self.mode }
|
||||
|
||||
/// The global maximum authority level.
|
||||
pub fn max_authority(&self) -> AuthorityLevel { self.max_authority }
|
||||
|
||||
/// Check whether the guard permits the `required` level.
|
||||
pub fn check(&self, required: AuthorityLevel) -> Result<(), ContainerError> {
|
||||
if self.max_authority.permits(required) {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(ContainerError::InsufficientAuthority {
|
||||
required: required as u8,
|
||||
granted: self.max_authority as u8,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Check authority for a specific action class.
|
||||
///
|
||||
/// Per-class overrides are capped by the global maximum to prevent escalation.
|
||||
pub fn check_action_class(
|
||||
&self, class: ActionClass, required: AuthorityLevel,
|
||||
) -> Result<(), ContainerError> {
|
||||
let effective = match self.class_overrides[class as usize] {
|
||||
Some(o) if (o as u8) <= (self.max_authority as u8) => o,
|
||||
Some(_) => self.max_authority,
|
||||
None => self.max_authority,
|
||||
};
|
||||
if effective.permits(required) {
|
||||
Ok(())
|
||||
} else {
|
||||
Err(ContainerError::InsufficientAuthority {
|
||||
required: required as u8,
|
||||
granted: effective as u8,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Grant authority for a specific action class (capped by global max at check time).
|
||||
pub fn grant_action_class(&mut self, class: ActionClass, level: AuthorityLevel) {
|
||||
self.class_overrides[class as usize] = Some(level);
|
||||
}
|
||||
}
|
||||
|
||||
/// Percentage utilization for each resource dimension (0.0..=100.0).
|
||||
#[derive(Clone, Copy, Debug, PartialEq)]
|
||||
pub struct BudgetUtilization {
|
||||
pub time_pct: f32,
|
||||
pub tokens_pct: f32,
|
||||
pub cost_pct: f32,
|
||||
pub tool_calls_pct: f32,
|
||||
pub external_writes_pct: f32,
|
||||
}
|
||||
|
||||
/// Point-in-time snapshot of budget state.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub struct BudgetSnapshot {
|
||||
pub budget: ResourceBudget,
|
||||
pub used_time_secs: u32,
|
||||
pub used_tokens: u32,
|
||||
pub used_cost_microdollars: u32,
|
||||
pub used_tool_calls: u16,
|
||||
pub used_external_writes: u16,
|
||||
}
|
||||
|
||||
/// Tracks resource consumption against a [`ResourceBudget`].
|
||||
///
|
||||
/// Each `charge_*` method adds to the running total and returns
|
||||
/// [`ContainerError::BudgetExhausted`] if the total would exceed the budget.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct BudgetTracker {
|
||||
budget: ResourceBudget,
|
||||
used_time_secs: u32,
|
||||
used_tokens: u32,
|
||||
used_cost_microdollars: u32,
|
||||
used_tool_calls: u16,
|
||||
used_external_writes: u16,
|
||||
}
|
||||
|
||||
impl BudgetTracker {
|
||||
/// Create a tracker with the given budget (clamped to hard maximums).
|
||||
pub fn new(budget: ResourceBudget) -> Self {
|
||||
Self {
|
||||
budget: budget.clamped(),
|
||||
used_time_secs: 0, used_tokens: 0, used_cost_microdollars: 0,
|
||||
used_tool_calls: 0, used_external_writes: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// The clamped budget this tracker enforces.
|
||||
pub fn budget(&self) -> &ResourceBudget { &self.budget }
|
||||
|
||||
/// Charge token usage.
|
||||
pub fn charge_tokens(&mut self, tokens: u32) -> Result<(), ContainerError> {
|
||||
let t = self.used_tokens.saturating_add(tokens);
|
||||
if t > self.budget.max_tokens { return Err(ContainerError::BudgetExhausted("tokens")); }
|
||||
self.used_tokens = t;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Charge cost in microdollars.
|
||||
pub fn charge_cost(&mut self, microdollars: u32) -> Result<(), ContainerError> {
|
||||
let t = self.used_cost_microdollars.saturating_add(microdollars);
|
||||
if t > self.budget.max_cost_microdollars { return Err(ContainerError::BudgetExhausted("cost")); }
|
||||
self.used_cost_microdollars = t;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Charge one tool call.
|
||||
pub fn charge_tool_call(&mut self) -> Result<(), ContainerError> {
|
||||
let t = self.used_tool_calls.saturating_add(1);
|
||||
if t > self.budget.max_tool_calls { return Err(ContainerError::BudgetExhausted("tool_calls")); }
|
||||
self.used_tool_calls = t;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Charge one external write.
|
||||
pub fn charge_external_write(&mut self) -> Result<(), ContainerError> {
|
||||
let t = self.used_external_writes.saturating_add(1);
|
||||
if t > self.budget.max_external_writes { return Err(ContainerError::BudgetExhausted("external_writes")); }
|
||||
self.used_external_writes = t;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Charge wall-clock time in seconds.
|
||||
pub fn charge_time(&mut self, secs: u32) -> Result<(), ContainerError> {
|
||||
let t = self.used_time_secs.saturating_add(secs);
|
||||
if t > self.budget.max_time_secs { return Err(ContainerError::BudgetExhausted("time")); }
|
||||
self.used_time_secs = t;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Remaining tokens before exhaustion.
|
||||
pub fn remaining_tokens(&self) -> u32 { self.budget.max_tokens.saturating_sub(self.used_tokens) }
|
||||
|
||||
/// Remaining cost budget in microdollars.
|
||||
pub fn remaining_cost(&self) -> u32 {
|
||||
self.budget.max_cost_microdollars.saturating_sub(self.used_cost_microdollars)
|
||||
}
|
||||
|
||||
/// Remaining wall-clock time in seconds.
|
||||
pub fn remaining_time(&self) -> u32 { self.budget.max_time_secs.saturating_sub(self.used_time_secs) }
|
||||
|
||||
/// Compute utilization percentages for each resource dimension.
|
||||
pub fn utilization(&self) -> BudgetUtilization {
|
||||
BudgetUtilization {
|
||||
time_pct: pct(self.used_time_secs as f32, self.budget.max_time_secs as f32),
|
||||
tokens_pct: pct(self.used_tokens as f32, self.budget.max_tokens as f32),
|
||||
cost_pct: pct(self.used_cost_microdollars as f32, self.budget.max_cost_microdollars as f32),
|
||||
tool_calls_pct: pct(self.used_tool_calls as f32, self.budget.max_tool_calls as f32),
|
||||
external_writes_pct: pct(self.used_external_writes as f32, self.budget.max_external_writes as f32),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns `true` if ANY resource dimension with a non-zero budget has reached
|
||||
/// its limit. A zero-max dimension is disabled, not exhausted.
|
||||
pub fn is_exhausted(&self) -> bool {
|
||||
(self.budget.max_time_secs > 0 && self.used_time_secs >= self.budget.max_time_secs)
|
||||
|| (self.budget.max_tokens > 0 && self.used_tokens >= self.budget.max_tokens)
|
||||
|| (self.budget.max_cost_microdollars > 0 && self.used_cost_microdollars >= self.budget.max_cost_microdollars)
|
||||
|| (self.budget.max_tool_calls > 0 && self.used_tool_calls >= self.budget.max_tool_calls)
|
||||
|| (self.budget.max_external_writes > 0 && self.used_external_writes >= self.budget.max_external_writes)
|
||||
}
|
||||
|
||||
/// Capture a point-in-time snapshot of the tracker state.
|
||||
pub fn snapshot(&self) -> BudgetSnapshot {
|
||||
BudgetSnapshot {
|
||||
budget: self.budget,
|
||||
used_time_secs: self.used_time_secs,
|
||||
used_tokens: self.used_tokens,
|
||||
used_cost_microdollars: self.used_cost_microdollars,
|
||||
used_tool_calls: self.used_tool_calls,
|
||||
used_external_writes: self.used_external_writes,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn pct(used: f32, max: f32) -> f32 {
|
||||
if max == 0.0 { if used > 0.0 { 100.0 } else { 0.0 } }
|
||||
else { (used / max * 100.0).min(100.0) }
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn default_authority_per_mode() {
|
||||
let r = AuthorityGuard::new(ExecutionMode::Replay);
|
||||
assert_eq!(r.max_authority(), AuthorityLevel::ReadOnly);
|
||||
assert_eq!(r.mode(), ExecutionMode::Replay);
|
||||
assert_eq!(AuthorityGuard::new(ExecutionMode::Verify).max_authority(), AuthorityLevel::ExecuteTools);
|
||||
assert_eq!(AuthorityGuard::new(ExecutionMode::Live).max_authority(), AuthorityLevel::WriteMemory);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn check_pass_and_fail() {
|
||||
let g = AuthorityGuard::new(ExecutionMode::Verify);
|
||||
assert!(g.check(AuthorityLevel::ReadOnly).is_ok());
|
||||
assert!(g.check(AuthorityLevel::ExecuteTools).is_ok());
|
||||
assert_eq!(g.check(AuthorityLevel::WriteExternal).unwrap_err(),
|
||||
ContainerError::InsufficientAuthority { required: 3, granted: 2 });
|
||||
let ro = AuthorityGuard::new(ExecutionMode::Replay);
|
||||
assert_eq!(ro.check(AuthorityLevel::WriteMemory).unwrap_err(),
|
||||
ContainerError::InsufficientAuthority { required: 1, granted: 0 });
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn with_max_authority_overrides_default() {
|
||||
let g = AuthorityGuard::with_max_authority(ExecutionMode::Replay, AuthorityLevel::WriteExternal);
|
||||
assert_eq!(g.mode(), ExecutionMode::Replay);
|
||||
assert!(g.check(AuthorityLevel::WriteExternal).is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn action_class_grant_restrict_and_inherit() {
|
||||
let mut g = AuthorityGuard::with_max_authority(ExecutionMode::Live, AuthorityLevel::WriteExternal);
|
||||
assert!(g.check_action_class(ActionClass::GitPush, AuthorityLevel::WriteExternal).is_ok());
|
||||
g.grant_action_class(ActionClass::GitPush, AuthorityLevel::ReadOnly);
|
||||
assert_eq!(g.check_action_class(ActionClass::GitPush, AuthorityLevel::WriteMemory).unwrap_err(),
|
||||
ContainerError::InsufficientAuthority { required: 1, granted: 0 });
|
||||
assert!(g.check_action_class(ActionClass::ReadMemory, AuthorityLevel::WriteExternal).is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn action_class_override_capped_by_global() {
|
||||
let mut g = AuthorityGuard::new(ExecutionMode::Replay);
|
||||
g.grant_action_class(ActionClass::RunCommand, AuthorityLevel::WriteExternal);
|
||||
assert!(g.check_action_class(ActionClass::RunCommand, AuthorityLevel::WriteMemory).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn action_class_override_within_global() {
|
||||
let mut g = AuthorityGuard::with_max_authority(ExecutionMode::Live, AuthorityLevel::ExecuteTools);
|
||||
g.grant_action_class(ActionClass::WriteFile, AuthorityLevel::WriteMemory);
|
||||
assert!(g.check_action_class(ActionClass::WriteFile, AuthorityLevel::WriteMemory).is_ok());
|
||||
assert!(g.check_action_class(ActionClass::WriteFile, AuthorityLevel::ExecuteTools).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn action_class_all_variants() {
|
||||
assert_eq!(ActionClass::ALL.len(), ACTION_CLASS_COUNT);
|
||||
for (i, c) in ActionClass::ALL.iter().enumerate() { assert_eq!(*c as usize, i); }
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tracker_zero_usage() {
|
||||
let t = BudgetTracker::new(ResourceBudget::DEFAULT);
|
||||
assert_eq!(t.remaining_tokens(), 200_000);
|
||||
assert_eq!(t.remaining_cost(), 1_000_000);
|
||||
assert_eq!(t.remaining_time(), 300);
|
||||
assert!(!t.is_exhausted());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn charge_and_exhaust_each_resource() {
|
||||
let mut t = BudgetTracker::new(ResourceBudget::DEFAULT);
|
||||
assert!(t.charge_tokens(200_000).is_ok());
|
||||
assert_eq!(t.charge_tokens(1), Err(ContainerError::BudgetExhausted("tokens")));
|
||||
|
||||
let mut t = BudgetTracker::new(ResourceBudget::DEFAULT);
|
||||
assert!(t.charge_cost(1_000_000).is_ok());
|
||||
assert_eq!(t.charge_cost(1), Err(ContainerError::BudgetExhausted("cost")));
|
||||
|
||||
let mut t = BudgetTracker::new(ResourceBudget::DEFAULT);
|
||||
for _ in 0..50 { t.charge_tool_call().unwrap(); }
|
||||
assert!(t.charge_tool_call().is_err());
|
||||
|
||||
let mut t = BudgetTracker::new(ResourceBudget::DEFAULT);
|
||||
assert!(t.charge_time(300).is_ok());
|
||||
assert!(t.charge_time(1).is_err());
|
||||
|
||||
let mut t = BudgetTracker::new(ResourceBudget::DEFAULT);
|
||||
assert!(t.charge_external_write().is_err()); // zero budget
|
||||
|
||||
let mut t = BudgetTracker::new(ResourceBudget::EXTENDED);
|
||||
for _ in 0..10 { t.charge_external_write().unwrap(); }
|
||||
assert!(t.charge_external_write().is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn is_exhausted_semantics() {
|
||||
assert!(!BudgetTracker::new(ResourceBudget::DEFAULT).is_exhausted());
|
||||
let mut t = BudgetTracker::new(ResourceBudget::DEFAULT);
|
||||
t.charge_tokens(200_000).unwrap();
|
||||
assert!(t.is_exhausted());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn utilization_calculation() {
|
||||
let mut t = BudgetTracker::new(ResourceBudget::DEFAULT);
|
||||
t.charge_tokens(100_000).unwrap();
|
||||
t.charge_time(150).unwrap();
|
||||
t.charge_cost(500_000).unwrap();
|
||||
let u = t.utilization();
|
||||
assert!((u.tokens_pct - 50.0).abs() < 0.01);
|
||||
assert!((u.time_pct - 50.0).abs() < 0.01);
|
||||
assert!((u.cost_pct - 50.0).abs() < 0.01);
|
||||
|
||||
t.charge_tokens(100_000).unwrap();
|
||||
let u2 = t.utilization();
|
||||
assert!((u2.tokens_pct - 100.0).abs() < 0.01);
|
||||
|
||||
let z = BudgetTracker::new(ResourceBudget {
|
||||
max_time_secs: 0, max_tokens: 0, max_cost_microdollars: 0,
|
||||
max_tool_calls: 0, max_external_writes: 0,
|
||||
});
|
||||
assert!((z.utilization().time_pct).abs() < 0.01);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn snapshot_captures_state() {
|
||||
let mut t = BudgetTracker::new(ResourceBudget::EXTENDED);
|
||||
t.charge_tokens(5_000).unwrap();
|
||||
t.charge_time(60).unwrap();
|
||||
t.charge_cost(100_000).unwrap();
|
||||
t.charge_tool_call().unwrap();
|
||||
t.charge_external_write().unwrap();
|
||||
let s = t.snapshot();
|
||||
assert_eq!(s.budget, ResourceBudget::EXTENDED);
|
||||
assert_eq!(s.used_tokens, 5_000);
|
||||
assert_eq!(s.used_time_secs, 60);
|
||||
assert_eq!(s.used_cost_microdollars, 100_000);
|
||||
assert_eq!(s.used_tool_calls, 1);
|
||||
assert_eq!(s.used_external_writes, 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn budget_clamped_on_creation() {
|
||||
let t = BudgetTracker::new(ResourceBudget {
|
||||
max_time_secs: 999_999, max_tokens: 999_999_999,
|
||||
max_cost_microdollars: 999_999_999, max_tool_calls: 60_000, max_external_writes: 60_000,
|
||||
});
|
||||
let b = t.budget();
|
||||
assert_eq!(b.max_time_secs, ResourceBudget::MAX.max_time_secs);
|
||||
assert_eq!(b.max_tokens, ResourceBudget::MAX.max_tokens);
|
||||
assert_eq!(b.max_external_writes, ResourceBudget::MAX.max_external_writes);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn charge_exactly_at_limit() {
|
||||
let mut t = BudgetTracker::new(ResourceBudget {
|
||||
max_time_secs: 10, max_tokens: 100, max_cost_microdollars: 500,
|
||||
max_tool_calls: 3, max_external_writes: 2,
|
||||
});
|
||||
assert!(t.charge_tokens(100).is_ok());
|
||||
assert!(t.charge_time(10).is_ok());
|
||||
assert!(t.charge_cost(500).is_ok());
|
||||
for _ in 0..3 { t.charge_tool_call().unwrap(); }
|
||||
for _ in 0..2 { t.charge_external_write().unwrap(); }
|
||||
assert_eq!(t.remaining_tokens(), 0);
|
||||
assert_eq!(t.remaining_cost(), 0);
|
||||
assert_eq!(t.remaining_time(), 0);
|
||||
assert!(t.is_exhausted());
|
||||
assert!(t.charge_tokens(1).is_err());
|
||||
assert!(t.charge_time(1).is_err());
|
||||
assert!(t.charge_cost(1).is_err());
|
||||
assert!(t.charge_tool_call().is_err());
|
||||
assert!(t.charge_external_write().is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn max_budget_allows_high_usage() {
|
||||
let mut t = BudgetTracker::new(ResourceBudget::MAX);
|
||||
assert!(t.charge_tokens(1_000_000).is_ok());
|
||||
assert!(t.charge_time(3600).is_ok());
|
||||
assert!(t.charge_cost(10_000_000).is_ok());
|
||||
for _ in 0..500 { t.charge_tool_call().unwrap(); }
|
||||
for _ in 0..50 { t.charge_external_write().unwrap(); }
|
||||
assert!(t.is_exhausted());
|
||||
}
|
||||
}
|
||||
689
crates/rvf/rvf-runtime/src/agi_coherence.rs
Normal file
689
crates/rvf/rvf-runtime/src/agi_coherence.rs
Normal file
|
|
@ -0,0 +1,689 @@
|
|||
//! Coherence monitoring and container validation pipeline (ADR-036).
|
||||
//!
|
||||
//! This module provides two main components:
|
||||
//!
|
||||
//! - [`CoherenceMonitor`]: tracks real-time coherence state across events,
|
||||
//! contradictions, and task rollbacks, transitioning through [`CoherenceState`]
|
||||
//! to gate commits, skill promotion, and autonomous execution.
|
||||
//!
|
||||
//! - [`ContainerValidator`]: runs the full validation pipeline over an AGI
|
||||
//! container's header, segments, and coherence thresholds, collecting all
|
||||
//! errors rather than short-circuiting.
|
||||
|
||||
use rvf_types::agi_container::*;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// CoherenceState
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Runtime coherence state derived from threshold checks.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum CoherenceState {
|
||||
/// All signals within bounds.
|
||||
Healthy,
|
||||
/// Contradiction rate exceeded -- skill promotion frozen.
|
||||
SkillFreeze,
|
||||
/// Coherence score below minimum -- commits blocked, repair mode.
|
||||
RepairMode,
|
||||
/// Rollback ratio exceeded -- execution halted, human review required.
|
||||
Halted,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// CoherenceReport
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Point-in-time snapshot of all coherence metrics.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct CoherenceReport {
|
||||
pub state: CoherenceState,
|
||||
pub coherence_score: f32,
|
||||
pub contradiction_rate: f32,
|
||||
pub rollback_ratio: f32,
|
||||
pub total_events: u64,
|
||||
pub total_contradictions: u64,
|
||||
pub total_tasks: u64,
|
||||
pub total_rollbacks: u64,
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// CoherenceMonitor
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Tracks real-time coherence state and gates system actions.
|
||||
pub struct CoherenceMonitor {
|
||||
thresholds: CoherenceThresholds,
|
||||
current_coherence: f32,
|
||||
total_events: u64,
|
||||
total_contradictions: u64,
|
||||
total_tasks: u64,
|
||||
total_rollbacks: u64,
|
||||
state: CoherenceState,
|
||||
}
|
||||
|
||||
impl CoherenceMonitor {
|
||||
/// Create a new monitor with the given thresholds. Returns an error if the
|
||||
/// thresholds are out of valid ranges.
|
||||
pub fn new(thresholds: CoherenceThresholds) -> Result<Self, ContainerError> {
|
||||
thresholds.validate()?;
|
||||
Ok(Self {
|
||||
thresholds,
|
||||
current_coherence: 1.0,
|
||||
total_events: 0,
|
||||
total_contradictions: 0,
|
||||
total_tasks: 0,
|
||||
total_rollbacks: 0,
|
||||
state: CoherenceState::Healthy,
|
||||
})
|
||||
}
|
||||
|
||||
/// Create a monitor with [`CoherenceThresholds::DEFAULT`].
|
||||
pub fn with_defaults() -> Self {
|
||||
Self {
|
||||
thresholds: CoherenceThresholds::DEFAULT,
|
||||
current_coherence: 1.0,
|
||||
total_events: 0,
|
||||
total_contradictions: 0,
|
||||
total_tasks: 0,
|
||||
total_rollbacks: 0,
|
||||
state: CoherenceState::Healthy,
|
||||
}
|
||||
}
|
||||
|
||||
/// Update the current coherence score and re-evaluate state.
|
||||
pub fn update_coherence(&mut self, score: f32) {
|
||||
self.current_coherence = score;
|
||||
self.recompute_state();
|
||||
}
|
||||
|
||||
/// Record a generic event (increments event counter).
|
||||
pub fn record_event(&mut self) {
|
||||
self.total_events = self.total_events.saturating_add(1);
|
||||
}
|
||||
|
||||
/// Record a contradiction event and re-evaluate state.
|
||||
pub fn record_contradiction(&mut self) {
|
||||
self.total_contradictions = self.total_contradictions.saturating_add(1);
|
||||
self.recompute_state();
|
||||
}
|
||||
|
||||
/// Record a task completion. If `rolled_back` is true the rollback counter
|
||||
/// is also incremented. State is re-evaluated afterward.
|
||||
pub fn record_task_completion(&mut self, rolled_back: bool) {
|
||||
self.total_tasks = self.total_tasks.saturating_add(1);
|
||||
if rolled_back {
|
||||
self.total_rollbacks = self.total_rollbacks.saturating_add(1);
|
||||
}
|
||||
self.recompute_state();
|
||||
}
|
||||
|
||||
/// Current coherence state.
|
||||
pub fn state(&self) -> CoherenceState {
|
||||
self.state
|
||||
}
|
||||
|
||||
/// Whether the system may commit world-model deltas. True when
|
||||
/// [`CoherenceState::Healthy`] or [`CoherenceState::SkillFreeze`].
|
||||
pub fn can_commit(&self) -> bool {
|
||||
matches!(self.state, CoherenceState::Healthy | CoherenceState::SkillFreeze)
|
||||
}
|
||||
|
||||
/// Whether new skills may be promoted. True only when
|
||||
/// [`CoherenceState::Healthy`].
|
||||
pub fn can_promote_skill(&self) -> bool {
|
||||
self.state == CoherenceState::Healthy
|
||||
}
|
||||
|
||||
/// Whether the system requires human review. True when
|
||||
/// [`CoherenceState::Halted`].
|
||||
pub fn requires_human_review(&self) -> bool {
|
||||
self.state == CoherenceState::Halted
|
||||
}
|
||||
|
||||
/// Contradiction rate: contradictions per 100 events.
|
||||
/// Returns `0.0` when there are no events.
|
||||
pub fn contradiction_rate(&self) -> f32 {
|
||||
if self.total_events == 0 {
|
||||
return 0.0;
|
||||
}
|
||||
(self.total_contradictions as f32 / self.total_events as f32) * 100.0
|
||||
}
|
||||
|
||||
/// Rollback ratio: rollbacks / total tasks.
|
||||
/// Returns `0.0` when there are no tasks.
|
||||
pub fn rollback_ratio(&self) -> f32 {
|
||||
if self.total_tasks == 0 {
|
||||
return 0.0;
|
||||
}
|
||||
self.total_rollbacks as f32 / self.total_tasks as f32
|
||||
}
|
||||
|
||||
/// Produce a point-in-time snapshot of all metrics.
|
||||
pub fn report(&self) -> CoherenceReport {
|
||||
CoherenceReport {
|
||||
state: self.state,
|
||||
coherence_score: self.current_coherence,
|
||||
contradiction_rate: self.contradiction_rate(),
|
||||
rollback_ratio: self.rollback_ratio(),
|
||||
total_events: self.total_events,
|
||||
total_contradictions: self.total_contradictions,
|
||||
total_tasks: self.total_tasks,
|
||||
total_rollbacks: self.total_rollbacks,
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Internal
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
/// Determine the worst applicable state from the current metrics.
|
||||
/// Priority (most severe first): Halted > RepairMode > SkillFreeze > Healthy.
|
||||
fn recompute_state(&mut self) {
|
||||
if self.rollback_ratio() > self.thresholds.max_rollback_ratio {
|
||||
self.state = CoherenceState::Halted;
|
||||
} else if self.current_coherence < self.thresholds.min_coherence_score {
|
||||
self.state = CoherenceState::RepairMode;
|
||||
} else if self.contradiction_rate() > self.thresholds.max_contradiction_rate {
|
||||
self.state = CoherenceState::SkillFreeze;
|
||||
} else {
|
||||
self.state = CoherenceState::Healthy;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// ContainerValidator
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/// Full validation pipeline for an AGI container.
|
||||
pub struct ContainerValidator {
|
||||
mode: ExecutionMode,
|
||||
}
|
||||
|
||||
impl ContainerValidator {
|
||||
/// Create a validator for the given execution mode.
|
||||
pub fn new(mode: ExecutionMode) -> Self {
|
||||
Self { mode }
|
||||
}
|
||||
|
||||
/// Validate container segments against mode requirements.
|
||||
pub fn validate_segments(
|
||||
&self,
|
||||
segments: &ContainerSegments,
|
||||
) -> Result<(), ContainerError> {
|
||||
segments.validate(self.mode)
|
||||
}
|
||||
|
||||
/// Validate the AGI container header.
|
||||
///
|
||||
/// Checks: magic bytes, version (must be 1), and flag consistency --
|
||||
/// replay-capable containers must not claim Live-only features without
|
||||
/// the kernel flag.
|
||||
pub fn validate_header(
|
||||
&self,
|
||||
header: &AgiContainerHeader,
|
||||
) -> Result<(), ContainerError> {
|
||||
if !header.is_valid_magic() {
|
||||
return Err(ContainerError::InvalidConfig("bad magic bytes"));
|
||||
}
|
||||
if header.version == 0 || header.version > 1 {
|
||||
return Err(ContainerError::InvalidConfig(
|
||||
"unsupported header version",
|
||||
));
|
||||
}
|
||||
// Flag consistency: if REPLAY_CAPABLE is set the container should
|
||||
// also have the witness flag, since replays depend on witness chains.
|
||||
if header.is_replay_capable()
|
||||
&& (header.flags & AGI_HAS_WITNESS == 0)
|
||||
{
|
||||
return Err(ContainerError::InvalidConfig(
|
||||
"replay-capable flag requires witness flag",
|
||||
));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Validate coherence threshold ranges.
|
||||
pub fn validate_coherence(
|
||||
&self,
|
||||
thresholds: &CoherenceThresholds,
|
||||
) -> Result<(), ContainerError> {
|
||||
thresholds.validate()
|
||||
}
|
||||
|
||||
/// Run all validations, collecting every error rather than
|
||||
/// short-circuiting on the first failure.
|
||||
pub fn validate_full(
|
||||
&self,
|
||||
header: &AgiContainerHeader,
|
||||
segments: &ContainerSegments,
|
||||
thresholds: &CoherenceThresholds,
|
||||
) -> Vec<ContainerError> {
|
||||
let mut errors = Vec::new();
|
||||
|
||||
if let Err(e) = self.validate_header(header) {
|
||||
errors.push(e);
|
||||
}
|
||||
if let Err(e) = self.validate_segments(segments) {
|
||||
errors.push(e);
|
||||
}
|
||||
if let Err(e) = self.validate_coherence(thresholds) {
|
||||
errors.push(e);
|
||||
}
|
||||
|
||||
errors
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Tests
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
// -- Helpers --
|
||||
|
||||
fn valid_header() -> AgiContainerHeader {
|
||||
AgiContainerHeader {
|
||||
magic: AGI_MAGIC,
|
||||
version: 1,
|
||||
flags: AGI_HAS_KERNEL | AGI_HAS_WITNESS | AGI_REPLAY_CAPABLE,
|
||||
container_id: [0x01; 16],
|
||||
build_id: [0x02; 16],
|
||||
created_ns: 1_700_000_000_000_000_000,
|
||||
model_id_hash: [0; 8],
|
||||
policy_hash: [0; 8],
|
||||
}
|
||||
}
|
||||
|
||||
fn valid_segments() -> ContainerSegments {
|
||||
ContainerSegments {
|
||||
manifest_present: true,
|
||||
kernel_present: true,
|
||||
world_model_present: true,
|
||||
..Default::default()
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// CoherenceMonitor — state transitions
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn monitor_starts_healthy() {
|
||||
let m = CoherenceMonitor::with_defaults();
|
||||
assert_eq!(m.state(), CoherenceState::Healthy);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn monitor_healthy_to_skill_freeze() {
|
||||
let mut m = CoherenceMonitor::with_defaults();
|
||||
// Default max_contradiction_rate is 5.0 per 100 events.
|
||||
// Record 100 events with 6 contradictions -> rate = 6.0 > 5.0.
|
||||
for _ in 0..100 {
|
||||
m.record_event();
|
||||
}
|
||||
for _ in 0..6 {
|
||||
m.record_contradiction();
|
||||
}
|
||||
assert_eq!(m.state(), CoherenceState::SkillFreeze);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn monitor_healthy_to_repair_mode() {
|
||||
let mut m = CoherenceMonitor::with_defaults();
|
||||
// Default min_coherence_score is 0.70.
|
||||
m.update_coherence(0.50);
|
||||
assert_eq!(m.state(), CoherenceState::RepairMode);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn monitor_healthy_to_halted() {
|
||||
let mut m = CoherenceMonitor::with_defaults();
|
||||
// Default max_rollback_ratio is 0.20.
|
||||
// 10 tasks, 3 rolled back -> ratio = 0.30 > 0.20.
|
||||
for _ in 0..7 {
|
||||
m.record_task_completion(false);
|
||||
}
|
||||
for _ in 0..3 {
|
||||
m.record_task_completion(true);
|
||||
}
|
||||
assert_eq!(m.state(), CoherenceState::Halted);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn halted_takes_priority_over_repair_mode() {
|
||||
let mut m = CoherenceMonitor::with_defaults();
|
||||
// Both rollback ratio and coherence score are bad.
|
||||
m.update_coherence(0.50);
|
||||
for _ in 0..5 {
|
||||
m.record_task_completion(false);
|
||||
}
|
||||
for _ in 0..5 {
|
||||
m.record_task_completion(true);
|
||||
}
|
||||
// Halted (rollback) is highest severity and wins.
|
||||
assert_eq!(m.state(), CoherenceState::Halted);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn repair_mode_takes_priority_over_skill_freeze() {
|
||||
let mut m = CoherenceMonitor::with_defaults();
|
||||
// Both coherence and contradiction rate are bad.
|
||||
m.update_coherence(0.50);
|
||||
for _ in 0..100 {
|
||||
m.record_event();
|
||||
}
|
||||
for _ in 0..10 {
|
||||
m.record_contradiction();
|
||||
}
|
||||
// RepairMode wins over SkillFreeze.
|
||||
assert_eq!(m.state(), CoherenceState::RepairMode);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn recovery_back_to_healthy() {
|
||||
let mut m = CoherenceMonitor::with_defaults();
|
||||
m.update_coherence(0.50);
|
||||
assert_eq!(m.state(), CoherenceState::RepairMode);
|
||||
m.update_coherence(0.90);
|
||||
assert_eq!(m.state(), CoherenceState::Healthy);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// CoherenceMonitor — gate queries
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn can_commit_when_healthy() {
|
||||
let m = CoherenceMonitor::with_defaults();
|
||||
assert!(m.can_commit());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn can_commit_when_skill_freeze() {
|
||||
let mut m = CoherenceMonitor::with_defaults();
|
||||
for _ in 0..100 {
|
||||
m.record_event();
|
||||
}
|
||||
for _ in 0..6 {
|
||||
m.record_contradiction();
|
||||
}
|
||||
assert_eq!(m.state(), CoherenceState::SkillFreeze);
|
||||
assert!(m.can_commit());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cannot_commit_when_repair_mode() {
|
||||
let mut m = CoherenceMonitor::with_defaults();
|
||||
m.update_coherence(0.50);
|
||||
assert!(!m.can_commit());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cannot_commit_when_halted() {
|
||||
let mut m = CoherenceMonitor::with_defaults();
|
||||
m.record_task_completion(true); // 1 task, 1 rollback -> ratio = 1.0
|
||||
assert!(!m.can_commit());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn can_promote_skill_only_when_healthy() {
|
||||
let m = CoherenceMonitor::with_defaults();
|
||||
assert!(m.can_promote_skill());
|
||||
|
||||
let mut m2 = CoherenceMonitor::with_defaults();
|
||||
for _ in 0..100 {
|
||||
m2.record_event();
|
||||
}
|
||||
for _ in 0..6 {
|
||||
m2.record_contradiction();
|
||||
}
|
||||
assert_eq!(m2.state(), CoherenceState::SkillFreeze);
|
||||
assert!(!m2.can_promote_skill());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn requires_human_review_only_when_halted() {
|
||||
let m = CoherenceMonitor::with_defaults();
|
||||
assert!(!m.requires_human_review());
|
||||
|
||||
let mut m2 = CoherenceMonitor::with_defaults();
|
||||
m2.record_task_completion(true);
|
||||
assert_eq!(m2.state(), CoherenceState::Halted);
|
||||
assert!(m2.requires_human_review());
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// CoherenceMonitor — rate calculations
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn contradiction_rate_calculation() {
|
||||
let mut m = CoherenceMonitor::with_defaults();
|
||||
for _ in 0..200 {
|
||||
m.record_event();
|
||||
}
|
||||
for _ in 0..4 {
|
||||
m.record_contradiction();
|
||||
}
|
||||
// 4 contradictions in (200 events + 4 contradiction events via record_contradiction)
|
||||
// record_contradiction does not call record_event, so total_events = 200.
|
||||
// Rate = (4 / 200) * 100 = 2.0.
|
||||
assert!((m.contradiction_rate() - 2.0).abs() < f32::EPSILON);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rollback_ratio_calculation() {
|
||||
let mut m = CoherenceMonitor::with_defaults();
|
||||
for _ in 0..8 {
|
||||
m.record_task_completion(false);
|
||||
}
|
||||
for _ in 0..2 {
|
||||
m.record_task_completion(true);
|
||||
}
|
||||
// 2 rollbacks / 10 tasks = 0.20.
|
||||
assert!((m.rollback_ratio() - 0.20).abs() < f32::EPSILON);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// CoherenceMonitor — edge cases
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn contradiction_rate_zero_events() {
|
||||
let m = CoherenceMonitor::with_defaults();
|
||||
assert!((m.contradiction_rate() - 0.0).abs() < f32::EPSILON);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rollback_ratio_zero_tasks() {
|
||||
let m = CoherenceMonitor::with_defaults();
|
||||
assert!((m.rollback_ratio() - 0.0).abs() < f32::EPSILON);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn new_with_invalid_thresholds() {
|
||||
let bad = CoherenceThresholds {
|
||||
min_coherence_score: 2.0,
|
||||
..CoherenceThresholds::DEFAULT
|
||||
};
|
||||
assert!(CoherenceMonitor::new(bad).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn new_with_valid_thresholds() {
|
||||
let m = CoherenceMonitor::new(CoherenceThresholds::STRICT).unwrap();
|
||||
assert_eq!(m.state(), CoherenceState::Healthy);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn report_snapshot() {
|
||||
let mut m = CoherenceMonitor::with_defaults();
|
||||
m.update_coherence(0.85);
|
||||
for _ in 0..50 {
|
||||
m.record_event();
|
||||
}
|
||||
m.record_contradiction();
|
||||
// 9 successful + 1 rollback = ratio 0.10, within the 0.20 threshold.
|
||||
for _ in 0..9 {
|
||||
m.record_task_completion(false);
|
||||
}
|
||||
m.record_task_completion(true);
|
||||
|
||||
let r = m.report();
|
||||
assert_eq!(r.state, CoherenceState::Healthy);
|
||||
assert!((r.coherence_score - 0.85).abs() < f32::EPSILON);
|
||||
assert_eq!(r.total_events, 50);
|
||||
assert_eq!(r.total_contradictions, 1);
|
||||
assert_eq!(r.total_tasks, 10);
|
||||
assert_eq!(r.total_rollbacks, 1);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// ContainerValidator — validate_segments
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn validator_segments_delegates_ok() {
|
||||
let v = ContainerValidator::new(ExecutionMode::Live);
|
||||
let segs = valid_segments();
|
||||
assert!(v.validate_segments(&segs).is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validator_segments_delegates_error() {
|
||||
let v = ContainerValidator::new(ExecutionMode::Replay);
|
||||
let segs = ContainerSegments {
|
||||
manifest_present: true,
|
||||
witness_count: 0,
|
||||
..Default::default()
|
||||
};
|
||||
assert!(v.validate_segments(&segs).is_err());
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// ContainerValidator — validate_header
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn validator_header_ok() {
|
||||
let v = ContainerValidator::new(ExecutionMode::Live);
|
||||
assert!(v.validate_header(&valid_header()).is_ok());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validator_header_bad_magic() {
|
||||
let v = ContainerValidator::new(ExecutionMode::Live);
|
||||
let mut h = valid_header();
|
||||
h.magic = 0xDEADBEEF;
|
||||
assert_eq!(
|
||||
v.validate_header(&h),
|
||||
Err(ContainerError::InvalidConfig("bad magic bytes"))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validator_header_bad_version_zero() {
|
||||
let v = ContainerValidator::new(ExecutionMode::Live);
|
||||
let mut h = valid_header();
|
||||
h.version = 0;
|
||||
assert_eq!(
|
||||
v.validate_header(&h),
|
||||
Err(ContainerError::InvalidConfig("unsupported header version"))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validator_header_bad_version_future() {
|
||||
let v = ContainerValidator::new(ExecutionMode::Live);
|
||||
let mut h = valid_header();
|
||||
h.version = 99;
|
||||
assert_eq!(
|
||||
v.validate_header(&h),
|
||||
Err(ContainerError::InvalidConfig("unsupported header version"))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validator_header_replay_without_witness() {
|
||||
let v = ContainerValidator::new(ExecutionMode::Live);
|
||||
let h = AgiContainerHeader {
|
||||
magic: AGI_MAGIC,
|
||||
version: 1,
|
||||
flags: AGI_REPLAY_CAPABLE, // missing AGI_HAS_WITNESS
|
||||
container_id: [0; 16],
|
||||
build_id: [0; 16],
|
||||
created_ns: 0,
|
||||
model_id_hash: [0; 8],
|
||||
policy_hash: [0; 8],
|
||||
};
|
||||
assert_eq!(
|
||||
v.validate_header(&h),
|
||||
Err(ContainerError::InvalidConfig(
|
||||
"replay-capable flag requires witness flag"
|
||||
))
|
||||
);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// ContainerValidator — validate_full
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
#[test]
|
||||
fn validator_full_all_ok() {
|
||||
let v = ContainerValidator::new(ExecutionMode::Live);
|
||||
let errs = v.validate_full(
|
||||
&valid_header(),
|
||||
&valid_segments(),
|
||||
&CoherenceThresholds::DEFAULT,
|
||||
);
|
||||
assert!(errs.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validator_full_collects_multiple_errors() {
|
||||
let v = ContainerValidator::new(ExecutionMode::Live);
|
||||
|
||||
// Bad header (wrong magic).
|
||||
let mut h = valid_header();
|
||||
h.magic = 0xBAD0CAFE;
|
||||
|
||||
// Bad segments (no kernel, no wasm, no world model for Live).
|
||||
let segs = ContainerSegments {
|
||||
manifest_present: true,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
// Bad thresholds.
|
||||
let bad_thresh = CoherenceThresholds {
|
||||
min_coherence_score: -1.0,
|
||||
..CoherenceThresholds::DEFAULT
|
||||
};
|
||||
|
||||
let errs = v.validate_full(&h, &segs, &bad_thresh);
|
||||
// Expect at least 3 errors: header, segments, and thresholds.
|
||||
assert!(errs.len() >= 3, "expected >= 3 errors, got {}", errs.len());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn validator_full_partial_errors() {
|
||||
let v = ContainerValidator::new(ExecutionMode::Replay);
|
||||
|
||||
// Good header, bad segments (replay needs witness), good thresholds.
|
||||
let segs = ContainerSegments {
|
||||
manifest_present: true,
|
||||
witness_count: 0,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
let errs = v.validate_full(
|
||||
&valid_header(),
|
||||
&segs,
|
||||
&CoherenceThresholds::DEFAULT,
|
||||
);
|
||||
assert_eq!(errs.len(), 1);
|
||||
}
|
||||
}
|
||||
|
|
@ -35,6 +35,8 @@ pub mod status;
|
|||
pub mod store;
|
||||
pub mod witness;
|
||||
pub mod write_path;
|
||||
pub mod agi_authority;
|
||||
pub mod agi_coherence;
|
||||
pub mod agi_container;
|
||||
|
||||
pub use adversarial::{
|
||||
|
|
|
|||
335
crates/rvf/rvf-runtime/tests/agi_e2e.rs
Normal file
335
crates/rvf/rvf-runtime/tests/agi_e2e.rs
Normal file
|
|
@ -0,0 +1,335 @@
|
|||
//! End-to-end integration tests and benchmarks for the AGI Cognitive Container
|
||||
//! system (ADR-036). Covers the full build -> serialize -> parse -> validate
|
||||
//! cycle, signed container tamper detection, execution mode validation matrix,
|
||||
//! authority levels, resource budgets, coherence thresholds, and perf benchmarks.
|
||||
|
||||
use rvf_runtime::agi_container::{AgiContainerBuilder, ParsedAgiManifest};
|
||||
use rvf_runtime::seed_crypto;
|
||||
use rvf_types::agi_container::*;
|
||||
|
||||
const SIGNING_KEY: &[u8] = b"agi-e2e-test-signing-key-32bytes";
|
||||
const ORCH_JSON: &[u8] = br#"{"model":"claude-opus-4-6","max_turns":100}"#;
|
||||
const TASKS_JSON: &[u8] = br#"[{"id":1,"spec":"fix bug"}]"#;
|
||||
const GRADERS_JSON: &[u8] = br#"[{"type":"test_pass"}]"#;
|
||||
const TOOLS_JSON: &[u8] = br#"[{"name":"ruvector_query","type":"search"}]"#;
|
||||
const COHER_JSON: &[u8] = br#"{"min_cut":0.7,"rollback":true}"#;
|
||||
|
||||
/// Build a fully-populated container with every optional section.
|
||||
fn build_full_container() -> (Vec<u8>, AgiContainerHeader) {
|
||||
AgiContainerBuilder::new([0x01; 16], [0x02; 16])
|
||||
.with_model_id("claude-opus-4-6")
|
||||
.with_policy(b"autonomous", [0xAA; 8])
|
||||
.with_orchestrator(ORCH_JSON)
|
||||
.with_tool_registry(TOOLS_JSON)
|
||||
.with_agent_prompts(b"You are a coder agent.")
|
||||
.with_eval_tasks(TASKS_JSON)
|
||||
.with_eval_graders(GRADERS_JSON)
|
||||
.with_skill_library(b"[]")
|
||||
.with_replay_script(b"#!/bin/sh\nrvf replay $1")
|
||||
.with_kernel_config(b"console=ttyS0")
|
||||
.with_network_config(b"{\"port\":8080}")
|
||||
.with_coherence_config(COHER_JSON)
|
||||
.with_project_instructions(b"# CLAUDE.md")
|
||||
.with_dependency_snapshot(b"sha256:abc123")
|
||||
.with_authority_config(b"{\"level\":\"WriteMemory\"}")
|
||||
.with_domain_profile(b"coding")
|
||||
.offline_capable()
|
||||
.with_segments(ContainerSegments {
|
||||
kernel_present: true, kernel_size: 5_000_000,
|
||||
wasm_count: 2, wasm_total_size: 60_000,
|
||||
vec_segment_count: 4, index_segment_count: 2,
|
||||
witness_count: 100, crypto_present: false,
|
||||
manifest_present: true, orchestrator_present: true,
|
||||
world_model_present: true, total_size: 0,
|
||||
})
|
||||
.build()
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
// -- 1. Full Container Lifecycle --
|
||||
|
||||
#[test]
|
||||
fn full_container_lifecycle() {
|
||||
let (payload, header) = build_full_container();
|
||||
|
||||
assert!(header.is_valid_magic());
|
||||
assert_eq!(header.version, 1);
|
||||
assert!(header.has_kernel());
|
||||
assert!(header.has_orchestrator());
|
||||
assert!(header.has_world_model());
|
||||
assert!(header.is_replay_capable());
|
||||
assert!(header.is_offline_capable());
|
||||
assert!(header.created_ns > 0, "created_ns should be a real timestamp");
|
||||
|
||||
// Header round-trip.
|
||||
let header_rt = AgiContainerHeader::from_bytes(&header.to_bytes()).unwrap();
|
||||
assert_eq!(header_rt, header);
|
||||
|
||||
// Parse manifest and verify every section.
|
||||
let p = ParsedAgiManifest::parse(&payload).unwrap();
|
||||
assert_eq!(p.model_id_str(), Some("claude-opus-4-6"));
|
||||
assert_eq!(p.orchestrator_config.unwrap(), ORCH_JSON);
|
||||
assert_eq!(p.tool_registry.unwrap(), TOOLS_JSON);
|
||||
assert_eq!(p.eval_tasks.unwrap(), TASKS_JSON);
|
||||
assert_eq!(p.eval_graders.unwrap(), GRADERS_JSON);
|
||||
assert_eq!(p.coherence_config.unwrap(), COHER_JSON);
|
||||
assert!(p.policy.is_some());
|
||||
assert!(p.agent_prompts.is_some());
|
||||
assert!(p.skill_library.is_some());
|
||||
assert!(p.replay_script.is_some());
|
||||
assert!(p.kernel_config.is_some());
|
||||
assert!(p.network_config.is_some());
|
||||
assert!(p.project_instructions.is_some());
|
||||
assert!(p.dependency_snapshot.is_some());
|
||||
assert!(p.authority_config.is_some());
|
||||
assert!(p.domain_profile.is_some());
|
||||
assert!(p.is_autonomous_capable());
|
||||
|
||||
// Segment-derived flags should all be present in the header.
|
||||
let seg_flags = ContainerSegments {
|
||||
kernel_present: true, wasm_count: 2, witness_count: 100,
|
||||
orchestrator_present: true, world_model_present: true,
|
||||
..Default::default()
|
||||
}.to_flags();
|
||||
assert_eq!(header.flags & seg_flags, seg_flags);
|
||||
}
|
||||
|
||||
// -- 2. Signed Container Tamper Detection --
|
||||
|
||||
#[test]
|
||||
fn signed_container_tamper_detection() {
|
||||
let builder = AgiContainerBuilder::new([0x10; 16], [0x20; 16])
|
||||
.with_model_id("claude-opus-4-6")
|
||||
.with_orchestrator(ORCH_JSON)
|
||||
.with_eval_tasks(TASKS_JSON)
|
||||
.with_eval_graders(GRADERS_JSON)
|
||||
.with_segments(ContainerSegments {
|
||||
kernel_present: true, manifest_present: true,
|
||||
world_model_present: true, ..Default::default()
|
||||
});
|
||||
|
||||
let (payload, header) = builder.build_and_sign(SIGNING_KEY).unwrap();
|
||||
assert!(header.is_signed());
|
||||
|
||||
let unsigned_len = payload.len() - 32;
|
||||
let sig = &payload[unsigned_len..];
|
||||
assert!(seed_crypto::verify_seed(SIGNING_KEY, &payload[..unsigned_len], sig));
|
||||
|
||||
// Tamper with one byte in the TLV payload area.
|
||||
let mut tampered = payload.clone();
|
||||
tampered[AGI_HEADER_SIZE + 10] ^= 0xFF;
|
||||
assert!(!seed_crypto::verify_seed(SIGNING_KEY, &tampered[..unsigned_len], sig),
|
||||
"tampered payload must fail verification");
|
||||
|
||||
// Tamper with header byte.
|
||||
let mut tampered_hdr = payload.clone();
|
||||
tampered_hdr[7] ^= 0x01;
|
||||
assert!(!seed_crypto::verify_seed(SIGNING_KEY, &tampered_hdr[..unsigned_len], sig),
|
||||
"tampered header must fail verification");
|
||||
}
|
||||
|
||||
// -- 3. Execution Mode Validation Matrix --
|
||||
|
||||
#[test]
|
||||
fn execution_mode_validation_matrix() {
|
||||
let m = |mp, kp, wc, wmc, vsc, isc, wnc| ContainerSegments {
|
||||
manifest_present: mp, kernel_present: kp, wasm_count: wc,
|
||||
world_model_present: wmc, vec_segment_count: vsc,
|
||||
index_segment_count: isc, witness_count: wnc,
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
// Replay + no witness -> fail
|
||||
assert!(m(true, false, 0, false, 0, 0, 0).validate(ExecutionMode::Replay).is_err());
|
||||
// Replay + witness -> pass
|
||||
assert!(m(true, false, 0, false, 0, 0, 10).validate(ExecutionMode::Replay).is_ok());
|
||||
// Verify + no runtime -> fail
|
||||
assert!(m(true, false, 0, false, 0, 0, 0).validate(ExecutionMode::Verify).is_err());
|
||||
// Verify + kernel + world_model -> pass
|
||||
assert!(m(true, true, 0, true, 0, 0, 0).validate(ExecutionMode::Verify).is_ok());
|
||||
// Verify + wasm + vec -> pass
|
||||
assert!(m(true, false, 1, false, 2, 0, 0).validate(ExecutionMode::Verify).is_ok());
|
||||
// Live + kernel only (no world model) -> fail
|
||||
assert!(m(true, true, 0, false, 0, 0, 0).validate(ExecutionMode::Live).is_err());
|
||||
// Live + kernel + world model -> pass
|
||||
assert!(m(true, true, 0, true, 0, 0, 0).validate(ExecutionMode::Live).is_ok());
|
||||
}
|
||||
|
||||
// -- 4. Authority Level Tests --
|
||||
|
||||
#[test]
|
||||
fn authority_level_defaults_per_mode() {
|
||||
assert_eq!(AuthorityLevel::default_for_mode(ExecutionMode::Replay), AuthorityLevel::ReadOnly);
|
||||
assert_eq!(AuthorityLevel::default_for_mode(ExecutionMode::Verify), AuthorityLevel::ExecuteTools);
|
||||
assert_eq!(AuthorityLevel::default_for_mode(ExecutionMode::Live), AuthorityLevel::WriteMemory);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn authority_level_hierarchy() {
|
||||
// WriteExternal permits all.
|
||||
assert!(AuthorityLevel::WriteExternal.permits(AuthorityLevel::ReadOnly));
|
||||
assert!(AuthorityLevel::WriteExternal.permits(AuthorityLevel::WriteMemory));
|
||||
assert!(AuthorityLevel::WriteExternal.permits(AuthorityLevel::ExecuteTools));
|
||||
assert!(AuthorityLevel::WriteExternal.permits(AuthorityLevel::WriteExternal));
|
||||
// ExecuteTools permits itself and below.
|
||||
assert!(AuthorityLevel::ExecuteTools.permits(AuthorityLevel::ReadOnly));
|
||||
assert!(AuthorityLevel::ExecuteTools.permits(AuthorityLevel::WriteMemory));
|
||||
assert!(AuthorityLevel::ExecuteTools.permits(AuthorityLevel::ExecuteTools));
|
||||
assert!(!AuthorityLevel::ExecuteTools.permits(AuthorityLevel::WriteExternal));
|
||||
// ReadOnly permits nothing above itself.
|
||||
assert!(AuthorityLevel::ReadOnly.permits(AuthorityLevel::ReadOnly));
|
||||
assert!(!AuthorityLevel::ReadOnly.permits(AuthorityLevel::WriteMemory));
|
||||
assert!(!AuthorityLevel::ReadOnly.permits(AuthorityLevel::ExecuteTools));
|
||||
assert!(!AuthorityLevel::ReadOnly.permits(AuthorityLevel::WriteExternal));
|
||||
}
|
||||
|
||||
// -- 5. Resource Budget Tests --
|
||||
|
||||
#[test]
|
||||
fn resource_budget_clamping() {
|
||||
let clamped = ResourceBudget {
|
||||
max_time_secs: 99999, max_tokens: 99999999,
|
||||
max_cost_microdollars: 99999999,
|
||||
max_tool_calls: 65535, max_external_writes: 65535,
|
||||
}.clamped();
|
||||
assert_eq!(clamped.max_time_secs, 3600);
|
||||
assert_eq!(clamped.max_tokens, 1_000_000);
|
||||
assert_eq!(clamped.max_cost_microdollars, 10_000_000);
|
||||
assert_eq!(clamped.max_tool_calls, 500);
|
||||
assert_eq!(clamped.max_external_writes, 50);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resource_budget_within_max_unchanged() {
|
||||
assert_eq!(ResourceBudget::DEFAULT.clamped(), ResourceBudget::DEFAULT);
|
||||
}
|
||||
|
||||
// -- 6. Coherence Threshold Validation --
|
||||
|
||||
#[test]
|
||||
fn coherence_threshold_validation() {
|
||||
assert!(CoherenceThresholds::DEFAULT.validate().is_ok());
|
||||
assert!(CoherenceThresholds::STRICT.validate().is_ok());
|
||||
|
||||
// Invalid: score > 1.0
|
||||
let bad = CoherenceThresholds { min_coherence_score: 1.5, ..CoherenceThresholds::DEFAULT };
|
||||
assert!(bad.validate().is_err());
|
||||
// Invalid: negative rate
|
||||
let bad2 = CoherenceThresholds { max_contradiction_rate: -1.0, ..CoherenceThresholds::DEFAULT };
|
||||
assert!(bad2.validate().is_err());
|
||||
// Invalid: rollback ratio > 1.0
|
||||
let bad3 = CoherenceThresholds { max_rollback_ratio: 2.0, ..CoherenceThresholds::DEFAULT };
|
||||
assert!(bad3.validate().is_err());
|
||||
// Edge: zero values are valid
|
||||
let edge = CoherenceThresholds {
|
||||
min_coherence_score: 0.0, max_contradiction_rate: 0.0, max_rollback_ratio: 0.0,
|
||||
};
|
||||
assert!(edge.validate().is_ok());
|
||||
}
|
||||
|
||||
// -- 7. Container Size Limit --
|
||||
|
||||
#[test]
|
||||
fn container_size_limit_enforced() {
|
||||
let oversized = ContainerSegments {
|
||||
manifest_present: true, total_size: AGI_MAX_CONTAINER_SIZE + 1,
|
||||
..Default::default()
|
||||
};
|
||||
assert_eq!(
|
||||
oversized.validate(ExecutionMode::Replay),
|
||||
Err(ContainerError::TooLarge { size: AGI_MAX_CONTAINER_SIZE + 1 })
|
||||
);
|
||||
}
|
||||
|
||||
// -- 8. Performance Benchmarks (using std::time) --
|
||||
|
||||
#[test]
|
||||
fn bench_header_serialize_deserialize() {
|
||||
use std::time::Instant;
|
||||
let header = AgiContainerHeader {
|
||||
magic: AGI_MAGIC, version: 1,
|
||||
flags: AGI_HAS_KERNEL | AGI_HAS_WASM | AGI_HAS_ORCHESTRATOR | AGI_SIGNED,
|
||||
container_id: [0x42; 16], build_id: [0x43; 16],
|
||||
created_ns: 1_700_000_000_000_000_000,
|
||||
model_id_hash: [0xAA; 8], policy_hash: [0xBB; 8],
|
||||
};
|
||||
let n: u128 = 100_000;
|
||||
|
||||
let start = Instant::now();
|
||||
for _ in 0..n { let _ = std::hint::black_box(header.to_bytes()); }
|
||||
let ser = start.elapsed();
|
||||
|
||||
let bytes = header.to_bytes();
|
||||
let start = Instant::now();
|
||||
for _ in 0..n { let _ = std::hint::black_box(AgiContainerHeader::from_bytes(&bytes).unwrap()); }
|
||||
let deser = start.elapsed();
|
||||
|
||||
let ser_ns = ser.as_nanos() / n;
|
||||
let deser_ns = deser.as_nanos() / n;
|
||||
eprintln!("Header serialize: {ser_ns:>8} ns/op ({n} iterations in {ser:?})");
|
||||
eprintln!("Header deserialize: {deser_ns:>8} ns/op ({n} iterations in {deser:?})");
|
||||
assert!(ser_ns < 1000, "serialize too slow: {ser_ns} ns/op");
|
||||
assert!(deser_ns < 1000, "deserialize too slow: {deser_ns} ns/op");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bench_container_build_parse() {
|
||||
use std::time::Instant;
|
||||
let n: u128 = 10_000;
|
||||
let segs = || ContainerSegments {
|
||||
kernel_present: true, manifest_present: true,
|
||||
world_model_present: true, ..Default::default()
|
||||
};
|
||||
|
||||
let start = Instant::now();
|
||||
for _ in 0..n {
|
||||
let b = AgiContainerBuilder::new([0x01; 16], [0x02; 16])
|
||||
.with_model_id("claude-opus-4-6")
|
||||
.with_policy(b"autonomous", [0xAA; 8])
|
||||
.with_orchestrator(ORCH_JSON)
|
||||
.with_eval_tasks(TASKS_JSON)
|
||||
.with_eval_graders(GRADERS_JSON)
|
||||
.with_segments(segs());
|
||||
let _ = std::hint::black_box(b.build().unwrap());
|
||||
}
|
||||
let build_elapsed = start.elapsed();
|
||||
|
||||
let (payload, _) = AgiContainerBuilder::new([0x01; 16], [0x02; 16])
|
||||
.with_model_id("claude-opus-4-6")
|
||||
.with_orchestrator(ORCH_JSON)
|
||||
.with_eval_tasks(TASKS_JSON)
|
||||
.with_eval_graders(GRADERS_JSON)
|
||||
.with_segments(segs())
|
||||
.build().unwrap();
|
||||
|
||||
let start = Instant::now();
|
||||
for _ in 0..n { let _ = std::hint::black_box(ParsedAgiManifest::parse(&payload).unwrap()); }
|
||||
let parse_elapsed = start.elapsed();
|
||||
|
||||
let build_ns = build_elapsed.as_nanos() / n;
|
||||
let parse_ns = parse_elapsed.as_nanos() / n;
|
||||
eprintln!("Container build: {build_ns:>8} ns/op ({n} iterations in {build_elapsed:?})");
|
||||
eprintln!("Container parse: {parse_ns:>8} ns/op ({n} iterations in {parse_elapsed:?})");
|
||||
assert!(build_ns < 10_000, "build too slow: {build_ns} ns/op");
|
||||
assert!(parse_ns < 5_000, "parse too slow: {parse_ns} ns/op");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bench_flags_computation() {
|
||||
use std::time::Instant;
|
||||
let n: u128 = 1_000_000;
|
||||
let segs = ContainerSegments {
|
||||
kernel_present: true, wasm_count: 2, witness_count: 100,
|
||||
crypto_present: true, orchestrator_present: true,
|
||||
world_model_present: true, vec_segment_count: 4,
|
||||
index_segment_count: 2, ..Default::default()
|
||||
};
|
||||
|
||||
let start = Instant::now();
|
||||
for _ in 0..n { let _ = std::hint::black_box(segs.to_flags()); }
|
||||
let elapsed = start.elapsed();
|
||||
|
||||
let ns = elapsed.as_nanos() / n;
|
||||
eprintln!("Flags computation: {ns:>8} ns/op ({n} iterations in {elapsed:?})");
|
||||
assert!(ns < 100, "flags computation too slow: {ns} ns/op");
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue