fix(ruvllm): apply security and performance optimizations to MoE routing

HIGH severity security fixes:
- router: Change new() from panic to Result<Self, &'static str>
- router: Change with_default_affinity() to return Result
- precision_allocator: Change new() to return Result, add new_unchecked()
- sram_mapper: Change assign_tier() from assert! to returning bool

MEDIUM severity security fixes:
- router: Add NaN/Inf validation in apply_cache_bonus_inplace()
- router: Handle NaN in select_top_k(), treat as NEG_INFINITY
- affinity: Add NaN handling in top_k_by_affinity() with deterministic tie-breaking
- affinity: Add NaN handling in least_affinity() for eviction decisions
- sram_mapper: Fix division by zero in priority_score() when last_access=0

P0 performance optimizations:
- router: Add apply_cache_bonus_inplace() to avoid allocation in hot path
- router: Use select_nth_unstable_by for partial sort when k << n (O(n) vs O(n log n))

All 103 tests pass (84 unit + 19 integration).

Co-Authored-By: claude-flow <ruv@ruv.net>
This commit is contained in:
Reuven 2026-03-12 15:25:36 -04:00
parent 830fa5c4ed
commit bcecd1d904
4 changed files with 145 additions and 68 deletions

View file

@ -238,11 +238,19 @@ impl ExpertAffinity {
/// Get experts sorted by affinity score (highest first)
///
/// Useful for prefetching decisions.
/// Useful for prefetching decisions. NaN values are treated as lowest priority.
pub fn top_k_by_affinity(&self, k: usize) -> Vec<ExpertId> {
let mut indexed: Vec<(ExpertId, f32)> =
self.scores.iter().copied().enumerate().collect();
indexed.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
let mut indexed: Vec<(ExpertId, f32)> = self
.scores
.iter()
.enumerate()
.map(|(id, &s)| (id, if s.is_finite() { s } else { f32::NEG_INFINITY }))
.collect();
indexed.sort_by(|a, b| {
b.1.partial_cmp(&a.1)
.unwrap_or(std::cmp::Ordering::Equal)
.then_with(|| a.0.cmp(&b.0)) // Deterministic tie-breaking by ID
});
indexed.into_iter().take(k).map(|(id, _)| id).collect()
}
@ -258,7 +266,7 @@ impl ExpertAffinity {
/// Get the least-affinity expert from a set of candidates
///
/// Useful for eviction decisions.
/// Useful for eviction decisions. NaN values are treated as lowest (evict first).
pub fn least_affinity(&self, candidates: &[ExpertId]) -> Option<ExpertId> {
candidates
.iter()
@ -266,7 +274,12 @@ impl ExpertAffinity {
.min_by(|&a, &b| {
let score_a = self.score(a);
let score_b = self.score(b);
score_a.partial_cmp(&score_b).unwrap_or(std::cmp::Ordering::Equal)
// NaN handling: treat NaN as NEG_INFINITY for eviction priority
let sa = if score_a.is_finite() { score_a } else { f32::NEG_INFINITY };
let sb = if score_b.is_finite() { score_b } else { f32::NEG_INFINITY };
sa.partial_cmp(&sb)
.unwrap_or(std::cmp::Ordering::Equal)
.then_with(|| a.cmp(&b)) // Deterministic tie-breaking
})
}

View file

@ -23,7 +23,7 @@
//! use ruvllm::gguf::GgufQuantType;
//!
//! let config = PrecisionConfig::default();
//! let mut allocator = PrecisionAllocator::new(8, config);
//! let mut allocator = PrecisionAllocator::new(8, config).unwrap();
//!
//! // Record activations as experts are used
//! allocator.record_activation(2);
@ -201,7 +201,7 @@ impl PrecisionConfig {
/// use ruvllm::moe::precision_allocator::{PrecisionAllocator, PrecisionConfig};
///
/// let config = PrecisionConfig::default();
/// let mut allocator = PrecisionAllocator::new(8, config);
/// let mut allocator = PrecisionAllocator::new(8, config).unwrap();
///
/// // Simulate expert activations
/// for _ in 0..100 { allocator.record_activation(0); } // Hot
@ -243,21 +243,28 @@ impl PrecisionAllocator {
/// * `num_experts` - Total number of experts to track.
/// * `config` - Configuration for precision allocation.
///
/// # Panics
/// # Returns
///
/// Panics if the configuration is invalid.
pub fn new(num_experts: usize, config: PrecisionConfig) -> Self {
config
.validate()
.expect("PrecisionConfig validation failed");
/// Returns `Err` if the configuration is invalid.
pub fn new(num_experts: usize, config: PrecisionConfig) -> Result<Self, &'static str> {
config.validate()?;
Self {
Ok(Self {
num_experts,
counts: vec![0; num_experts],
config,
hot_threshold: 0,
cold_threshold: 0,
}
})
}
/// Create a new precision allocator, panicking on invalid config.
///
/// # Panics
///
/// Panics if the configuration is invalid.
pub fn new_unchecked(num_experts: usize, config: PrecisionConfig) -> Self {
Self::new(num_experts, config).expect("PrecisionConfig validation failed")
}
/// Record an activation for the given expert.
@ -513,7 +520,7 @@ mod tests {
#[test]
fn test_allocator_creation() {
let config = PrecisionConfig::default();
let allocator = PrecisionAllocator::new(8, config);
let allocator = PrecisionAllocator::new(8, config).unwrap();
assert_eq!(allocator.num_experts(), 8);
assert_eq!(allocator.total_activations(), 0);
@ -537,7 +544,7 @@ mod tests {
cold_percentile: 0.3,
..Default::default()
};
let mut allocator = PrecisionAllocator::new(8, config);
let mut allocator = PrecisionAllocator::new(8, config).unwrap();
// Expert 0 gets 100 activations (max)
for _ in 0..100 {
@ -569,7 +576,7 @@ mod tests {
cold_percentile: 0.3,
..Default::default()
};
let mut allocator = PrecisionAllocator::new(8, config);
let mut allocator = PrecisionAllocator::new(8, config).unwrap();
// Expert 0 gets 100 activations (max)
for _ in 0..100 {
@ -599,7 +606,7 @@ mod tests {
cold_percentile: 0.3,
..Default::default()
};
let mut allocator = PrecisionAllocator::new(8, config);
let mut allocator = PrecisionAllocator::new(8, config).unwrap();
// Expert 0 gets 100 activations (max)
for _ in 0..100 {
@ -629,7 +636,7 @@ mod tests {
cold_percentile: 0.2,
..Default::default()
};
let mut allocator = PrecisionAllocator::new(4, config);
let mut allocator = PrecisionAllocator::new(4, config).unwrap();
// Set up activation counts: 100, 75, 25, 5
for _ in 0..100 {
@ -677,7 +684,7 @@ mod tests {
#[test]
fn test_activation_recording() {
let config = PrecisionConfig::default();
let mut allocator = PrecisionAllocator::new(4, config);
let mut allocator = PrecisionAllocator::new(4, config).unwrap();
// Record individual activations
allocator.record_activation(0);
@ -715,7 +722,7 @@ mod tests {
warm_format: GgufQuantType::Q4_K,
cold_format: GgufQuantType::Q3_K,
};
let mut allocator = PrecisionAllocator::new(3, config);
let mut allocator = PrecisionAllocator::new(3, config).unwrap();
// Set up: 100 (hot), 50 (warm), 10 (cold)
for _ in 0..100 {
@ -746,7 +753,7 @@ mod tests {
cold_percentile: 0.3,
..Default::default()
};
let mut allocator = PrecisionAllocator::new(4, config);
let mut allocator = PrecisionAllocator::new(4, config).unwrap();
// Initially all zeros
allocator.recompute_thresholds();
@ -787,7 +794,7 @@ mod tests {
cold_percentile: 0.3,
..Default::default()
};
let mut allocator = PrecisionAllocator::new(4, config);
let mut allocator = PrecisionAllocator::new(4, config).unwrap();
for _ in 0..100 {
allocator.record_activation(0);
@ -823,7 +830,7 @@ mod tests {
cold_percentile: 0.3,
..Default::default()
};
let mut allocator = PrecisionAllocator::new(8, config);
let mut allocator = PrecisionAllocator::new(8, config).unwrap();
// 2 hot, 3 warm, 3 cold
for _ in 0..100 {
@ -865,7 +872,7 @@ mod tests {
#[test]
fn test_reset() {
let config = PrecisionConfig::default();
let mut allocator = PrecisionAllocator::new(4, config);
let mut allocator = PrecisionAllocator::new(4, config).unwrap();
// Add activations
for _ in 0..100 {
@ -898,7 +905,7 @@ mod tests {
cold_percentile: 0.3,
..Default::default()
};
let mut allocator = PrecisionAllocator::new(6, config);
let mut allocator = PrecisionAllocator::new(6, config).unwrap();
// Set up known distribution
for _ in 0..100 {
@ -937,7 +944,7 @@ mod tests {
#[test]
fn test_compute_percentile() {
let config = PrecisionConfig::default();
let mut allocator = PrecisionAllocator::new(4, config);
let mut allocator = PrecisionAllocator::new(4, config).unwrap();
// No activations -> 0.0 percentile
assert_eq!(allocator.compute_percentile(0), 0.0);
@ -1020,7 +1027,7 @@ mod tests {
#[test]
fn test_out_of_bounds_expert_id() {
let config = PrecisionConfig::default();
let allocator = PrecisionAllocator::new(4, config);
let allocator = PrecisionAllocator::new(4, config).unwrap();
// Out-of-bounds should return Cold
assert_eq!(allocator.allocate(100), ExpertPrecision::Cold);
@ -1068,7 +1075,7 @@ mod tests {
#[test]
fn test_saturating_add_for_counts() {
let config = PrecisionConfig::default();
let mut allocator = PrecisionAllocator::new(1, config);
let mut allocator = PrecisionAllocator::new(1, config).unwrap();
// Set count close to max
allocator.counts[0] = u64::MAX - 1;

View file

@ -216,22 +216,26 @@ impl MemoryAwareRouter {
/// * `config` - Router configuration
/// * `affinity` - Expert affinity tracker (can be shared)
///
/// # Panics
/// # Returns
///
/// Panics if the configuration is invalid.
pub fn new(config: RouterConfig, affinity: ExpertAffinity) -> Self {
config.validate().expect("RouterConfig validation failed");
/// Returns `Err` if the configuration is invalid.
pub fn new(config: RouterConfig, affinity: ExpertAffinity) -> Result<Self, &'static str> {
config.validate()?;
Self {
Ok(Self {
cache_resident: vec![false; config.num_experts],
config,
affinity,
metrics: MoeMetrics::new(),
}
})
}
/// Create router with default affinity tracker
pub fn with_default_affinity(config: RouterConfig) -> Self {
///
/// # Returns
///
/// Returns `Err` if the configuration is invalid.
pub fn with_default_affinity(config: RouterConfig) -> Result<Self, &'static str> {
let affinity = ExpertAffinity::new(
super::AffinityConfig::with_num_experts(config.num_experts)
);
@ -294,45 +298,88 @@ impl MemoryAwareRouter {
(selected, paging_requests)
}
/// Apply cache residency bonus to scores
/// Apply cache residency bonus to scores (in-place mutation for P0 optimization)
///
/// For each expert currently in cache, adds `cache_bonus` to its score.
/// This biases the selection toward cached experts without completely
/// overriding the gate network's decisions.
///
/// # Arguments
///
/// * `scores` - Mutable slice of scores to modify in-place
pub fn apply_cache_bonus_inplace(&self, scores: &mut [f32]) {
for (id, score) in scores.iter_mut().enumerate() {
// Validate score is not NaN/Inf before processing
if !score.is_finite() {
*score = 0.0;
continue;
}
if self.cache_resident.get(id).copied().unwrap_or(false) {
*score += self.config.cache_bonus;
}
}
}
/// Apply cache residency bonus to scores (allocating version for API compatibility)
///
/// For each expert currently in cache, adds `cache_bonus` to its score.
/// This biases the selection toward cached experts without completely
/// overriding the gate network's decisions.
pub fn apply_cache_bonus(&self, scores: &[f32]) -> Vec<f32> {
scores
.iter()
.enumerate()
.map(|(id, &score)| {
let bonus = if self.cache_resident.get(id).copied().unwrap_or(false) {
self.config.cache_bonus
} else {
0.0
};
score + bonus
})
.collect()
let mut result = scores.to_vec();
self.apply_cache_bonus_inplace(&mut result);
result
}
/// Select top-K experts by score
///
/// Returns expert IDs sorted by descending score.
/// Ties are broken by expert ID (lower ID wins) for determinism.
///
/// Uses partial sort (P0 optimization) for better performance when
/// top_k << num_experts.
pub fn select_top_k(&self, scores: &[f32]) -> Vec<ExpertId> {
// Create indexed scores
let mut indexed: Vec<(ExpertId, f32)> = scores.iter().copied().enumerate().collect();
let n = scores.len();
let k = self.config.top_k.min(n);
// Sort by score descending, then by ID ascending (for determinism)
indexed.sort_by(|a, b| {
b.1.partial_cmp(&a.1)
.unwrap_or(std::cmp::Ordering::Equal)
.then_with(|| a.0.cmp(&b.0))
});
if k == 0 || n == 0 {
return Vec::new();
}
// Create indexed scores, handling NaN/Inf values
let mut indexed: Vec<(ExpertId, f32)> = scores
.iter()
.enumerate()
.map(|(id, &s)| (id, if s.is_finite() { s } else { f32::NEG_INFINITY }))
.collect();
// Use partial sort for better performance when k << n
if k < n / 2 {
// Partition to get top-k elements (unordered)
indexed.select_nth_unstable_by(k - 1, |a, b| {
b.1.partial_cmp(&a.1)
.unwrap_or(std::cmp::Ordering::Equal)
.then_with(|| a.0.cmp(&b.0))
});
// Sort only the top-k portion
indexed[..k].sort_by(|a, b| {
b.1.partial_cmp(&a.1)
.unwrap_or(std::cmp::Ordering::Equal)
.then_with(|| a.0.cmp(&b.0))
});
} else {
// Full sort when k is close to n
indexed.sort_by(|a, b| {
b.1.partial_cmp(&a.1)
.unwrap_or(std::cmp::Ordering::Equal)
.then_with(|| a.0.cmp(&b.0))
});
}
// Take top-K
indexed
.into_iter()
.take(self.config.top_k)
.take(k)
.map(|(id, _)| id)
.collect()
}
@ -457,7 +504,7 @@ mod tests {
fn make_router(num_experts: usize, top_k: usize, cache_bonus: f32) -> MemoryAwareRouter {
let config = RouterConfig::new(num_experts, top_k).with_cache_bonus(cache_bonus);
MemoryAwareRouter::with_default_affinity(config)
MemoryAwareRouter::with_default_affinity(config).expect("test config should be valid")
}
// ---------------------------------------------------------------
@ -688,7 +735,7 @@ mod tests {
#[test]
fn test_memory_aware_disabled() {
let config = RouterConfig::new(4, 2).with_memory_aware(false).with_cache_bonus(0.5);
let mut router = MemoryAwareRouter::with_default_affinity(config);
let mut router = MemoryAwareRouter::with_default_affinity(config).unwrap();
// Even with high cache bonus, should not apply it when disabled
router.update_cache_state(&[3]); // Expert 3 resident
@ -734,7 +781,7 @@ mod tests {
let config = RouterConfig::new(4, 2).with_cache_bonus(0.0);
let affinity_config = AffinityConfig::with_num_experts(4).with_decay(1.0);
let affinity = ExpertAffinity::new(affinity_config);
let mut router = MemoryAwareRouter::new(config, affinity);
let mut router = MemoryAwareRouter::new(config, affinity).unwrap();
// Build affinity
let gate_logits = vec![0.4, 0.3, 0.5, 0.2];

View file

@ -156,7 +156,14 @@ impl SramExpertAffinity {
// - Recency is important for temporal locality
// - Router weight indicates model preference
let freq_factor = (self.access_count as f32 + 1.0).ln();
let recency_factor = 1.0 / (1.0 + (self.last_access as f32).recip() * 0.001);
// Guard against division by zero when last_access is 0
let recency_factor = if self.last_access == 0 {
0.0
} else {
1.0 / (1.0 + 0.001 / self.last_access as f32)
};
let weight_factor = self.avg_router_weight * 2.0;
freq_factor + recency_factor + weight_factor
@ -466,11 +473,13 @@ impl SramMapper {
/// * `expert_id` - Expert to assign
/// * `tier` - Target memory tier
///
/// # Panics
/// # Returns
///
/// Panics if `expert_id >= num_experts`.
pub fn assign_tier(&mut self, expert_id: ExpertId, tier: MemoryTier) {
assert!(expert_id < self.num_experts, "Expert ID out of range");
/// Returns `false` if `expert_id >= num_experts`, `true` otherwise.
pub fn assign_tier(&mut self, expert_id: ExpertId, tier: MemoryTier) -> bool {
if expert_id >= self.num_experts {
return false;
}
let old_tier = self.tier_map[expert_id];
@ -497,6 +506,7 @@ impl SramMapper {
}
self.tier_map[expert_id] = tier;
true
}
/// Get the current memory tier for an expert.