fix(ruvllm): apply security and performance optimizations to MoE routing

HIGH severity security fixes: - router: Change new() from panic to Result<Self, &'static str> - router: Change with_default_affinity() to return Result - precision_allocator: Change new() to return Result, add new_unchecked() - sram_mapper: Change assign_tier() from assert! to returning bool MEDIUM severity security fixes: - router: Add NaN/Inf validation in apply_cache_bonus_inplace() - router: Handle NaN in select_top_k(), treat as NEG_INFINITY - affinity: Add NaN handling in top_k_by_affinity() with deterministic tie-breaking - affinity: Add NaN handling in least_affinity() for eviction decisions - sram_mapper: Fix division by zero in priority_score() when last_access=0 P0 performance optimizations: - router: Add apply_cache_bonus_inplace() to avoid allocation in hot path - router: Use select_nth_unstable_by for partial sort when k << n (O(n) vs O(n log n)) All 103 tests pass (84 unit + 19 integration). Co-Authored-By: claude-flow <ruv@ruv.net>
2026-05-24 05:43:58 +00:00 · 2026-03-12 15:25:36 -04:00 · 2026-03-12 15:25:36 -04:00 · bcecd1d904
commit bcecd1d904
parent 830fa5c4ed
4 changed files with 145 additions and 68 deletions
--- a/crates/ruvllm/src/moe/affinity.rs
+++ b/crates/ruvllm/src/moe/affinity.rs
@ -238,11 +238,19 @@ impl ExpertAffinity {

    /// Get experts sorted by affinity score (highest first)
    ///
-    /// Useful for prefetching decisions.
+    /// Useful for prefetching decisions. NaN values are treated as lowest priority.
    pub fn top_k_by_affinity(&self, k: usize) -> Vec<ExpertId> {
-        let mut indexed: Vec<(ExpertId, f32)> =
-            self.scores.iter().copied().enumerate().collect();
-        indexed.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
+        let mut indexed: Vec<(ExpertId, f32)> = self
+            .scores
+            .iter()
+            .enumerate()
+            .map(|(id, &s)| (id, if s.is_finite() { s } else { f32::NEG_INFINITY }))
+            .collect();
+        indexed.sort_by(|a, b| {
+            b.1.partial_cmp(&a.1)
+                .unwrap_or(std::cmp::Ordering::Equal)
+                .then_with(|| a.0.cmp(&b.0)) // Deterministic tie-breaking by ID
+        });
        indexed.into_iter().take(k).map(|(id, _)| id).collect()
    }

@ -258,7 +266,7 @@ impl ExpertAffinity {

    /// Get the least-affinity expert from a set of candidates
    ///
-    /// Useful for eviction decisions.
+    /// Useful for eviction decisions. NaN values are treated as lowest (evict first).
    pub fn least_affinity(&self, candidates: &[ExpertId]) -> Option<ExpertId> {
        candidates
            .iter()
@ -266,7 +274,12 @@ impl ExpertAffinity {
            .min_by(|&a, &b| {
                let score_a = self.score(a);
                let score_b = self.score(b);
-                score_a.partial_cmp(&score_b).unwrap_or(std::cmp::Ordering::Equal)
+                // NaN handling: treat NaN as NEG_INFINITY for eviction priority
+                let sa = if score_a.is_finite() { score_a } else { f32::NEG_INFINITY };
+                let sb = if score_b.is_finite() { score_b } else { f32::NEG_INFINITY };
+                sa.partial_cmp(&sb)
+                    .unwrap_or(std::cmp::Ordering::Equal)
+                    .then_with(|| a.cmp(&b)) // Deterministic tie-breaking
            })
    }

--- a/crates/ruvllm/src/moe/precision_allocator.rs
+++ b/crates/ruvllm/src/moe/precision_allocator.rs
@ -23,7 +23,7 @@
 //! use ruvllm::gguf::GgufQuantType;
 //!
 //! let config = PrecisionConfig::default();
-//! let mut allocator = PrecisionAllocator::new(8, config);
+//! let mut allocator = PrecisionAllocator::new(8, config).unwrap();
 //!
 //! // Record activations as experts are used
 //! allocator.record_activation(2);
@ -201,7 +201,7 @@ impl PrecisionConfig {
 /// use ruvllm::moe::precision_allocator::{PrecisionAllocator, PrecisionConfig};
 ///
 /// let config = PrecisionConfig::default();
-/// let mut allocator = PrecisionAllocator::new(8, config);
+/// let mut allocator = PrecisionAllocator::new(8, config).unwrap();
 ///
 /// // Simulate expert activations
 /// for _ in 0..100 { allocator.record_activation(0); } // Hot
@ -243,21 +243,28 @@ impl PrecisionAllocator {
    /// * `num_experts` - Total number of experts to track.
    /// * `config` - Configuration for precision allocation.
    ///
-    /// # Panics
+    /// # Returns
    ///
-    /// Panics if the configuration is invalid.
-    pub fn new(num_experts: usize, config: PrecisionConfig) -> Self {
-        config
-            .validate()
-            .expect("PrecisionConfig validation failed");
+    /// Returns `Err` if the configuration is invalid.
+    pub fn new(num_experts: usize, config: PrecisionConfig) -> Result<Self, &'static str> {
+        config.validate()?;

-        Self {
+        Ok(Self {
            num_experts,
            counts: vec![0; num_experts],
            config,
            hot_threshold: 0,
            cold_threshold: 0,
-        }
+        })
+    }
+
+    /// Create a new precision allocator, panicking on invalid config.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the configuration is invalid.
+    pub fn new_unchecked(num_experts: usize, config: PrecisionConfig) -> Self {
+        Self::new(num_experts, config).expect("PrecisionConfig validation failed")
    }

    /// Record an activation for the given expert.
@ -513,7 +520,7 @@ mod tests {
    #[test]
    fn test_allocator_creation() {
        let config = PrecisionConfig::default();
-        let allocator = PrecisionAllocator::new(8, config);
+        let allocator = PrecisionAllocator::new(8, config).unwrap();

        assert_eq!(allocator.num_experts(), 8);
        assert_eq!(allocator.total_activations(), 0);
@ -537,7 +544,7 @@ mod tests {
            cold_percentile: 0.3,
            ..Default::default()
        };
-        let mut allocator = PrecisionAllocator::new(8, config);
+        let mut allocator = PrecisionAllocator::new(8, config).unwrap();

        // Expert 0 gets 100 activations (max)
        for _ in 0..100 {
@ -569,7 +576,7 @@ mod tests {
            cold_percentile: 0.3,
            ..Default::default()
        };
-        let mut allocator = PrecisionAllocator::new(8, config);
+        let mut allocator = PrecisionAllocator::new(8, config).unwrap();

        // Expert 0 gets 100 activations (max)
        for _ in 0..100 {
@ -599,7 +606,7 @@ mod tests {
            cold_percentile: 0.3,
            ..Default::default()
        };
-        let mut allocator = PrecisionAllocator::new(8, config);
+        let mut allocator = PrecisionAllocator::new(8, config).unwrap();

        // Expert 0 gets 100 activations (max)
        for _ in 0..100 {
@ -629,7 +636,7 @@ mod tests {
            cold_percentile: 0.2,
            ..Default::default()
        };
-        let mut allocator = PrecisionAllocator::new(4, config);
+        let mut allocator = PrecisionAllocator::new(4, config).unwrap();

        // Set up activation counts: 100, 75, 25, 5
        for _ in 0..100 {
@ -677,7 +684,7 @@ mod tests {
    #[test]
    fn test_activation_recording() {
        let config = PrecisionConfig::default();
-        let mut allocator = PrecisionAllocator::new(4, config);
+        let mut allocator = PrecisionAllocator::new(4, config).unwrap();

        // Record individual activations
        allocator.record_activation(0);
@ -715,7 +722,7 @@ mod tests {
            warm_format: GgufQuantType::Q4_K,
            cold_format: GgufQuantType::Q3_K,
        };
-        let mut allocator = PrecisionAllocator::new(3, config);
+        let mut allocator = PrecisionAllocator::new(3, config).unwrap();

        // Set up: 100 (hot), 50 (warm), 10 (cold)
        for _ in 0..100 {
@ -746,7 +753,7 @@ mod tests {
            cold_percentile: 0.3,
            ..Default::default()
        };
-        let mut allocator = PrecisionAllocator::new(4, config);
+        let mut allocator = PrecisionAllocator::new(4, config).unwrap();

        // Initially all zeros
        allocator.recompute_thresholds();
@ -787,7 +794,7 @@ mod tests {
            cold_percentile: 0.3,
            ..Default::default()
        };
-        let mut allocator = PrecisionAllocator::new(4, config);
+        let mut allocator = PrecisionAllocator::new(4, config).unwrap();

        for _ in 0..100 {
            allocator.record_activation(0);
@ -823,7 +830,7 @@ mod tests {
            cold_percentile: 0.3,
            ..Default::default()
        };
-        let mut allocator = PrecisionAllocator::new(8, config);
+        let mut allocator = PrecisionAllocator::new(8, config).unwrap();

        // 2 hot, 3 warm, 3 cold
        for _ in 0..100 {
@ -865,7 +872,7 @@ mod tests {
    #[test]
    fn test_reset() {
        let config = PrecisionConfig::default();
-        let mut allocator = PrecisionAllocator::new(4, config);
+        let mut allocator = PrecisionAllocator::new(4, config).unwrap();

        // Add activations
        for _ in 0..100 {
@ -898,7 +905,7 @@ mod tests {
            cold_percentile: 0.3,
            ..Default::default()
        };
-        let mut allocator = PrecisionAllocator::new(6, config);
+        let mut allocator = PrecisionAllocator::new(6, config).unwrap();

        // Set up known distribution
        for _ in 0..100 {
@ -937,7 +944,7 @@ mod tests {
    #[test]
    fn test_compute_percentile() {
        let config = PrecisionConfig::default();
-        let mut allocator = PrecisionAllocator::new(4, config);
+        let mut allocator = PrecisionAllocator::new(4, config).unwrap();

        // No activations -> 0.0 percentile
        assert_eq!(allocator.compute_percentile(0), 0.0);
@ -1020,7 +1027,7 @@ mod tests {
    #[test]
    fn test_out_of_bounds_expert_id() {
        let config = PrecisionConfig::default();
-        let allocator = PrecisionAllocator::new(4, config);
+        let allocator = PrecisionAllocator::new(4, config).unwrap();

        // Out-of-bounds should return Cold
        assert_eq!(allocator.allocate(100), ExpertPrecision::Cold);
@ -1068,7 +1075,7 @@ mod tests {
    #[test]
    fn test_saturating_add_for_counts() {
        let config = PrecisionConfig::default();
-        let mut allocator = PrecisionAllocator::new(1, config);
+        let mut allocator = PrecisionAllocator::new(1, config).unwrap();

        // Set count close to max
        allocator.counts[0] = u64::MAX - 1;
--- a/crates/ruvllm/src/moe/router.rs
+++ b/crates/ruvllm/src/moe/router.rs
@ -216,22 +216,26 @@ impl MemoryAwareRouter {
    /// * `config` - Router configuration
    /// * `affinity` - Expert affinity tracker (can be shared)
    ///
-    /// # Panics
+    /// # Returns
    ///
-    /// Panics if the configuration is invalid.
-    pub fn new(config: RouterConfig, affinity: ExpertAffinity) -> Self {
-        config.validate().expect("RouterConfig validation failed");
+    /// Returns `Err` if the configuration is invalid.
+    pub fn new(config: RouterConfig, affinity: ExpertAffinity) -> Result<Self, &'static str> {
+        config.validate()?;

-        Self {
+        Ok(Self {
            cache_resident: vec![false; config.num_experts],
            config,
            affinity,
            metrics: MoeMetrics::new(),
-        }
+        })
    }

    /// Create router with default affinity tracker
-    pub fn with_default_affinity(config: RouterConfig) -> Self {
+    ///
+    /// # Returns
+    ///
+    /// Returns `Err` if the configuration is invalid.
+    pub fn with_default_affinity(config: RouterConfig) -> Result<Self, &'static str> {
        let affinity = ExpertAffinity::new(
            super::AffinityConfig::with_num_experts(config.num_experts)
        );
@ -294,45 +298,88 @@ impl MemoryAwareRouter {
        (selected, paging_requests)
    }

-    /// Apply cache residency bonus to scores
+    /// Apply cache residency bonus to scores (in-place mutation for P0 optimization)
+    ///
+    /// For each expert currently in cache, adds `cache_bonus` to its score.
+    /// This biases the selection toward cached experts without completely
+    /// overriding the gate network's decisions.
+    ///
+    /// # Arguments
+    ///
+    /// * `scores` - Mutable slice of scores to modify in-place
+    pub fn apply_cache_bonus_inplace(&self, scores: &mut [f32]) {
+        for (id, score) in scores.iter_mut().enumerate() {
+            // Validate score is not NaN/Inf before processing
+            if !score.is_finite() {
+                *score = 0.0;
+                continue;
+            }
+            if self.cache_resident.get(id).copied().unwrap_or(false) {
+                *score += self.config.cache_bonus;
+            }
+        }
+    }
+
+    /// Apply cache residency bonus to scores (allocating version for API compatibility)
    ///
    /// For each expert currently in cache, adds `cache_bonus` to its score.
    /// This biases the selection toward cached experts without completely
    /// overriding the gate network's decisions.
    pub fn apply_cache_bonus(&self, scores: &[f32]) -> Vec<f32> {
-        scores
-            .iter()
-            .enumerate()
-            .map(|(id, &score)| {
-                let bonus = if self.cache_resident.get(id).copied().unwrap_or(false) {
-                    self.config.cache_bonus
-                } else {
-                    0.0
-                };
-                score + bonus
-            })
-            .collect()
+        let mut result = scores.to_vec();
+        self.apply_cache_bonus_inplace(&mut result);
+        result
    }

    /// Select top-K experts by score
    ///
    /// Returns expert IDs sorted by descending score.
    /// Ties are broken by expert ID (lower ID wins) for determinism.
+    ///
+    /// Uses partial sort (P0 optimization) for better performance when
+    /// top_k << num_experts.
    pub fn select_top_k(&self, scores: &[f32]) -> Vec<ExpertId> {
-        // Create indexed scores
-        let mut indexed: Vec<(ExpertId, f32)> = scores.iter().copied().enumerate().collect();
+        let n = scores.len();
+        let k = self.config.top_k.min(n);

-        // Sort by score descending, then by ID ascending (for determinism)
-        indexed.sort_by(|a, b| {
-            b.1.partial_cmp(&a.1)
-                .unwrap_or(std::cmp::Ordering::Equal)
-                .then_with(|| a.0.cmp(&b.0))
-        });
+        if k == 0 || n == 0 {
+            return Vec::new();
+        }
+
+        // Create indexed scores, handling NaN/Inf values
+        let mut indexed: Vec<(ExpertId, f32)> = scores
+            .iter()
+            .enumerate()
+            .map(|(id, &s)| (id, if s.is_finite() { s } else { f32::NEG_INFINITY }))
+            .collect();
+
+        // Use partial sort for better performance when k << n
+        if k < n / 2 {
+            // Partition to get top-k elements (unordered)
+            indexed.select_nth_unstable_by(k - 1, |a, b| {
+                b.1.partial_cmp(&a.1)
+                    .unwrap_or(std::cmp::Ordering::Equal)
+                    .then_with(|| a.0.cmp(&b.0))
+            });
+            // Sort only the top-k portion
+            indexed[..k].sort_by(|a, b| {
+                b.1.partial_cmp(&a.1)
+                    .unwrap_or(std::cmp::Ordering::Equal)
+                    .then_with(|| a.0.cmp(&b.0))
+            });
+        } else {
+            // Full sort when k is close to n
+            indexed.sort_by(|a, b| {
+                b.1.partial_cmp(&a.1)
+                    .unwrap_or(std::cmp::Ordering::Equal)
+                    .then_with(|| a.0.cmp(&b.0))
+            });
+        }

        // Take top-K
        indexed
            .into_iter()
-            .take(self.config.top_k)
+            .take(k)
            .map(|(id, _)| id)
            .collect()
    }
@ -457,7 +504,7 @@ mod tests {

    fn make_router(num_experts: usize, top_k: usize, cache_bonus: f32) -> MemoryAwareRouter {
        let config = RouterConfig::new(num_experts, top_k).with_cache_bonus(cache_bonus);
-        MemoryAwareRouter::with_default_affinity(config)
+        MemoryAwareRouter::with_default_affinity(config).expect("test config should be valid")
    }

    // ---------------------------------------------------------------
@ -688,7 +735,7 @@ mod tests {
    #[test]
    fn test_memory_aware_disabled() {
        let config = RouterConfig::new(4, 2).with_memory_aware(false).with_cache_bonus(0.5);
-        let mut router = MemoryAwareRouter::with_default_affinity(config);
+        let mut router = MemoryAwareRouter::with_default_affinity(config).unwrap();

        // Even with high cache bonus, should not apply it when disabled
        router.update_cache_state(&[3]); // Expert 3 resident
@ -734,7 +781,7 @@ mod tests {
        let config = RouterConfig::new(4, 2).with_cache_bonus(0.0);
        let affinity_config = AffinityConfig::with_num_experts(4).with_decay(1.0);
        let affinity = ExpertAffinity::new(affinity_config);
-        let mut router = MemoryAwareRouter::new(config, affinity);
+        let mut router = MemoryAwareRouter::new(config, affinity).unwrap();

        // Build affinity
        let gate_logits = vec![0.4, 0.3, 0.5, 0.2];
--- a/crates/ruvllm/src/moe/sram_mapper.rs
+++ b/crates/ruvllm/src/moe/sram_mapper.rs
@ -156,7 +156,14 @@ impl SramExpertAffinity {
        // - Recency is important for temporal locality
        // - Router weight indicates model preference
        let freq_factor = (self.access_count as f32 + 1.0).ln();
-        let recency_factor = 1.0 / (1.0 + (self.last_access as f32).recip() * 0.001);
+
+        // Guard against division by zero when last_access is 0
+        let recency_factor = if self.last_access == 0 {
+            0.0
+        } else {
+            1.0 / (1.0 + 0.001 / self.last_access as f32)
+        };
+
        let weight_factor = self.avg_router_weight * 2.0;

        freq_factor + recency_factor + weight_factor
@ -466,11 +473,13 @@ impl SramMapper {
    /// * `expert_id` - Expert to assign
    /// * `tier` - Target memory tier
    ///
-    /// # Panics
+    /// # Returns
    ///
-    /// Panics if `expert_id >= num_experts`.
-    pub fn assign_tier(&mut self, expert_id: ExpertId, tier: MemoryTier) {
-        assert!(expert_id < self.num_experts, "Expert ID out of range");
+    /// Returns `false` if `expert_id >= num_experts`, `true` otherwise.
+    pub fn assign_tier(&mut self, expert_id: ExpertId, tier: MemoryTier) -> bool {
+        if expert_id >= self.num_experts {
+            return false;
+        }

        let old_tier = self.tier_map[expert_id];

@ -497,6 +506,7 @@ impl SramMapper {
        }

        self.tier_map[expert_id] = tier;
+        true
    }

    /// Get the current memory tier for an expert.