diff --git a/crates/ruvllm/src/moe/affinity.rs b/crates/ruvllm/src/moe/affinity.rs
index 81440766..f8d24758 100644
--- a/crates/ruvllm/src/moe/affinity.rs
+++ b/crates/ruvllm/src/moe/affinity.rs
@@ -238,11 +238,19 @@ impl ExpertAffinity {
 
     /// Get experts sorted by affinity score (highest first)
     ///
-    /// Useful for prefetching decisions.
+    /// Useful for prefetching decisions. NaN values are treated as lowest priority.
     pub fn top_k_by_affinity(&self, k: usize) -> Vec<ExpertId> {
-        let mut indexed: Vec<(ExpertId, f32)> =
-            self.scores.iter().copied().enumerate().collect();
-        indexed.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
+        let mut indexed: Vec<(ExpertId, f32)> = self
+            .scores
+            .iter()
+            .enumerate()
+            .map(|(id, &s)| (id, if s.is_finite() { s } else { f32::NEG_INFINITY }))
+            .collect();
+        indexed.sort_by(|a, b| {
+            b.1.partial_cmp(&a.1)
+                .unwrap_or(std::cmp::Ordering::Equal)
+                .then_with(|| a.0.cmp(&b.0)) // Deterministic tie-breaking by ID
+        });
         indexed.into_iter().take(k).map(|(id, _)| id).collect()
     }
 
@@ -258,7 +266,7 @@ impl ExpertAffinity {
 
     /// Get the least-affinity expert from a set of candidates
     ///
-    /// Useful for eviction decisions.
+    /// Useful for eviction decisions. NaN values are treated as lowest (evict first).
     pub fn least_affinity(&self, candidates: &[ExpertId]) -> Option<ExpertId> {
         candidates
             .iter()
@@ -266,7 +274,12 @@ impl ExpertAffinity {
             .min_by(|&a, &b| {
                 let score_a = self.score(a);
                 let score_b = self.score(b);
-                score_a.partial_cmp(&score_b).unwrap_or(std::cmp::Ordering::Equal)
+                // NaN handling: treat NaN as NEG_INFINITY for eviction priority
+                let sa = if score_a.is_finite() { score_a } else { f32::NEG_INFINITY };
+                let sb = if score_b.is_finite() { score_b } else { f32::NEG_INFINITY };
+                sa.partial_cmp(&sb)
+                    .unwrap_or(std::cmp::Ordering::Equal)
+                    .then_with(|| a.cmp(&b)) // Deterministic tie-breaking
             })
     }
 
diff --git a/crates/ruvllm/src/moe/precision_allocator.rs b/crates/ruvllm/src/moe/precision_allocator.rs
index 629bec0b..d2d681a1 100644
--- a/crates/ruvllm/src/moe/precision_allocator.rs
+++ b/crates/ruvllm/src/moe/precision_allocator.rs
@@ -23,7 +23,7 @@
 //! use ruvllm::gguf::GgufQuantType;
 //!
 //! let config = PrecisionConfig::default();
-//! let mut allocator = PrecisionAllocator::new(8, config);
+//! let mut allocator = PrecisionAllocator::new(8, config).unwrap();
 //!
 //! // Record activations as experts are used
 //! allocator.record_activation(2);
@@ -201,7 +201,7 @@ impl PrecisionConfig {
 /// use ruvllm::moe::precision_allocator::{PrecisionAllocator, PrecisionConfig};
 ///
 /// let config = PrecisionConfig::default();
-/// let mut allocator = PrecisionAllocator::new(8, config);
+/// let mut allocator = PrecisionAllocator::new(8, config).unwrap();
 ///
 /// // Simulate expert activations
 /// for _ in 0..100 { allocator.record_activation(0); } // Hot
@@ -243,21 +243,28 @@ impl PrecisionAllocator {
     /// * `num_experts` - Total number of experts to track.
     /// * `config` - Configuration for precision allocation.
     ///
-    /// # Panics
+    /// # Returns
     ///
-    /// Panics if the configuration is invalid.
-    pub fn new(num_experts: usize, config: PrecisionConfig) -> Self {
-        config
-            .validate()
-            .expect("PrecisionConfig validation failed");
+    /// Returns `Err` if the configuration is invalid.
+    pub fn new(num_experts: usize, config: PrecisionConfig) -> Result<Self, &'static str> {
+        config.validate()?;
 
-        Self {
+        Ok(Self {
             num_experts,
             counts: vec![0; num_experts],
             config,
             hot_threshold: 0,
             cold_threshold: 0,
-        }
+        })
+    }
+
+    /// Create a new precision allocator, panicking on invalid config.
+    ///
+    /// # Panics
+    ///
+    /// Panics if the configuration is invalid.
+    pub fn new_unchecked(num_experts: usize, config: PrecisionConfig) -> Self {
+        Self::new(num_experts, config).expect("PrecisionConfig validation failed")
     }
 
     /// Record an activation for the given expert.
@@ -513,7 +520,7 @@ mod tests {
     #[test]
     fn test_allocator_creation() {
         let config = PrecisionConfig::default();
-        let allocator = PrecisionAllocator::new(8, config);
+        let allocator = PrecisionAllocator::new(8, config).unwrap();
 
         assert_eq!(allocator.num_experts(), 8);
         assert_eq!(allocator.total_activations(), 0);
@@ -537,7 +544,7 @@ mod tests {
             cold_percentile: 0.3,
             ..Default::default()
         };
-        let mut allocator = PrecisionAllocator::new(8, config);
+        let mut allocator = PrecisionAllocator::new(8, config).unwrap();
 
         // Expert 0 gets 100 activations (max)
         for _ in 0..100 {
@@ -569,7 +576,7 @@ mod tests {
             cold_percentile: 0.3,
             ..Default::default()
         };
-        let mut allocator = PrecisionAllocator::new(8, config);
+        let mut allocator = PrecisionAllocator::new(8, config).unwrap();
 
         // Expert 0 gets 100 activations (max)
         for _ in 0..100 {
@@ -599,7 +606,7 @@ mod tests {
             cold_percentile: 0.3,
             ..Default::default()
         };
-        let mut allocator = PrecisionAllocator::new(8, config);
+        let mut allocator = PrecisionAllocator::new(8, config).unwrap();
 
         // Expert 0 gets 100 activations (max)
         for _ in 0..100 {
@@ -629,7 +636,7 @@ mod tests {
             cold_percentile: 0.2,
             ..Default::default()
         };
-        let mut allocator = PrecisionAllocator::new(4, config);
+        let mut allocator = PrecisionAllocator::new(4, config).unwrap();
 
         // Set up activation counts: 100, 75, 25, 5
         for _ in 0..100 {
@@ -677,7 +684,7 @@ mod tests {
     #[test]
     fn test_activation_recording() {
         let config = PrecisionConfig::default();
-        let mut allocator = PrecisionAllocator::new(4, config);
+        let mut allocator = PrecisionAllocator::new(4, config).unwrap();
 
         // Record individual activations
         allocator.record_activation(0);
@@ -715,7 +722,7 @@ mod tests {
             warm_format: GgufQuantType::Q4_K,
             cold_format: GgufQuantType::Q3_K,
         };
-        let mut allocator = PrecisionAllocator::new(3, config);
+        let mut allocator = PrecisionAllocator::new(3, config).unwrap();
 
         // Set up: 100 (hot), 50 (warm), 10 (cold)
         for _ in 0..100 {
@@ -746,7 +753,7 @@ mod tests {
             cold_percentile: 0.3,
             ..Default::default()
         };
-        let mut allocator = PrecisionAllocator::new(4, config);
+        let mut allocator = PrecisionAllocator::new(4, config).unwrap();
 
         // Initially all zeros
         allocator.recompute_thresholds();
@@ -787,7 +794,7 @@ mod tests {
             cold_percentile: 0.3,
             ..Default::default()
         };
-        let mut allocator = PrecisionAllocator::new(4, config);
+        let mut allocator = PrecisionAllocator::new(4, config).unwrap();
 
         for _ in 0..100 {
             allocator.record_activation(0);
@@ -823,7 +830,7 @@ mod tests {
             cold_percentile: 0.3,
             ..Default::default()
         };
-        let mut allocator = PrecisionAllocator::new(8, config);
+        let mut allocator = PrecisionAllocator::new(8, config).unwrap();
 
         // 2 hot, 3 warm, 3 cold
         for _ in 0..100 {
@@ -865,7 +872,7 @@ mod tests {
     #[test]
     fn test_reset() {
         let config = PrecisionConfig::default();
-        let mut allocator = PrecisionAllocator::new(4, config);
+        let mut allocator = PrecisionAllocator::new(4, config).unwrap();
 
         // Add activations
         for _ in 0..100 {
@@ -898,7 +905,7 @@ mod tests {
             cold_percentile: 0.3,
             ..Default::default()
         };
-        let mut allocator = PrecisionAllocator::new(6, config);
+        let mut allocator = PrecisionAllocator::new(6, config).unwrap();
 
         // Set up known distribution
         for _ in 0..100 {
@@ -937,7 +944,7 @@ mod tests {
     #[test]
     fn test_compute_percentile() {
         let config = PrecisionConfig::default();
-        let mut allocator = PrecisionAllocator::new(4, config);
+        let mut allocator = PrecisionAllocator::new(4, config).unwrap();
 
         // No activations -> 0.0 percentile
         assert_eq!(allocator.compute_percentile(0), 0.0);
@@ -1020,7 +1027,7 @@ mod tests {
     #[test]
     fn test_out_of_bounds_expert_id() {
         let config = PrecisionConfig::default();
-        let allocator = PrecisionAllocator::new(4, config);
+        let allocator = PrecisionAllocator::new(4, config).unwrap();
 
         // Out-of-bounds should return Cold
         assert_eq!(allocator.allocate(100), ExpertPrecision::Cold);
@@ -1068,7 +1075,7 @@ mod tests {
     #[test]
     fn test_saturating_add_for_counts() {
         let config = PrecisionConfig::default();
-        let mut allocator = PrecisionAllocator::new(1, config);
+        let mut allocator = PrecisionAllocator::new(1, config).unwrap();
 
         // Set count close to max
         allocator.counts[0] = u64::MAX - 1;
diff --git a/crates/ruvllm/src/moe/router.rs b/crates/ruvllm/src/moe/router.rs
index 1aaba4b7..557aac0c 100644
--- a/crates/ruvllm/src/moe/router.rs
+++ b/crates/ruvllm/src/moe/router.rs
@@ -216,22 +216,26 @@ impl MemoryAwareRouter {
     /// * `config` - Router configuration
     /// * `affinity` - Expert affinity tracker (can be shared)
     ///
-    /// # Panics
+    /// # Returns
     ///
-    /// Panics if the configuration is invalid.
-    pub fn new(config: RouterConfig, affinity: ExpertAffinity) -> Self {
-        config.validate().expect("RouterConfig validation failed");
+    /// Returns `Err` if the configuration is invalid.
+    pub fn new(config: RouterConfig, affinity: ExpertAffinity) -> Result<Self, &'static str> {
+        config.validate()?;
 
-        Self {
+        Ok(Self {
             cache_resident: vec![false; config.num_experts],
             config,
             affinity,
             metrics: MoeMetrics::new(),
-        }
+        })
     }
 
     /// Create router with default affinity tracker
-    pub fn with_default_affinity(config: RouterConfig) -> Self {
+    ///
+    /// # Returns
+    ///
+    /// Returns `Err` if the configuration is invalid.
+    pub fn with_default_affinity(config: RouterConfig) -> Result<Self, &'static str> {
         let affinity = ExpertAffinity::new(
             super::AffinityConfig::with_num_experts(config.num_experts)
         );
@@ -294,45 +298,88 @@ impl MemoryAwareRouter {
         (selected, paging_requests)
     }
 
-    /// Apply cache residency bonus to scores
+    /// Apply cache residency bonus to scores (in-place mutation for P0 optimization)
+    ///
+    /// For each expert currently in cache, adds `cache_bonus` to its score.
+    /// This biases the selection toward cached experts without completely
+    /// overriding the gate network's decisions.
+    ///
+    /// # Arguments
+    ///
+    /// * `scores` - Mutable slice of scores to modify in-place
+    pub fn apply_cache_bonus_inplace(&self, scores: &mut [f32]) {
+        for (id, score) in scores.iter_mut().enumerate() {
+            // Validate score is not NaN/Inf before processing
+            if !score.is_finite() {
+                *score = 0.0;
+                continue;
+            }
+            if self.cache_resident.get(id).copied().unwrap_or(false) {
+                *score += self.config.cache_bonus;
+            }
+        }
+    }
+
+    /// Apply cache residency bonus to scores (allocating version for API compatibility)
     ///
     /// For each expert currently in cache, adds `cache_bonus` to its score.
     /// This biases the selection toward cached experts without completely
     /// overriding the gate network's decisions.
     pub fn apply_cache_bonus(&self, scores: &[f32]) -> Vec<f32> {
-        scores
-            .iter()
-            .enumerate()
-            .map(|(id, &score)| {
-                let bonus = if self.cache_resident.get(id).copied().unwrap_or(false) {
-                    self.config.cache_bonus
-                } else {
-                    0.0
-                };
-                score + bonus
-            })
-            .collect()
+        let mut result = scores.to_vec();
+        self.apply_cache_bonus_inplace(&mut result);
+        result
     }
 
     /// Select top-K experts by score
     ///
     /// Returns expert IDs sorted by descending score.
     /// Ties are broken by expert ID (lower ID wins) for determinism.
+    ///
+    /// Uses partial sort (P0 optimization) for better performance when
+    /// top_k << num_experts.
     pub fn select_top_k(&self, scores: &[f32]) -> Vec<ExpertId> {
-        // Create indexed scores
-        let mut indexed: Vec<(ExpertId, f32)> = scores.iter().copied().enumerate().collect();
+        let n = scores.len();
+        let k = self.config.top_k.min(n);
 
-        // Sort by score descending, then by ID ascending (for determinism)
-        indexed.sort_by(|a, b| {
-            b.1.partial_cmp(&a.1)
-                .unwrap_or(std::cmp::Ordering::Equal)
-                .then_with(|| a.0.cmp(&b.0))
-        });
+        if k == 0 || n == 0 {
+            return Vec::new();
+        }
+
+        // Create indexed scores, handling NaN/Inf values
+        let mut indexed: Vec<(ExpertId, f32)> = scores
+            .iter()
+            .enumerate()
+            .map(|(id, &s)| (id, if s.is_finite() { s } else { f32::NEG_INFINITY }))
+            .collect();
+
+        // Use partial sort for better performance when k << n
+        if k < n / 2 {
+            // Partition to get top-k elements (unordered)
+            indexed.select_nth_unstable_by(k - 1, |a, b| {
+                b.1.partial_cmp(&a.1)
+                    .unwrap_or(std::cmp::Ordering::Equal)
+                    .then_with(|| a.0.cmp(&b.0))
+            });
+            // Sort only the top-k portion
+            indexed[..k].sort_by(|a, b| {
+                b.1.partial_cmp(&a.1)
+                    .unwrap_or(std::cmp::Ordering::Equal)
+                    .then_with(|| a.0.cmp(&b.0))
+            });
+        } else {
+            // Full sort when k is close to n
+            indexed.sort_by(|a, b| {
+                b.1.partial_cmp(&a.1)
+                    .unwrap_or(std::cmp::Ordering::Equal)
+                    .then_with(|| a.0.cmp(&b.0))
+            });
+        }
 
         // Take top-K
         indexed
             .into_iter()
-            .take(self.config.top_k)
+            .take(k)
             .map(|(id, _)| id)
             .collect()
     }
@@ -457,7 +504,7 @@ mod tests {
 
     fn make_router(num_experts: usize, top_k: usize, cache_bonus: f32) -> MemoryAwareRouter {
         let config = RouterConfig::new(num_experts, top_k).with_cache_bonus(cache_bonus);
-        MemoryAwareRouter::with_default_affinity(config)
+        MemoryAwareRouter::with_default_affinity(config).expect("test config should be valid")
     }
 
     // ---------------------------------------------------------------
@@ -688,7 +735,7 @@ mod tests {
     #[test]
     fn test_memory_aware_disabled() {
         let config = RouterConfig::new(4, 2).with_memory_aware(false).with_cache_bonus(0.5);
-        let mut router = MemoryAwareRouter::with_default_affinity(config);
+        let mut router = MemoryAwareRouter::with_default_affinity(config).unwrap();
 
         // Even with high cache bonus, should not apply it when disabled
         router.update_cache_state(&[3]); // Expert 3 resident
@@ -734,7 +781,7 @@ mod tests {
         let config = RouterConfig::new(4, 2).with_cache_bonus(0.0);
         let affinity_config = AffinityConfig::with_num_experts(4).with_decay(1.0);
         let affinity = ExpertAffinity::new(affinity_config);
-        let mut router = MemoryAwareRouter::new(config, affinity);
+        let mut router = MemoryAwareRouter::new(config, affinity).unwrap();
 
         // Build affinity
         let gate_logits = vec![0.4, 0.3, 0.5, 0.2];
diff --git a/crates/ruvllm/src/moe/sram_mapper.rs b/crates/ruvllm/src/moe/sram_mapper.rs
index 9a00cbcc..678969fb 100644
--- a/crates/ruvllm/src/moe/sram_mapper.rs
+++ b/crates/ruvllm/src/moe/sram_mapper.rs
@@ -156,7 +156,14 @@ impl SramExpertAffinity {
         // - Recency is important for temporal locality
         // - Router weight indicates model preference
         let freq_factor = (self.access_count as f32 + 1.0).ln();
-        let recency_factor = 1.0 / (1.0 + (self.last_access as f32).recip() * 0.001);
+
+        // Guard against division by zero when last_access is 0
+        let recency_factor = if self.last_access == 0 {
+            0.0
+        } else {
+            1.0 / (1.0 + 0.001 / self.last_access as f32)
+        };
+
         let weight_factor = self.avg_router_weight * 2.0;
 
         freq_factor + recency_factor + weight_factor
@@ -466,11 +473,13 @@ impl SramMapper {
     /// * `expert_id` - Expert to assign
     /// * `tier` - Target memory tier
     ///
-    /// # Panics
+    /// # Returns
     ///
-    /// Panics if `expert_id >= num_experts`.
-    pub fn assign_tier(&mut self, expert_id: ExpertId, tier: MemoryTier) {
-        assert!(expert_id < self.num_experts, "Expert ID out of range");
+    /// Returns `false` if `expert_id >= num_experts`, `true` otherwise.
+    pub fn assign_tier(&mut self, expert_id: ExpertId, tier: MemoryTier) -> bool {
+        if expert_id >= self.num_experts {
+            return false;
+        }
 
         let old_tier = self.tier_map[expert_id];
 
@@ -497,6 +506,7 @@ impl SramMapper {
         }
 
         self.tier_map[expert_id] = tier;
+        true
     }
 
     /// Get the current memory tier for an expert.