mirror of
https://github.com/ruvnet/RuVector.git
synced 2026-05-24 05:43:58 +00:00
fix(ruvllm): apply security and performance optimizations to MoE routing
HIGH severity security fixes: - router: Change new() from panic to Result<Self, &'static str> - router: Change with_default_affinity() to return Result - precision_allocator: Change new() to return Result, add new_unchecked() - sram_mapper: Change assign_tier() from assert! to returning bool MEDIUM severity security fixes: - router: Add NaN/Inf validation in apply_cache_bonus_inplace() - router: Handle NaN in select_top_k(), treat as NEG_INFINITY - affinity: Add NaN handling in top_k_by_affinity() with deterministic tie-breaking - affinity: Add NaN handling in least_affinity() for eviction decisions - sram_mapper: Fix division by zero in priority_score() when last_access=0 P0 performance optimizations: - router: Add apply_cache_bonus_inplace() to avoid allocation in hot path - router: Use select_nth_unstable_by for partial sort when k << n (O(n) vs O(n log n)) All 103 tests pass (84 unit + 19 integration). Co-Authored-By: claude-flow <ruv@ruv.net>
This commit is contained in:
parent
830fa5c4ed
commit
bcecd1d904
4 changed files with 145 additions and 68 deletions
|
|
@ -238,11 +238,19 @@ impl ExpertAffinity {
|
|||
|
||||
/// Get experts sorted by affinity score (highest first)
|
||||
///
|
||||
/// Useful for prefetching decisions.
|
||||
/// Useful for prefetching decisions. NaN values are treated as lowest priority.
|
||||
pub fn top_k_by_affinity(&self, k: usize) -> Vec<ExpertId> {
|
||||
let mut indexed: Vec<(ExpertId, f32)> =
|
||||
self.scores.iter().copied().enumerate().collect();
|
||||
indexed.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
|
||||
let mut indexed: Vec<(ExpertId, f32)> = self
|
||||
.scores
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(id, &s)| (id, if s.is_finite() { s } else { f32::NEG_INFINITY }))
|
||||
.collect();
|
||||
indexed.sort_by(|a, b| {
|
||||
b.1.partial_cmp(&a.1)
|
||||
.unwrap_or(std::cmp::Ordering::Equal)
|
||||
.then_with(|| a.0.cmp(&b.0)) // Deterministic tie-breaking by ID
|
||||
});
|
||||
indexed.into_iter().take(k).map(|(id, _)| id).collect()
|
||||
}
|
||||
|
||||
|
|
@ -258,7 +266,7 @@ impl ExpertAffinity {
|
|||
|
||||
/// Get the least-affinity expert from a set of candidates
|
||||
///
|
||||
/// Useful for eviction decisions.
|
||||
/// Useful for eviction decisions. NaN values are treated as lowest (evict first).
|
||||
pub fn least_affinity(&self, candidates: &[ExpertId]) -> Option<ExpertId> {
|
||||
candidates
|
||||
.iter()
|
||||
|
|
@ -266,7 +274,12 @@ impl ExpertAffinity {
|
|||
.min_by(|&a, &b| {
|
||||
let score_a = self.score(a);
|
||||
let score_b = self.score(b);
|
||||
score_a.partial_cmp(&score_b).unwrap_or(std::cmp::Ordering::Equal)
|
||||
// NaN handling: treat NaN as NEG_INFINITY for eviction priority
|
||||
let sa = if score_a.is_finite() { score_a } else { f32::NEG_INFINITY };
|
||||
let sb = if score_b.is_finite() { score_b } else { f32::NEG_INFINITY };
|
||||
sa.partial_cmp(&sb)
|
||||
.unwrap_or(std::cmp::Ordering::Equal)
|
||||
.then_with(|| a.cmp(&b)) // Deterministic tie-breaking
|
||||
})
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@
|
|||
//! use ruvllm::gguf::GgufQuantType;
|
||||
//!
|
||||
//! let config = PrecisionConfig::default();
|
||||
//! let mut allocator = PrecisionAllocator::new(8, config);
|
||||
//! let mut allocator = PrecisionAllocator::new(8, config).unwrap();
|
||||
//!
|
||||
//! // Record activations as experts are used
|
||||
//! allocator.record_activation(2);
|
||||
|
|
@ -201,7 +201,7 @@ impl PrecisionConfig {
|
|||
/// use ruvllm::moe::precision_allocator::{PrecisionAllocator, PrecisionConfig};
|
||||
///
|
||||
/// let config = PrecisionConfig::default();
|
||||
/// let mut allocator = PrecisionAllocator::new(8, config);
|
||||
/// let mut allocator = PrecisionAllocator::new(8, config).unwrap();
|
||||
///
|
||||
/// // Simulate expert activations
|
||||
/// for _ in 0..100 { allocator.record_activation(0); } // Hot
|
||||
|
|
@ -243,21 +243,28 @@ impl PrecisionAllocator {
|
|||
/// * `num_experts` - Total number of experts to track.
|
||||
/// * `config` - Configuration for precision allocation.
|
||||
///
|
||||
/// # Panics
|
||||
/// # Returns
|
||||
///
|
||||
/// Panics if the configuration is invalid.
|
||||
pub fn new(num_experts: usize, config: PrecisionConfig) -> Self {
|
||||
config
|
||||
.validate()
|
||||
.expect("PrecisionConfig validation failed");
|
||||
/// Returns `Err` if the configuration is invalid.
|
||||
pub fn new(num_experts: usize, config: PrecisionConfig) -> Result<Self, &'static str> {
|
||||
config.validate()?;
|
||||
|
||||
Self {
|
||||
Ok(Self {
|
||||
num_experts,
|
||||
counts: vec![0; num_experts],
|
||||
config,
|
||||
hot_threshold: 0,
|
||||
cold_threshold: 0,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Create a new precision allocator, panicking on invalid config.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// Panics if the configuration is invalid.
|
||||
pub fn new_unchecked(num_experts: usize, config: PrecisionConfig) -> Self {
|
||||
Self::new(num_experts, config).expect("PrecisionConfig validation failed")
|
||||
}
|
||||
|
||||
/// Record an activation for the given expert.
|
||||
|
|
@ -513,7 +520,7 @@ mod tests {
|
|||
#[test]
|
||||
fn test_allocator_creation() {
|
||||
let config = PrecisionConfig::default();
|
||||
let allocator = PrecisionAllocator::new(8, config);
|
||||
let allocator = PrecisionAllocator::new(8, config).unwrap();
|
||||
|
||||
assert_eq!(allocator.num_experts(), 8);
|
||||
assert_eq!(allocator.total_activations(), 0);
|
||||
|
|
@ -537,7 +544,7 @@ mod tests {
|
|||
cold_percentile: 0.3,
|
||||
..Default::default()
|
||||
};
|
||||
let mut allocator = PrecisionAllocator::new(8, config);
|
||||
let mut allocator = PrecisionAllocator::new(8, config).unwrap();
|
||||
|
||||
// Expert 0 gets 100 activations (max)
|
||||
for _ in 0..100 {
|
||||
|
|
@ -569,7 +576,7 @@ mod tests {
|
|||
cold_percentile: 0.3,
|
||||
..Default::default()
|
||||
};
|
||||
let mut allocator = PrecisionAllocator::new(8, config);
|
||||
let mut allocator = PrecisionAllocator::new(8, config).unwrap();
|
||||
|
||||
// Expert 0 gets 100 activations (max)
|
||||
for _ in 0..100 {
|
||||
|
|
@ -599,7 +606,7 @@ mod tests {
|
|||
cold_percentile: 0.3,
|
||||
..Default::default()
|
||||
};
|
||||
let mut allocator = PrecisionAllocator::new(8, config);
|
||||
let mut allocator = PrecisionAllocator::new(8, config).unwrap();
|
||||
|
||||
// Expert 0 gets 100 activations (max)
|
||||
for _ in 0..100 {
|
||||
|
|
@ -629,7 +636,7 @@ mod tests {
|
|||
cold_percentile: 0.2,
|
||||
..Default::default()
|
||||
};
|
||||
let mut allocator = PrecisionAllocator::new(4, config);
|
||||
let mut allocator = PrecisionAllocator::new(4, config).unwrap();
|
||||
|
||||
// Set up activation counts: 100, 75, 25, 5
|
||||
for _ in 0..100 {
|
||||
|
|
@ -677,7 +684,7 @@ mod tests {
|
|||
#[test]
|
||||
fn test_activation_recording() {
|
||||
let config = PrecisionConfig::default();
|
||||
let mut allocator = PrecisionAllocator::new(4, config);
|
||||
let mut allocator = PrecisionAllocator::new(4, config).unwrap();
|
||||
|
||||
// Record individual activations
|
||||
allocator.record_activation(0);
|
||||
|
|
@ -715,7 +722,7 @@ mod tests {
|
|||
warm_format: GgufQuantType::Q4_K,
|
||||
cold_format: GgufQuantType::Q3_K,
|
||||
};
|
||||
let mut allocator = PrecisionAllocator::new(3, config);
|
||||
let mut allocator = PrecisionAllocator::new(3, config).unwrap();
|
||||
|
||||
// Set up: 100 (hot), 50 (warm), 10 (cold)
|
||||
for _ in 0..100 {
|
||||
|
|
@ -746,7 +753,7 @@ mod tests {
|
|||
cold_percentile: 0.3,
|
||||
..Default::default()
|
||||
};
|
||||
let mut allocator = PrecisionAllocator::new(4, config);
|
||||
let mut allocator = PrecisionAllocator::new(4, config).unwrap();
|
||||
|
||||
// Initially all zeros
|
||||
allocator.recompute_thresholds();
|
||||
|
|
@ -787,7 +794,7 @@ mod tests {
|
|||
cold_percentile: 0.3,
|
||||
..Default::default()
|
||||
};
|
||||
let mut allocator = PrecisionAllocator::new(4, config);
|
||||
let mut allocator = PrecisionAllocator::new(4, config).unwrap();
|
||||
|
||||
for _ in 0..100 {
|
||||
allocator.record_activation(0);
|
||||
|
|
@ -823,7 +830,7 @@ mod tests {
|
|||
cold_percentile: 0.3,
|
||||
..Default::default()
|
||||
};
|
||||
let mut allocator = PrecisionAllocator::new(8, config);
|
||||
let mut allocator = PrecisionAllocator::new(8, config).unwrap();
|
||||
|
||||
// 2 hot, 3 warm, 3 cold
|
||||
for _ in 0..100 {
|
||||
|
|
@ -865,7 +872,7 @@ mod tests {
|
|||
#[test]
|
||||
fn test_reset() {
|
||||
let config = PrecisionConfig::default();
|
||||
let mut allocator = PrecisionAllocator::new(4, config);
|
||||
let mut allocator = PrecisionAllocator::new(4, config).unwrap();
|
||||
|
||||
// Add activations
|
||||
for _ in 0..100 {
|
||||
|
|
@ -898,7 +905,7 @@ mod tests {
|
|||
cold_percentile: 0.3,
|
||||
..Default::default()
|
||||
};
|
||||
let mut allocator = PrecisionAllocator::new(6, config);
|
||||
let mut allocator = PrecisionAllocator::new(6, config).unwrap();
|
||||
|
||||
// Set up known distribution
|
||||
for _ in 0..100 {
|
||||
|
|
@ -937,7 +944,7 @@ mod tests {
|
|||
#[test]
|
||||
fn test_compute_percentile() {
|
||||
let config = PrecisionConfig::default();
|
||||
let mut allocator = PrecisionAllocator::new(4, config);
|
||||
let mut allocator = PrecisionAllocator::new(4, config).unwrap();
|
||||
|
||||
// No activations -> 0.0 percentile
|
||||
assert_eq!(allocator.compute_percentile(0), 0.0);
|
||||
|
|
@ -1020,7 +1027,7 @@ mod tests {
|
|||
#[test]
|
||||
fn test_out_of_bounds_expert_id() {
|
||||
let config = PrecisionConfig::default();
|
||||
let allocator = PrecisionAllocator::new(4, config);
|
||||
let allocator = PrecisionAllocator::new(4, config).unwrap();
|
||||
|
||||
// Out-of-bounds should return Cold
|
||||
assert_eq!(allocator.allocate(100), ExpertPrecision::Cold);
|
||||
|
|
@ -1068,7 +1075,7 @@ mod tests {
|
|||
#[test]
|
||||
fn test_saturating_add_for_counts() {
|
||||
let config = PrecisionConfig::default();
|
||||
let mut allocator = PrecisionAllocator::new(1, config);
|
||||
let mut allocator = PrecisionAllocator::new(1, config).unwrap();
|
||||
|
||||
// Set count close to max
|
||||
allocator.counts[0] = u64::MAX - 1;
|
||||
|
|
|
|||
|
|
@ -216,22 +216,26 @@ impl MemoryAwareRouter {
|
|||
/// * `config` - Router configuration
|
||||
/// * `affinity` - Expert affinity tracker (can be shared)
|
||||
///
|
||||
/// # Panics
|
||||
/// # Returns
|
||||
///
|
||||
/// Panics if the configuration is invalid.
|
||||
pub fn new(config: RouterConfig, affinity: ExpertAffinity) -> Self {
|
||||
config.validate().expect("RouterConfig validation failed");
|
||||
/// Returns `Err` if the configuration is invalid.
|
||||
pub fn new(config: RouterConfig, affinity: ExpertAffinity) -> Result<Self, &'static str> {
|
||||
config.validate()?;
|
||||
|
||||
Self {
|
||||
Ok(Self {
|
||||
cache_resident: vec![false; config.num_experts],
|
||||
config,
|
||||
affinity,
|
||||
metrics: MoeMetrics::new(),
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Create router with default affinity tracker
|
||||
pub fn with_default_affinity(config: RouterConfig) -> Self {
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// Returns `Err` if the configuration is invalid.
|
||||
pub fn with_default_affinity(config: RouterConfig) -> Result<Self, &'static str> {
|
||||
let affinity = ExpertAffinity::new(
|
||||
super::AffinityConfig::with_num_experts(config.num_experts)
|
||||
);
|
||||
|
|
@ -294,45 +298,88 @@ impl MemoryAwareRouter {
|
|||
(selected, paging_requests)
|
||||
}
|
||||
|
||||
/// Apply cache residency bonus to scores
|
||||
/// Apply cache residency bonus to scores (in-place mutation for P0 optimization)
|
||||
///
|
||||
/// For each expert currently in cache, adds `cache_bonus` to its score.
|
||||
/// This biases the selection toward cached experts without completely
|
||||
/// overriding the gate network's decisions.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `scores` - Mutable slice of scores to modify in-place
|
||||
pub fn apply_cache_bonus_inplace(&self, scores: &mut [f32]) {
|
||||
for (id, score) in scores.iter_mut().enumerate() {
|
||||
// Validate score is not NaN/Inf before processing
|
||||
if !score.is_finite() {
|
||||
*score = 0.0;
|
||||
continue;
|
||||
}
|
||||
if self.cache_resident.get(id).copied().unwrap_or(false) {
|
||||
*score += self.config.cache_bonus;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Apply cache residency bonus to scores (allocating version for API compatibility)
|
||||
///
|
||||
/// For each expert currently in cache, adds `cache_bonus` to its score.
|
||||
/// This biases the selection toward cached experts without completely
|
||||
/// overriding the gate network's decisions.
|
||||
pub fn apply_cache_bonus(&self, scores: &[f32]) -> Vec<f32> {
|
||||
scores
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(id, &score)| {
|
||||
let bonus = if self.cache_resident.get(id).copied().unwrap_or(false) {
|
||||
self.config.cache_bonus
|
||||
} else {
|
||||
0.0
|
||||
};
|
||||
score + bonus
|
||||
})
|
||||
.collect()
|
||||
let mut result = scores.to_vec();
|
||||
self.apply_cache_bonus_inplace(&mut result);
|
||||
result
|
||||
}
|
||||
|
||||
/// Select top-K experts by score
|
||||
///
|
||||
/// Returns expert IDs sorted by descending score.
|
||||
/// Ties are broken by expert ID (lower ID wins) for determinism.
|
||||
///
|
||||
/// Uses partial sort (P0 optimization) for better performance when
|
||||
/// top_k << num_experts.
|
||||
pub fn select_top_k(&self, scores: &[f32]) -> Vec<ExpertId> {
|
||||
// Create indexed scores
|
||||
let mut indexed: Vec<(ExpertId, f32)> = scores.iter().copied().enumerate().collect();
|
||||
let n = scores.len();
|
||||
let k = self.config.top_k.min(n);
|
||||
|
||||
// Sort by score descending, then by ID ascending (for determinism)
|
||||
indexed.sort_by(|a, b| {
|
||||
b.1.partial_cmp(&a.1)
|
||||
.unwrap_or(std::cmp::Ordering::Equal)
|
||||
.then_with(|| a.0.cmp(&b.0))
|
||||
});
|
||||
if k == 0 || n == 0 {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
// Create indexed scores, handling NaN/Inf values
|
||||
let mut indexed: Vec<(ExpertId, f32)> = scores
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(id, &s)| (id, if s.is_finite() { s } else { f32::NEG_INFINITY }))
|
||||
.collect();
|
||||
|
||||
// Use partial sort for better performance when k << n
|
||||
if k < n / 2 {
|
||||
// Partition to get top-k elements (unordered)
|
||||
indexed.select_nth_unstable_by(k - 1, |a, b| {
|
||||
b.1.partial_cmp(&a.1)
|
||||
.unwrap_or(std::cmp::Ordering::Equal)
|
||||
.then_with(|| a.0.cmp(&b.0))
|
||||
});
|
||||
// Sort only the top-k portion
|
||||
indexed[..k].sort_by(|a, b| {
|
||||
b.1.partial_cmp(&a.1)
|
||||
.unwrap_or(std::cmp::Ordering::Equal)
|
||||
.then_with(|| a.0.cmp(&b.0))
|
||||
});
|
||||
} else {
|
||||
// Full sort when k is close to n
|
||||
indexed.sort_by(|a, b| {
|
||||
b.1.partial_cmp(&a.1)
|
||||
.unwrap_or(std::cmp::Ordering::Equal)
|
||||
.then_with(|| a.0.cmp(&b.0))
|
||||
});
|
||||
}
|
||||
|
||||
// Take top-K
|
||||
indexed
|
||||
.into_iter()
|
||||
.take(self.config.top_k)
|
||||
.take(k)
|
||||
.map(|(id, _)| id)
|
||||
.collect()
|
||||
}
|
||||
|
|
@ -457,7 +504,7 @@ mod tests {
|
|||
|
||||
fn make_router(num_experts: usize, top_k: usize, cache_bonus: f32) -> MemoryAwareRouter {
|
||||
let config = RouterConfig::new(num_experts, top_k).with_cache_bonus(cache_bonus);
|
||||
MemoryAwareRouter::with_default_affinity(config)
|
||||
MemoryAwareRouter::with_default_affinity(config).expect("test config should be valid")
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------
|
||||
|
|
@ -688,7 +735,7 @@ mod tests {
|
|||
#[test]
|
||||
fn test_memory_aware_disabled() {
|
||||
let config = RouterConfig::new(4, 2).with_memory_aware(false).with_cache_bonus(0.5);
|
||||
let mut router = MemoryAwareRouter::with_default_affinity(config);
|
||||
let mut router = MemoryAwareRouter::with_default_affinity(config).unwrap();
|
||||
|
||||
// Even with high cache bonus, should not apply it when disabled
|
||||
router.update_cache_state(&[3]); // Expert 3 resident
|
||||
|
|
@ -734,7 +781,7 @@ mod tests {
|
|||
let config = RouterConfig::new(4, 2).with_cache_bonus(0.0);
|
||||
let affinity_config = AffinityConfig::with_num_experts(4).with_decay(1.0);
|
||||
let affinity = ExpertAffinity::new(affinity_config);
|
||||
let mut router = MemoryAwareRouter::new(config, affinity);
|
||||
let mut router = MemoryAwareRouter::new(config, affinity).unwrap();
|
||||
|
||||
// Build affinity
|
||||
let gate_logits = vec![0.4, 0.3, 0.5, 0.2];
|
||||
|
|
|
|||
|
|
@ -156,7 +156,14 @@ impl SramExpertAffinity {
|
|||
// - Recency is important for temporal locality
|
||||
// - Router weight indicates model preference
|
||||
let freq_factor = (self.access_count as f32 + 1.0).ln();
|
||||
let recency_factor = 1.0 / (1.0 + (self.last_access as f32).recip() * 0.001);
|
||||
|
||||
// Guard against division by zero when last_access is 0
|
||||
let recency_factor = if self.last_access == 0 {
|
||||
0.0
|
||||
} else {
|
||||
1.0 / (1.0 + 0.001 / self.last_access as f32)
|
||||
};
|
||||
|
||||
let weight_factor = self.avg_router_weight * 2.0;
|
||||
|
||||
freq_factor + recency_factor + weight_factor
|
||||
|
|
@ -466,11 +473,13 @@ impl SramMapper {
|
|||
/// * `expert_id` - Expert to assign
|
||||
/// * `tier` - Target memory tier
|
||||
///
|
||||
/// # Panics
|
||||
/// # Returns
|
||||
///
|
||||
/// Panics if `expert_id >= num_experts`.
|
||||
pub fn assign_tier(&mut self, expert_id: ExpertId, tier: MemoryTier) {
|
||||
assert!(expert_id < self.num_experts, "Expert ID out of range");
|
||||
/// Returns `false` if `expert_id >= num_experts`, `true` otherwise.
|
||||
pub fn assign_tier(&mut self, expert_id: ExpertId, tier: MemoryTier) -> bool {
|
||||
if expert_id >= self.num_experts {
|
||||
return false;
|
||||
}
|
||||
|
||||
let old_tier = self.tier_map[expert_id];
|
||||
|
||||
|
|
@ -497,6 +506,7 @@ impl SramMapper {
|
|||
}
|
||||
|
||||
self.tier_map[expert_id] = tier;
|
||||
true
|
||||
}
|
||||
|
||||
/// Get the current memory tier for an expert.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue