test(hailo): lock in iter-200 check_n behavior (iter 201)

iter-200 added `RateLimiter::check_n(peer, n)` to debit the streaming-batch length against the per-peer rate limiter, then wired it into `embed_stream`. Both code paths shipped without direct test coverage. Add five focused unit tests covering the contract: check_n_zero_is_a_noop n=0 must not consume tokens (the embed_stream caller passes n-1 after the interceptor's 1, so for batch=1 the call is n=0). Repeated zero-calls don't burn the bucket; a normal check still succeeds afterwards. check_n_within_burst_consumes_n_tokens 1 rps / burst 5: check_n(3) leaves 2 tokens; two more singleton checks pass; the third fails. Locks in the "actually consumes n tokens" property. check_n_exceeding_burst_is_denied 1 rps / burst 4: check_n(8) returns Err (governor's InsufficientCapacity collapsed to RateLimitDenied). The bucket is unchanged — the failed attempt does NOT burn any tokens, so 4 singleton checks still pass after. check_n_partial_capacity_denied_without_consuming Burn 2 of 4, then check_n(3) — tokens-needed (2 + 3 = 5) > 4 so denied. The 2 already-burned tokens stay burned; the failed check_n doesn't roll them back. Verifies the failure mode is "deny + don't side-effect." check_n_separate_peers_have_independent_buckets A streaming-batch debit on peer-a must not bleed into peer-b's quota — proves the per-peer keying still holds for check_n. Validated: - rate_limit lib tests: 7 → 12 (+5 iter 201) - full lib : 103 → 108 - full integration sweep : 181 → 186 tests, 0 failures - all flaky tests still green (iter-196/197 fixes hold) Pi worker untouched; pure test-side addition. Co-Authored-By: claude-flow <ruv@ruv.net>
2026-05-30 03:53:34 +00:00 · 2026-05-03 20:09:37 -04:00 · 2026-05-03 20:09:37 -04:00 · 1d8d64b26f
commit 1d8d64b26f
parent 0ffff492bf
1 changed files with 72 additions and 0 deletions
--- a/crates/ruvector-hailo-cluster/src/rate_limit.rs
+++ b/crates/ruvector-hailo-cluster/src/rate_limit.rs
@ -223,6 +223,78 @@ mod tests {
        assert!(RateLimiter::new(0, 0).is_none());
    }

+    // ---- check_n tests (iter 200 API, locked in iter 201) ----
+
+    #[test]
+    fn check_n_zero_is_a_noop() {
+        // n=0 must not consume tokens and must not error — the
+        // embed_stream caller passes n-1 after the interceptor's 1
+        // already debited, so for batch=1 the call is n=0.
+        let r = RateLimiter::new(1, 1).expect("non-zero quota");
+        for _ in 0..10 {
+            assert!(r.check_n("peer-a", 0).is_ok());
+        }
+        // Bucket untouched: a single normal check still passes.
+        assert!(r.check("peer-a").is_ok());
+    }
+
+    #[test]
+    fn check_n_within_burst_consumes_n_tokens() {
+        // 1 rps, burst 5. check_n(3) consumes 3; one more check
+        // succeeds (4th token); two more fail.
+        let r = RateLimiter::new(1, 5).expect("non-zero quota");
+        assert!(r.check_n("peer-a", 3).is_ok());
+        assert!(r.check("peer-a").is_ok(), "4th token should still fit");
+        assert!(r.check("peer-a").is_ok(), "5th token should still fit");
+        assert!(r.check("peer-a").is_err(), "6th must be rate-limited");
+    }
+
+    #[test]
+    fn check_n_exceeding_burst_is_denied() {
+        // 1 rps, burst 4. check_n(8) is bigger than the bucket can
+        // ever hold → governor returns InsufficientCapacity, which
+        // we collapse to RateLimitDenied. The bucket itself is
+        // unchanged (still has all 4 tokens available).
+        let r = RateLimiter::new(1, 4).expect("non-zero quota");
+        assert!(r.check_n("peer-a", 8).is_err());
+        // Verify no tokens were burned by the failed attempt: 4
+        // singletons should still pass.
+        for _ in 0..4 {
+            assert!(r.check("peer-a").is_ok());
+        }
+    }
+
+    #[test]
+    fn check_n_partial_capacity_denied_without_consuming() {
+        // 1 rps, burst 4. Burn 2 with check, then check_n(3) — that's
+        // 2 + 3 = 5 > 4 → denied. The 2 already-burned tokens stay
+        // burned; check_n's denial does NOT roll back.
+        let r = RateLimiter::new(1, 4).expect("non-zero quota");
+        assert!(r.check("peer-a").is_ok());
+        assert!(r.check("peer-a").is_ok());
+        assert!(
+            r.check_n("peer-a", 3).is_err(),
+            "3 tokens beyond the remaining 2 must be denied"
+        );
+        // 2 tokens remaining: 2 singleton checks pass.
+        assert!(r.check("peer-a").is_ok());
+        assert!(r.check("peer-a").is_ok());
+        assert!(r.check("peer-a").is_err());
+    }
+
+    #[test]
+    fn check_n_separate_peers_have_independent_buckets() {
+        // Streaming-batch debits on one peer must not bleed into
+        // another peer's quota.
+        let r = RateLimiter::new(1, 4).expect("non-zero quota");
+        assert!(r.check_n("peer-a", 4).is_ok());
+        assert!(r.check("peer-a").is_err(), "peer-a fully consumed");
+        // peer-b's bucket is untouched.
+        assert!(r.check_n("peer-b", 4).is_ok());
+        assert!(r.check("peer-b").is_err());
+        assert_eq!(r.tracked_peers(), 2);
+    }
+
    // Iter 197 — both tests below mutate the same process-global env
    // vars (`RUVECTOR_RATE_LIMIT_RPS` / `_BURST`). Cargo runs tests in
    // parallel by default, so without serialization the wipe in