From 1d8d64b26f50f973500a672b62057f5a0dae99b3 Mon Sep 17 00:00:00 2001
From: ruvnet <ruvnet@gmail.com>
Date: Sun, 3 May 2026 20:09:37 -0400
Subject: [PATCH] test(hailo): lock in iter-200 check_n behavior (iter 201)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

iter-200 added `RateLimiter::check_n(peer, n)` to debit the
streaming-batch length against the per-peer rate limiter, then
wired it into `embed_stream`. Both code paths shipped without
direct test coverage. Add five focused unit tests covering the
contract:

  check_n_zero_is_a_noop
    n=0 must not consume tokens (the embed_stream caller passes
    n-1 after the interceptor's 1, so for batch=1 the call is
    n=0). Repeated zero-calls don't burn the bucket; a normal
    check still succeeds afterwards.

  check_n_within_burst_consumes_n_tokens
    1 rps / burst 5: check_n(3) leaves 2 tokens; two more singleton
    checks pass; the third fails. Locks in the "actually consumes
    n tokens" property.

  check_n_exceeding_burst_is_denied
    1 rps / burst 4: check_n(8) returns Err (governor's
    InsufficientCapacity collapsed to RateLimitDenied). The bucket
    is unchanged — the failed attempt does NOT burn any tokens, so
    4 singleton checks still pass after.

  check_n_partial_capacity_denied_without_consuming
    Burn 2 of 4, then check_n(3) — tokens-needed (2 + 3 = 5) > 4 so
    denied. The 2 already-burned tokens stay burned; the failed
    check_n doesn't roll them back. Verifies the failure mode is
    "deny + don't side-effect."

  check_n_separate_peers_have_independent_buckets
    A streaming-batch debit on peer-a must not bleed into peer-b's
    quota — proves the per-peer keying still holds for check_n.

Validated:
  - rate_limit lib tests: 7 → 12 (+5 iter 201)
  - full lib                : 103 → 108
  - full integration sweep  : 181 → 186 tests, 0 failures
  - all flaky tests still green (iter-196/197 fixes hold)

Pi worker untouched; pure test-side addition.

Co-Authored-By: claude-flow <ruv@ruv.net>
---
 .../ruvector-hailo-cluster/src/rate_limit.rs  | 72 +++++++++++++++++++
 1 file changed, 72 insertions(+)

diff --git a/crates/ruvector-hailo-cluster/src/rate_limit.rs b/crates/ruvector-hailo-cluster/src/rate_limit.rs
index 08305ba2..43329ab8 100644
--- a/crates/ruvector-hailo-cluster/src/rate_limit.rs
+++ b/crates/ruvector-hailo-cluster/src/rate_limit.rs
@@ -223,6 +223,78 @@ mod tests {
         assert!(RateLimiter::new(0, 0).is_none());
     }
 
+    // ---- check_n tests (iter 200 API, locked in iter 201) ----
+
+    #[test]
+    fn check_n_zero_is_a_noop() {
+        // n=0 must not consume tokens and must not error — the
+        // embed_stream caller passes n-1 after the interceptor's 1
+        // already debited, so for batch=1 the call is n=0.
+        let r = RateLimiter::new(1, 1).expect("non-zero quota");
+        for _ in 0..10 {
+            assert!(r.check_n("peer-a", 0).is_ok());
+        }
+        // Bucket untouched: a single normal check still passes.
+        assert!(r.check("peer-a").is_ok());
+    }
+
+    #[test]
+    fn check_n_within_burst_consumes_n_tokens() {
+        // 1 rps, burst 5. check_n(3) consumes 3; one more check
+        // succeeds (4th token); two more fail.
+        let r = RateLimiter::new(1, 5).expect("non-zero quota");
+        assert!(r.check_n("peer-a", 3).is_ok());
+        assert!(r.check("peer-a").is_ok(), "4th token should still fit");
+        assert!(r.check("peer-a").is_ok(), "5th token should still fit");
+        assert!(r.check("peer-a").is_err(), "6th must be rate-limited");
+    }
+
+    #[test]
+    fn check_n_exceeding_burst_is_denied() {
+        // 1 rps, burst 4. check_n(8) is bigger than the bucket can
+        // ever hold → governor returns InsufficientCapacity, which
+        // we collapse to RateLimitDenied. The bucket itself is
+        // unchanged (still has all 4 tokens available).
+        let r = RateLimiter::new(1, 4).expect("non-zero quota");
+        assert!(r.check_n("peer-a", 8).is_err());
+        // Verify no tokens were burned by the failed attempt: 4
+        // singletons should still pass.
+        for _ in 0..4 {
+            assert!(r.check("peer-a").is_ok());
+        }
+    }
+
+    #[test]
+    fn check_n_partial_capacity_denied_without_consuming() {
+        // 1 rps, burst 4. Burn 2 with check, then check_n(3) — that's
+        // 2 + 3 = 5 > 4 → denied. The 2 already-burned tokens stay
+        // burned; check_n's denial does NOT roll back.
+        let r = RateLimiter::new(1, 4).expect("non-zero quota");
+        assert!(r.check("peer-a").is_ok());
+        assert!(r.check("peer-a").is_ok());
+        assert!(
+            r.check_n("peer-a", 3).is_err(),
+            "3 tokens beyond the remaining 2 must be denied"
+        );
+        // 2 tokens remaining: 2 singleton checks pass.
+        assert!(r.check("peer-a").is_ok());
+        assert!(r.check("peer-a").is_ok());
+        assert!(r.check("peer-a").is_err());
+    }
+
+    #[test]
+    fn check_n_separate_peers_have_independent_buckets() {
+        // Streaming-batch debits on one peer must not bleed into
+        // another peer's quota.
+        let r = RateLimiter::new(1, 4).expect("non-zero quota");
+        assert!(r.check_n("peer-a", 4).is_ok());
+        assert!(r.check("peer-a").is_err(), "peer-a fully consumed");
+        // peer-b's bucket is untouched.
+        assert!(r.check_n("peer-b", 4).is_ok());
+        assert!(r.check("peer-b").is_err());
+        assert_eq!(r.tracked_peers(), 2);
+    }
+
     // Iter 197 — both tests below mutate the same process-global env
     // vars (`RUVECTOR_RATE_LIMIT_RPS` / `_BURST`). Cargo runs tests in
     // parallel by default, so without serialization the wipe in