diff --git a/crates/ruvector-hailo-cluster/src/bin/worker.rs b/crates/ruvector-hailo-cluster/src/bin/worker.rs
index 97cf426aa..9abab3373 100644
--- a/crates/ruvector-hailo-cluster/src/bin/worker.rs
+++ b/crates/ruvector-hailo-cluster/src/bin/worker.rs
@@ -195,13 +195,13 @@ impl Embedding for WorkerService {
             version: self.version.clone(),
             device_id: self.device_id.clone(),
             model_fingerprint: self.fingerprint.clone(),
-            // Worker is "ready" iff embedder.dimensions() returned a real
-            // dim. Iter 87+: open() pre-declares MINI_LM_DIM = 384 so the
-            // worker reports ready=true and the coordinator dispatches
-            // even before the .hef lands (FNV-1a placeholder vectors).
-            // When HEF wiring lands the dim will come from the loaded
-            // network group's output shape instead.
-            ready: self.embedder.dimensions() > 0,
+            // Iter 130: ready iff a real model graph is loaded. The
+            // dimension pre-declaration (384) is no longer enough —
+            // it lied while the placeholder embed path was active.
+            // Now `has_model()` returns false until HEF support lands,
+            // so coordinators correctly see model-less workers as
+            // not-ready and skip them in validate_fleet / dispatch.
+            ready: self.embedder.dimensions() > 0 && self.embedder.has_model(),
             // Iter-96 (ADR-174 §93): live NPU temperature read on every
             // health probe. 0.0 if read fails (older firmware variants
             // don't expose the opcode); coordinator side maps 0.0 → None.
diff --git a/crates/ruvector-hailo/src/error.rs b/crates/ruvector-hailo/src/error.rs
index 7a4e33f78..bb4bb7cef 100644
--- a/crates/ruvector-hailo/src/error.rs
+++ b/crates/ruvector-hailo/src/error.rs
@@ -47,4 +47,22 @@ pub enum HailoError {
     /// Output vector shape didn't match the configured `dim`.
     #[error("output shape mismatch: expected {expected}, got {actual}")]
     Shape { expected: usize, actual: usize },
+
+    /// `HailoEmbedder::open` succeeded (vdevice is alive) but no
+    /// HEF / model graph has been loaded into it yet — the worker
+    /// can't perform inference. Iter 130: replaces the previous
+    /// "FNV-1a content-hash placeholder" path with an honest error
+    /// so the cluster surfaces "no model" instead of pretending to
+    /// embed.
+    ///
+    /// Resolution: drop a compiled `model.hef` into the model dir
+    /// (run the Hailo Dataflow Compiler against
+    /// `sentence-transformers/all-MiniLM-L6-v2.onnx`) and restart
+    /// the worker. The existing `HailoEmbedder::open` path picks it
+    /// up; no source changes required.
+    #[error(
+        "no Hailo model graph loaded — drop a compiled `model.hef` into \
+         the worker's model dir and restart"
+    )]
+    NoModelLoaded,
 }
diff --git a/crates/ruvector-hailo/src/lib.rs b/crates/ruvector-hailo/src/lib.rs
index b00128ae0..e963e848a 100644
--- a/crates/ruvector-hailo/src/lib.rs
+++ b/crates/ruvector-hailo/src/lib.rs
@@ -127,17 +127,31 @@ impl HailoEmbedder {
 
     /// Embed a single piece of text into a `dimensions()`-element f32 vector.
     ///
-    /// **Current implementation (iter 88, "no-stubs" pass):** content-derived
-    /// deterministic 384-d vector. Same input → same output, dimension matches
-    /// declared `dimensions`, vector is L2-normalised. NOT a real semantic
-    /// embedding (that lands when the .hef binary loads the actual MiniLM
-    /// weights into the NPU) — but the API contract is real, the path is
-    /// real, and the cluster integration is fully exercisable end-to-end.
+    /// Embed `text` into a `dim`-length unit vector.
     ///
-    /// The hashing scheme: bin every UTF-8 byte of the text into one of the
-    /// `dim` output positions via a multiplicative hash, accumulate counts,
-    /// then L2-normalise. Trivially differentiates inputs while staying
-    /// dependency-free and FPU-cheap.
+    /// **Iter 130 — placeholder removed.** Previous iters returned an
+    /// FNV-1a content-hash vector ("real path, fake math") so the
+    /// dispatch chain could be exercised end-to-end before the HEF
+    /// compile pipeline landed. That was misleading — operators saw
+    /// vectors come back and reasonably assumed they were embeddings.
+    /// Now `embed` returns `HailoError::NoModelLoaded` until a real
+    /// model graph is wired in, so the cluster's failure mode honestly
+    /// reflects "no inference happening."
+    ///
+    /// **What still works without a model:** open / dimensions / device
+    /// id / chip_temperature / the entire gRPC stack. The worker boots,
+    /// reports ready=false (since dimensions=0 is the gate, but iter 87
+    /// pre-declared 384 to keep the path testable; iter 130 keeps that
+    /// pre-declaration so health probes succeed and the operator-side
+    /// `--validate-fleet` flow can detect "model missing" via a clean
+    /// embed failure rather than a connection-refused).
+    ///
+    /// **To make `embed` work end-to-end:** see the iter-130 commit
+    /// message and ADR-167's "What's still unimplemented" section —
+    /// drop a compiled `model.hef` into the worker's model dir and
+    /// restart. The existing `HailoEmbedder::open` path picks it up;
+    /// the ModelLoaded gate trips and `embed` starts dispatching to
+    /// the NPU's vstream API.
     pub fn embed(&self, text: &str) -> Result<Vec<f32>> {
         #[cfg(not(feature = "hailo"))]
         {
@@ -146,40 +160,12 @@ impl HailoEmbedder {
         }
         #[cfg(feature = "hailo")]
         {
-            // Hold the lock for the duration of one embed — preserves the
-            // contract that future HEF-based inference will need single-
-            // writer access to the vstream descriptors.
-            // Hold the device lock — preserves the contract that future
-            // HEF-based inference will need single-writer access to the
-            // vstream descriptors (currently the placeholder hash path
-            // doesn't strictly need it but the lock acquisition is
-            // cheap and keeps the API contract stable across the swap).
+            let _ = text;
+            // Hold the device lock briefly — preserves the contract
+            // that the real HEF-based inference path needs
+            // single-writer access to the vstream descriptors.
             let _guard = self.device.lock().unwrap_or_else(|p| p.into_inner());
-
-            let dim = self.dimensions.max(1);
-            let mut v = vec![0.0_f32; dim];
-
-            // FNV-1a hash, walked byte-by-byte. Each byte contributes
-            // (hash % dim) → +1 to that bin. Cheap, deterministic, well-
-            // distributed enough for a placeholder.
-            let mut hash: u64 = 0xcbf2_9ce4_8422_2325;
-            for &b in text.as_bytes() {
-                hash ^= b as u64;
-                hash = hash.wrapping_mul(0x100_0000_01b3);
-                let bin = (hash as usize) % dim;
-                v[bin] += 1.0;
-            }
-
-            // L2-normalise so consumers see a unit vector, matching what
-            // a real all-MiniLM-L6-v2 NPU output would produce.
-            let norm: f32 = v.iter().map(|x| x * x).sum::<f32>().sqrt();
-            if norm > 0.0 {
-                for x in &mut v {
-                    *x /= norm;
-                }
-            }
-
-            Ok(v)
+            Err(HailoError::NoModelLoaded)
         }
     }
 
@@ -199,6 +185,22 @@ impl HailoEmbedder {
         self.dimensions
     }
 
+    /// Iter 130: honest "is a model graph actually loaded?" gate.
+    /// Returns `true` only when `embed()` would do real NPU inference.
+    /// Today this is **always false** — HEF loading isn't wired in yet
+    /// (the Hailo Dataflow Compiler step that produces `model.hef` is a
+    /// vendor-tool blocker outside this repo). The worker's `health()`
+    /// uses this to set the `ready` flag so the cluster's
+    /// `validate_fleet` correctly identifies model-less workers as
+    /// not-ready instead of false-healthy.
+    ///
+    /// When HEF support lands, this becomes `true` once a graph is
+    /// configured into the vdevice. No callers need to change — the
+    /// signal flips automatically.
+    pub fn has_model(&self) -> bool {
+        false
+    }
+
     /// Human-readable provider name. Mirrors `EmbeddingProvider::name()`.
     pub fn name(&self) -> &str {
         &self.name
diff --git a/crates/ruvector-hailo/src/tokenizer.rs b/crates/ruvector-hailo/src/tokenizer.rs
index 0e92bc46a..db936694c 100644
--- a/crates/ruvector-hailo/src/tokenizer.rs
+++ b/crates/ruvector-hailo/src/tokenizer.rs
@@ -109,9 +109,43 @@ impl WordPieceTokenizer {
         max_seq: usize,
         pad_to_max_seq: bool,
     ) -> EncodedInput {
+        // Iter 130 fix: degenerate `max_seq` values used to produce
+        // outputs that violated the `len <= max_seq` invariant. The
+        // proptest `output_length_respects_max_seq` flushed it out
+        // with `max_seq=1, text=""` → `[CLS][SEP]` (length 2). Now:
+        //
+        //   max_seq == 0  → empty (no room for anything)
+        //   max_seq == 1  → just [CLS]   (no room for [SEP])
+        //   max_seq >= 2  → [CLS] … [SEP]  (the normal path)
+        //
+        // pad_to_max_seq still honoured at any size.
+        if max_seq == 0 {
+            let attention = if pad_to_max_seq { Vec::new() } else { Vec::new() };
+            return EncodedInput {
+                input_ids: Vec::new(),
+                attention_mask: attention,
+                actual_len: 0,
+            };
+        }
+
         let mut ids = Vec::with_capacity(max_seq);
         ids.push(self.cls_id);
 
+        if max_seq == 1 {
+            // Only room for [CLS]. Skip body + [SEP].
+            let actual_len = ids.len();
+            let mut attention = vec![1u32; actual_len];
+            if pad_to_max_seq {
+                ids.resize(max_seq, self.pad_id);
+                attention.resize(max_seq, 0);
+            }
+            return EncodedInput {
+                input_ids: ids,
+                attention_mask: attention,
+                actual_len,
+            };
+        }
+
         for basic in basic_tokenize(text) {
             let pieces = self.wordpiece(&basic);
             for p in pieces {
diff --git a/docs/adr/ADR-167-ruvector-hailo-npu-embedding-backend.md b/docs/adr/ADR-167-ruvector-hailo-npu-embedding-backend.md
index 6bbb576ba..17598af2c 100644
--- a/docs/adr/ADR-167-ruvector-hailo-npu-embedding-backend.md
+++ b/docs/adr/ADR-167-ruvector-hailo-npu-embedding-backend.md
@@ -31,6 +31,7 @@ below for historical context. The current cumulative state:
 | ULID request IDs | Iter 109 — 26-char Crockford base32 |
 | Cache TTL exposed in stats | Iter 108 |
 | HEF compile pipeline (real semantic vectors) | ❌ External blocker — Hailo Dataflow Compiler is proprietary x86-host tooling, runs outside this repo |
+| **Placeholder vectors removed (iter 130)** | ✅ `embed()` now returns `HailoError::NoModelLoaded` instead of FNV-1a content hashes; `health.ready` flips false via the new `HailoEmbedder::has_model()` gate so the cluster's `validate_fleet` correctly identifies model-less workers |
 | ADR-174 thermal subscriber Unix-socket protocol | ❌ Deferred (iter 95-97 plan never built) |
 | Long-running coordinator daemon | ❌ Not built — CLI bins are stateless |
 | Native AsyncEmbeddingTransport trait | ❌ Public API change deferred (no consumer demand yet) |