diff --git a/crates/ruvector-hailo-cluster/src/bin/worker.rs b/crates/ruvector-hailo-cluster/src/bin/worker.rs
index 9abab3373..329b0f48e 100644
--- a/crates/ruvector-hailo-cluster/src/bin/worker.rs
+++ b/crates/ruvector-hailo-cluster/src/bin/worker.rs
@@ -337,8 +337,10 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
     let fingerprint = compute_fingerprint(&model_dir);
     if fingerprint.is_empty() {
         warn!(
-            "model_dir {} has no model.hef / vocab.txt — fingerprint empty; \
-             coordinators will skip the integrity check",
+            "model_dir {} has no recognizable model artifacts \
+             (NPU: model.hef + vocab.txt; cpu-fallback: model.safetensors + \
+             tokenizer.json + config.json) — fingerprint empty; coordinators \
+             will skip the integrity check",
             model_dir.display()
         );
     } else {
diff --git a/crates/ruvector-hailo-cluster/src/fingerprint.rs b/crates/ruvector-hailo-cluster/src/fingerprint.rs
index 47dca83f0..ac8fd2636 100644
--- a/crates/ruvector-hailo-cluster/src/fingerprint.rs
+++ b/crates/ruvector-hailo-cluster/src/fingerprint.rs
@@ -1,29 +1,50 @@
-//! Model fingerprint — sha256 over HEF + tokenizer artifacts.
+//! Model fingerprint — sha256 over the model artifacts on disk.
 //!
 //! ADR-167 §8.3 fleet integrity guard: coordinators refuse to mix
 //! workers reporting different model fingerprints. Computed at worker
-//! startup over the files actually loaded, so any swap of HEF or
-//! vocab.txt produces a different fingerprint and the coordinator can
-//! eject the drift.
+//! startup over the files actually loaded, so any swap of model
+//! weights or tokenizer produces a different fingerprint and the
+//! coordinator can eject the drift.
 //!
-//! Format: hex-lowercase, 64 chars.
+//! Iter 143 — covers both deployment paths:
+//!   * NPU path:   sha256(model.hef || vocab.txt)
+//!   * cpu-fallback: sha256(model.safetensors || tokenizer.json || config.json)
+//!
+//! Mixed clusters (some workers on NPU, some on CPU) intentionally
+//! produce different fingerprints — they're running different code
+//! paths so the cluster should reject the mix.
+//!
+//! Format: hex-lowercase, 64 chars. Empty when no recognizable model
+//! artifacts are present in `model_dir`.
 
 use sha2::{Digest, Sha256};
 use std::path::Path;
 
-/// Compute sha256 over (hef_bytes || vocab_bytes). Missing files are
-/// treated as empty so the fingerprint is *also* a witness of which
-/// files exist — a worker that loads only the HEF (no tokenizer)
-/// produces a different fingerprint than a worker with both.
+/// Compute sha256 over the model artifacts. Missing files are treated
+/// as empty within their layout so the fingerprint is *also* a witness
+/// of which files exist — a worker that loads only the HEF (no
+/// tokenizer) produces a different fingerprint than a worker with both.
 ///
-/// Returns "" when both files are missing — caller (worker startup)
-/// uses empty-string as "skip the check" sentinel until step 6 lands.
+/// Returns "" when neither layout has any recognizable file — caller
+/// (worker startup) uses empty-string as "skip the check" sentinel.
 pub fn compute_fingerprint(model_dir: &Path) -> String {
     let hef = std::fs::read(model_dir.join("model.hef")).unwrap_or_default();
     let vocab = std::fs::read(model_dir.join("vocab.txt")).unwrap_or_default();
-    if hef.is_empty() && vocab.is_empty() {
+
+    // Iter 143: cpu-fallback artifacts. We don't read the full
+    // safetensors (90 MB) into memory — sha256 it in streaming chunks.
+    let safetensors_present = model_dir.join("model.safetensors").exists();
+    let tokenizer_json = std::fs::read(model_dir.join("tokenizer.json")).unwrap_or_default();
+    let config_json = std::fs::read(model_dir.join("config.json")).unwrap_or_default();
+
+    let npu_layout_present = !hef.is_empty() || !vocab.is_empty();
+    let cpu_layout_present =
+        safetensors_present || !tokenizer_json.is_empty() || !config_json.is_empty();
+
+    if !npu_layout_present && !cpu_layout_present {
         return String::new();
     }
+
     let mut h = Sha256::new();
     // Length-prefix each input so a hef of N bytes + vocab of M bytes
     // never collides with a hef of N+M bytes + empty vocab.
@@ -31,6 +52,30 @@ pub fn compute_fingerprint(model_dir: &Path) -> String {
     h.update(&hef);
     h.update((vocab.len() as u64).to_le_bytes());
     h.update(&vocab);
+
+    // Stream-hash the safetensors so we don't read 90 MB into memory.
+    if safetensors_present {
+        // Tag with file marker so an empty hef doesn't blend with safetensors.
+        h.update(b"safetensors:");
+        if let Ok(mut f) = std::fs::File::open(model_dir.join("model.safetensors")) {
+            let mut buf = [0u8; 64 * 1024];
+            let mut total: u64 = 0;
+            use std::io::Read;
+            while let Ok(n) = f.read(&mut buf) {
+                if n == 0 {
+                    break;
+                }
+                h.update(&buf[..n]);
+                total += n as u64;
+            }
+            h.update(total.to_le_bytes());
+        }
+    }
+    h.update((tokenizer_json.len() as u64).to_le_bytes());
+    h.update(&tokenizer_json);
+    h.update((config_json.len() as u64).to_le_bytes());
+    h.update(&config_json);
+
     let digest = h.finalize();
     hex_lower(&digest)
 }
@@ -95,6 +140,37 @@ mod tests {
         assert_ne!(fp2, fp3);
     }
 
+    #[test]
+    fn cpu_fallback_safetensors_layout_yields_distinct_fingerprint() {
+        // Iter 143: a worker with safetensors+tokenizer+config but no
+        // hef must produce a non-empty fingerprint, distinct from a
+        // worker with the same files but different content.
+        let d1 = tmpdir();
+        std::fs::write(d1.path().join("model.safetensors"), b"weights-A").unwrap();
+        std::fs::write(d1.path().join("tokenizer.json"), b"tok-A").unwrap();
+        std::fs::write(d1.path().join("config.json"), b"cfg-A").unwrap();
+        let fp1 = compute_fingerprint(d1.path());
+        assert_eq!(fp1.len(), 64);
+        assert!(!fp1.is_empty());
+
+        let d2 = tmpdir();
+        std::fs::write(d2.path().join("model.safetensors"), b"weights-B").unwrap();
+        std::fs::write(d2.path().join("tokenizer.json"), b"tok-A").unwrap();
+        std::fs::write(d2.path().join("config.json"), b"cfg-A").unwrap();
+        let fp2 = compute_fingerprint(d2.path());
+        assert_ne!(fp1, fp2, "different safetensors must yield different fp");
+
+        // Per ADR-167 §8.3, an NPU-layout worker and a cpu-fallback
+        // worker run different code paths so their fingerprints SHOULD
+        // differ even with the same logical model — the cluster will
+        // refuse to mix them.
+        let d3 = tmpdir();
+        std::fs::write(d3.path().join("model.hef"), b"weights-A").unwrap();
+        std::fs::write(d3.path().join("vocab.txt"), b"tok-A").unwrap();
+        let fp3 = compute_fingerprint(d3.path());
+        assert_ne!(fp1, fp3, "NPU layout vs cpu-fallback must differ");
+    }
+
     #[test]
     fn length_prefix_prevents_split_collision() {
         // Without length-prefixing, sha256(b"abc" || b"de") == sha256(b"ab" || b"cde").
diff --git a/docs/adr/ADR-167-ruvector-hailo-npu-embedding-backend.md b/docs/adr/ADR-167-ruvector-hailo-npu-embedding-backend.md
index 6eb54e6ec..1caff786c 100644
--- a/docs/adr/ADR-167-ruvector-hailo-npu-embedding-backend.md
+++ b/docs/adr/ADR-167-ruvector-hailo-npu-embedding-backend.md
@@ -66,10 +66,27 @@ re-exported the BERT encoder block in isolation:
   quantized weights (full-precision HEF would be possible on
   hailo15h but not hailo8).
 
+**Iter 142/142b/143 follow-up debugging** (after reading the SDK source):
+- Root-caused the iter-139 `KeyError` to a mismatch between
+  `_get_build_inputs` (returns dict keyed by user dataset keys) and
+  `hailo_model.build` (looks up by internal `flow.input_nodes` names).
+  Workaround: introspect the parsed HN, key the calibration dict by
+  the actual layer name (`minilm_encoder/input_layer1`).
+- Past that: `AccelerasValueError` shape mismatch — Hailo's HN treats
+  inputs as 4D NCHW with implicit channels=1. Workaround: reshape
+  calibration from `[batch, seq, hidden]` to `[batch, 1, seq, hidden]`.
+- Past **that**: a Keras serialization bug —
+  `TypeError: Could not locate class 'ElementwiseAddDirectOp'` —
+  during the SDK's deepcopy of its own internal layer types. This is
+  hailo_model_optimization deepcopy-ing a custom Keras layer it
+  registered itself, then failing to deserialize it because the
+  `@register_keras_serializable` decorator isn't running in the
+  spawned subprocess. Cannot be fixed from user-space.
+
 **Status:** the encoder ONNX is fundamentally Hailo-compatible (it
-parses + full-precision-optimizes cleanly). The remaining gap is an
-SDK-internal bug in INT8 quantization of transformer encoders that
-can't be worked around from user-space. The cleanest unblock paths:
+parses + full-precision-optimizes cleanly). The remaining gap is a
+chain of SDK-internal bugs in INT8 quantization of transformer encoders
+that can't be worked around from user-space. The cleanest unblock paths:
 1. Hailo support ticket (the SDK should not KeyError on a layer it
    knows about — this is a quantization-flow bug, not a
    user-input bug)