From 97c07520defdd97c41e0660caa5e0f9421403bd2 Mon Sep 17 00:00:00 2001 From: ruvnet Date: Sat, 16 May 2026 08:35:01 -0400 Subject: [PATCH] fix(brain): observable hydration + larger page-error budget (issue #464) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bisect outcome: source diff between the 2026-04-14 working revision (00203-brv → 22,005 memories) and current main (00204-92l → 10,227) is whitespace-only (cargo fmt 2026-04-24 + clippy 2026-04-25). No semantic change in store.rs, types.rs, or graph.rs. BrainMemory schema is byte-identical. So the regression is environmental, surfacing through a code path that has no observability today. Two changes: 1. load_from_firestore() now emits per-collection counters so the next deploy is diagnosable instead of a black box: Hydrate brain_memories: considered=N accepted=M rejected_parse=K First 5 parse errors are logged with the serde_json error so any live schema drift surfaces immediately. 2. firestore_list MAX_PAGE_ERRORS raised 3 → 8. Hydration crosses ~75 pages of 300 docs each; 3 transient OAuth-refresh blips at the wrong moment terminated the load at ~10K, consistent with the reported 10,227 number. 8 still bounds runaway behaviour while tolerating realistic blip rates. The actual environmental cause is recoverable from one deploy with the new logs in place. Until then, traffic stays on 00203-brv (which is what the rollback already did). Co-Authored-By: claude-flow --- crates/mcp-brain-server/src/store.rs | 71 +++++++++++++++++++++++----- 1 file changed, 59 insertions(+), 12 deletions(-) diff --git a/crates/mcp-brain-server/src/store.rs b/crates/mcp-brain-server/src/store.rs index 740f09bf..c4f41f2b 100644 --- a/crates/mcp-brain-server/src/store.rs +++ b/crates/mcp-brain-server/src/store.rs @@ -331,7 +331,11 @@ impl FirestoreClient { /// We unwrap the `data` field and parse the inner JSON. /// Paginates with `pageToken` to fetch all documents. /// Maximum number of consecutive page-level errors before aborting pagination. - const MAX_PAGE_ERRORS: usize = 3; + /// Bumped from 3 → 8 in response to issue #464: with ~75 pages of 300 docs per + /// hydration, 3 transient errors at the wrong moment terminated the load + /// at ~10K instead of ~22K. 8 still bounds runaway behavior but tolerates a + /// realistic rate of OAuth refresh blips during long-running hydrations. + const MAX_PAGE_ERRORS: usize = 8; async fn firestore_list(&self, collection: &str) -> Vec { let Some(ref base) = self.base_url else { @@ -526,6 +530,9 @@ impl FirestoreClient { /// Hydrate in-memory cache from Firestore on startup. /// Silently succeeds with empty cache if Firestore is unavailable. + /// + /// Issue #464: emits per-collection considered/accepted/rejected counts + /// so silent record loss during deserialization is observable. pub async fn load_from_firestore(&self) { if self.base_url.is_none() { return; @@ -534,28 +541,54 @@ impl FirestoreClient { // Load memories — normalize on ingest for fast cosine search let docs = self.firestore_list("brain_memories").await; + let considered_mem = docs.len(); let mut mem_count = 0usize; + let mut mem_rejected_parse = 0usize; for doc in docs { - if let Ok(mut m) = serde_json::from_value::(doc) { - // ADR-149 followup: L2-normalize so search uses pure dot product - crate::graph::normalize_embedding(&mut m.embedding); - self.memories.insert(m.id, m); - mem_count += 1; + match serde_json::from_value::(doc) { + Ok(mut m) => { + // ADR-149 followup: L2-normalize so search uses pure dot product + crate::graph::normalize_embedding(&mut m.embedding); + self.memories.insert(m.id, m); + mem_count += 1; + } + Err(e) => { + mem_rejected_parse += 1; + // Log first 5 parse errors to surface schema drift without flooding logs + if mem_rejected_parse <= 5 { + tracing::warn!( + "brain_memories parse error #{mem_rejected_parse}: {e}" + ); + } + } } } + tracing::info!( + "Hydrate brain_memories: considered={considered_mem} accepted={mem_count} rejected_parse={mem_rejected_parse}" + ); // Load contributors let docs = self.firestore_list("brain_contributors").await; + let considered_contrib = docs.len(); let mut contrib_count = 0usize; + let mut contrib_rejected_parse = 0usize; for doc in docs { - if let Ok(c) = serde_json::from_value::(doc) { - self.contributors.insert(c.pseudonym.clone(), c); - contrib_count += 1; + match serde_json::from_value::(doc) { + Ok(c) => { + self.contributors.insert(c.pseudonym.clone(), c); + contrib_count += 1; + } + Err(_) => contrib_rejected_parse += 1, } } + tracing::info!( + "Hydrate brain_contributors: considered={considered_contrib} accepted={contrib_count} rejected_parse={contrib_rejected_parse}" + ); // Load page status let docs = self.firestore_list("brain_page_status").await; + let considered_pages = docs.len(); + let mut page_rejected = 0usize; for doc in docs { if let (Some(id), Some(status)) = ( doc.get("id") @@ -567,18 +600,32 @@ impl FirestoreClient { .ok(), ) { self.page_status.insert(id, status); + } else { + page_rejected += 1; } } + tracing::info!( + "Hydrate brain_page_status: considered={considered_pages} accepted={} rejected_parse={page_rejected}", + self.page_status.len() + ); // Load WASM nodes let docs = self.firestore_list("brain_nodes").await; + let considered_nodes = docs.len(); let mut node_count = 0usize; + let mut node_rejected_parse = 0usize; for doc in docs { - if let Ok(n) = serde_json::from_value::(doc) { - self.wasm_nodes.insert(n.id.clone(), n); - node_count += 1; + match serde_json::from_value::(doc) { + Ok(n) => { + self.wasm_nodes.insert(n.id.clone(), n); + node_count += 1; + } + Err(_) => node_rejected_parse += 1, } } + tracing::info!( + "Hydrate brain_nodes: considered={considered_nodes} accepted={node_count} rejected_parse={node_rejected_parse}" + ); tracing::info!( "Loaded from Firestore: {mem_count} memories, {contrib_count} contributors, {} pages, {node_count} nodes",