From ca1b8f461a72f44001a9b3c858478fbe83efbb1f Mon Sep 17 00:00:00 2001 From: Reuven Date: Tue, 17 Mar 2026 00:47:06 -0400 Subject: [PATCH] fix(adr-115): deserialize CDX length/offset from strings Common Crawl CDX API returns length and offset as strings, not integers. Add custom deserialize_string_to_u64 function to handle the type conversion. Co-Authored-By: claude-flow --- crates/mcp-brain-server/src/pipeline.rs | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/crates/mcp-brain-server/src/pipeline.rs b/crates/mcp-brain-server/src/pipeline.rs index e96254c04..326e2fc1c 100644 --- a/crates/mcp-brain-server/src/pipeline.rs +++ b/crates/mcp-brain-server/src/pipeline.rs @@ -499,14 +499,26 @@ pub struct CdxRecord { pub status: String, #[serde(default)] pub mime: String, - #[serde(default)] + /// Length in bytes (CDX returns as string, we parse to u64) + #[serde(default, deserialize_with = "deserialize_string_to_u64")] pub length: u64, - #[serde(default)] + /// Offset in WARC file (CDX returns as string, we parse to u64) + #[serde(default, deserialize_with = "deserialize_string_to_u64")] pub offset: u64, #[serde(default)] pub filename: String, } +/// Deserialize a string to u64 (CDX API returns numeric fields as strings) +fn deserialize_string_to_u64<'de, D>(deserializer: D) -> Result +where + D: serde::Deserializer<'de>, +{ + use serde::Deserialize; + let s: String = String::deserialize(deserializer)?; + s.parse().map_err(serde::de::Error::custom) +} + /// Query parameters for Common Crawl CDX index. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct CdxQuery {