fix(adr-115): deserialize CDX length/offset from strings

Common Crawl CDX API returns length and offset as strings, not
integers. Add custom deserialize_string_to_u64 function to handle
the type conversion.

Co-Authored-By: claude-flow <ruv@ruv.net>
This commit is contained in:
Reuven 2026-03-17 00:47:06 -04:00
parent 0934304719
commit ca1b8f461a

View file

@ -499,14 +499,26 @@ pub struct CdxRecord {
pub status: String,
#[serde(default)]
pub mime: String,
#[serde(default)]
/// Length in bytes (CDX returns as string, we parse to u64)
#[serde(default, deserialize_with = "deserialize_string_to_u64")]
pub length: u64,
#[serde(default)]
/// Offset in WARC file (CDX returns as string, we parse to u64)
#[serde(default, deserialize_with = "deserialize_string_to_u64")]
pub offset: u64,
#[serde(default)]
pub filename: String,
}
/// Deserialize a string to u64 (CDX API returns numeric fields as strings)
fn deserialize_string_to_u64<'de, D>(deserializer: D) -> Result<u64, D::Error>
where
D: serde::Deserializer<'de>,
{
use serde::Deserialize;
let s: String = String::deserialize(deserializer)?;
s.parse().map_err(serde::de::Error::custom)
}
/// Query parameters for Common Crawl CDX index.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CdxQuery {