mirror of
https://github.com/ruvnet/RuVector.git
synced 2026-06-01 14:39:33 +00:00
fix(adr-115): deserialize CDX length/offset from strings
Common Crawl CDX API returns length and offset as strings, not integers. Add custom deserialize_string_to_u64 function to handle the type conversion. Co-Authored-By: claude-flow <ruv@ruv.net>
This commit is contained in:
parent
0934304719
commit
ca1b8f461a
1 changed files with 14 additions and 2 deletions
|
|
@ -499,14 +499,26 @@ pub struct CdxRecord {
|
|||
pub status: String,
|
||||
#[serde(default)]
|
||||
pub mime: String,
|
||||
#[serde(default)]
|
||||
/// Length in bytes (CDX returns as string, we parse to u64)
|
||||
#[serde(default, deserialize_with = "deserialize_string_to_u64")]
|
||||
pub length: u64,
|
||||
#[serde(default)]
|
||||
/// Offset in WARC file (CDX returns as string, we parse to u64)
|
||||
#[serde(default, deserialize_with = "deserialize_string_to_u64")]
|
||||
pub offset: u64,
|
||||
#[serde(default)]
|
||||
pub filename: String,
|
||||
}
|
||||
|
||||
/// Deserialize a string to u64 (CDX API returns numeric fields as strings)
|
||||
fn deserialize_string_to_u64<'de, D>(deserializer: D) -> Result<u64, D::Error>
|
||||
where
|
||||
D: serde::Deserializer<'de>,
|
||||
{
|
||||
use serde::Deserialize;
|
||||
let s: String = String::deserialize(deserializer)?;
|
||||
s.parse().map_err(serde::de::Error::custom)
|
||||
}
|
||||
|
||||
/// Query parameters for Common Crawl CDX index.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct CdxQuery {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue