from __future__ import annotations from dataclasses import dataclass, asdict, field from typing import Any, Iterable from bs4 import BeautifulSoup, Tag @dataclass class ExtractedField: kind: str # input|textarea|select input_type: str | None = None # text|radio|checkbox|... name: str | None = None id: str | None = None label: str | None = None required: bool = False options: list[dict[str, Any]] = field(default_factory=list) # [{label,value}] def to_dict(self) -> dict[str, Any]: return asdict(self) def _text(el: Tag | None) -> str: if not el: return "" return " ".join(el.get_text(" ", strip=True).split()) def _first_non_empty(*vals: str | None) -> str | None: for v in vals: if v is None: continue vv = " ".join(str(v).strip().split()) if vv: return vv return None def _is_hidden_input(tag: Tag) -> bool: if tag.name != "input": return False t = (tag.get("type") or "").lower() return t == "hidden" def _is_ignorable_control(tag: Tag) -> bool: if tag.name == "input": t = (tag.get("type") or "text").lower() return t in {"submit", "button", "reset", "image", "file", "hidden"} if tag.name in {"button"}: return True return False def _infer_label(soup: BeautifulSoup, field_tag: Tag) -> str | None: # 1) explicit