[SKY-8952] CSV with Wide Header Exceede ~1 MB (#5510)

This commit is contained in:
Aaron Perez 2026-04-15 13:43:14 -05:00 committed by GitHub
parent ae08f4c64d
commit 0e72bdabd7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 65 additions and 857 deletions

View file

@ -2414,555 +2414,6 @@
}
}
}
},
"/v1/schedules": {
"get": {
"tags": [
"Schedules"
],
"summary": "List all schedules for the organization",
"operationId": "schedules_list_all",
"parameters": [
{
"name": "page",
"in": "query",
"required": false,
"schema": {
"type": "integer",
"minimum": 1,
"default": 1,
"title": "Page"
}
},
{
"name": "page_size",
"in": "query",
"required": false,
"schema": {
"type": "integer",
"maximum": 100,
"minimum": 1,
"default": 10,
"title": "Page Size"
}
},
{
"name": "status",
"in": "query",
"required": false,
"schema": {
"enum": [
"active",
"paused"
],
"type": "string",
"nullable": true,
"description": "Filter by status: 'active' or 'paused'",
"title": "Status"
},
"description": "Filter by status: 'active' or 'paused'"
},
{
"name": "search",
"in": "query",
"required": false,
"schema": {
"type": "string",
"nullable": true,
"description": "Search by workflow title or schedule name",
"title": "Search"
},
"description": "Search by workflow title or schedule name"
},
{
"name": "x-api-key",
"in": "header",
"required": false,
"schema": {
"type": "string",
"nullable": true,
"description": "Skyvern API key for authentication. API key can be found at https://app.skyvern.com/settings.",
"title": "X-Api-Key"
},
"description": "Skyvern API key for authentication. API key can be found at https://app.skyvern.com/settings."
}
],
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/OrganizationScheduleListResponse"
}
}
}
},
"422": {
"description": "Validation Error",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/HTTPValidationError"
}
}
}
}
},
"x-fern-sdk-group-name": "agent",
"x-fern-sdk-method-name": "list_organization_schedules"
}
},
"/v1/workflows/{workflow_permanent_id}/schedules": {
"post": {
"tags": [
"Schedules"
],
"summary": "Create a schedule for a workflow",
"operationId": "schedules_create",
"parameters": [
{
"name": "workflow_permanent_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"title": "Workflow Permanent Id"
}
},
{
"name": "x-api-key",
"in": "header",
"required": false,
"schema": {
"type": "string",
"nullable": true,
"description": "Skyvern API key for authentication. API key can be found at https://app.skyvern.com/settings.",
"title": "X-Api-Key"
},
"description": "Skyvern API key for authentication. API key can be found at https://app.skyvern.com/settings."
}
],
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/WorkflowScheduleUpsertRequest"
}
}
}
},
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/WorkflowScheduleResponse"
}
}
}
},
"422": {
"description": "Validation Error",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/HTTPValidationError"
}
}
}
}
},
"x-fern-sdk-group-name": "agent",
"x-fern-sdk-method-name": "create_workflow_schedule"
},
"get": {
"tags": [
"Schedules"
],
"summary": "List schedules for a workflow",
"operationId": "schedules_list",
"parameters": [
{
"name": "workflow_permanent_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"title": "Workflow Permanent Id"
}
},
{
"name": "x-api-key",
"in": "header",
"required": false,
"schema": {
"type": "string",
"nullable": true,
"description": "Skyvern API key for authentication. API key can be found at https://app.skyvern.com/settings.",
"title": "X-Api-Key"
},
"description": "Skyvern API key for authentication. API key can be found at https://app.skyvern.com/settings."
}
],
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/WorkflowScheduleListResponse"
}
}
}
},
"422": {
"description": "Validation Error",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/HTTPValidationError"
}
}
}
}
},
"x-fern-sdk-group-name": "agent",
"x-fern-sdk-method-name": "list_workflow_schedules"
}
},
"/v1/workflows/{workflow_permanent_id}/schedules/{workflow_schedule_id}": {
"get": {
"tags": [
"Schedules"
],
"summary": "Get a schedule by ID",
"operationId": "schedules_get",
"parameters": [
{
"name": "workflow_permanent_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"title": "Workflow Permanent Id"
}
},
{
"name": "workflow_schedule_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"title": "Workflow Schedule Id"
}
},
{
"name": "x-api-key",
"in": "header",
"required": false,
"schema": {
"type": "string",
"nullable": true,
"description": "Skyvern API key for authentication. API key can be found at https://app.skyvern.com/settings.",
"title": "X-Api-Key"
},
"description": "Skyvern API key for authentication. API key can be found at https://app.skyvern.com/settings."
}
],
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/WorkflowScheduleResponse"
}
}
}
},
"422": {
"description": "Validation Error",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/HTTPValidationError"
}
}
}
}
},
"x-fern-sdk-group-name": "agent",
"x-fern-sdk-method-name": "get_workflow_schedule"
},
"put": {
"tags": [
"Schedules"
],
"summary": "Update a schedule",
"operationId": "schedules_update",
"parameters": [
{
"name": "workflow_permanent_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"title": "Workflow Permanent Id"
}
},
{
"name": "workflow_schedule_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"title": "Workflow Schedule Id"
}
},
{
"name": "x-api-key",
"in": "header",
"required": false,
"schema": {
"type": "string",
"nullable": true,
"description": "Skyvern API key for authentication. API key can be found at https://app.skyvern.com/settings.",
"title": "X-Api-Key"
},
"description": "Skyvern API key for authentication. API key can be found at https://app.skyvern.com/settings."
}
],
"requestBody": {
"required": true,
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/WorkflowScheduleUpsertRequest"
}
}
}
},
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/WorkflowScheduleResponse"
}
}
}
},
"422": {
"description": "Validation Error",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/HTTPValidationError"
}
}
}
}
},
"x-fern-sdk-group-name": "agent",
"x-fern-sdk-method-name": "update_workflow_schedule"
},
"delete": {
"tags": [
"Schedules"
],
"summary": "Delete a schedule",
"operationId": "schedules_delete",
"parameters": [
{
"name": "workflow_permanent_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"title": "Workflow Permanent Id"
}
},
{
"name": "workflow_schedule_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"title": "Workflow Schedule Id"
}
},
{
"name": "x-api-key",
"in": "header",
"required": false,
"schema": {
"type": "string",
"nullable": true,
"description": "Skyvern API key for authentication. API key can be found at https://app.skyvern.com/settings.",
"title": "X-Api-Key"
},
"description": "Skyvern API key for authentication. API key can be found at https://app.skyvern.com/settings."
}
],
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/DeleteScheduleResponse"
}
}
}
},
"422": {
"description": "Validation Error",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/HTTPValidationError"
}
}
}
}
},
"x-fern-sdk-group-name": "agent",
"x-fern-sdk-method-name": "delete_workflow_schedule_route"
}
},
"/v1/workflows/{workflow_permanent_id}/schedules/{workflow_schedule_id}/enable": {
"post": {
"tags": [
"Schedules"
],
"summary": "Enable a schedule",
"operationId": "schedules_enable",
"parameters": [
{
"name": "workflow_permanent_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"title": "Workflow Permanent Id"
}
},
{
"name": "workflow_schedule_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"title": "Workflow Schedule Id"
}
},
{
"name": "x-api-key",
"in": "header",
"required": false,
"schema": {
"type": "string",
"nullable": true,
"description": "Skyvern API key for authentication. API key can be found at https://app.skyvern.com/settings.",
"title": "X-Api-Key"
},
"description": "Skyvern API key for authentication. API key can be found at https://app.skyvern.com/settings."
}
],
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/WorkflowScheduleResponse"
}
}
}
},
"422": {
"description": "Validation Error",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/HTTPValidationError"
}
}
}
}
},
"x-fern-sdk-group-name": "agent",
"x-fern-sdk-method-name": "enable_workflow_schedule"
}
},
"/v1/workflows/{workflow_permanent_id}/schedules/{workflow_schedule_id}/disable": {
"post": {
"tags": [
"Schedules"
],
"summary": "Disable a schedule",
"operationId": "schedules_disable",
"parameters": [
{
"name": "workflow_permanent_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"title": "Workflow Permanent Id"
}
},
{
"name": "workflow_schedule_id",
"in": "path",
"required": true,
"schema": {
"type": "string",
"title": "Workflow Schedule Id"
}
},
{
"name": "x-api-key",
"in": "header",
"required": false,
"schema": {
"type": "string",
"nullable": true,
"description": "Skyvern API key for authentication. API key can be found at https://app.skyvern.com/settings.",
"title": "X-Api-Key"
},
"description": "Skyvern API key for authentication. API key can be found at https://app.skyvern.com/settings."
}
],
"responses": {
"200": {
"description": "Successful Response",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/WorkflowScheduleResponse"
}
}
}
},
"422": {
"description": "Validation Error",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/HTTPValidationError"
}
}
}
}
},
"x-fern-sdk-group-name": "agent",
"x-fern-sdk-method-name": "disable_workflow_schedule"
}
}
},
"components": {
@ -4512,8 +3963,8 @@
"properties": {
"file": {
"type": "string",
"title": "File",
"format": "binary"
"format": "binary",
"title": "File"
}
},
"type": "object",
@ -10999,8 +10450,8 @@
},
"engine": {
"$ref": "#/components/schemas/RunEngine",
"description": "\nThe engine that powers the agent task. The default value is `skyvern-1.0`, which is good for simple tasks like filling a form, or searching for information on Google. `skyvern-2.0` is the latest Skyvern agent that performs well with complex and multi-step tasks. The `openai-cua` engine uses OpenAI's CUA model. The `anthropic-cua` uses Anthropic's Claude Sonnet 3.7 model with the computer use tool.\n",
"default": "skyvern-1.0"
"description": "\nThe engine that powers the agent task. The default value is `skyvern-2.0`, the latest Skyvern agent that performs pretty well with complex and multi-step tasks. `skyvern-1.0` is good for simple tasks like filling a form, or searching for information on Google. The `openai-cua` engine uses OpenAI's CUA model. The `anthropic-cua` uses Anthropic's Claude Sonnet 3.7 model with the computer use tool.\n",
"default": "skyvern-2.0"
},
"title": {
"type": "string",
@ -14264,280 +13715,6 @@
"azure_vault"
],
"title": "CredentialType"
},
"DeleteScheduleResponse": {
"properties": {
"ok": {
"type": "boolean",
"title": "Ok"
}
},
"type": "object",
"required": [
"ok"
],
"title": "DeleteScheduleResponse"
},
"OrganizationScheduleItem": {
"properties": {
"workflow_schedule_id": {
"type": "string",
"title": "Workflow Schedule Id"
},
"organization_id": {
"type": "string",
"title": "Organization Id"
},
"workflow_permanent_id": {
"type": "string",
"title": "Workflow Permanent Id"
},
"workflow_title": {
"type": "string",
"title": "Workflow Title"
},
"cron_expression": {
"type": "string",
"title": "Cron Expression"
},
"timezone": {
"type": "string",
"title": "Timezone"
},
"enabled": {
"type": "boolean",
"title": "Enabled"
},
"parameters": {
"additionalProperties": true,
"type": "object",
"nullable": true,
"title": "Parameters"
},
"name": {
"type": "string",
"nullable": true,
"title": "Name"
},
"description": {
"type": "string",
"nullable": true,
"title": "Description"
},
"next_run": {
"type": "string",
"format": "date-time",
"nullable": true,
"title": "Next Run"
},
"created_at": {
"type": "string",
"format": "date-time",
"title": "Created At"
},
"modified_at": {
"type": "string",
"format": "date-time",
"title": "Modified At"
}
},
"type": "object",
"required": [
"workflow_schedule_id",
"organization_id",
"workflow_permanent_id",
"workflow_title",
"cron_expression",
"timezone",
"enabled",
"created_at",
"modified_at"
],
"title": "OrganizationScheduleItem",
"description": "Compact schedule projection for the org-wide list endpoint."
},
"OrganizationScheduleListResponse": {
"properties": {
"schedules": {
"items": {
"$ref": "#/components/schemas/OrganizationScheduleItem"
},
"type": "array",
"title": "Schedules"
},
"total_count": {
"type": "integer",
"title": "Total Count"
},
"page": {
"type": "integer",
"title": "Page"
},
"page_size": {
"type": "integer",
"title": "Page Size"
}
},
"type": "object",
"required": [
"schedules",
"total_count",
"page",
"page_size"
],
"title": "OrganizationScheduleListResponse"
},
"WorkflowSchedule": {
"properties": {
"workflow_schedule_id": {
"type": "string",
"title": "Workflow Schedule Id"
},
"organization_id": {
"type": "string",
"title": "Organization Id"
},
"workflow_permanent_id": {
"type": "string",
"title": "Workflow Permanent Id"
},
"cron_expression": {
"type": "string",
"title": "Cron Expression"
},
"timezone": {
"type": "string",
"title": "Timezone"
},
"enabled": {
"type": "boolean",
"title": "Enabled"
},
"parameters": {
"additionalProperties": true,
"type": "object",
"nullable": true,
"title": "Parameters"
},
"temporal_schedule_id": {
"type": "string",
"nullable": true,
"title": "Temporal Schedule Id"
},
"name": {
"type": "string",
"nullable": true,
"title": "Name"
},
"description": {
"type": "string",
"nullable": true,
"title": "Description"
},
"created_at": {
"type": "string",
"format": "date-time",
"title": "Created At"
},
"modified_at": {
"type": "string",
"format": "date-time",
"title": "Modified At"
},
"deleted_at": {
"type": "string",
"format": "date-time",
"nullable": true,
"title": "Deleted At"
}
},
"type": "object",
"required": [
"workflow_schedule_id",
"organization_id",
"workflow_permanent_id",
"cron_expression",
"timezone",
"enabled",
"created_at",
"modified_at"
],
"title": "WorkflowSchedule"
},
"WorkflowScheduleListResponse": {
"properties": {
"schedules": {
"items": {
"$ref": "#/components/schemas/WorkflowSchedule"
},
"type": "array",
"title": "Schedules"
}
},
"type": "object",
"required": [
"schedules"
],
"title": "WorkflowScheduleListResponse"
},
"WorkflowScheduleResponse": {
"properties": {
"schedule": {
"$ref": "#/components/schemas/WorkflowSchedule"
},
"next_runs": {
"items": {
"type": "string",
"format": "date-time"
},
"type": "array",
"title": "Next Runs"
}
},
"type": "object",
"required": [
"schedule"
],
"title": "WorkflowScheduleResponse"
},
"WorkflowScheduleUpsertRequest": {
"properties": {
"cron_expression": {
"type": "string",
"title": "Cron Expression"
},
"timezone": {
"type": "string",
"title": "Timezone"
},
"enabled": {
"type": "boolean",
"title": "Enabled",
"default": true
},
"parameters": {
"additionalProperties": true,
"type": "object",
"nullable": true,
"title": "Parameters"
},
"name": {
"type": "string",
"nullable": true,
"title": "Name"
},
"description": {
"type": "string",
"nullable": true,
"title": "Description"
}
},
"type": "object",
"required": [
"cron_expression",
"timezone"
],
"title": "WorkflowScheduleUpsertRequest"
}
}
},
@ -14552,4 +13729,4 @@
"url": "http://localhost:8000"
}
]
}
}

View file

@ -271,7 +271,7 @@ The response from polling (`get_run`) and webhooks have slightly different struc
"run_request": {
"prompt": "Get the title of the top post",
"url": "https://news.ycombinator.com/",
"engine": "skyvern-1.0"
"engine": "skyvern-2.0"
}
}
```

View file

@ -125,14 +125,14 @@ The AI engine that powers the task. These are not iterations—they're suited fo
| Engine | Description |
|--------|-------------|
| `skyvern-1.0` | **Default.** Single objective, precise, faster, cheaper. Best for simple tasks like form filling or single-page extraction. |
| `skyvern-2.0` | Multi-objective, flexible, handles complex multi-step tasks. 85.85% on WebVoyager benchmark. Slower and more expensive. |
| `skyvern-2.0` | **Default.** Multi-objective, flexible, handles complex multi-step tasks. 85.85% on WebVoyager benchmark. Slower and more expensive. |
| `skyvern-1.0` | Single objective, precise, faster, cheaper. Best for simple tasks like form filling or single-page extraction. |
| `openai-cua` | OpenAI's Computer Use Agent |
| `anthropic-cua` | Anthropic Claude Sonnet with computer use |
| `ui-tars` | UI-TARS model (Seed1.5-VL) via Doubao API |
<Tip>
The default engine `skyvern-1.0` works well for clear, single-goal tasks with faster, cheaper execution. Switch to `skyvern-2.0` when the task requires flexibility or multiple steps.
Use `skyvern-1.0` when you have a clear, single goal and want faster, cheaper execution. Use `skyvern-2.0` when the task requires flexibility or multiple steps.
</Tip>
<CodeGroup>

View file

@ -360,8 +360,8 @@ Skyvern supports multiple AI engines for task execution:
| Engine | Description |
|--------|-------------|
| `skyvern-1.0` | Default Skyvern model — fast, precise, ideal for simple tasks |
| `skyvern-2.0` | Latest Skyvern model — flexible, handles complex multi-step tasks |
| `skyvern-2.0` | Latest Skyvern model (default, recommended) |
| `skyvern-1.0` | Previous Skyvern model |
| `openai-cua` | OpenAI Computer Use Agent |
| `anthropic-cua` | Anthropic Computer Use Agent |
| `ui-tars` | UI-TARS model |

View file

@ -97,7 +97,7 @@ const result = await skyvern.runTask({
body: {
prompt: string, // Required. Natural language instructions.
url?: string, // Starting page URL.
engine?: RunEngine, // "skyvern_v1" (default), "skyvern_v2", "openai_cua", "anthropic_cua", "ui_tars"
engine?: RunEngine, // "skyvern_v2" (default), "skyvern_v1", "openai_cua", "anthropic_cua", "ui_tars"
max_steps?: number, // Cap AI steps to limit cost.
data_extraction_schema?: Record<string, unknown> | string, // JSON Schema for output.
browser_session_id?: string, // Run in existing session.

View file

@ -3,6 +3,7 @@ from __future__ import annotations
import abc
import ast
import asyncio
import codecs
import copy
import csv
import json
@ -3738,6 +3739,13 @@ class FileParserBlock(Block):
# Parameter 1 of Literal[...] cannot be of type "Any"
block_type: Literal[BlockType.FILE_URL_PARSER] = BlockType.FILE_URL_PARSER # type: ignore
# FileParserBlock CSV constants
_CSV_SNIFF_LINES = 5
_CSV_BINARY_PREFIX_BYTES = 4096
_CSV_UTF_BOMS = (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE, codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE)
# Bounded cap for legitimate wide cells (JSON blobs, long descriptions); applied only while parsing.
_MAX_CSV_FIELD_SIZE_BYTES = 10 * 1024 * 1024
file_url: str
file_type: FileType
json_schema: dict[str, Any] | None = None
@ -3840,12 +3848,46 @@ class FileParserBlock(Block):
# latin-1 always succeeds (1:1 byte mapping), so this is a safety fallback
return "latin-1"
def _sniff_csv_delimiter(self, file_path: str) -> tuple[str, str]:
"""Return (delimiter, encoding). Samples full lines to avoid mid-row truncation."""
# Read small raw byte prefix to quickly detect empty binary files before attempting text decoding/sniffing
with open(file_path, "rb") as f:
raw_prefix = f.read(self._CSV_BINARY_PREFIX_BYTES)
# Reject files that contain no meaningful bytes
if not raw_prefix.strip():
raise csv.Error("File is empty")
# Reject likely binary content:
# - Presence of null bytes is a strong binary signal
# - Exception: UTF-16/UTF-32 text often starts with BOM and may contain null bytes
if b"\x00" in raw_prefix and not raw_prefix.startswith(self._CSV_UTF_BOMS):
raise csv.Error("File contains binary data")
# Detect best text encoding for file, then read only the first N full lines so csv.Sniffer sees complete rows
encoding = self._detect_file_encoding(file_path)
with open(file_path, encoding=encoding, errors="replace", newline="") as file:
lines: list[str] = []
for _ in range(self._CSV_SNIFF_LINES):
line = file.readline()
if not line:
break
lines.append(line)
# Build the sniffer sample from complete lines only
sample = "".join(lines)
# Guard against files that decode but still contain no meaningful text
if not sample.strip():
raise csv.Error("File is empty")
try:
delimiter = csv.Sniffer().sniff(sample).delimiter
except csv.Error:
delimiter = "\t" if file_path.lower().endswith(".tsv") else ","
return delimiter, encoding
def validate_file_type(self, file_url_used: str, file_path: str) -> None:
if self.file_type == FileType.CSV:
try:
encoding = self._detect_file_encoding(file_path)
with open(file_path, encoding=encoding, errors="replace") as file:
csv.Sniffer().sniff(file.read(1024))
self._sniff_csv_delimiter(file_path)
except csv.Error as e:
raise InvalidFileType(file_url=file_url_used, file_type=self.file_type, error=str(e))
elif self.file_type == FileType.EXCEL:
@ -3878,25 +3920,14 @@ class FileParserBlock(Block):
async def _parse_csv_file(self, file_path: str) -> list[dict[str, Any]]:
"""Parse CSV/TSV file and return list of dictionaries."""
parsed_data = []
encoding = self._detect_file_encoding(file_path)
with open(file_path, encoding=encoding, errors="replace") as file:
# Try to detect the delimiter (comma for CSV, tab for TSV)
sample = file.read(1024)
file.seek(0) # Reset file pointer
# Use csv.Sniffer to detect the delimiter
try:
dialect = csv.Sniffer().sniff(sample)
delimiter = dialect.delimiter
except csv.Error:
# Default to comma if detection fails
delimiter = ","
reader = csv.DictReader(file, delimiter=delimiter)
for row in reader:
parsed_data.append(row)
return parsed_data
delimiter, encoding = self._sniff_csv_delimiter(file_path)
previous_limit = csv.field_size_limit(self._MAX_CSV_FIELD_SIZE_BYTES)
try:
with open(file_path, encoding=encoding, errors="replace", newline="") as file:
reader = csv.DictReader(file, delimiter=delimiter)
return list(reader)
finally:
csv.field_size_limit(previous_limit)
def _clean_dataframe_for_json(self, df: pd.DataFrame) -> list[dict[str, Any]]:
"""Clean DataFrame to ensure it can be serialized to JSON."""