diff --git a/alembic/versions/2025_06_27_0211-6cf2c1e15039_add_http_request_block_columns_to_.py b/alembic/versions/2025_06_27_0211-6cf2c1e15039_add_http_request_block_columns_to_.py new file mode 100644 index 00000000..34f76f99 --- /dev/null +++ b/alembic/versions/2025_06_27_0211-6cf2c1e15039_add_http_request_block_columns_to_.py @@ -0,0 +1,43 @@ +"""add http request block columns to workflow_run_blocks + +Revision ID: 6cf2c1e15039 +Revises: 760ae45a1345 +Create Date: 2025-06-27 02:11:36.372859+00:00 + +""" + +from typing import Sequence, Union + +import sqlalchemy as sa + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "6cf2c1e15039" +down_revision: Union[str, None] = "760ae45a1345" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column("workflow_run_blocks", sa.Column("http_request_method", sa.String(length=10), nullable=True)) + op.add_column("workflow_run_blocks", sa.Column("http_request_url", sa.String(), nullable=True)) + op.add_column("workflow_run_blocks", sa.Column("http_request_headers", sa.JSON(), nullable=True)) + op.add_column("workflow_run_blocks", sa.Column("http_request_timeout", sa.Integer(), nullable=True)) + op.add_column("workflow_run_blocks", sa.Column("http_request_follow_redirects", sa.Boolean(), nullable=True)) + op.add_column("workflow_run_blocks", sa.Column("http_request_body", sa.JSON(), nullable=True)) + op.add_column("workflow_run_blocks", sa.Column("http_request_parameters", sa.JSON(), nullable=True)) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column("workflow_run_blocks", "http_request_parameters") + op.drop_column("workflow_run_blocks", "http_request_body") + op.drop_column("workflow_run_blocks", "http_request_follow_redirects") + op.drop_column("workflow_run_blocks", "http_request_timeout") + op.drop_column("workflow_run_blocks", "http_request_headers") + op.drop_column("workflow_run_blocks", "http_request_url") + op.drop_column("workflow_run_blocks", "http_request_method") + # ### end Alembic commands ### diff --git a/poetry.lock b/poetry.lock index 93426f70..e2adfa8c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 2.1.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand. [[package]] name = "about-time" @@ -1237,6 +1237,18 @@ ssh = ["bcrypt (>=3.1.5)"] test = ["certifi (>=2024)", "cryptography-vectors (==44.0.2)", "pretend (>=0.7)", "pytest (>=7.4.0)", "pytest-benchmark (>=4.0)", "pytest-cov (>=2.10.1)", "pytest-xdist (>=3.5.0)"] test-randomorder = ["pytest-randomly"] +[[package]] +name = "curlparser" +version = "0.1.0" +description = "Parse cURL commands returning object representing the request." +optional = false +python-versions = ">=3.7,<4.0" +groups = ["main"] +files = [ + {file = "curlparser-0.1.0-py3-none-any.whl", hash = "sha256:3d2020367571fc02dbab0e583ff52a8e31d13e05bbd5dbb893aa75322744f527"}, + {file = "curlparser-0.1.0.tar.gz", hash = "sha256:e6cb7b675ed7c08338f2ce0ba64012aa3b9e908577d1f54c8f5f94a3057099ae"}, +] + [[package]] name = "ddtrace" version = "2.21.6" @@ -8846,4 +8858,5 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.1" python-versions = ">=3.11,<3.14" -content-hash = "807cb0c70058604538b422763a89b6f75869e570049d746a4e39a67194ead7d0" + +content-hash = "948f285193134676c35a2c55038552ea2bf20b10ef234240d5041e97d163677e" diff --git a/pyproject.toml b/pyproject.toml index 86846677..fb1e80b5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -76,6 +76,7 @@ onepassword-sdk = "0.3.0" types-boto3 = {extras = ["full"], version = "^1.38.31"} lark = "^1.2.2" libcst = "^1.8.2" +curlparser = "^0.1.0" [tool.poetry.group.dev.dependencies] isort = "^5.13.2" diff --git a/skyvern/forge/sdk/core/aiohttp_helper.py b/skyvern/forge/sdk/core/aiohttp_helper.py index 0f7d2386..002a7b89 100644 --- a/skyvern/forge/sdk/core/aiohttp_helper.py +++ b/skyvern/forge/sdk/core/aiohttp_helper.py @@ -10,6 +10,53 @@ LOG = structlog.get_logger() DEFAULT_REQUEST_TIMEOUT = 30 +async def aiohttp_request( + method: str, + url: str, + headers: dict[str, str] | None = None, + data: dict[str, Any] | None = None, + json_data: dict[str, Any] | None = None, + cookies: dict[str, str] | None = None, + timeout: int = DEFAULT_REQUEST_TIMEOUT, + follow_redirects: bool = True, + proxy: str | None = None, +) -> tuple[int, dict[str, str], Any]: + """ + Generic HTTP request function that supports all HTTP methods. + + Returns: + Tuple of (status_code, response_headers, response_body) + where response_body can be dict (for JSON) or str (for text) + """ + async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(total=timeout)) as session: + request_kwargs = { + "url": url, + "headers": headers or {}, + "cookies": cookies, + "proxy": proxy, + "allow_redirects": follow_redirects, + } + + # Handle body based on content type and method + if method.upper() != "GET": + if json_data is not None: + request_kwargs["json"] = json_data + elif data is not None: + request_kwargs["data"] = data + + async with session.request(method.upper(), **request_kwargs) as response: + response_headers = dict(response.headers) + + # Try to parse response as JSON + try: + response_body = await response.json() + except (aiohttp.ContentTypeError, Exception): + # If not JSON, get as text + response_body = await response.text() + + return response.status, response_headers, response_body + + async def aiohttp_get_json( url: str, params: dict[str, Any] | None = None, diff --git a/skyvern/forge/sdk/core/curl_converter.py b/skyvern/forge/sdk/core/curl_converter.py new file mode 100644 index 00000000..2c292f11 --- /dev/null +++ b/skyvern/forge/sdk/core/curl_converter.py @@ -0,0 +1,123 @@ +import json +from typing import Any + +import curlparser +import structlog + +LOG = structlog.get_logger() + + +def parse_curl_command(curl_command: str) -> dict[str, Any]: + """ + Parse a curl command into HTTP request parameters using curlparser library. + + Args: + curl_command: The curl command string to parse + + Returns: + Dict containing: + - method: HTTP method (GET, POST, etc.) + - url: The URL to request + - headers: Dict of headers + - body: Request body as dict (parsed from JSON) or None + + Raises: + ValueError: If the curl command cannot be parsed + """ + try: + # Parse the curl command + parsed = curlparser.parse(curl_command) + + # Extract the components + method = parsed.method.upper() if parsed.method else "GET" + if not parsed.method: + LOG.info( + "No HTTP method found in curl command, defaulting to GET", + curl_command=curl_command, + ) + + result = { + "method": method, + "url": parsed.url, + "headers": {}, + "body": None, + } + + # Process headers - curlparser returns headers as an OrderedDict + if parsed.header: + result["headers"] = {k: v.strip() for k, v in parsed.header.items()} + + # Process body/data + if parsed.data: + # Try to parse as JSON + try: + if isinstance(parsed.data, list): + # Join multiple data parts + data_str = "".join(parsed.data) + else: + data_str = parsed.data + + result["body"] = json.loads(data_str) + except (json.JSONDecodeError, TypeError) as e: + # If not valid JSON, convert to dict with single "data" key + LOG.warning( + "Curl data is not valid JSON, wrapping in data key", + data=parsed.data, + data_str=data_str if "data_str" in locals() else None, + error_type=type(e).__name__, + error_message=str(e), + curl_command=curl_command, + ) + result["body"] = {"data": data_str if "data_str" in locals() else parsed.data} + + # Process JSON data if provided + if hasattr(parsed, "json") and parsed.json: + try: + result["body"] = json.loads(parsed.json) + except (json.JSONDecodeError, TypeError) as e: + LOG.warning( + "Curl json is not valid JSON", + json=parsed.json, + error_type=type(e).__name__, + error_message=str(e), + curl_command=curl_command, + ) + result["body"] = {"data": parsed.json} + + # Validate URL + if not result["url"]: + raise ValueError("No URL found in curl command") + + return result + + except Exception as e: + LOG.exception( + "Failed to parse curl command", + curl_command=curl_command, # Log entire command for debugging + ) + raise ValueError(f"Failed to parse curl command: {str(e)}") + + +def curl_to_http_request_block_params(curl_command: str) -> dict[str, Any]: + """ + Convert a curl command to parameters suitable for HttpRequestBlock. + + This is a convenience function that can be used in API endpoints + to convert curl commands before creating workflow blocks. + + Args: + curl_command: The curl command string + + Returns: + Dict with keys matching HttpRequestBlock parameters + """ + parsed = parse_curl_command(curl_command) + + return { + "method": parsed["method"], + "url": parsed["url"], + "headers": parsed["headers"] if parsed["headers"] else None, + "body": parsed["body"], + "timeout": 30, # Default timeout + "follow_redirects": True, # Default follow redirects + } diff --git a/skyvern/forge/sdk/db/client.py b/skyvern/forge/sdk/db/client.py index dc35ae4e..d8f3ac13 100644 --- a/skyvern/forge/sdk/db/client.py +++ b/skyvern/forge/sdk/db/client.py @@ -2765,6 +2765,14 @@ class AgentDB: description: str | None = None, block_workflow_run_id: str | None = None, engine: str | None = None, + # HTTP request block parameters + http_request_method: str | None = None, + http_request_url: str | None = None, + http_request_headers: dict[str, str] | None = None, + http_request_body: dict[str, Any] | None = None, + http_request_parameters: dict[str, Any] | None = None, + http_request_timeout: int | None = None, + http_request_follow_redirects: bool | None = None, ) -> WorkflowRunBlock: async with self.Session() as session: workflow_run_block = ( @@ -2807,6 +2815,21 @@ class AgentDB: workflow_run_block.block_workflow_run_id = block_workflow_run_id if engine: workflow_run_block.engine = engine + # HTTP request block fields + if http_request_method: + workflow_run_block.http_request_method = http_request_method + if http_request_url: + workflow_run_block.http_request_url = http_request_url + if http_request_headers: + workflow_run_block.http_request_headers = http_request_headers + if http_request_body: + workflow_run_block.http_request_body = http_request_body + if http_request_parameters: + workflow_run_block.http_request_parameters = http_request_parameters + if http_request_timeout: + workflow_run_block.http_request_timeout = http_request_timeout + if http_request_follow_redirects is not None: + workflow_run_block.http_request_follow_redirects = http_request_follow_redirects await session.commit() await session.refresh(workflow_run_block) else: diff --git a/skyvern/forge/sdk/db/models.py b/skyvern/forge/sdk/db/models.py index 9fb8e848..463afc1f 100644 --- a/skyvern/forge/sdk/db/models.py +++ b/skyvern/forge/sdk/db/models.py @@ -606,6 +606,15 @@ class WorkflowRunBlockModel(Base): # wait block wait_sec = Column(Integer, nullable=True) + # http request block + http_request_method = Column(String(10), nullable=True) + http_request_url = Column(String, nullable=True) + http_request_headers = Column(JSON, nullable=True) + http_request_body = Column(JSON, nullable=True) + http_request_parameters = Column(JSON, nullable=True) + http_request_timeout = Column(Integer, nullable=True) + http_request_follow_redirects = Column(Boolean, nullable=True) + created_at = Column(DateTime, default=datetime.datetime.utcnow, nullable=False) modified_at = Column(DateTime, default=datetime.datetime.utcnow, onupdate=datetime.datetime.utcnow, nullable=False) diff --git a/skyvern/forge/sdk/routes/agent_protocol.py b/skyvern/forge/sdk/routes/agent_protocol.py index 86a24b62..de9625f2 100644 --- a/skyvern/forge/sdk/routes/agent_protocol.py +++ b/skyvern/forge/sdk/routes/agent_protocol.py @@ -15,6 +15,7 @@ from skyvern.forge.prompts import prompt_engine from skyvern.forge.sdk.api.llm.exceptions import LLMProviderError from skyvern.forge.sdk.artifact.models import Artifact, ArtifactType from skyvern.forge.sdk.core import skyvern_context +from skyvern.forge.sdk.core.curl_converter import curl_to_http_request_block_params from skyvern.forge.sdk.core.permissions.permission_checker_factory import PermissionCheckerFactory from skyvern.forge.sdk.core.security import generate_skyvern_signature from skyvern.forge.sdk.db.enums import OrganizationAuthTokenType @@ -682,6 +683,60 @@ async def delete_workflow( await app.WORKFLOW_SERVICE.delete_workflow_by_permanent_id(workflow_id, current_org.organization_id) +@legacy_base_router.post( + "/utilities/curl-to-http", + tags=["Utilities"], + openapi_extra={ + "x-fern-sdk-group-name": "utilities", + "x-fern-sdk-method-name": "convert_curl_to_http", + }, + description="Convert a curl command to HTTP request parameters", + summary="Convert curl to HTTP parameters", + responses={ + 200: {"description": "Successfully converted curl command"}, + 400: {"description": "Invalid curl command"}, + }, +) +@legacy_base_router.post("/utilities/curl-to-http/", include_in_schema=False) +async def convert_curl_to_http( + request: dict[str, str], + current_org: Organization = Depends(org_auth_service.get_current_org), +) -> dict[str, Any]: + """ + Convert a curl command to HTTP request parameters. + + This endpoint is useful for converting curl commands to the format + needed by the HTTP Request workflow block. + + Request body should contain: + - curl_command: The curl command string to convert + + Returns: + - method: HTTP method + - url: The URL + - headers: Dict of headers + - body: Request body as dict + - timeout: Default timeout + - follow_redirects: Default follow redirects setting + """ + curl_command = request.get("curl_command") + if not curl_command: + raise HTTPException(status_code=400, detail="curl_command is required in the request body") + + try: + result = curl_to_http_request_block_params(curl_command) + return result + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) + except Exception as e: + LOG.error( + "Failed to convert curl command", + error=str(e), + organization_id=current_org.organization_id, + ) + raise HTTPException(status_code=400, detail=f"Failed to convert curl command: {str(e)}") + + @base_router.get( "/artifacts/{artifact_id}", tags=["Artifacts"], diff --git a/skyvern/forge/sdk/workflow/models/block.py b/skyvern/forge/sdk/workflow/models/block.py index 022b724e..5de0c4d3 100644 --- a/skyvern/forge/sdk/workflow/models/block.py +++ b/skyvern/forge/sdk/workflow/models/block.py @@ -15,7 +15,7 @@ from email.message import EmailMessage from enum import StrEnum from pathlib import Path from typing import Annotated, Any, Awaitable, Callable, Literal, Union -from urllib.parse import quote +from urllib.parse import quote, urlparse import filetype import structlog @@ -100,6 +100,7 @@ class BlockType(StrEnum): FILE_DOWNLOAD = "file_download" GOTO_URL = "goto_url" PDF_PARSER = "pdf_parser" + HTTP_REQUEST = "http_request" class BlockStatus(StrEnum): @@ -2592,6 +2593,188 @@ class TaskV2Block(Block): ) +class HttpRequestBlock(Block): + block_type: Literal[BlockType.HTTP_REQUEST] = BlockType.HTTP_REQUEST + + # Individual HTTP parameters + method: str = "GET" + url: str | None = None + headers: dict[str, str] | None = None + body: dict[str, Any] | None = None # Changed to consistently be dict only + timeout: int = 30 + follow_redirects: bool = True + + # Parameters for templating + parameters: list[PARAMETER_TYPE] = [] + + def get_all_parameters( + self, + workflow_run_id: str, + ) -> list[PARAMETER_TYPE]: + parameters = self.parameters + workflow_run_context = self.get_workflow_run_context(workflow_run_id) + + # Check if url is a parameter + if self.url and workflow_run_context.has_parameter(self.url): + if self.url not in [parameter.key for parameter in parameters]: + parameters.append(workflow_run_context.get_parameter(self.url)) + + return parameters + + def format_potential_template_parameters(self, workflow_run_context: WorkflowRunContext) -> None: + """Format template parameters in the block fields""" + if self.url: + self.url = self.format_block_parameter_template_from_workflow_run_context(self.url, workflow_run_context) + + if self.body: + # If body is provided as a template string, try to parse it as JSON + for key, value in self.body.items(): + if isinstance(value, str): + self.body[key] = self.format_block_parameter_template_from_workflow_run_context( + value, workflow_run_context + ) + + if self.headers: + for key, value in self.headers.items(): + self.headers[key] = self.format_block_parameter_template_from_workflow_run_context( + value, workflow_run_context + ) + + def validate_url(self, url: str) -> bool: + """Validate if the URL is properly formatted""" + try: + result = urlparse(url) + return all([result.scheme, result.netloc]) + except Exception: + return False + + async def execute( + self, + workflow_run_id: str, + workflow_run_block_id: str, + organization_id: str | None = None, + browser_session_id: str | None = None, + **kwargs: dict, + ) -> BlockResult: + """Execute the HTTP request and return the response""" + from skyvern.forge.sdk.core.aiohttp_helper import aiohttp_request + + workflow_run_context = self.get_workflow_run_context(workflow_run_id) + + try: + self.format_potential_template_parameters(workflow_run_context) + except Exception as e: + return await self.build_block_result( + success=False, + failure_reason=f"Failed to format jinja template: {str(e)}", + output_parameter_value=None, + status=BlockStatus.failed, + workflow_run_block_id=workflow_run_block_id, + organization_id=organization_id, + ) + + # Validate URL + if not self.url: + return await self.build_block_result( + success=False, + failure_reason="URL is required for HTTP request", + output_parameter_value=None, + status=BlockStatus.failed, + workflow_run_block_id=workflow_run_block_id, + organization_id=organization_id, + ) + + if not self.validate_url(self.url): + return await self.build_block_result( + success=False, + failure_reason=f"Invalid URL format: {self.url}", + output_parameter_value=None, + status=BlockStatus.failed, + workflow_run_block_id=workflow_run_block_id, + organization_id=organization_id, + ) + + # Execute HTTP request using the generic aiohttp_request function + try: + LOG.info( + "Executing HTTP request", + method=self.method, + url=self.url, + headers=self.headers, + has_body=bool(self.body), + workflow_run_id=workflow_run_id, + ) + + # Use the generic aiohttp_request function + status_code, response_headers, response_body = await aiohttp_request( + method=self.method, + url=self.url, + headers=self.headers, + json_data=self.body, + timeout=self.timeout, + follow_redirects=self.follow_redirects, + ) + + response_data = { + "status_code": status_code, + "headers": response_headers, + "body": response_body, + "url": self.url, + } + + LOG.info( + "HTTP request completed", + status_code=status_code, + url=self.url, + method=self.method, + workflow_run_id=workflow_run_id, + ) + + # Determine success based on status code + success = 200 <= status_code < 300 + + await self.record_output_parameter_value(workflow_run_context, workflow_run_id, response_data) + + return await self.build_block_result( + success=success, + failure_reason=None if success else f"HTTP {status_code}: {response_body}", + output_parameter_value=response_data, + status=BlockStatus.completed if success else BlockStatus.failed, + workflow_run_block_id=workflow_run_block_id, + organization_id=organization_id, + ) + + except asyncio.TimeoutError: + error_data = {"error": "Request timed out", "error_type": "timeout"} + await self.record_output_parameter_value(workflow_run_context, workflow_run_id, error_data) + return await self.build_block_result( + success=False, + failure_reason=f"Request timed out after {self.timeout} seconds", + output_parameter_value=error_data, + status=BlockStatus.timed_out, + workflow_run_block_id=workflow_run_block_id, + organization_id=organization_id, + ) + except Exception as e: + error_data = {"error": str(e), "error_type": "unknown"} + LOG.warning( # Changed from LOG.exception to LOG.warning as requested + "HTTP request failed with unexpected error", + error=str(e), + url=self.url, + method=self.method, + workflow_run_id=workflow_run_id, + ) + await self.record_output_parameter_value(workflow_run_context, workflow_run_id, error_data) + return await self.build_block_result( + success=False, + failure_reason=f"HTTP request failed: {str(e)}", + output_parameter_value=error_data, + status=BlockStatus.failed, + workflow_run_block_id=workflow_run_block_id, + organization_id=organization_id, + ) + + BlockSubclasses = Union[ ForLoopBlock, TaskBlock, @@ -2612,5 +2795,6 @@ BlockSubclasses = Union[ UrlBlock, TaskV2Block, FileUploadBlock, + HttpRequestBlock, ] BlockTypeVar = Annotated[BlockSubclasses, Field(discriminator="block_type")] diff --git a/skyvern/forge/sdk/workflow/models/yaml.py b/skyvern/forge/sdk/workflow/models/yaml.py index 87a37edc..f3f892f0 100644 --- a/skyvern/forge/sdk/workflow/models/yaml.py +++ b/skyvern/forge/sdk/workflow/models/yaml.py @@ -371,6 +371,21 @@ class TaskV2BlockYAML(BlockYAML): max_steps: int = settings.MAX_STEPS_PER_TASK_V2 +class HttpRequestBlockYAML(BlockYAML): + block_type: Literal[BlockType.HTTP_REQUEST] = BlockType.HTTP_REQUEST # type: ignore + + # Individual HTTP parameters + method: str = "GET" + url: str | None = None + headers: dict[str, str] | None = None + body: dict[str, Any] | None = None # Changed to consistently be dict only + timeout: int = 30 + follow_redirects: bool = True + + # Parameter keys for templating + parameter_keys: list[str] | None = None + + PARAMETER_YAML_SUBCLASSES = ( AWSSecretParameterYAML | BitwardenLoginCredentialParameterYAML @@ -404,6 +419,7 @@ BLOCK_YAML_SUBCLASSES = ( | UrlBlockYAML | PDFParserBlockYAML | TaskV2BlockYAML + | HttpRequestBlockYAML ) BLOCK_YAML_TYPES = Annotated[BLOCK_YAML_SUBCLASSES, Field(discriminator="block_type")] diff --git a/skyvern/forge/sdk/workflow/service.py b/skyvern/forge/sdk/workflow/service.py index 6a22d7ac..0f541f2d 100644 --- a/skyvern/forge/sdk/workflow/service.py +++ b/skyvern/forge/sdk/workflow/service.py @@ -48,6 +48,7 @@ from skyvern.forge.sdk.workflow.models.block import ( FileParserBlock, FileUploadBlock, ForLoopBlock, + HttpRequestBlock, LoginBlock, NavigationBlock, PDFParserBlock, @@ -2064,6 +2065,24 @@ class WorkflowService: model=block_yaml.model, output_parameter=output_parameter, ) + elif block_yaml.block_type == BlockType.HTTP_REQUEST: + http_request_block_parameters = ( + [parameters[parameter_key] for parameter_key in block_yaml.parameter_keys] + if block_yaml.parameter_keys + else [] + ) + return HttpRequestBlock( + label=block_yaml.label, + method=block_yaml.method, + url=block_yaml.url, + headers=block_yaml.headers, + body=block_yaml.body, + timeout=block_yaml.timeout, + follow_redirects=block_yaml.follow_redirects, + parameters=http_request_block_parameters, + output_parameter=output_parameter, + continue_on_failure=block_yaml.continue_on_failure, + ) elif block_yaml.block_type == BlockType.GOTO_URL: return UrlBlock( label=block_yaml.label,