auto prepend scheme to url (#1335)

This commit is contained in:
LawyZheng 2024-12-06 11:56:12 +08:00 committed by GitHub
parent db5b9d1dbd
commit 873b484ffa
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 72 additions and 16 deletions

View file

@ -504,11 +504,9 @@ class CachedActionPlanError(SkyvernException):
super().__init__(message) super().__init__(message)
class InvalidUrl(SkyvernHTTPException): class InvalidUrl(SkyvernException):
def __init__(self, url: str) -> None: def __init__(self, url: str) -> None:
super().__init__( super().__init__(f"Invalid URL: {url}. Skyvern supports HTTP and HTTPS urls with max 2083 character length.")
f"Invalid URL: {url}. Skyvern supports HTTP and HTTPS urls.", status_code=status.HTTP_400_BAD_REQUEST
)
class BlockedHost(SkyvernHTTPException): class BlockedHost(SkyvernHTTPException):

View file

@ -48,7 +48,7 @@ from skyvern.forge.sdk.api.files import get_path_for_workflow_download_directory
from skyvern.forge.sdk.artifact.models import ArtifactType from skyvern.forge.sdk.artifact.models import ArtifactType
from skyvern.forge.sdk.core import skyvern_context from skyvern.forge.sdk.core import skyvern_context
from skyvern.forge.sdk.core.security import generate_skyvern_signature from skyvern.forge.sdk.core.security import generate_skyvern_signature
from skyvern.forge.sdk.core.validators import validate_url from skyvern.forge.sdk.core.validators import prepend_scheme_and_validate_url
from skyvern.forge.sdk.db.enums import TaskType from skyvern.forge.sdk.db.enums import TaskType
from skyvern.forge.sdk.models import Organization, Step, StepStatus from skyvern.forge.sdk.models import Organization, Step, StepStatus
from skyvern.forge.sdk.schemas.tasks import Task, TaskRequest, TaskResponse, TaskStatus from skyvern.forge.sdk.schemas.tasks import Task, TaskRequest, TaskResponse, TaskStatus
@ -139,7 +139,11 @@ class ForgeAgent:
task_url = working_page.url task_url = working_page.url
task_url = validate_url(task_url) task_url = prepend_scheme_and_validate_url(task_url)
totp_verification_url = task_block.totp_verification_url
if totp_verification_url:
totp_verification_url = prepend_scheme_and_validate_url(totp_verification_url)
task = await app.DATABASE.create_task( task = await app.DATABASE.create_task(
url=task_url, url=task_url,
task_type=task_block.task_type, task_type=task_block.task_type,
@ -147,7 +151,7 @@ class ForgeAgent:
terminate_criterion=task_block.terminate_criterion, terminate_criterion=task_block.terminate_criterion,
title=task_block.title or task_block.label, title=task_block.title or task_block.label,
webhook_callback_url=None, webhook_callback_url=None,
totp_verification_url=task_block.totp_verification_url, totp_verification_url=totp_verification_url,
totp_identifier=task_block.totp_identifier, totp_identifier=task_block.totp_identifier,
navigation_goal=task_block.navigation_goal, navigation_goal=task_block.navigation_goal,
data_extraction_goal=task_block.data_extraction_goal, data_extraction_goal=task_block.data_extraction_goal,

View file

@ -1,4 +1,5 @@
import ipaddress import ipaddress
from urllib.parse import urlparse
from pydantic import HttpUrl, ValidationError, parse_obj_as from pydantic import HttpUrl, ValidationError, parse_obj_as
@ -6,6 +7,26 @@ from skyvern.config import settings
from skyvern.exceptions import InvalidUrl from skyvern.exceptions import InvalidUrl
def prepend_scheme_and_validate_url(url: str) -> str:
if not url:
return url
parsed_url = urlparse(url=url)
if parsed_url.scheme and parsed_url.scheme not in ["http", "https"]:
raise InvalidUrl(url=url)
# if url doesn't contain any scheme, we prepend `https` to it by default
if not parsed_url.scheme:
url = f"https://{url}"
try:
HttpUrl(url)
except ValidationError:
raise InvalidUrl(url=url)
return url
def validate_url(url: str) -> str: def validate_url(url: str) -> str:
try: try:
if url: if url:

View file

@ -4,10 +4,11 @@ from datetime import datetime
from enum import StrEnum from enum import StrEnum
from typing import Any from typing import Any
from fastapi import status
from pydantic import BaseModel, Field, HttpUrl, field_validator from pydantic import BaseModel, Field, HttpUrl, field_validator
from skyvern.exceptions import BlockedHost, InvalidTaskStatusTransition, TaskAlreadyCanceled from skyvern.exceptions import BlockedHost, InvalidTaskStatusTransition, SkyvernHTTPException, TaskAlreadyCanceled
from skyvern.forge.sdk.core.validators import is_blocked_host from skyvern.forge.sdk.core.validators import is_blocked_host, prepend_scheme_and_validate_url
from skyvern.forge.sdk.db.enums import TaskType from skyvern.forge.sdk.db.enums import TaskType
@ -99,28 +100,37 @@ class TaskBase(BaseModel):
class TaskRequest(TaskBase): class TaskRequest(TaskBase):
url: HttpUrl = Field( url: str = Field(
..., ...,
description="Starting URL for the task.", description="Starting URL for the task.",
examples=["https://www.geico.com"], examples=["https://www.geico.com"],
) )
webhook_callback_url: HttpUrl | None = Field( webhook_callback_url: str | None = Field(
default=None, default=None,
description="The URL to call when the task is completed.", description="The URL to call when the task is completed.",
examples=["https://my-webhook.com"], examples=["https://my-webhook.com"],
) )
totp_verification_url: HttpUrl | None = None totp_verification_url: str | None = None
@field_validator("url", "webhook_callback_url", "totp_verification_url") @field_validator("url", "webhook_callback_url", "totp_verification_url")
@classmethod @classmethod
def validate_urls(cls, v: HttpUrl | None) -> HttpUrl | None: def validate_urls(cls, url: str | None) -> str | None:
if not v or not v.host: if url is None:
return None
try:
url = prepend_scheme_and_validate_url(url=url)
v = HttpUrl(url=url)
except Exception as e:
raise SkyvernHTTPException(message=str(e), status_code=status.HTTP_400_BAD_REQUEST)
if not v.host:
return None return None
host = v.host host = v.host
blocked = is_blocked_host(host) blocked = is_blocked_host(host)
if blocked: if blocked:
raise BlockedHost(host=host) raise BlockedHost(host=host)
return v return str(v)
class TaskStatus(StrEnum): class TaskStatus(StrEnum):

View file

@ -2,8 +2,11 @@ from datetime import datetime
from enum import StrEnum from enum import StrEnum
from typing import Any, List from typing import Any, List
from pydantic import BaseModel from fastapi import status
from pydantic import BaseModel, HttpUrl, field_validator
from skyvern.exceptions import BlockedHost, SkyvernHTTPException
from skyvern.forge.sdk.core.validators import is_blocked_host, prepend_scheme_and_validate_url
from skyvern.forge.sdk.schemas.tasks import ProxyLocation from skyvern.forge.sdk.schemas.tasks import ProxyLocation
from skyvern.forge.sdk.workflow.exceptions import WorkflowDefinitionHasDuplicateBlockLabels from skyvern.forge.sdk.workflow.exceptions import WorkflowDefinitionHasDuplicateBlockLabels
from skyvern.forge.sdk.workflow.models.block import BlockTypeVar from skyvern.forge.sdk.workflow.models.block import BlockTypeVar
@ -17,6 +20,26 @@ class WorkflowRequestBody(BaseModel):
totp_verification_url: str | None = None totp_verification_url: str | None = None
totp_identifier: str | None = None totp_identifier: str | None = None
@field_validator("webhook_callback_url", "totp_verification_url")
@classmethod
def validate_urls(cls, url: str | None) -> str | None:
if url is None:
return None
try:
url = prepend_scheme_and_validate_url(url=url)
v = HttpUrl(url=url)
except Exception as e:
raise SkyvernHTTPException(message=str(e), status_code=status.HTTP_400_BAD_REQUEST)
if not v.host:
return None
host = v.host
blocked = is_blocked_host(host)
if blocked:
raise BlockedHost(host=host)
return str(v)
class RunWorkflowResponse(BaseModel): class RunWorkflowResponse(BaseModel):
workflow_id: str workflow_id: str