auto prepend scheme to url (#1335)

This commit is contained in:
LawyZheng 2024-12-06 11:56:12 +08:00 committed by GitHub
parent db5b9d1dbd
commit 873b484ffa
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 72 additions and 16 deletions

View file

@ -504,11 +504,9 @@ class CachedActionPlanError(SkyvernException):
super().__init__(message)
class InvalidUrl(SkyvernHTTPException):
class InvalidUrl(SkyvernException):
def __init__(self, url: str) -> None:
super().__init__(
f"Invalid URL: {url}. Skyvern supports HTTP and HTTPS urls.", status_code=status.HTTP_400_BAD_REQUEST
)
super().__init__(f"Invalid URL: {url}. Skyvern supports HTTP and HTTPS urls with max 2083 character length.")
class BlockedHost(SkyvernHTTPException):

View file

@ -48,7 +48,7 @@ from skyvern.forge.sdk.api.files import get_path_for_workflow_download_directory
from skyvern.forge.sdk.artifact.models import ArtifactType
from skyvern.forge.sdk.core import skyvern_context
from skyvern.forge.sdk.core.security import generate_skyvern_signature
from skyvern.forge.sdk.core.validators import validate_url
from skyvern.forge.sdk.core.validators import prepend_scheme_and_validate_url
from skyvern.forge.sdk.db.enums import TaskType
from skyvern.forge.sdk.models import Organization, Step, StepStatus
from skyvern.forge.sdk.schemas.tasks import Task, TaskRequest, TaskResponse, TaskStatus
@ -139,7 +139,11 @@ class ForgeAgent:
task_url = working_page.url
task_url = validate_url(task_url)
task_url = prepend_scheme_and_validate_url(task_url)
totp_verification_url = task_block.totp_verification_url
if totp_verification_url:
totp_verification_url = prepend_scheme_and_validate_url(totp_verification_url)
task = await app.DATABASE.create_task(
url=task_url,
task_type=task_block.task_type,
@ -147,7 +151,7 @@ class ForgeAgent:
terminate_criterion=task_block.terminate_criterion,
title=task_block.title or task_block.label,
webhook_callback_url=None,
totp_verification_url=task_block.totp_verification_url,
totp_verification_url=totp_verification_url,
totp_identifier=task_block.totp_identifier,
navigation_goal=task_block.navigation_goal,
data_extraction_goal=task_block.data_extraction_goal,

View file

@ -1,4 +1,5 @@
import ipaddress
from urllib.parse import urlparse
from pydantic import HttpUrl, ValidationError, parse_obj_as
@ -6,6 +7,26 @@ from skyvern.config import settings
from skyvern.exceptions import InvalidUrl
def prepend_scheme_and_validate_url(url: str) -> str:
if not url:
return url
parsed_url = urlparse(url=url)
if parsed_url.scheme and parsed_url.scheme not in ["http", "https"]:
raise InvalidUrl(url=url)
# if url doesn't contain any scheme, we prepend `https` to it by default
if not parsed_url.scheme:
url = f"https://{url}"
try:
HttpUrl(url)
except ValidationError:
raise InvalidUrl(url=url)
return url
def validate_url(url: str) -> str:
try:
if url:

View file

@ -4,10 +4,11 @@ from datetime import datetime
from enum import StrEnum
from typing import Any
from fastapi import status
from pydantic import BaseModel, Field, HttpUrl, field_validator
from skyvern.exceptions import BlockedHost, InvalidTaskStatusTransition, TaskAlreadyCanceled
from skyvern.forge.sdk.core.validators import is_blocked_host
from skyvern.exceptions import BlockedHost, InvalidTaskStatusTransition, SkyvernHTTPException, TaskAlreadyCanceled
from skyvern.forge.sdk.core.validators import is_blocked_host, prepend_scheme_and_validate_url
from skyvern.forge.sdk.db.enums import TaskType
@ -99,28 +100,37 @@ class TaskBase(BaseModel):
class TaskRequest(TaskBase):
url: HttpUrl = Field(
url: str = Field(
...,
description="Starting URL for the task.",
examples=["https://www.geico.com"],
)
webhook_callback_url: HttpUrl | None = Field(
webhook_callback_url: str | None = Field(
default=None,
description="The URL to call when the task is completed.",
examples=["https://my-webhook.com"],
)
totp_verification_url: HttpUrl | None = None
totp_verification_url: str | None = None
@field_validator("url", "webhook_callback_url", "totp_verification_url")
@classmethod
def validate_urls(cls, v: HttpUrl | None) -> HttpUrl | None:
if not v or not v.host:
def validate_urls(cls, url: str | None) -> str | None:
if url is None:
return None
try:
url = prepend_scheme_and_validate_url(url=url)
v = HttpUrl(url=url)
except Exception as e:
raise SkyvernHTTPException(message=str(e), status_code=status.HTTP_400_BAD_REQUEST)
if not v.host:
return None
host = v.host
blocked = is_blocked_host(host)
if blocked:
raise BlockedHost(host=host)
return v
return str(v)
class TaskStatus(StrEnum):

View file

@ -2,8 +2,11 @@ from datetime import datetime
from enum import StrEnum
from typing import Any, List
from pydantic import BaseModel
from fastapi import status
from pydantic import BaseModel, HttpUrl, field_validator
from skyvern.exceptions import BlockedHost, SkyvernHTTPException
from skyvern.forge.sdk.core.validators import is_blocked_host, prepend_scheme_and_validate_url
from skyvern.forge.sdk.schemas.tasks import ProxyLocation
from skyvern.forge.sdk.workflow.exceptions import WorkflowDefinitionHasDuplicateBlockLabels
from skyvern.forge.sdk.workflow.models.block import BlockTypeVar
@ -17,6 +20,26 @@ class WorkflowRequestBody(BaseModel):
totp_verification_url: str | None = None
totp_identifier: str | None = None
@field_validator("webhook_callback_url", "totp_verification_url")
@classmethod
def validate_urls(cls, url: str | None) -> str | None:
if url is None:
return None
try:
url = prepend_scheme_and_validate_url(url=url)
v = HttpUrl(url=url)
except Exception as e:
raise SkyvernHTTPException(message=str(e), status_code=status.HTTP_400_BAD_REQUEST)
if not v.host:
return None
host = v.host
blocked = is_blocked_host(host)
if blocked:
raise BlockedHost(host=host)
return str(v)
class RunWorkflowResponse(BaseModel):
workflow_id: str