mirror of
https://github.com/Skyvern-AI/skyvern.git
synced 2026-04-28 03:30:10 +00:00
582 lines
22 KiB
Python
582 lines
22 KiB
Python
"""Tests for ScriptReviewer quality validators: proactive misuse, fragile selectors, hardcoded run data."""
|
|
|
|
from skyvern.services.script_reviewer import ScriptReviewer
|
|
|
|
|
|
class TestValidateProactiveMisuse:
|
|
"""Tests for _validate_proactive_misuse."""
|
|
|
|
def setup_method(self) -> None:
|
|
self.reviewer = ScriptReviewer()
|
|
|
|
def test_fallback_is_fine(self) -> None:
|
|
code = """
|
|
async def login(page, context):
|
|
await page.fill(selector='input[name="email"]', value='test', ai='fallback', prompt='email')
|
|
await page.click(selector='button', ai='fallback', prompt='submit')
|
|
"""
|
|
assert self.reviewer._validate_proactive_misuse(code) is None
|
|
|
|
def test_proactive_on_fill_flagged(self) -> None:
|
|
code = """
|
|
async def login(page, context):
|
|
await page.fill(selector='input[placeholder="Username"]', ai='proactive', prompt='username')
|
|
"""
|
|
error = self.reviewer._validate_proactive_misuse(code)
|
|
assert error is not None
|
|
assert "page.fill()" in error
|
|
assert "ai='fallback'" in error
|
|
|
|
def test_proactive_on_click_flagged(self) -> None:
|
|
code = """
|
|
async def block_fn(page, context):
|
|
await page.click(selector='button:has-text("Submit")', ai='proactive', prompt='submit')
|
|
"""
|
|
error = self.reviewer._validate_proactive_misuse(code)
|
|
assert error is not None
|
|
assert "page.click()" in error
|
|
|
|
def test_proactive_on_select_option_flagged(self) -> None:
|
|
code = """
|
|
async def block_fn(page, context):
|
|
await page.select_option(selector='select[name="format"]', value='PDF', ai='proactive', prompt='format')
|
|
"""
|
|
error = self.reviewer._validate_proactive_misuse(code)
|
|
assert error is not None
|
|
assert "page.select_option()" in error
|
|
|
|
def test_proactive_on_type_flagged(self) -> None:
|
|
code = """
|
|
async def block_fn(page, context):
|
|
await page.type(selector='input[name="search"]', ai='proactive', prompt='search')
|
|
"""
|
|
error = self.reviewer._validate_proactive_misuse(code)
|
|
assert error is not None
|
|
assert "page.type()" in error
|
|
|
|
def test_proactive_on_extract_not_flagged(self) -> None:
|
|
"""ai='proactive' on extract is legitimate — extract doesn't have selectors."""
|
|
code = """
|
|
async def block_fn(page, context):
|
|
result = await page.extract(prompt='Get the invoice data', ai='proactive')
|
|
"""
|
|
# extract is not in _INTERACTION_METHODS, so this should pass
|
|
assert self.reviewer._validate_proactive_misuse(code) is None
|
|
|
|
def test_comments_ignored(self) -> None:
|
|
code = """
|
|
async def block_fn(page, context):
|
|
# await page.fill(selector='input', ai='proactive', prompt='test')
|
|
await page.fill(selector='input', value='x', ai='fallback', prompt='test')
|
|
"""
|
|
assert self.reviewer._validate_proactive_misuse(code) is None
|
|
|
|
def test_multiline_call_flagged(self) -> None:
|
|
"""ai='proactive' on a continuation line should still be caught."""
|
|
code = """
|
|
async def login(page, context):
|
|
await page.fill(
|
|
selector='input[name="email"]',
|
|
value='test',
|
|
ai='proactive',
|
|
prompt='email field',
|
|
)
|
|
"""
|
|
error = self.reviewer._validate_proactive_misuse(code)
|
|
assert error is not None
|
|
assert "page.fill()" in error
|
|
|
|
def test_multiple_issues_reported(self) -> None:
|
|
code = """
|
|
async def login(page, context):
|
|
await page.fill(selector='#user', ai='proactive', prompt='user')
|
|
await page.fill(selector='#pass', ai='proactive', prompt='pass')
|
|
await page.click(selector='#submit', ai='proactive', prompt='submit')
|
|
"""
|
|
error = self.reviewer._validate_proactive_misuse(code)
|
|
assert error is not None
|
|
# Should mention multiple occurrences
|
|
assert "page.fill()" in error
|
|
assert "page.click()" in error
|
|
|
|
|
|
class TestValidateFragileSelectors:
|
|
"""Tests for _validate_fragile_selectors."""
|
|
|
|
def setup_method(self) -> None:
|
|
self.reviewer = ScriptReviewer()
|
|
|
|
def test_stable_selectors_pass(self) -> None:
|
|
code = """
|
|
async def login(page, context):
|
|
await page.fill(selector='input[name="email"]', value='test', ai='fallback', prompt='email')
|
|
await page.click(selector='button:has-text("Sign In")', ai='fallback', prompt='sign in')
|
|
await page.fill(selector='input[placeholder="Password"]', value='pass', ai='fallback', prompt='pass')
|
|
"""
|
|
assert self.reviewer._validate_fragile_selectors(code) is None
|
|
|
|
def test_dotnetnuke_id_flagged(self) -> None:
|
|
code = """
|
|
async def login(page, context):
|
|
await page.click(selector='#dnn_ctl00_aMyAccount', ai='fallback', prompt='account')
|
|
"""
|
|
error = self.reviewer._validate_fragile_selectors(code)
|
|
assert error is not None
|
|
assert "dnn_" in error
|
|
assert "DotNetNuke" in error
|
|
|
|
def test_ember_id_flagged(self) -> None:
|
|
code = """
|
|
async def block_fn(page, context):
|
|
await page.click(selector='#ember-1234', ai='fallback', prompt='click')
|
|
"""
|
|
error = self.reviewer._validate_fragile_selectors(code)
|
|
assert error is not None
|
|
assert "ember" in error.lower()
|
|
|
|
def test_react_select_id_flagged(self) -> None:
|
|
code = """
|
|
async def block_fn(page, context):
|
|
await page.click(selector='#react-select-5-option-2', ai='fallback', prompt='select option')
|
|
"""
|
|
error = self.reviewer._validate_fragile_selectors(code)
|
|
assert error is not None
|
|
assert "react-select" in error.lower()
|
|
|
|
def test_css_in_js_class_flagged(self) -> None:
|
|
code = """
|
|
async def block_fn(page, context):
|
|
await page.click(selector='.css-1a2b3c', ai='fallback', prompt='click button')
|
|
"""
|
|
error = self.reviewer._validate_fragile_selectors(code)
|
|
assert error is not None
|
|
assert "css-" in error.lower()
|
|
|
|
def test_mui_class_flagged(self) -> None:
|
|
code = """
|
|
async def block_fn(page, context):
|
|
await page.click(selector='.MuiButton-root', ai='fallback', prompt='click button')
|
|
"""
|
|
error = self.reviewer._validate_fragile_selectors(code)
|
|
assert error is not None
|
|
|
|
def test_extjs_id_flagged(self) -> None:
|
|
code = """
|
|
async def block_fn(page, context):
|
|
await page.click(selector='#ext-gen-456', ai='fallback', prompt='click')
|
|
"""
|
|
error = self.reviewer._validate_fragile_selectors(code)
|
|
assert error is not None
|
|
assert "ext-gen" in error.lower()
|
|
|
|
def test_comments_ignored(self) -> None:
|
|
code = """
|
|
async def block_fn(page, context):
|
|
# selector='#dnn_ctl00_aMyAccount' is fragile but this is a comment
|
|
await page.click(selector='button:has-text("Submit")', ai='fallback', prompt='submit')
|
|
"""
|
|
assert self.reviewer._validate_fragile_selectors(code) is None
|
|
|
|
def test_multiline_call_flagged(self) -> None:
|
|
"""Fragile selector on a continuation line should still be caught."""
|
|
code = """
|
|
async def block_fn(page, context):
|
|
await page.click(
|
|
selector='#dnn_ctl00_aMyAccount',
|
|
ai='fallback',
|
|
prompt='account link',
|
|
)
|
|
"""
|
|
error = self.reviewer._validate_fragile_selectors(code)
|
|
assert error is not None
|
|
assert "dnn_" in error
|
|
|
|
def test_no_selector_passes(self) -> None:
|
|
"""Code without selector= kwargs should not trigger."""
|
|
code = """
|
|
async def block_fn(page, context):
|
|
await page.click(ai='fallback', prompt='click the submit button')
|
|
"""
|
|
assert self.reviewer._validate_fragile_selectors(code) is None
|
|
|
|
|
|
class TestValidateHardcodedRunData:
|
|
"""Tests for _validate_hardcoded_run_data."""
|
|
|
|
def setup_method(self) -> None:
|
|
self.reviewer = ScriptReviewer()
|
|
|
|
def test_clean_code_passes(self) -> None:
|
|
code = """
|
|
async def block_fn(page, context):
|
|
await page.click(selector='button:has-text("Download")', ai='fallback', prompt='download invoice')
|
|
"""
|
|
assert self.reviewer._validate_hardcoded_run_data(code) is None
|
|
|
|
def test_date_in_selector_flagged(self) -> None:
|
|
code = """
|
|
async def block_fn(page, context):
|
|
await page.click(selector='a:has-text("03/17/2026")', ai='fallback', prompt='click invoice')
|
|
"""
|
|
error = self.reviewer._validate_hardcoded_run_data(code)
|
|
assert error is not None
|
|
assert "03/17/2026" in error
|
|
assert "date" in error.lower()
|
|
|
|
def test_iso_date_in_selector_flagged(self) -> None:
|
|
code = """
|
|
async def block_fn(page, context):
|
|
await page.click(selector='td:has-text("2026-03-17")', ai='fallback', prompt='click invoice')
|
|
"""
|
|
error = self.reviewer._validate_hardcoded_run_data(code)
|
|
assert error is not None
|
|
assert "2026-03-17" in error
|
|
|
|
def test_date_in_prompt_flagged(self) -> None:
|
|
code = """
|
|
async def block_fn(page, context):
|
|
await page.select_option(selector='select', value='PDF', ai='proactive', prompt='Select format for invoice dated 3/17/2026')
|
|
"""
|
|
error = self.reviewer._validate_hardcoded_run_data(code)
|
|
assert error is not None
|
|
assert "3/17/2026" in error
|
|
assert "prompt" in error.lower()
|
|
|
|
def test_short_has_text_flagged(self) -> None:
|
|
"""a:has-text("6") is almost certainly hardcoded run data."""
|
|
code = """
|
|
async def block_fn(page, context):
|
|
await page.click(selector='a:has-text("6")', ai='fallback', prompt='click invoice')
|
|
"""
|
|
error = self.reviewer._validate_hardcoded_run_data(code)
|
|
assert error is not None
|
|
assert ':has-text("6")' in error
|
|
|
|
def test_short_has_text_number_flagged(self) -> None:
|
|
code = """
|
|
async def block_fn(page, context):
|
|
await page.click(selector='a:has-text("12")', ai='fallback', prompt='click row')
|
|
"""
|
|
error = self.reviewer._validate_hardcoded_run_data(code)
|
|
assert error is not None
|
|
|
|
def test_stable_short_text_ok(self) -> None:
|
|
"""Common UI elements with short text like 'OK' or 'X' should pass."""
|
|
code = """
|
|
async def block_fn(page, context):
|
|
await page.click(selector='button:has-text("OK")', ai='fallback', prompt='confirm')
|
|
"""
|
|
assert self.reviewer._validate_hardcoded_run_data(code) is None
|
|
|
|
def test_long_has_text_ok(self) -> None:
|
|
""":has-text with meaningful text (3+ chars) should pass."""
|
|
code = """
|
|
async def block_fn(page, context):
|
|
await page.click(selector='a:has-text("Download")', ai='fallback', prompt='download')
|
|
"""
|
|
assert self.reviewer._validate_hardcoded_run_data(code) is None
|
|
|
|
def test_comments_ignored(self) -> None:
|
|
code = """
|
|
async def block_fn(page, context):
|
|
# Don't use selector='a:has-text("6")' — it's hardcoded
|
|
await page.click(selector='a:has-text("Download")', ai='fallback', prompt='download')
|
|
"""
|
|
assert self.reviewer._validate_hardcoded_run_data(code) is None
|
|
|
|
def test_multiline_date_in_selector_flagged(self) -> None:
|
|
"""Hardcoded date on a continuation line should still be caught."""
|
|
code = """
|
|
async def block_fn(page, context):
|
|
await page.click(
|
|
selector='a:has-text("03/17/2026")',
|
|
ai='fallback',
|
|
prompt='click invoice',
|
|
)
|
|
"""
|
|
error = self.reviewer._validate_hardcoded_run_data(code)
|
|
assert error is not None
|
|
assert "03/17/2026" in error
|
|
|
|
def test_multiline_date_in_prompt_flagged(self) -> None:
|
|
"""Hardcoded date in prompt on a continuation line should still be caught."""
|
|
code = """
|
|
async def block_fn(page, context):
|
|
await page.select_option(
|
|
selector='select',
|
|
value='PDF',
|
|
ai='proactive',
|
|
prompt='Select format for invoice dated 3/17/2026',
|
|
)
|
|
"""
|
|
error = self.reviewer._validate_hardcoded_run_data(code)
|
|
assert error is not None
|
|
assert "3/17/2026" in error
|
|
|
|
def test_parameterized_date_ok(self) -> None:
|
|
"""Dates in context.parameters references should not trigger."""
|
|
code = """
|
|
async def block_fn(page, context):
|
|
start_date = context.parameters['download_start_date']
|
|
await page.click(selector=f'a:has-text("{start_date}")', ai='fallback', prompt='click date')
|
|
"""
|
|
# f-string selectors won't match the date regex in the selector value
|
|
assert self.reviewer._validate_hardcoded_run_data(code) is None
|
|
|
|
def test_email_in_text_patterns_flagged(self) -> None:
|
|
"""Email addresses in text_patterns are PII that should not be in cached scripts."""
|
|
code = """
|
|
async def block_fn(page, context):
|
|
state = await page.classify(
|
|
options={"login": "login page", "dashboard": "dashboard"},
|
|
text_patterns={
|
|
"login": "Welcome to Portal, Username, Password, Sign in",
|
|
"dashboard": "Logout, cmt.acme@example.com, John Smith, Billing",
|
|
},
|
|
)
|
|
"""
|
|
error = self.reviewer._validate_hardcoded_run_data(code)
|
|
assert error is not None
|
|
assert "email" in error.lower()
|
|
|
|
def test_text_patterns_without_email_ok(self) -> None:
|
|
"""Generic text_patterns without PII should pass."""
|
|
code = """
|
|
async def block_fn(page, context):
|
|
state = await page.classify(
|
|
options={"login": "login page", "dashboard": "dashboard"},
|
|
text_patterns={
|
|
"login": "Welcome, Username, Password, Sign in",
|
|
"dashboard": "Logout, Billing & Payments, Service Management",
|
|
},
|
|
)
|
|
"""
|
|
assert self.reviewer._validate_hardcoded_run_data(code) is None
|
|
|
|
|
|
class TestValidateMissingSelectors:
|
|
"""Tests for _validate_missing_selectors."""
|
|
|
|
def setup_method(self) -> None:
|
|
self.reviewer = ScriptReviewer()
|
|
|
|
def test_fallback_with_selector_is_fine(self) -> None:
|
|
code = """
|
|
async def block_fn(page, context):
|
|
await page.click(selector='button:has-text("Submit")', ai='fallback', prompt='submit')
|
|
"""
|
|
assert self.reviewer._validate_missing_selectors(code) is None
|
|
|
|
def test_fallback_without_selector_flagged(self) -> None:
|
|
"""ai='fallback' with no selector= silently uses AI as primary path."""
|
|
code = """
|
|
async def block_fn(page, context):
|
|
await page.click(ai='fallback', prompt='Click Billing & Payments')
|
|
"""
|
|
error = self.reviewer._validate_missing_selectors(code)
|
|
assert error is not None
|
|
assert "page.click()" in error
|
|
assert "Missing selector" in error
|
|
|
|
def test_fill_without_selector_flagged(self) -> None:
|
|
code = """
|
|
async def block_fn(page, context):
|
|
await page.fill(ai='fallback', prompt='Enter username', value='test')
|
|
"""
|
|
error = self.reviewer._validate_missing_selectors(code)
|
|
assert error is not None
|
|
assert "page.fill()" in error
|
|
|
|
def test_no_ai_arg_without_selector_flagged(self) -> None:
|
|
"""Bare calls with no ai= and no selector= silently burn LLM tokens."""
|
|
code = """
|
|
async def block_fn(page, context):
|
|
await page.click(prompt='Click something')
|
|
"""
|
|
error = self.reviewer._validate_missing_selectors(code)
|
|
assert error is not None
|
|
assert "page.click()" in error
|
|
assert "no ai= argument" in error
|
|
|
|
def test_proactive_without_selector_not_flagged(self) -> None:
|
|
"""ai='proactive' without selector is intentional — AI always generates the value."""
|
|
code = """
|
|
async def block_fn(page, context):
|
|
await page.click(ai='proactive', prompt='Click something')
|
|
"""
|
|
assert self.reviewer._validate_missing_selectors(code) is None
|
|
|
|
def test_multiline_call_flagged(self) -> None:
|
|
code = """
|
|
async def block_fn(page, context):
|
|
await page.click(
|
|
ai='fallback',
|
|
prompt='Click Billing & Payments',
|
|
)
|
|
"""
|
|
error = self.reviewer._validate_missing_selectors(code)
|
|
assert error is not None
|
|
assert "page.click()" in error
|
|
|
|
def test_multiline_with_selector_ok(self) -> None:
|
|
code = """
|
|
async def block_fn(page, context):
|
|
await page.click(
|
|
selector='a:has-text("Billing")',
|
|
ai='fallback',
|
|
prompt='Click billing link',
|
|
)
|
|
"""
|
|
assert self.reviewer._validate_missing_selectors(code) is None
|
|
|
|
def test_comments_ignored(self) -> None:
|
|
code = """
|
|
async def block_fn(page, context):
|
|
# await page.click(ai='fallback', prompt='old code')
|
|
await page.click(selector='button', ai='fallback', prompt='submit')
|
|
"""
|
|
assert self.reviewer._validate_missing_selectors(code) is None
|
|
|
|
def test_non_interaction_methods_ignored(self) -> None:
|
|
"""Methods like page.wait, page.complete are not interaction methods."""
|
|
code = """
|
|
async def block_fn(page, context):
|
|
await page.wait(ai='fallback', prompt='wait for page')
|
|
"""
|
|
assert self.reviewer._validate_missing_selectors(code) is None
|
|
|
|
|
|
class TestExtractCachedBlocks:
|
|
"""Tests for the shared block extraction utilities."""
|
|
|
|
def test_extract_all_blocks(self):
|
|
from skyvern.services.workflow_script_service import extract_cached_blocks_from_source
|
|
|
|
source = """import skyvern
|
|
|
|
@skyvern.cached(cache_key = 'login')
|
|
async def login(page, context):
|
|
await page.goto('https://example.com')
|
|
await page.complete()
|
|
|
|
|
|
@skyvern.cached(cache_key = 'block_1')
|
|
async def block_1(page, context):
|
|
await page.click(selector='button')
|
|
await page.complete()
|
|
"""
|
|
result = extract_cached_blocks_from_source(source)
|
|
assert set(result.keys()) == {"login", "block_1"}
|
|
assert "page.goto" in result["login"]
|
|
assert "page.click" in result["block_1"]
|
|
# login block should NOT contain block_1 code
|
|
assert "page.click" not in result["login"]
|
|
|
|
def test_extract_single_block(self):
|
|
from skyvern.services.workflow_script_service import extract_single_cached_block
|
|
|
|
source = """import skyvern
|
|
|
|
@skyvern.cached(cache_key = 'login')
|
|
async def login(page, context):
|
|
await page.goto('https://example.com')
|
|
|
|
|
|
@skyvern.cached(cache_key = 'block_1')
|
|
async def block_1(page, context):
|
|
await page.click(selector='button')
|
|
"""
|
|
result = extract_single_cached_block(source, "block_1")
|
|
assert result is not None
|
|
assert "page.click" in result
|
|
assert "page.goto" not in result
|
|
|
|
def test_extract_first_block(self):
|
|
from skyvern.services.workflow_script_service import extract_single_cached_block
|
|
|
|
source = """import skyvern
|
|
|
|
@skyvern.cached(cache_key = 'login')
|
|
async def login(page, context):
|
|
await page.goto('https://example.com')
|
|
|
|
|
|
@skyvern.cached(cache_key = 'block_1')
|
|
async def block_1(page, context):
|
|
await page.click(selector='button')
|
|
"""
|
|
result = extract_single_cached_block(source, "login")
|
|
assert result is not None
|
|
assert "page.goto" in result
|
|
assert "page.click" not in result
|
|
|
|
def test_extract_single_block_not_found(self):
|
|
from skyvern.services.workflow_script_service import extract_single_cached_block
|
|
|
|
source = "@skyvern.cached(cache_key = 'login')\nasync def login(page, context): pass\n"
|
|
result = extract_single_cached_block(source, "nonexistent")
|
|
assert result is None
|
|
|
|
def test_extract_empty_source(self):
|
|
from skyvern.services.workflow_script_service import extract_cached_blocks_from_source
|
|
|
|
assert extract_cached_blocks_from_source("") == {}
|
|
assert extract_cached_blocks_from_source("import skyvern\n") == {}
|
|
|
|
def test_extract_last_block_goes_to_eof(self):
|
|
from skyvern.services.workflow_script_service import extract_cached_blocks_from_source
|
|
|
|
source = "@skyvern.cached(cache_key = 'only_block')\nasync def only(page, ctx):\n pass\n"
|
|
result = extract_cached_blocks_from_source(source)
|
|
assert "only_block" in result
|
|
assert "pass" in result["only_block"]
|
|
|
|
|
|
class TestClassifyBlockStrategy:
|
|
"""Tests for _classify_block_strategy template selection."""
|
|
|
|
def setup_method(self):
|
|
from skyvern.services.script_reviewer import ScriptReviewer
|
|
|
|
self.reviewer = ScriptReviewer()
|
|
|
|
def test_extraction_block(self):
|
|
"""Block with page.extract() and no page.click() → extraction."""
|
|
code = "async def block(page, ctx):\n return await page.extract(prompt='...')\n"
|
|
result = self.reviewer._classify_block_strategy(existing_code=code)
|
|
assert result == "extraction"
|
|
|
|
def test_extraction_with_click_is_sequential(self):
|
|
"""Block with both extract and click → sequential (not extraction)."""
|
|
code = (
|
|
"async def block(page, ctx):\n await page.click('#btn')\n return await page.extract(prompt='...')\n"
|
|
)
|
|
result = self.reviewer._classify_block_strategy(existing_code=code)
|
|
assert result == "sequential"
|
|
|
|
def test_existing_fill_form_stays_form_filling(self):
|
|
"""Block already using page.fill_form() stays form_filling."""
|
|
code = "async def block(page, ctx):\n await page.fill_form(ctx.parameters, prompt='...')\n"
|
|
result = self.reviewer._classify_block_strategy(existing_code=code)
|
|
assert result == "form_filling"
|
|
|
|
def test_download_block_is_sequential(self):
|
|
"""Download block should be sequential, never form_filling."""
|
|
code = "async def download(page, ctx):\n await page.download_file(prompt='...')\n await page.complete()\n"
|
|
result = self.reviewer._classify_block_strategy(existing_code=code)
|
|
assert result == "sequential"
|
|
|
|
def test_navigation_block_with_form_fields_is_sequential(self):
|
|
"""Block on a page with form fields should still be sequential (not form_filling).
|
|
|
|
Regression guard: this data (5 form fields + form keywords in goal + form
|
|
action types) would have triggered the removed heuristic. Verifies it's gone.
|
|
"""
|
|
code = "async def block(page, ctx):\n await page.click('#btn')\n await page.complete()\n"
|
|
result = self.reviewer._classify_block_strategy(existing_code=code)
|
|
assert result == "sequential"
|
|
|
|
def test_default_is_sequential(self):
|
|
"""Empty code, no special signals → sequential."""
|
|
result = self.reviewer._classify_block_strategy(existing_code="")
|
|
assert result == "sequential"
|