feat: teach script reviewer about page.terminate() with conditional-only rule (SKY-8334) (#5097)

This commit is contained in:
pedrohsdb 2026-03-13 14:11:16 -07:00 committed by GitHub
parent c45d14527e
commit ef4e68a8a5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 444 additions and 1 deletions

View file

@ -164,6 +164,7 @@ When no suggested selector is available, build one from the element's `[tag]`, `
| `click: Submit` [tag: button] [text: "Submit"] | `await page.click(selector='button:has-text("Submit")', ai='fallback', prompt='Click Submit')` |
| `click: Gender option` (no tag/attrs) | `await page.click(ai='fallback', prompt='Select the first gender option')` |
| `complete: ...` | `await page.complete()` |
| `terminate: ...` | `if <condition>: await page.terminate(errors=["reason"])` — see **Termination Rules** in the Lifecycle section below |
### Selector priority (most reliable first)
1. `#id` — if the element has a stable `id` attribute (not auto-generated hex/numeric IDs)
@ -379,6 +380,13 @@ async def block_fn(page, context):
- `await page.complete()` — Mark the task as complete (call at the end)
- `await page.wait(timeout_ms=)` — Wait for a duration
- `await page.solve_captcha()` — Solve a CAPTCHA on the page
- `await page.terminate(errors=["reason"])` — Stop the task when the goal cannot be achieved (see rules below)
### Termination Rules
- `page.terminate()` MUST be inside an `if`/`elif` branch guarded by `page.classify()` or `page.extract()`. NEVER call terminate unconditionally.
- After a `page.classify()`, the `else` branch must be `element_fallback()`, not terminate — "unknown state" means "let the agent figure it out", not "give up." After a `page.extract()` with a known set of outcomes, terminate in `else` is acceptable when the else represents a definite failure condition (not an unknown state).
- Only use terminate when the navigation goal explicitly describes conditions under which the task should stop (e.g., "if no invoice is available, terminate", "if the account doesn't exist, stop").
- Do NOT use terminate as a fallback for "something went wrong" — that's what `element_fallback()` is for.
### AI Fallback
- `await page.element_fallback(navigation_goal="...", max_steps=10)` — Hand off to AI agent

View file

@ -2,8 +2,9 @@ from __future__ import annotations
import json
import re
from typing import Literal
from typing import Literal, Sequence
import libcst as cst
import structlog
from skyvern.forge import app
@ -742,6 +743,19 @@ class ScriptReviewer:
current_prompt = self._build_retry_prompt(updated_code, regression_error, function_signature)
continue
# Validate bare terminate calls (must be inside if/elif, never unconditional)
terminate_error = self._validate_bare_terminate(updated_code)
if terminate_error is not None:
LOG.warning(
"ScriptReviewer: bare terminate detected, retrying",
block_label=block_label,
attempt=attempt,
error=terminate_error,
)
if attempt < max_attempts:
current_prompt = self._build_retry_prompt(updated_code, terminate_error, function_signature)
continue
# Validate no hardcoded parameter values (catch leaked run-specific data)
hardcoded_error = self._validate_no_hardcoded_values(updated_code, run_parameter_values)
if hardcoded_error is not None:
@ -1217,6 +1231,7 @@ class ScriptReviewer:
"scroll": frozenset({"direction", "amount", "selector"}),
"keypress": frozenset({"keys", "prompt"}),
"wait": frozenset({"timeout_ms"}),
"terminate": frozenset({"errors"}),
"complete": frozenset(set()),
"goto": frozenset({"url", "timeout"}),
}
@ -1467,6 +1482,177 @@ class ScriptReviewer:
return None
def _validate_bare_terminate(self, code: str) -> str | None:
"""Validate that page.terminate() is never called unconditionally.
Every page.terminate() call must be inside an if/elif branch (guarded by
a classify or extract result). A terminate at function-body level (not
inside any conditional) is rejected.
Uses libcst.parse_module to walk the tree and check that every
page.terminate() Expr node has a cst.If ancestor.
Note: the prompt distinguishes classify-else (should use element_fallback)
from extract-else (terminate is acceptable). This validator enforces only
the structural rule; else-branch semantics are left to the LLM prompt.
Returns an error message or None if valid.
"""
# Fast short-circuit: skip libcst parsing when terminate is not present
if "terminate" not in code:
return None
try:
tree = cst.parse_module(code)
except cst.ParserSyntaxError:
return None # compile check handles syntax errors separately
# Walk the CST looking for page.terminate() calls not inside an If node.
# Each top-level FunctionDef is validated independently. Nested function
# definitions are intentionally not recursed into — generated script blocks
# are always a single top-level async function with no inner defs.
# libcst uses a single FunctionDef for both sync and async.
for stmt in tree.body:
func_def = stmt if isinstance(stmt, cst.FunctionDef) else None
if func_def is None:
continue
bare = ScriptReviewer._find_bare_terminate_in_body(func_def.body.body, inside_conditional=False)
if bare is not None:
return bare
return None
@staticmethod
def _find_bare_terminate_in_body(
stmts: Sequence[cst.BaseStatement],
inside_conditional: bool,
) -> str | None:
"""Recursively check statements for bare page.terminate() calls.
Returns an error message if a bare terminate is found, None otherwise.
Note: the prompt distinguishes classify-else (should use element_fallback)
from extract-else (terminate is acceptable). This validator only enforces
the structural rule (terminate must be inside *some* conditional). Finer
classify-vs-extract else-branch enforcement is left to the LLM prompt.
"""
def _unwrap_body(suite: cst.BaseSuite | cst.Else | cst.Finally | None) -> Sequence[cst.BaseStatement]:
"""Extract the statement list from an IndentedBlock, Else, or Finally."""
if suite is None:
return ()
if isinstance(suite, (cst.Else, cst.Finally)):
suite = suite.body
if isinstance(suite, cst.IndentedBlock):
return suite.body
return ()
def _check_bodies(bodies: list[Sequence[cst.BaseStatement]], cond: bool) -> str | None:
"""Check multiple statement lists, returning the first error."""
for body in bodies:
err = ScriptReviewer._find_bare_terminate_in_body(body, inside_conditional=cond)
if err:
return err
return None
for stmt in stmts:
# In libcst, expression statements are wrapped in SimpleStatementLine
if isinstance(stmt, cst.SimpleStatementLine):
for small_stmt in stmt.body:
if isinstance(small_stmt, cst.Expr) and ScriptReviewer._is_terminate_call(small_stmt.value):
if not inside_conditional:
return (
"page.terminate() must be inside an if/elif branch — unconditional terminate rejected"
)
# Recurse into if/elif/else bodies
if isinstance(stmt, cst.If):
# The if and elif bodies are "inside a conditional".
# cst.If.orelse is If (elif) | Else (else) | None.
# Collect all branch bodies via the elif/else chain.
branch_bodies: list[Sequence[cst.BaseStatement]] = [_unwrap_body(stmt.body)]
orelse: cst.If | cst.Else | None = stmt.orelse
while orelse is not None:
if isinstance(orelse, cst.If):
branch_bodies.append(_unwrap_body(orelse.body))
orelse = orelse.orelse
elif isinstance(orelse, cst.Else):
branch_bodies.append(_unwrap_body(orelse.body))
orelse = None
else:
break
err = _check_bodies(branch_bodies, cond=True)
if err:
return err
elif isinstance(stmt, (cst.For, cst.While)):
bodies: list[Sequence[cst.BaseStatement]] = [_unwrap_body(stmt.body)]
if stmt.orelse is not None:
bodies.append(_unwrap_body(stmt.orelse))
err = _check_bodies(bodies, cond=inside_conditional)
if err:
return err
elif isinstance(stmt, cst.With):
err = _check_bodies([_unwrap_body(stmt.body)], cond=inside_conditional)
if err:
return err
elif isinstance(stmt, cst.Try):
# except handler bodies inherit inside_conditional from the
# enclosing scope (not set to True) — terminate in an except
# block is "something went wrong" error handling, which should
# use element_fallback, not terminate.
handler_bodies = [_unwrap_body(h.body) for h in stmt.handlers]
err = _check_bodies(
[_unwrap_body(stmt.body), *handler_bodies, _unwrap_body(stmt.orelse), _unwrap_body(stmt.finalbody)],
cond=inside_conditional,
)
if err:
return err
elif isinstance(stmt, cst.TryStar):
handler_bodies = [_unwrap_body(h.body) for h in stmt.handlers]
err = _check_bodies(
[_unwrap_body(stmt.body), *handler_bodies, _unwrap_body(stmt.orelse), _unwrap_body(stmt.finalbody)],
cond=inside_conditional,
)
if err:
return err
# Python 3.10+ match/case — each case body is conditional
elif isinstance(stmt, cst.Match):
err = _check_bodies([_unwrap_body(case.body) for case in stmt.cases], cond=True)
if err:
return err
# FunctionDef stmts are intentionally not recursed into —
# generated scripts never contain nested function definitions.
return None
@staticmethod
def _is_terminate_call(node: cst.BaseExpression) -> bool:
"""Check if a CST expression node is a page.terminate(...) call.
Handles both ``await page.terminate(...)`` and bare ``page.terminate(...)``.
Generated scripts always use ``await``, but we check both defensively.
The caller passes ``small_stmt.value`` (the inner expression of a
``cst.Expr`` small statement), so this method receives a ``cst.Await``
or ``cst.Call`` node, never a ``cst.Expr`` wrapper.
"""
call = node.expression if isinstance(node, cst.Await) else node
if not isinstance(call, cst.Call):
return False
func = call.func
return (
isinstance(func, cst.Attribute)
and isinstance(func.attr, cst.Name)
and func.attr.value == "terminate"
and isinstance(func.value, cst.Name)
and func.value.value == "page"
)
# Regex to extract string literals (single or double quoted, excluding escaped quotes)
_STRING_LITERAL_RE: re.Pattern[str] = re.compile(r"""(?<![\\])(['"])((?:(?!\1)[^\\]|\\.)*)(\1)""")

View file

@ -0,0 +1,249 @@
"""Tests for ScriptReviewer bare-terminate validation."""
from skyvern.services.script_reviewer import ScriptReviewer
class TestValidateBareTerminate:
"""Tests for _validate_bare_terminate."""
def setup_method(self) -> None:
self.reviewer = ScriptReviewer()
def test_terminate_inside_if_passes(self) -> None:
"""page.terminate() inside an if block should pass validation."""
code = """
async def block_fn(page, context):
result = await page.extract(
prompt='Is there an invoice available?',
schema={'type': 'object', 'properties': {'available': {'type': 'boolean'}}}
)
if not result.get('available', True):
await page.terminate(errors=["No invoice available"])
else:
await page.element_fallback(navigation_goal="Download the invoice")
"""
assert self.reviewer._validate_bare_terminate(code) is None
def test_terminate_inside_elif_passes(self) -> None:
"""page.terminate() inside an elif block should pass validation."""
code = """
async def block_fn(page, context):
state = await page.classify(
options={"has_invoice": "invoice is available", "no_invoice": "no invoice found"},
text_patterns={"has_invoice": "Download Invoice", "no_invoice": "No invoices"},
)
if state == "has_invoice":
await page.click(selector='button:has-text("Download")', ai='fallback', prompt='Click download')
elif state == "no_invoice":
await page.terminate(errors=["No invoice available for this period"])
else:
await page.element_fallback(navigation_goal="Handle invoice page")
"""
assert self.reviewer._validate_bare_terminate(code) is None
def test_bare_terminate_at_function_body_rejected(self) -> None:
"""page.terminate() at function body level (unconditional) should be rejected."""
code = """
async def block_fn(page, context):
await page.terminate(errors=["Something went wrong"])
"""
error = self.reviewer._validate_bare_terminate(code)
assert error is not None
assert "unconditional terminate rejected" in error
assert "if/elif" in error
def test_bare_terminate_after_other_calls_rejected(self) -> None:
"""page.terminate() at function body level after other calls should be rejected."""
code = """
async def block_fn(page, context):
await page.click(selector='button', ai='fallback', prompt='Click button')
await page.terminate(errors=["Done but wrong"])
"""
error = self.reviewer._validate_bare_terminate(code)
assert error is not None
assert "unconditional terminate rejected" in error
def test_no_terminate_passes(self) -> None:
"""Code without any terminate call should pass."""
code = """
async def block_fn(page, context):
await page.click(selector='button:has-text("Submit")', ai='fallback', prompt='Click submit')
await page.complete()
"""
assert self.reviewer._validate_bare_terminate(code) is None
def test_terminate_in_nested_if_passes(self) -> None:
"""page.terminate() inside a nested if should pass."""
code = """
async def block_fn(page, context):
result = await page.extract(
prompt='What is the account status?',
schema={'type': 'object', 'properties': {'status': {'type': 'string'}}}
)
if result.get('status') == 'active':
data = await page.extract(
prompt='Is there a balance?',
schema={'type': 'object', 'properties': {'has_balance': {'type': 'boolean'}}}
)
if not data.get('has_balance', True):
await page.terminate(errors=["No balance to process"])
else:
await page.click(selector='button:has-text("Pay")', ai='fallback', prompt='Click pay')
else:
await page.element_fallback(navigation_goal="Handle account status")
"""
assert self.reviewer._validate_bare_terminate(code) is None
def test_terminate_in_for_loop_without_conditional_rejected(self) -> None:
"""page.terminate() inside a for loop but not in an if should be rejected."""
code = """
async def block_fn(page, context):
for item in items:
await page.terminate(errors=["Bad item"])
"""
error = self.reviewer._validate_bare_terminate(code)
assert error is not None
assert "unconditional terminate rejected" in error
def test_terminate_in_for_loop_inside_if_passes(self) -> None:
"""page.terminate() inside a for loop AND inside an if should pass."""
code = """
async def block_fn(page, context):
for item in items:
if item.get('invalid'):
await page.terminate(errors=["Invalid item found"])
else:
await page.click(selector='button', ai='fallback', prompt='Process item')
"""
assert self.reviewer._validate_bare_terminate(code) is None
def test_terminate_in_else_branch_passes(self) -> None:
"""page.terminate() inside an else branch still passes (it's inside a conditional).
This test uses an extract-pattern example. The validator would also pass
terminate in a classify-else branch -- classify-vs-extract else-branch
semantics are enforced by the LLM prompt, not the structural validator.
"""
code = """
async def block_fn(page, context):
result = await page.extract(
prompt='Check status',
schema={'type': 'object', 'properties': {'ok': {'type': 'boolean'}}}
)
if result.get('ok'):
await page.complete()
else:
await page.terminate(errors=["Status check failed"])
"""
assert self.reviewer._validate_bare_terminate(code) is None
def test_bare_non_awaited_terminate_rejected(self) -> None:
"""page.terminate() without await at function body level should still be rejected."""
code = """
async def block_fn(page, context):
page.terminate(errors=["Something went wrong"])
"""
error = self.reviewer._validate_bare_terminate(code)
assert error is not None
assert "unconditional terminate rejected" in error
def test_non_awaited_terminate_inside_if_passes(self) -> None:
"""page.terminate() without await but inside an if block should pass."""
code = """
async def block_fn(page, context):
if some_condition:
page.terminate(errors=["Condition met"])
else:
await page.element_fallback(navigation_goal="Handle it")
"""
assert self.reviewer._validate_bare_terminate(code) is None
def test_terminate_inside_match_case_passes(self) -> None:
"""page.terminate() inside a match/case body should pass (case is conditional)."""
code = """
async def block_fn(page, context):
result = await page.extract(
prompt='What is the status?',
schema={'type': 'object', 'properties': {'status': {'type': 'string'}}}
)
match result.get('status'):
case 'active':
await page.click(selector='button', ai='fallback', prompt='Click button')
case 'cancelled':
await page.terminate(errors=["Account cancelled — cannot proceed"])
case _:
await page.element_fallback(navigation_goal="Handle unknown status")
"""
assert self.reviewer._validate_bare_terminate(code) is None
def test_terminate_in_bare_try_rejected(self) -> None:
"""page.terminate() inside a try block but not in an if should be rejected."""
code = """
async def block_fn(page, context):
try:
await page.terminate(errors=["Something failed"])
except Exception:
await page.element_fallback(navigation_goal="Handle error")
"""
error = self.reviewer._validate_bare_terminate(code)
assert error is not None
assert "unconditional terminate rejected" in error
def test_terminate_in_try_inside_if_passes(self) -> None:
"""page.terminate() inside try > if should pass."""
code = """
async def block_fn(page, context):
try:
result = await page.extract(
prompt='Check status',
schema={'type': 'object', 'properties': {'ok': {'type': 'boolean'}}}
)
if not result.get('ok'):
await page.terminate(errors=["Status check failed"])
else:
await page.complete()
except Exception:
await page.element_fallback(navigation_goal="Handle error")
"""
assert self.reviewer._validate_bare_terminate(code) is None
def test_terminate_in_bare_except_rejected(self) -> None:
"""page.terminate() in an except handler (not inside if) should be rejected."""
code = """
async def block_fn(page, context):
try:
await page.click(selector='button', ai='fallback', prompt='Click')
except Exception:
await page.terminate(errors=["Exception occurred"])
"""
error = self.reviewer._validate_bare_terminate(code)
assert error is not None
assert "unconditional terminate rejected" in error
def test_terminate_in_bare_with_rejected(self) -> None:
"""page.terminate() inside a with block but not in an if should be rejected."""
code = """
async def block_fn(page, context):
async with some_context_manager() as ctx:
await page.terminate(errors=["Bad state"])
"""
error = self.reviewer._validate_bare_terminate(code)
assert error is not None
assert "unconditional terminate rejected" in error
def test_terminate_in_with_inside_if_passes(self) -> None:
"""page.terminate() inside with > if should pass."""
code = """
async def block_fn(page, context):
async with some_context_manager() as ctx:
if ctx.failed:
await page.terminate(errors=["Context failed"])
else:
await page.complete()
"""
assert self.reviewer._validate_bare_terminate(code) is None
def test_syntax_error_code_returns_none(self) -> None:
"""Code that doesn't parse should return None (syntax errors handled elsewhere)."""
code = "this is not valid python at all {"
assert self.reviewer._validate_bare_terminate(code) is None