diff --git a/skyvern/forge/prompts/skyvern/script-reviewer.j2 b/skyvern/forge/prompts/skyvern/script-reviewer.j2 index f55974bd6..5554ab947 100644 --- a/skyvern/forge/prompts/skyvern/script-reviewer.j2 +++ b/skyvern/forge/prompts/skyvern/script-reviewer.j2 @@ -164,6 +164,7 @@ When no suggested selector is available, build one from the element's `[tag]`, ` | `click: Submit` [tag: button] [text: "Submit"] | `await page.click(selector='button:has-text("Submit")', ai='fallback', prompt='Click Submit')` | | `click: Gender option` (no tag/attrs) | `await page.click(ai='fallback', prompt='Select the first gender option')` | | `complete: ...` | `await page.complete()` | +| `terminate: ...` | `if : await page.terminate(errors=["reason"])` — see **Termination Rules** in the Lifecycle section below | ### Selector priority (most reliable first) 1. `#id` — if the element has a stable `id` attribute (not auto-generated hex/numeric IDs) @@ -379,6 +380,13 @@ async def block_fn(page, context): - `await page.complete()` — Mark the task as complete (call at the end) - `await page.wait(timeout_ms=)` — Wait for a duration - `await page.solve_captcha()` — Solve a CAPTCHA on the page +- `await page.terminate(errors=["reason"])` — Stop the task when the goal cannot be achieved (see rules below) + +### Termination Rules +- `page.terminate()` MUST be inside an `if`/`elif` branch guarded by `page.classify()` or `page.extract()`. NEVER call terminate unconditionally. +- After a `page.classify()`, the `else` branch must be `element_fallback()`, not terminate — "unknown state" means "let the agent figure it out", not "give up." After a `page.extract()` with a known set of outcomes, terminate in `else` is acceptable when the else represents a definite failure condition (not an unknown state). +- Only use terminate when the navigation goal explicitly describes conditions under which the task should stop (e.g., "if no invoice is available, terminate", "if the account doesn't exist, stop"). +- Do NOT use terminate as a fallback for "something went wrong" — that's what `element_fallback()` is for. ### AI Fallback - `await page.element_fallback(navigation_goal="...", max_steps=10)` — Hand off to AI agent diff --git a/skyvern/services/script_reviewer.py b/skyvern/services/script_reviewer.py index 82c31dbd8..6e02fda43 100644 --- a/skyvern/services/script_reviewer.py +++ b/skyvern/services/script_reviewer.py @@ -2,8 +2,9 @@ from __future__ import annotations import json import re -from typing import Literal +from typing import Literal, Sequence +import libcst as cst import structlog from skyvern.forge import app @@ -742,6 +743,19 @@ class ScriptReviewer: current_prompt = self._build_retry_prompt(updated_code, regression_error, function_signature) continue + # Validate bare terminate calls (must be inside if/elif, never unconditional) + terminate_error = self._validate_bare_terminate(updated_code) + if terminate_error is not None: + LOG.warning( + "ScriptReviewer: bare terminate detected, retrying", + block_label=block_label, + attempt=attempt, + error=terminate_error, + ) + if attempt < max_attempts: + current_prompt = self._build_retry_prompt(updated_code, terminate_error, function_signature) + continue + # Validate no hardcoded parameter values (catch leaked run-specific data) hardcoded_error = self._validate_no_hardcoded_values(updated_code, run_parameter_values) if hardcoded_error is not None: @@ -1217,6 +1231,7 @@ class ScriptReviewer: "scroll": frozenset({"direction", "amount", "selector"}), "keypress": frozenset({"keys", "prompt"}), "wait": frozenset({"timeout_ms"}), + "terminate": frozenset({"errors"}), "complete": frozenset(set()), "goto": frozenset({"url", "timeout"}), } @@ -1467,6 +1482,177 @@ class ScriptReviewer: return None + def _validate_bare_terminate(self, code: str) -> str | None: + """Validate that page.terminate() is never called unconditionally. + + Every page.terminate() call must be inside an if/elif branch (guarded by + a classify or extract result). A terminate at function-body level (not + inside any conditional) is rejected. + + Uses libcst.parse_module to walk the tree and check that every + page.terminate() Expr node has a cst.If ancestor. + + Note: the prompt distinguishes classify-else (should use element_fallback) + from extract-else (terminate is acceptable). This validator enforces only + the structural rule; else-branch semantics are left to the LLM prompt. + + Returns an error message or None if valid. + """ + # Fast short-circuit: skip libcst parsing when terminate is not present + if "terminate" not in code: + return None + + try: + tree = cst.parse_module(code) + except cst.ParserSyntaxError: + return None # compile check handles syntax errors separately + + # Walk the CST looking for page.terminate() calls not inside an If node. + # Each top-level FunctionDef is validated independently. Nested function + # definitions are intentionally not recursed into — generated script blocks + # are always a single top-level async function with no inner defs. + # libcst uses a single FunctionDef for both sync and async. + for stmt in tree.body: + func_def = stmt if isinstance(stmt, cst.FunctionDef) else None + if func_def is None: + continue + bare = ScriptReviewer._find_bare_terminate_in_body(func_def.body.body, inside_conditional=False) + if bare is not None: + return bare + + return None + + @staticmethod + def _find_bare_terminate_in_body( + stmts: Sequence[cst.BaseStatement], + inside_conditional: bool, + ) -> str | None: + """Recursively check statements for bare page.terminate() calls. + + Returns an error message if a bare terminate is found, None otherwise. + + Note: the prompt distinguishes classify-else (should use element_fallback) + from extract-else (terminate is acceptable). This validator only enforces + the structural rule (terminate must be inside *some* conditional). Finer + classify-vs-extract else-branch enforcement is left to the LLM prompt. + """ + + def _unwrap_body(suite: cst.BaseSuite | cst.Else | cst.Finally | None) -> Sequence[cst.BaseStatement]: + """Extract the statement list from an IndentedBlock, Else, or Finally.""" + if suite is None: + return () + if isinstance(suite, (cst.Else, cst.Finally)): + suite = suite.body + if isinstance(suite, cst.IndentedBlock): + return suite.body + return () + + def _check_bodies(bodies: list[Sequence[cst.BaseStatement]], cond: bool) -> str | None: + """Check multiple statement lists, returning the first error.""" + for body in bodies: + err = ScriptReviewer._find_bare_terminate_in_body(body, inside_conditional=cond) + if err: + return err + return None + + for stmt in stmts: + # In libcst, expression statements are wrapped in SimpleStatementLine + if isinstance(stmt, cst.SimpleStatementLine): + for small_stmt in stmt.body: + if isinstance(small_stmt, cst.Expr) and ScriptReviewer._is_terminate_call(small_stmt.value): + if not inside_conditional: + return ( + "page.terminate() must be inside an if/elif branch — unconditional terminate rejected" + ) + + # Recurse into if/elif/else bodies + if isinstance(stmt, cst.If): + # The if and elif bodies are "inside a conditional". + # cst.If.orelse is If (elif) | Else (else) | None. + # Collect all branch bodies via the elif/else chain. + branch_bodies: list[Sequence[cst.BaseStatement]] = [_unwrap_body(stmt.body)] + orelse: cst.If | cst.Else | None = stmt.orelse + while orelse is not None: + if isinstance(orelse, cst.If): + branch_bodies.append(_unwrap_body(orelse.body)) + orelse = orelse.orelse + elif isinstance(orelse, cst.Else): + branch_bodies.append(_unwrap_body(orelse.body)) + orelse = None + else: + break + err = _check_bodies(branch_bodies, cond=True) + if err: + return err + + elif isinstance(stmt, (cst.For, cst.While)): + bodies: list[Sequence[cst.BaseStatement]] = [_unwrap_body(stmt.body)] + if stmt.orelse is not None: + bodies.append(_unwrap_body(stmt.orelse)) + err = _check_bodies(bodies, cond=inside_conditional) + if err: + return err + + elif isinstance(stmt, cst.With): + err = _check_bodies([_unwrap_body(stmt.body)], cond=inside_conditional) + if err: + return err + + elif isinstance(stmt, cst.Try): + # except handler bodies inherit inside_conditional from the + # enclosing scope (not set to True) — terminate in an except + # block is "something went wrong" error handling, which should + # use element_fallback, not terminate. + handler_bodies = [_unwrap_body(h.body) for h in stmt.handlers] + err = _check_bodies( + [_unwrap_body(stmt.body), *handler_bodies, _unwrap_body(stmt.orelse), _unwrap_body(stmt.finalbody)], + cond=inside_conditional, + ) + if err: + return err + + elif isinstance(stmt, cst.TryStar): + handler_bodies = [_unwrap_body(h.body) for h in stmt.handlers] + err = _check_bodies( + [_unwrap_body(stmt.body), *handler_bodies, _unwrap_body(stmt.orelse), _unwrap_body(stmt.finalbody)], + cond=inside_conditional, + ) + if err: + return err + + # Python 3.10+ match/case — each case body is conditional + elif isinstance(stmt, cst.Match): + err = _check_bodies([_unwrap_body(case.body) for case in stmt.cases], cond=True) + if err: + return err + + # FunctionDef stmts are intentionally not recursed into — + # generated scripts never contain nested function definitions. + return None + + @staticmethod + def _is_terminate_call(node: cst.BaseExpression) -> bool: + """Check if a CST expression node is a page.terminate(...) call. + + Handles both ``await page.terminate(...)`` and bare ``page.terminate(...)``. + Generated scripts always use ``await``, but we check both defensively. + + The caller passes ``small_stmt.value`` (the inner expression of a + ``cst.Expr`` small statement), so this method receives a ``cst.Await`` + or ``cst.Call`` node, never a ``cst.Expr`` wrapper. + """ + call = node.expression if isinstance(node, cst.Await) else node + if not isinstance(call, cst.Call): + return False + func = call.func + return ( + isinstance(func, cst.Attribute) + and isinstance(func.attr, cst.Name) + and func.attr.value == "terminate" + and isinstance(func.value, cst.Name) + and func.value.value == "page" + ) + # Regex to extract string literals (single or double quoted, excluding escaped quotes) _STRING_LITERAL_RE: re.Pattern[str] = re.compile(r"""(? None: + self.reviewer = ScriptReviewer() + + def test_terminate_inside_if_passes(self) -> None: + """page.terminate() inside an if block should pass validation.""" + code = """ +async def block_fn(page, context): + result = await page.extract( + prompt='Is there an invoice available?', + schema={'type': 'object', 'properties': {'available': {'type': 'boolean'}}} + ) + if not result.get('available', True): + await page.terminate(errors=["No invoice available"]) + else: + await page.element_fallback(navigation_goal="Download the invoice") +""" + assert self.reviewer._validate_bare_terminate(code) is None + + def test_terminate_inside_elif_passes(self) -> None: + """page.terminate() inside an elif block should pass validation.""" + code = """ +async def block_fn(page, context): + state = await page.classify( + options={"has_invoice": "invoice is available", "no_invoice": "no invoice found"}, + text_patterns={"has_invoice": "Download Invoice", "no_invoice": "No invoices"}, + ) + if state == "has_invoice": + await page.click(selector='button:has-text("Download")', ai='fallback', prompt='Click download') + elif state == "no_invoice": + await page.terminate(errors=["No invoice available for this period"]) + else: + await page.element_fallback(navigation_goal="Handle invoice page") +""" + assert self.reviewer._validate_bare_terminate(code) is None + + def test_bare_terminate_at_function_body_rejected(self) -> None: + """page.terminate() at function body level (unconditional) should be rejected.""" + code = """ +async def block_fn(page, context): + await page.terminate(errors=["Something went wrong"]) +""" + error = self.reviewer._validate_bare_terminate(code) + assert error is not None + assert "unconditional terminate rejected" in error + assert "if/elif" in error + + def test_bare_terminate_after_other_calls_rejected(self) -> None: + """page.terminate() at function body level after other calls should be rejected.""" + code = """ +async def block_fn(page, context): + await page.click(selector='button', ai='fallback', prompt='Click button') + await page.terminate(errors=["Done but wrong"]) +""" + error = self.reviewer._validate_bare_terminate(code) + assert error is not None + assert "unconditional terminate rejected" in error + + def test_no_terminate_passes(self) -> None: + """Code without any terminate call should pass.""" + code = """ +async def block_fn(page, context): + await page.click(selector='button:has-text("Submit")', ai='fallback', prompt='Click submit') + await page.complete() +""" + assert self.reviewer._validate_bare_terminate(code) is None + + def test_terminate_in_nested_if_passes(self) -> None: + """page.terminate() inside a nested if should pass.""" + code = """ +async def block_fn(page, context): + result = await page.extract( + prompt='What is the account status?', + schema={'type': 'object', 'properties': {'status': {'type': 'string'}}} + ) + if result.get('status') == 'active': + data = await page.extract( + prompt='Is there a balance?', + schema={'type': 'object', 'properties': {'has_balance': {'type': 'boolean'}}} + ) + if not data.get('has_balance', True): + await page.terminate(errors=["No balance to process"]) + else: + await page.click(selector='button:has-text("Pay")', ai='fallback', prompt='Click pay') + else: + await page.element_fallback(navigation_goal="Handle account status") +""" + assert self.reviewer._validate_bare_terminate(code) is None + + def test_terminate_in_for_loop_without_conditional_rejected(self) -> None: + """page.terminate() inside a for loop but not in an if should be rejected.""" + code = """ +async def block_fn(page, context): + for item in items: + await page.terminate(errors=["Bad item"]) +""" + error = self.reviewer._validate_bare_terminate(code) + assert error is not None + assert "unconditional terminate rejected" in error + + def test_terminate_in_for_loop_inside_if_passes(self) -> None: + """page.terminate() inside a for loop AND inside an if should pass.""" + code = """ +async def block_fn(page, context): + for item in items: + if item.get('invalid'): + await page.terminate(errors=["Invalid item found"]) + else: + await page.click(selector='button', ai='fallback', prompt='Process item') +""" + assert self.reviewer._validate_bare_terminate(code) is None + + def test_terminate_in_else_branch_passes(self) -> None: + """page.terminate() inside an else branch still passes (it's inside a conditional). + + This test uses an extract-pattern example. The validator would also pass + terminate in a classify-else branch -- classify-vs-extract else-branch + semantics are enforced by the LLM prompt, not the structural validator. + """ + code = """ +async def block_fn(page, context): + result = await page.extract( + prompt='Check status', + schema={'type': 'object', 'properties': {'ok': {'type': 'boolean'}}} + ) + if result.get('ok'): + await page.complete() + else: + await page.terminate(errors=["Status check failed"]) +""" + assert self.reviewer._validate_bare_terminate(code) is None + + def test_bare_non_awaited_terminate_rejected(self) -> None: + """page.terminate() without await at function body level should still be rejected.""" + code = """ +async def block_fn(page, context): + page.terminate(errors=["Something went wrong"]) +""" + error = self.reviewer._validate_bare_terminate(code) + assert error is not None + assert "unconditional terminate rejected" in error + + def test_non_awaited_terminate_inside_if_passes(self) -> None: + """page.terminate() without await but inside an if block should pass.""" + code = """ +async def block_fn(page, context): + if some_condition: + page.terminate(errors=["Condition met"]) + else: + await page.element_fallback(navigation_goal="Handle it") +""" + assert self.reviewer._validate_bare_terminate(code) is None + + def test_terminate_inside_match_case_passes(self) -> None: + """page.terminate() inside a match/case body should pass (case is conditional).""" + code = """ +async def block_fn(page, context): + result = await page.extract( + prompt='What is the status?', + schema={'type': 'object', 'properties': {'status': {'type': 'string'}}} + ) + match result.get('status'): + case 'active': + await page.click(selector='button', ai='fallback', prompt='Click button') + case 'cancelled': + await page.terminate(errors=["Account cancelled — cannot proceed"]) + case _: + await page.element_fallback(navigation_goal="Handle unknown status") +""" + assert self.reviewer._validate_bare_terminate(code) is None + + def test_terminate_in_bare_try_rejected(self) -> None: + """page.terminate() inside a try block but not in an if should be rejected.""" + code = """ +async def block_fn(page, context): + try: + await page.terminate(errors=["Something failed"]) + except Exception: + await page.element_fallback(navigation_goal="Handle error") +""" + error = self.reviewer._validate_bare_terminate(code) + assert error is not None + assert "unconditional terminate rejected" in error + + def test_terminate_in_try_inside_if_passes(self) -> None: + """page.terminate() inside try > if should pass.""" + code = """ +async def block_fn(page, context): + try: + result = await page.extract( + prompt='Check status', + schema={'type': 'object', 'properties': {'ok': {'type': 'boolean'}}} + ) + if not result.get('ok'): + await page.terminate(errors=["Status check failed"]) + else: + await page.complete() + except Exception: + await page.element_fallback(navigation_goal="Handle error") +""" + assert self.reviewer._validate_bare_terminate(code) is None + + def test_terminate_in_bare_except_rejected(self) -> None: + """page.terminate() in an except handler (not inside if) should be rejected.""" + code = """ +async def block_fn(page, context): + try: + await page.click(selector='button', ai='fallback', prompt='Click') + except Exception: + await page.terminate(errors=["Exception occurred"]) +""" + error = self.reviewer._validate_bare_terminate(code) + assert error is not None + assert "unconditional terminate rejected" in error + + def test_terminate_in_bare_with_rejected(self) -> None: + """page.terminate() inside a with block but not in an if should be rejected.""" + code = """ +async def block_fn(page, context): + async with some_context_manager() as ctx: + await page.terminate(errors=["Bad state"]) +""" + error = self.reviewer._validate_bare_terminate(code) + assert error is not None + assert "unconditional terminate rejected" in error + + def test_terminate_in_with_inside_if_passes(self) -> None: + """page.terminate() inside with > if should pass.""" + code = """ +async def block_fn(page, context): + async with some_context_manager() as ctx: + if ctx.failed: + await page.terminate(errors=["Context failed"]) + else: + await page.complete() +""" + assert self.reviewer._validate_bare_terminate(code) is None + + def test_syntax_error_code_returns_none(self) -> None: + """Code that doesn't parse should return None (syntax errors handled elsewhere).""" + code = "this is not valid python at all {" + assert self.reviewer._validate_bare_terminate(code) is None