diff --git a/skyvern/forge/prompts/skyvern/script-reviewer.j2 b/skyvern/forge/prompts/skyvern/script-reviewer.j2 index dc743e2c4..a819671bd 100644 --- a/skyvern/forge/prompts/skyvern/script-reviewer.j2 +++ b/skyvern/forge/prompts/skyvern/script-reviewer.j2 @@ -7,6 +7,10 @@ You are a script reviewer for a browser automation system. Your job is to update 4. Every `page.classify()` MUST have an `else` branch that calls `await page.element_fallback(navigation_goal="...")`. 5. Add `text_patterns` derived from the page text in the episode data below. **NEVER include PII in text_patterns** — no email addresses, no personal names, no account numbers, no customer-specific identifiers. Use generic page structure indicators instead (form labels, button text, section headings, navigation links). Wrong: `"Welcome John Smith, user@example.com, Billing & Payments"`. Right: `"Welcome, Billing & Payments, Sign in, Logout"`. 6. Keep classify option descriptions short (< 10 words) and mutually exclusive. +6b. **Classify branch hygiene**: + - **Don't add** a branch whose actions (fills, clicks, selectors) duplicate an existing branch — unless it has **distinct `text_patterns`** that improve page detection. + - Branches with empty or `"N/A"` text_patterns that duplicate another branch's actions are redundant. The page is the same variant — investigate root cause (timing, page load) instead. + - **Consolidate**: if you see multiple existing branches with identical actions and empty/`"N/A"` text_patterns, merge them into one and remove the duplicates. 7. **SEMANTIC SELECTORS**: Use `ai='fallback'` with label-based selectors — NEVER copy xpaths from other branches. Build selectors using `:has-text()` (case-insensitive substring match). Examples: `selector='label:has-text("Full name") input'`, `selector='button:has-text("Submit")'`. If no semantic selector is possible (complex widgets), use `ai='fallback'` with only a `prompt=`. 8. **SELECTOR MANAGEMENT**: a. **Accumulate across variants**: When different page variants use different labels for the same field, use comma-separated CSS selectors: `selector='label:has-text("Website") input, label:has-text("URL") input'`. The first match wins. diff --git a/skyvern/forge/sdk/workflow/service.py b/skyvern/forge/sdk/workflow/service.py index fd08bbf41..857af1534 100644 --- a/skyvern/forge/sdk/workflow/service.py +++ b/skyvern/forge/sdk/workflow/service.py @@ -1126,7 +1126,8 @@ class WorkflowService: ) # Trigger AI Script Reviewer for adaptive caching workflows - # Include terminated and failed runs (triage will filter non-code-fixable failures) + # Include terminated and failed runs — the reviewer filters to only + # episodes where the AI fallback succeeded (actionable signal). # Skip canceled (user stopped) and timed_out (infrastructure issue) # Only trigger if the script was actually executed this run — reviewing based on # agent-only runs provides no signal about script quality and wastes LLM tokens. @@ -5413,12 +5414,26 @@ class WorkflowService: ) -> None: """Run the script reviewer inside a lock. Episodes are scoped to the script version.""" # Double-check: re-query episodes after acquiring lock (another process may have reviewed them) - episodes = await app.DATABASE.scripts.get_unreviewed_episodes( + all_episodes = await app.DATABASE.scripts.get_unreviewed_episodes( workflow_permanent_id=workflow.workflow_permanent_id, organization_id=workflow.organization_id, script_revision_id=script_revision_id, ) + if not all_episodes: + return + + # Only review episodes where the AI fallback succeeded — those carry + # actionable signal (working selectors, agent actions) the reviewer can + # learn from. When both the script AND the AI fail, there's nothing to + # improve and reviewing wastes LLM tokens. + episodes = [ep for ep in all_episodes if ep.fallback_succeeded is not False] if not episodes: + LOG.info( + "Skipping script review — all fallback episodes failed (no actionable signal)", + workflow_permanent_id=workflow.workflow_permanent_id, + total_episodes=len(all_episodes), + failed_labels=[ep.block_label for ep in all_episodes][:20], + ) return LOG.info(