mirror of
https://github.com/Skyvern-AI/skyvern.git
synced 2026-04-28 03:30:10 +00:00
fix: skip script review when AI fallback also failed (#5382)
This commit is contained in:
parent
5d23d0adc4
commit
0a29446dc3
2 changed files with 21 additions and 2 deletions
|
|
@ -7,6 +7,10 @@ You are a script reviewer for a browser automation system. Your job is to update
|
|||
4. Every `page.classify()` MUST have an `else` branch that calls `await page.element_fallback(navigation_goal="...")`.
|
||||
5. Add `text_patterns` derived from the page text in the episode data below. **NEVER include PII in text_patterns** — no email addresses, no personal names, no account numbers, no customer-specific identifiers. Use generic page structure indicators instead (form labels, button text, section headings, navigation links). Wrong: `"Welcome John Smith, user@example.com, Billing & Payments"`. Right: `"Welcome, Billing & Payments, Sign in, Logout"`.
|
||||
6. Keep classify option descriptions short (< 10 words) and mutually exclusive.
|
||||
6b. **Classify branch hygiene**:
|
||||
- **Don't add** a branch whose actions (fills, clicks, selectors) duplicate an existing branch — unless it has **distinct `text_patterns`** that improve page detection.
|
||||
- Branches with empty or `"N/A"` text_patterns that duplicate another branch's actions are redundant. The page is the same variant — investigate root cause (timing, page load) instead.
|
||||
- **Consolidate**: if you see multiple existing branches with identical actions and empty/`"N/A"` text_patterns, merge them into one and remove the duplicates.
|
||||
7. **SEMANTIC SELECTORS**: Use `ai='fallback'` with label-based selectors — NEVER copy xpaths from other branches. Build selectors using `:has-text()` (case-insensitive substring match). Examples: `selector='label:has-text("Full name") input'`, `selector='button:has-text("Submit")'`. If no semantic selector is possible (complex widgets), use `ai='fallback'` with only a `prompt=`.
|
||||
8. **SELECTOR MANAGEMENT**:
|
||||
a. **Accumulate across variants**: When different page variants use different labels for the same field, use comma-separated CSS selectors: `selector='label:has-text("Website") input, label:has-text("URL") input'`. The first match wins.
|
||||
|
|
|
|||
|
|
@ -1126,7 +1126,8 @@ class WorkflowService:
|
|||
)
|
||||
|
||||
# Trigger AI Script Reviewer for adaptive caching workflows
|
||||
# Include terminated and failed runs (triage will filter non-code-fixable failures)
|
||||
# Include terminated and failed runs — the reviewer filters to only
|
||||
# episodes where the AI fallback succeeded (actionable signal).
|
||||
# Skip canceled (user stopped) and timed_out (infrastructure issue)
|
||||
# Only trigger if the script was actually executed this run — reviewing based on
|
||||
# agent-only runs provides no signal about script quality and wastes LLM tokens.
|
||||
|
|
@ -5413,12 +5414,26 @@ class WorkflowService:
|
|||
) -> None:
|
||||
"""Run the script reviewer inside a lock. Episodes are scoped to the script version."""
|
||||
# Double-check: re-query episodes after acquiring lock (another process may have reviewed them)
|
||||
episodes = await app.DATABASE.scripts.get_unreviewed_episodes(
|
||||
all_episodes = await app.DATABASE.scripts.get_unreviewed_episodes(
|
||||
workflow_permanent_id=workflow.workflow_permanent_id,
|
||||
organization_id=workflow.organization_id,
|
||||
script_revision_id=script_revision_id,
|
||||
)
|
||||
if not all_episodes:
|
||||
return
|
||||
|
||||
# Only review episodes where the AI fallback succeeded — those carry
|
||||
# actionable signal (working selectors, agent actions) the reviewer can
|
||||
# learn from. When both the script AND the AI fail, there's nothing to
|
||||
# improve and reviewing wastes LLM tokens.
|
||||
episodes = [ep for ep in all_episodes if ep.fallback_succeeded is not False]
|
||||
if not episodes:
|
||||
LOG.info(
|
||||
"Skipping script review — all fallback episodes failed (no actionable signal)",
|
||||
workflow_permanent_id=workflow.workflow_permanent_id,
|
||||
total_episodes=len(all_episodes),
|
||||
failed_labels=[ep.block_label for ep in all_episodes][:20],
|
||||
)
|
||||
return
|
||||
|
||||
LOG.info(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue