Skyvern/skyvern/forge/failure_classifier.py
Aaron Perez ff198cb6f5
Some checks are pending
Run tests and pre-commit / Run tests and pre-commit hooks (push) Waiting to run
Run tests and pre-commit / Frontend Lint and Build (push) Waiting to run
Publish Fern Docs / run (push) Waiting to run
fix(extraction-shadow): follow-up to SKY-8871 — cycle, dead code, comment nits (#5486)
2026-04-13 23:32:20 -05:00

217 lines
8.4 KiB
Python

from __future__ import annotations
def classify_from_failure_reason(
failure_reason: str | None,
exception: Exception | None = None,
fallback_to_unknown: bool = False,
) -> list[dict] | None:
"""Classify failure from failure_reason text and/or exception type.
Returns list of categories sorted by confidence, or None if no classification.
When ``fallback_to_unknown`` is True and no keywords match, returns a single
UNKNOWN category instead of None. Use True for paths that are *always* failures
(exception, max_steps, max_retries). Use False (the default) for terminate paths
where the absence of a classification may simply mean the termination was
user-guided / expected.
Categories (16):
ANTI_BOT_DETECTION, PROXY_ERROR, BROWSER_ERROR, NAVIGATION_FAILURE,
PAGE_LOAD_TIMEOUT, AUTH_FAILURE, LLM_ERROR, CREDENTIAL_ERROR,
DATA_EXTRACTION_FAILURE, ELEMENT_NOT_FOUND, WRONG_PAGE_STATE,
MAX_STEPS_EXCEEDED, LLM_REASONING_ERROR, INFRASTRUCTURE_ERROR,
PARAMETER_BINDING_ERROR, UNKNOWN
"""
if not failure_reason and not exception:
return None
reason = (failure_reason or "").lower()
exc_name = type(exception).__name__ if exception else ""
categories: list[dict] = []
# Bot detection / CAPTCHA — use specific phrases to avoid false positives
_auth_context_keywords = ["login", "auth", "password", "permission", "credential"]
_has_auth_context = any(kw in reason for kw in _auth_context_keywords)
_antibot_keywords = [
"captcha",
"cloudflare",
"bot detect",
"bot block",
"ip block",
"request block",
"anti-bot",
"human verification",
]
# "access denied" is ambiguous: it can be bot blocking OR auth failure.
# Only treat it as bot detection when there are no auth-related keywords nearby.
# Note: in Skyvern's context, failure_reason is LLM-generated from page observations,
# so RBAC-style messages like "Access denied: insufficient privileges" are unlikely.
# If this becomes a false-positive source, consider further narrowing (e.g. requiring
# "access denied" appears without ANY qualifier, or adding more exclusion keywords).
if not _has_auth_context:
_antibot_keywords.append("access denied")
if any(kw in reason for kw in _antibot_keywords):
categories.append(
{
"category": "ANTI_BOT_DETECTION",
"confidence_float": 0.7,
"reasoning": "Keywords matched in failure reason",
}
)
# Proxy errors — check before browser errors so proxy failures don't fall into BROWSER_ERROR.
# The exception name may contain "Browser" (e.g. UnknownErrorWhileCreatingBrowserContext) but the
# root cause is proxy pool exhaustion.
_proxy_exc_keywords = ["NoProxy", "ProxyError"]
_proxy_reason_keywords = ["no proxy available", "proxy unavailable"]
if any(kw in exc_name for kw in _proxy_exc_keywords) or any(kw in reason for kw in _proxy_reason_keywords):
categories.append(
{
"category": "PROXY_ERROR",
"confidence_float": 0.9,
"reasoning": f"Exception: {exc_name}" if exc_name else "Keywords matched",
}
)
# Browser errors — only match if not already classified as PROXY_ERROR above
elif any(kw in exc_name for kw in ["Browser", "CDP", "TargetClosed"]) or any(
kw in reason for kw in ["browser context closed", "page closed", "browser crash"]
):
categories.append(
{
"category": "BROWSER_ERROR",
"confidence_float": 0.9,
"reasoning": f"Exception: {exc_name}" if exc_name else "Keywords matched",
}
)
# Navigation failure
if "FailedToNavigateToUrl" in exc_name or any(
kw in reason for kw in ["failed to navigate", "404", "redirect loop"]
):
categories.append(
{
"category": "NAVIGATION_FAILURE",
"confidence_float": 0.9,
"reasoning": f"Exception: {exc_name}" if "FailedToNavigate" in exc_name else "Keywords matched",
}
)
# Page load timeout
if "Timeout" in exc_name or "timeout" in reason:
categories.append(
{
"category": "PAGE_LOAD_TIMEOUT",
"confidence_float": 0.8,
"reasoning": f"Exception: {exc_name}" if "Timeout" in exc_name else "Timeout in failure reason",
}
)
# Auth failure — also catches "access denied" when auth context is present
if any(kw in reason for kw in ["login fail", "authentication fail", "auth fail", "mfa", "password"]) or (
"access denied" in reason and _has_auth_context
):
categories.append(
{
"category": "AUTH_FAILURE",
"confidence_float": 0.7,
"reasoning": "Keywords matched",
}
)
# Credential error
if "Bitwarden" in exc_name or any(kw in reason for kw in ["credential not found", "missing credential"]):
categories.append(
{
"category": "CREDENTIAL_ERROR",
"confidence_float": 0.8,
"reasoning": f"Exception: {exc_name}" if "Bitwarden" in exc_name else "Keywords matched",
}
)
# LLM error
if any(kw in exc_name for kw in ["LLM", "APIError", "RateLimit"]) or "rate limit" in reason:
categories.append(
{
"category": "LLM_ERROR",
"confidence_float": 0.9,
"reasoning": f"Exception: {exc_name}" if exc_name else "Keywords matched",
}
)
# Scraping / data extraction failure
if "ScrapingFailed" in exc_name or any(kw in reason for kw in ["scraping", "extraction fail", "empty extraction"]):
categories.append(
{
"category": "DATA_EXTRACTION_FAILURE",
"confidence_float": 0.7,
"reasoning": f"Exception: {exc_name}" if "Scraping" in exc_name else "Keywords matched",
}
)
# Element not found
if "ElementNotFound" in exc_name or any(kw in reason for kw in ["element not found", "no matching element"]):
categories.append(
{
"category": "ELEMENT_NOT_FOUND",
"confidence_float": 0.8,
"reasoning": f"Exception: {exc_name}" if "ElementNotFound" in exc_name else "Keywords matched",
}
)
# Wrong page state
if any(kw in reason for kw in ["unexpected page", "wrong page", "blank page"]):
categories.append(
{
"category": "WRONG_PAGE_STATE",
"confidence_float": 0.6,
"reasoning": "Keywords matched",
}
)
# Max steps exceeded
if any(kw in reason for kw in ["max steps", "maximum steps", "max number of", "step limit"]):
categories.append(
{
"category": "MAX_STEPS_EXCEEDED",
"confidence_float": 0.9,
"reasoning": "Keywords matched",
}
)
# LLM reasoning error (wrong action, hallucination)
if any(kw in reason for kw in ["wrong action", "invalid action", "hallucin"]):
categories.append(
{
"category": "LLM_REASONING_ERROR",
"confidence_float": 0.6,
"reasoning": "Keywords matched",
}
)
# Internal configuration mismatch — not a site/selector failure.
_param_binding_keywords = [
"should have already been set through workflow run parameters",
"should have already been set through workflow run context init",
"pre-run invariant: workflow_definition and persisted parameter rows disagree",
]
if any(kw in reason for kw in _param_binding_keywords):
categories.append(
{
"category": "PARAMETER_BINDING_ERROR",
"confidence_float": 0.95,
"reasoning": "Keywords matched",
}
)
if not categories:
if fallback_to_unknown:
return [{"category": "UNKNOWN", "confidence_float": 0.5, "reasoning": "No keyword match found"}]
return None
# Sort by confidence descending
categories.sort(key=lambda x: x["confidence_float"], reverse=True)
return categories