🔄 synced local 'tests/unit/' with remote 'tests/unit/'

2026-04-28 11:40:32 +00:00 · 2026-04-23 19:14:09 +00:00 · 2026-04-23 19:14:09 +00:00 · b14fcbfea3
commit b14fcbfea3
parent c2ba244039
3 changed files with 175 additions and 8 deletions
--- a/tests/unit/test_copilot_cancel_helpers.py
+++ b/tests/unit/test_copilot_cancel_helpers.py
@ -238,7 +238,7 @@ def test_trusted_post_drain_handles_missing_row() -> None:

 from types import SimpleNamespace as _NS  # noqa: E402  (grouped with test block)

-from skyvern.forge.sdk.copilot.tools import _BLOCK_RUNNING_TOOLS, _tool_loop_error  # noqa: E402
+from skyvern.forge.sdk.copilot.tools import BLOCK_RUNNING_TOOLS, _tool_loop_error  # noqa: E402


 def _guard_ctx(pending_run_id: str | None = None) -> _NS:
@ -251,9 +251,9 @@ def _guard_ctx(pending_run_id: str | None = None) -> _NS:
    )


-@pytest.mark.parametrize("tool_name", sorted(_BLOCK_RUNNING_TOOLS))
+@pytest.mark.parametrize("tool_name", sorted(BLOCK_RUNNING_TOOLS))
 def test_reconciliation_guard_blocks_block_running_tools(tool_name: str) -> None:
-    """With a pending run set, every tool in ``_BLOCK_RUNNING_TOOLS`` is
+    """With a pending run set, every tool in ``BLOCK_RUNNING_TOOLS`` is
    rejected with an error that names the run id and directs the LLM to call
    ``get_run_results`` first."""
    ctx = _guard_ctx(pending_run_id="wr_pending_123")
@ -268,7 +268,7 @@ def test_reconciliation_guard_blocks_block_running_tools(tool_name: str) -> None
    ["get_run_results", "update_workflow", "list_credentials"],
 )
 def test_reconciliation_guard_ignores_non_block_running_tools(tool_name: str) -> None:
-    """The guard is scoped to ``_BLOCK_RUNNING_TOOLS``. Planning / metadata
+    """The guard is scoped to ``BLOCK_RUNNING_TOOLS``. Planning / metadata
    tools (including ``get_run_results`` itself, which is the tool that can
    CLEAR the flag) must not be rejected."""
    ctx = _guard_ctx(pending_run_id="wr_pending_123")
@ -279,7 +279,7 @@ def test_reconciliation_guard_passes_when_flag_empty() -> None:
    """No pending run → `_tool_loop_error` returns None for block-running
    tools (assuming no other guard trips)."""
    ctx = _guard_ctx(pending_run_id=None)
-    for name in _BLOCK_RUNNING_TOOLS:
+    for name in BLOCK_RUNNING_TOOLS:
        assert _tool_loop_error(ctx, name) is None


@ -288,7 +288,7 @@ def test_reconciliation_guard_rejects_empty_string_run_id() -> None:
    not set. Prevents a spurious guard trip if anything ever clears the flag
    to ``''`` instead of ``None``."""
    ctx = _guard_ctx(pending_run_id="")
-    for name in _BLOCK_RUNNING_TOOLS:
+    for name in BLOCK_RUNNING_TOOLS:
        assert _tool_loop_error(ctx, name) is None


--- a/tests/unit/test_copilot_enforcement_pruning.py
+++ b/tests/unit/test_copilot_enforcement_pruning.py
@ -25,7 +25,11 @@ from skyvern.forge.sdk.copilot.enforcement import (
    _prune_input_list,
    _summarize_tool_output,
 )
-from skyvern.forge.sdk.copilot.tools import _is_meaningful_extracted_data
+from skyvern.forge.sdk.copilot.tools import (
+    _analyze_run_blocks,
+    _is_meaningful_extracted_data,
+    _record_run_blocks_result,
+)


 class _Ctx:
@ -107,6 +111,169 @@ def test_meaningful_data_string() -> None:
    assert _is_meaningful_extracted_data("$260.48") is True


+# ---------------------------------------------------------------------------
+# _analyze_run_blocks — envelope-unwrap for EXTRACTION blocks
+#
+# ExtractionBlock stores TaskOutput.from_task() on block.output. Envelope
+# fields (task_id, status, *_screenshot_artifact_ids) are always populated on
+# a completed run and would short-circuit _is_meaningful_extracted_data to
+# True even when the real payload fields (extracted_information,
+# downloaded_files, downloaded_file_urls) are empty. The meaningful-data
+# check must judge against the payload slice, not the envelope.
+# ---------------------------------------------------------------------------
+
+
+_EMPTY_EXTRACTION_ENVELOPE: dict[str, Any] = {
+    "task_id": "tsk_00000000000000000001",
+    "status": "completed",
+    "extracted_information": [],
+    "failure_reason": None,
+    "errors": [],
+    "failure_category": None,
+    "downloaded_files": [],
+    "downloaded_file_urls": None,
+    "task_screenshots": None,
+    "workflow_screenshots": None,
+    "task_screenshot_artifact_ids": ["a_00000000000000000001", "a_00000000000000000002"],
+    "workflow_screenshot_artifact_ids": ["a_00000000000000000001", "a_00000000000000000003"],
+}
+
+
+def _run_result(blocks: list[dict[str, Any]], ok: bool = True) -> dict[str, Any]:
+    return {"ok": ok, "data": {"blocks": blocks}}
+
+
+def _envelope(**overrides: Any) -> dict[str, Any]:
+    """Return a fresh copy of the empty-extraction envelope with field overrides."""
+    return {**_EMPTY_EXTRACTION_ENVELOPE, **overrides}
+
+
+def _extraction_block(extracted_data: dict[str, Any]) -> dict[str, Any]:
+    return {
+        "label": "extract_flights",
+        "block_type": "EXTRACTION",
+        "status": "completed",
+        "extracted_data": extracted_data,
+    }
+
+
+def _text_prompt_block(extracted_data: Any) -> dict[str, Any]:
+    return {
+        "label": "summarize",
+        "block_type": "TEXT_PROMPT",
+        "status": "completed",
+        "extracted_data": extracted_data,
+    }
+
+
+# Case id -> (envelope overrides, expected empty_data_blocks)
+#
+# empty_payload_trace_repro: extracted_information=[], downloaded_files=[],
+#   downloaded_file_urls=None, envelope metadata populated. Envelope-as-a-whole
+#   is truthy; real payload is empty; gate must flip. (SKY-9143 repro.)
+# download_only_files / download_only_urls: legitimate extraction success where the
+#   block produced files but no structured payload — must NOT flip the gate.
+_EXTRACTION_ENVELOPE_CASES: list[tuple[str, dict[str, Any], bool]] = [
+    ("empty_payload_trace_repro", {}, True),
+    ("real_extraction", {"extracted_information": [{"price": "260.48"}]}, False),
+    (
+        "download_only_files",
+        {"downloaded_files": [{"url": "https://example.com/a.pdf", "checksum": "abc123"}]},
+        False,
+    ),
+    (
+        "download_only_urls",
+        {"extracted_information": None, "downloaded_file_urls": ["https://example.com/a.pdf"]},
+        False,
+    ),
+]
+
+
+@pytest.mark.parametrize(
+    "overrides,expected_empty",
+    [(ovr, exp) for _, ovr, exp in _EXTRACTION_ENVELOPE_CASES],
+    ids=[case_id for case_id, _, _ in _EXTRACTION_ENVELOPE_CASES],
+)
+def test_analyze_extraction_envelope(overrides: dict[str, Any], expected_empty: bool) -> None:
+    _, empty, _ = _analyze_run_blocks(_run_result([_extraction_block(_envelope(**overrides))]))
+    assert empty is expected_empty
+
+
+def test_analyze_text_prompt_default_schema_is_not_empty() -> None:
+    # TEXT_PROMPT blocks return the raw LLM response dict (no Task envelope).
+    # Default schema is {"llm_response": "<text>"}.
+    _, empty, _ = _analyze_run_blocks(_run_result([_text_prompt_block({"llm_response": "the sentiment is positive"})]))
+    assert empty is False
+
+
+def test_analyze_text_prompt_user_schema_named_extracted_information_is_not_sliced() -> None:
+    # Guard against a too-broad unwrap: a user's json_schema may name a
+    # top-level field "extracted_information". The helper must not mistake
+    # that for an EXTRACTION envelope and discard sibling fields.
+    block = _text_prompt_block({"extracted_information": "ignored because this is TEXT_PROMPT", "summary": "x"})
+    _, empty, _ = _analyze_run_blocks(_run_result([block]))
+    assert empty is False
+
+
+def test_analyze_text_prompt_all_null_is_empty() -> None:
+    # Symmetric to {"price": None} — a text-prompt response with all-null
+    # fields counts as no meaningful output.
+    _, empty, _ = _analyze_run_blocks(_run_result([_text_prompt_block({"summary": None})]))
+    assert empty is True
+
+
+# ---------------------------------------------------------------------------
+# _record_run_blocks_result — end-to-end flip of last_test_ok on empty envelope
+# ---------------------------------------------------------------------------
+
+
+def _fresh_ctx_for_record() -> Any:
+    """SimpleNamespace shaped for _record_run_blocks_result + update_repeated_failure_state.
+
+    Uses getattr-with-default-compatible defaults so the function under test
+    populates the interesting fields without tripping AttributeError on the
+    downstream update_repeated_failure_state call.
+    """
+    from types import SimpleNamespace
+
+    return SimpleNamespace(
+        last_test_ok=True,
+        last_test_failure_reason=None,
+        last_test_suspicious_success=False,
+        last_test_anti_bot=None,
+        last_failure_category_top=None,
+        last_test_non_retriable_nav_error=None,
+        null_data_streak_count=0,
+        failed_test_nudge_count=0,
+        last_failed_workflow_yaml=None,
+        non_retriable_nav_error_last_emitted_signature=None,
+        workflow_yaml=None,
+        last_workflow=None,
+        last_frontier_start_label=None,
+        last_executed_block_labels=[],
+        last_failure_signature=None,
+        last_frontier_fingerprint=None,
+        repeated_failure_streak_count=0,
+        repeated_failure_nudge_emitted_at_streak=0,
+        pending_action_sequence_fingerprint=None,
+        last_action_sequence_fingerprint=None,
+        repeated_action_fingerprint_streak_count=0,
+    )
+
+
+def test_record_run_blocks_result_flips_last_test_ok_on_empty_extraction_envelope() -> None:
+    # End-to-end: a run reporting ok=true but whose sole EXTRACTION block
+    # produced the empty envelope must push last_test_ok from True to None,
+    # so _verified_workflow_or_none blocks the proposal. This is the user-
+    # visible SKY-9143 regression.
+    ctx = _fresh_ctx_for_record()
+    result = _run_result([_extraction_block(_envelope())])
+    _record_run_blocks_result(ctx, result)
+    assert ctx.last_test_ok is None
+    assert ctx.last_test_suspicious_success is True
+    assert ctx.last_test_failure_reason is not None
+
+
 # ---------------------------------------------------------------------------
 # Repeated null-data escalation
 # ---------------------------------------------------------------------------
--- a/tests/unit/test_copilot_non_retriable_nav.py
+++ b/tests/unit/test_copilot_non_retriable_nav.py
@ -570,7 +570,7 @@ def test_tool_loop_error_blocks_run_blocks_and_collect_debug_after_dns_failure()

 def test_tool_loop_error_does_not_block_planning_tools() -> None:
    # get_run_results / list_credentials / update_workflow are scoped out of
-    # _BLOCK_RUNNING_TOOLS and should remain callable so the agent can inspect
+    # BLOCK_RUNNING_TOOLS and should remain callable so the agent can inspect
    # the failure and decide how to respond to the user.
    ctx = _fresh_context()
    ctx.last_test_non_retriable_nav_error = _DNS_FAILURE_REASON