mirror of
https://github.com/Skyvern-AI/skyvern.git
synced 2026-04-28 03:30:10 +00:00
351 lines
13 KiB
Python
351 lines
13 KiB
Python
"""Tests for the copilot-v2 post-emission reject of ``task`` / ``task_v2`` block
|
|
types (SKY-9174, Part F).
|
|
|
|
Part C.1 banned the types at the schema-lookup surface via `SchemaOverlay`
|
|
pre / post hooks, but the LLM can bypass that by writing YAML directly without
|
|
querying the schema. Part F closes the bypass with a YAML-level reject that
|
|
fires on every copilot-v2 write path (``_update_workflow`` + inline
|
|
``REPLACE_WORKFLOW``), keyed by block label so legacy workflows with
|
|
pre-existing ``task`` blocks can still be edited by the copilot.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import textwrap
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
|
import pytest
|
|
import yaml
|
|
|
|
from skyvern.forge.sdk.copilot.tools import _detect_new_banned_blocks, _update_workflow
|
|
|
|
|
|
def _yaml(*blocks: dict) -> str:
|
|
return yaml.safe_dump(
|
|
{"title": "wf", "workflow_definition": {"blocks": list(blocks)}},
|
|
sort_keys=False,
|
|
)
|
|
|
|
|
|
# ---------- Flat shapes ----------
|
|
|
|
|
|
def test_top_level_task_block_is_detected_on_first_authoring() -> None:
|
|
submitted = _yaml({"block_type": "task", "label": "fill_contact_form", "navigation_goal": "do thing"})
|
|
result = _detect_new_banned_blocks(submitted, prior_workflow_yaml=None)
|
|
assert result == [("fill_contact_form", "task")]
|
|
|
|
|
|
def test_top_level_task_v2_block_is_detected() -> None:
|
|
submitted = _yaml({"block_type": "task_v2", "label": "legacy_taskv2", "prompt": "do it"})
|
|
result = _detect_new_banned_blocks(submitted, prior_workflow_yaml=None)
|
|
assert result == [("legacy_taskv2", "task_v2")]
|
|
|
|
|
|
def test_case_and_whitespace_insensitive() -> None:
|
|
submitted = _yaml(
|
|
{"block_type": "TASK", "label": "a"},
|
|
{"block_type": " task_v2 ", "label": "b"},
|
|
{"block_type": "Task", "label": "c"},
|
|
)
|
|
result = _detect_new_banned_blocks(submitted, prior_workflow_yaml=None)
|
|
assert sorted(result) == [("a", "task"), ("b", "task_v2"), ("c", "task")]
|
|
|
|
|
|
def test_mixed_task_and_navigation_only_reports_banned() -> None:
|
|
submitted = _yaml(
|
|
{"block_type": "navigation", "label": "nav_a", "navigation_goal": "ok"},
|
|
{"block_type": "task", "label": "bad", "navigation_goal": "bad"},
|
|
{"block_type": "extraction", "label": "ext_a"},
|
|
)
|
|
result = _detect_new_banned_blocks(submitted, prior_workflow_yaml=None)
|
|
assert result == [("bad", "task")]
|
|
|
|
|
|
def test_only_allowed_types_returns_empty() -> None:
|
|
submitted = _yaml(
|
|
{"block_type": "navigation", "label": "n"},
|
|
{"block_type": "extraction", "label": "e"},
|
|
{"block_type": "validation", "label": "v"},
|
|
{"block_type": "login", "label": "lg"},
|
|
{"block_type": "goto_url", "label": "g"},
|
|
)
|
|
assert _detect_new_banned_blocks(submitted, prior_workflow_yaml=None) == []
|
|
|
|
|
|
# ---------- Malformed ----------
|
|
|
|
|
|
def test_malformed_yaml_is_graceful_no_op() -> None:
|
|
# Intentional parse failure — missing close quote.
|
|
assert _detect_new_banned_blocks("title: 'unterminated", prior_workflow_yaml=None) == []
|
|
|
|
|
|
def test_missing_workflow_definition_is_graceful_no_op() -> None:
|
|
assert _detect_new_banned_blocks("title: wf\n", prior_workflow_yaml=None) == []
|
|
|
|
|
|
def test_blocks_key_not_a_list_is_graceful_no_op() -> None:
|
|
bad = "title: wf\nworkflow_definition:\n blocks: not-a-list\n"
|
|
assert _detect_new_banned_blocks(bad, prior_workflow_yaml=None) == []
|
|
|
|
|
|
def test_block_entry_not_a_dict_is_skipped() -> None:
|
|
# A bare string where a block dict is expected — should be skipped, not crash.
|
|
weird = textwrap.dedent(
|
|
"""\
|
|
title: wf
|
|
workflow_definition:
|
|
blocks:
|
|
- "not a block"
|
|
- block_type: task
|
|
label: real_banned
|
|
"""
|
|
)
|
|
assert _detect_new_banned_blocks(weird, prior_workflow_yaml=None) == [("real_banned", "task")]
|
|
|
|
|
|
# ---------- Legacy preservation (RISK-1) ----------
|
|
|
|
|
|
def test_preserved_legacy_task_block_under_same_label_does_not_reject() -> None:
|
|
prior = _yaml({"block_type": "task", "label": "legacy_task", "navigation_goal": "old"})
|
|
submitted = _yaml({"block_type": "task", "label": "legacy_task", "navigation_goal": "old edited"})
|
|
assert _detect_new_banned_blocks(submitted, prior_workflow_yaml=prior) == []
|
|
|
|
|
|
def test_new_task_block_alongside_preserved_legacy_reports_only_the_new_one() -> None:
|
|
prior = _yaml({"block_type": "task", "label": "legacy_task"})
|
|
submitted = _yaml(
|
|
{"block_type": "task", "label": "legacy_task"},
|
|
{"block_type": "task", "label": "fill_contact_form"},
|
|
)
|
|
assert _detect_new_banned_blocks(submitted, prior_workflow_yaml=prior) == [("fill_contact_form", "task")]
|
|
|
|
|
|
def test_renamed_legacy_task_block_is_treated_as_new() -> None:
|
|
"""Edge case: copilot re-emits a legacy task block under a different label.
|
|
The detector has no way to know this is a rename, so it's reported as new.
|
|
Acceptable: the copilot can recover by re-using the prior label."""
|
|
prior = _yaml({"block_type": "task", "label": "old_name"})
|
|
submitted = _yaml({"block_type": "task", "label": "new_name"})
|
|
assert _detect_new_banned_blocks(submitted, prior_workflow_yaml=prior) == [("new_name", "task")]
|
|
|
|
|
|
def test_prior_contains_allowed_types_submitted_adds_task_rejects() -> None:
|
|
prior = _yaml({"block_type": "navigation", "label": "nav"})
|
|
submitted = _yaml(
|
|
{"block_type": "navigation", "label": "nav"},
|
|
{"block_type": "task", "label": "bad_new"},
|
|
)
|
|
assert _detect_new_banned_blocks(submitted, prior_workflow_yaml=prior) == [("bad_new", "task")]
|
|
|
|
|
|
def test_legacy_task_v2_preservation() -> None:
|
|
prior = _yaml({"block_type": "task_v2", "label": "legacy_v2"})
|
|
submitted = _yaml({"block_type": "task_v2", "label": "legacy_v2"})
|
|
assert _detect_new_banned_blocks(submitted, prior_workflow_yaml=prior) == []
|
|
|
|
|
|
# ---------- Nested (COMP-1) ----------
|
|
|
|
|
|
def test_task_block_inside_for_loop_is_detected() -> None:
|
|
submitted = _yaml(
|
|
{
|
|
"block_type": "for_loop",
|
|
"label": "loop",
|
|
"loop_blocks": [
|
|
{"block_type": "navigation", "label": "inner_nav"},
|
|
{"block_type": "task", "label": "inner_bad"},
|
|
],
|
|
}
|
|
)
|
|
assert _detect_new_banned_blocks(submitted, prior_workflow_yaml=None) == [("inner_bad", "task")]
|
|
|
|
|
|
def test_nested_preservation_does_not_reject() -> None:
|
|
prior = _yaml(
|
|
{
|
|
"block_type": "for_loop",
|
|
"label": "loop",
|
|
"loop_blocks": [{"block_type": "task", "label": "nested_legacy"}],
|
|
}
|
|
)
|
|
submitted = _yaml(
|
|
{
|
|
"block_type": "for_loop",
|
|
"label": "loop",
|
|
"loop_blocks": [{"block_type": "task", "label": "nested_legacy"}],
|
|
}
|
|
)
|
|
assert _detect_new_banned_blocks(submitted, prior_workflow_yaml=prior) == []
|
|
|
|
|
|
def test_nested_new_addition_is_detected() -> None:
|
|
prior = _yaml(
|
|
{
|
|
"block_type": "for_loop",
|
|
"label": "loop",
|
|
"loop_blocks": [{"block_type": "navigation", "label": "nav_inner"}],
|
|
}
|
|
)
|
|
submitted = _yaml(
|
|
{
|
|
"block_type": "for_loop",
|
|
"label": "loop",
|
|
"loop_blocks": [
|
|
{"block_type": "navigation", "label": "nav_inner"},
|
|
{"block_type": "task", "label": "new_nested_bad"},
|
|
],
|
|
}
|
|
)
|
|
assert _detect_new_banned_blocks(submitted, prior_workflow_yaml=prior) == [("new_nested_bad", "task")]
|
|
|
|
|
|
def test_deeply_nested_for_loop_is_walked() -> None:
|
|
"""for_loop nested inside another for_loop — recursion must reach the innermost level."""
|
|
submitted = _yaml(
|
|
{
|
|
"block_type": "for_loop",
|
|
"label": "outer",
|
|
"loop_blocks": [
|
|
{
|
|
"block_type": "for_loop",
|
|
"label": "inner",
|
|
"loop_blocks": [{"block_type": "task", "label": "deeply_nested_bad"}],
|
|
}
|
|
],
|
|
}
|
|
)
|
|
assert _detect_new_banned_blocks(submitted, prior_workflow_yaml=None) == [("deeply_nested_bad", "task")]
|
|
|
|
|
|
# ---------- Missing label — should not crash ----------
|
|
|
|
|
|
def test_block_without_label_is_skipped() -> None:
|
|
"""A banned block missing the ``label`` key can't be identified for
|
|
preservation matching; skip it rather than crash. The YAML validator
|
|
downstream will surface the missing-label error on its own."""
|
|
submitted = _yaml({"block_type": "task", "navigation_goal": "no label"})
|
|
# No label → not collectible; result is empty (downstream Pydantic reject
|
|
# will surface the malformed block).
|
|
assert _detect_new_banned_blocks(submitted, prior_workflow_yaml=None) == []
|
|
|
|
|
|
# ---------- Integration-shape tests: _update_workflow end-to-end ----------
|
|
#
|
|
# These exercise the reject path at the tool-helper boundary, confirming the
|
|
# detection + error tool-result shape + dedicated OTEL span. The success path
|
|
# (YAML with only allowed types, or with preserved legacy task labels) is also
|
|
# covered — we patch ``_process_workflow_yaml`` and the workflow-service write
|
|
# so the test does not need a DB.
|
|
|
|
|
|
def _ctx(prior_yaml: str | None = None) -> MagicMock:
|
|
ctx = MagicMock()
|
|
ctx.workflow_yaml = prior_yaml
|
|
ctx.workflow_id = "w_test"
|
|
ctx.workflow_permanent_id = "wpid_test"
|
|
ctx.organization_id = "o_test"
|
|
return ctx
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_update_workflow_rejects_new_task_block_and_emits_span() -> None:
|
|
submitted = _yaml({"block_type": "task", "label": "fill_contact_form", "navigation_goal": "do"})
|
|
ctx = _ctx(prior_yaml=None)
|
|
|
|
with patch("skyvern.forge.sdk.copilot.tools._record_banned_block_reject_span") as mock_span:
|
|
result = await _update_workflow({"workflow_yaml": submitted}, ctx)
|
|
|
|
assert result["ok"] is False
|
|
assert "not available in the workflow copilot" in result["error"]
|
|
assert "fill_contact_form" in result["error"]
|
|
for alternative in ("navigation", "extraction", "validation", "login"):
|
|
assert alternative in result["error"]
|
|
|
|
# Dedicated span fired with source_tool + items for logfire trend analysis.
|
|
mock_span.assert_called_once_with("_update_workflow", [("fill_contact_form", "task")])
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_update_workflow_preserves_legacy_task_block_under_unchanged_label() -> None:
|
|
"""Copilot edit of a legacy workflow that already carries a ``task`` block
|
|
must not fail the reject. The helper sees the task label in prior YAML
|
|
and treats its re-emission as legacy preservation, not a new addition."""
|
|
prior = _yaml({"block_type": "task", "label": "legacy_task", "navigation_goal": "old"})
|
|
# New YAML preserves the legacy task block AND adds an allowed-type block.
|
|
submitted = _yaml(
|
|
{"block_type": "task", "label": "legacy_task", "navigation_goal": "old"},
|
|
{"block_type": "navigation", "label": "new_nav", "navigation_goal": "new"},
|
|
)
|
|
ctx = _ctx(prior_yaml=prior)
|
|
|
|
fake_workflow = MagicMock()
|
|
fake_workflow.title = "t"
|
|
fake_workflow.description = "d"
|
|
fake_workflow.workflow_definition = MagicMock()
|
|
fake_workflow.proxy_location = None
|
|
fake_workflow.webhook_callback_url = None
|
|
fake_workflow.persist_browser_session = False
|
|
fake_workflow.model = None
|
|
fake_workflow.max_screenshot_scrolls = None
|
|
fake_workflow.extra_http_headers = None
|
|
fake_workflow.run_with = None
|
|
fake_workflow.ai_fallback = None
|
|
fake_workflow.cache_key = None
|
|
fake_workflow.run_sequentially = None
|
|
fake_workflow.sequential_key = None
|
|
|
|
with (
|
|
patch("skyvern.forge.sdk.copilot.tools._process_workflow_yaml", return_value=fake_workflow),
|
|
patch("skyvern.forge.sdk.copilot.tools.app") as mock_app,
|
|
):
|
|
mock_app.WORKFLOW_SERVICE.get_workflow = AsyncMock(return_value=None)
|
|
mock_app.WORKFLOW_SERVICE.update_workflow_definition = AsyncMock()
|
|
result = await _update_workflow({"workflow_yaml": submitted}, ctx)
|
|
|
|
assert result["ok"] is True
|
|
# The new YAML was accepted and assigned to ctx as the current workflow state.
|
|
assert ctx.workflow_yaml == submitted
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_update_workflow_allows_all_allowed_block_types() -> None:
|
|
"""Baseline success path: only allowed block types, no prior — passes through."""
|
|
submitted = _yaml(
|
|
{"block_type": "navigation", "label": "n", "navigation_goal": "x"},
|
|
{"block_type": "validation", "label": "v", "complete_criterion": "c"},
|
|
)
|
|
ctx = _ctx(prior_yaml=None)
|
|
|
|
fake_workflow = MagicMock()
|
|
for attr in (
|
|
"title",
|
|
"description",
|
|
"workflow_definition",
|
|
"proxy_location",
|
|
"webhook_callback_url",
|
|
"persist_browser_session",
|
|
"model",
|
|
"max_screenshot_scrolls",
|
|
"extra_http_headers",
|
|
"run_with",
|
|
"ai_fallback",
|
|
"cache_key",
|
|
"run_sequentially",
|
|
"sequential_key",
|
|
):
|
|
setattr(fake_workflow, attr, None)
|
|
|
|
with (
|
|
patch("skyvern.forge.sdk.copilot.tools._process_workflow_yaml", return_value=fake_workflow),
|
|
patch("skyvern.forge.sdk.copilot.tools.app") as mock_app,
|
|
):
|
|
mock_app.WORKFLOW_SERVICE.get_workflow = AsyncMock(return_value=None)
|
|
mock_app.WORKFLOW_SERVICE.update_workflow_definition = AsyncMock()
|
|
result = await _update_workflow({"workflow_yaml": submitted}, ctx)
|
|
|
|
assert result["ok"] is True
|