Skyvern/tests/unit/test_should_run_script.py

"""Tests for WorkflowService.should_run_script() and script reviewer gating.

Verifies the priority chain:
  run-level run_with > workflow-level run_with > code_version fallback > default (agent).

run_with is always "agent" or "code" (never null). When run_with="agent" and
code_version >= 1, the code_version fallback still applies (workflow intended code mode).
"""

from datetime import datetime, timezone

import pytest

from skyvern.forge.sdk.workflow.models.workflow import (
    Workflow,
    WorkflowRun,
    WorkflowRunStatus,
    is_adaptive_caching,
)


def _make_workflow(
    run_with: str = "agent",
    adaptive_caching: bool = False,
    code_version: int | None = None,
) -> Workflow:
    return Workflow(
        workflow_id="wf_test",
        organization_id="org_test",
        workflow_permanent_id="wpid_test",
        title="test",
        version=1,
        is_saved_task=False,
        workflow_definition={"parameters": [], "blocks": []},
        run_with=run_with,
        adaptive_caching=adaptive_caching,
        code_version=code_version,
        created_at=datetime.now(timezone.utc),
        modified_at=datetime.now(timezone.utc),
    )


def _make_run(run_with: str = "agent") -> WorkflowRun:
    return WorkflowRun(
        workflow_run_id="wr_test",
        workflow_id="wf_test",
        workflow_permanent_id="wpid_test",
        organization_id="org_test",
        status=WorkflowRunStatus.running,
        run_with=run_with,
        created_at=datetime.now(timezone.utc),
        modified_at=datetime.now(timezone.utc),
    )


@pytest.fixture
def service():
    from skyvern.forge.sdk.workflow.service import WorkflowService

    return WorkflowService()


class TestShouldRunScript:
    """Run-level run_with takes priority, then workflow-level, then code_version fallback, then agent."""

    def test_run_code_overrides_workflow_agent(self, service):
        wf = _make_workflow(run_with="agent")
        wr = _make_run(run_with="code")
        assert service.should_run_script(wf, wr) is True

    def test_run_agent_overrides_workflow_code(self, service):
        wf = _make_workflow(run_with="code")
        wr = _make_run(run_with="agent")
        assert service.should_run_script(wf, wr) is False

    def test_run_agent_overrides_code_version(self, service):
        """Explicit run-level agent overrides code_version fallback."""
        wf = _make_workflow(run_with="agent", code_version=2)
        wr = _make_run(run_with="agent")
        assert service.should_run_script(wf, wr) is False

    def test_workflow_code_with_agent_run(self, service):
        """Workflow says code, run says agent → run wins."""
        wf = _make_workflow(run_with="code")
        wr = _make_run(run_with="agent")
        assert service.should_run_script(wf, wr) is False

    def test_workflow_agent_with_agent_run(self, service):
        wf = _make_workflow(run_with="agent")
        wr = _make_run(run_with="agent")
        assert service.should_run_script(wf, wr) is False

    def test_workflow_agent_overrides_code_version(self, service):
        """Explicit workflow-level agent takes priority over code_version."""
        wf = _make_workflow(run_with="agent", code_version=2)
        wr = _make_run(run_with="agent")
        assert service.should_run_script(wf, wr) is False

    def test_both_agent_defaults_to_agent(self, service):
        """When both workflow and run are agent, default to agent."""
        wf = _make_workflow(run_with="agent")
        wr = _make_run(run_with="agent")
        assert service.should_run_script(wf, wr) is False

    def test_code_version_with_agent_run_with(self, service):
        """code_version >= 1 with run_with=agent should still check code_version fallback."""
        wf = _make_workflow(run_with="agent", code_version=2)
        wr = _make_run(run_with="agent")
        # run_with=agent on both sides → agent wins, code_version ignored
        assert service.should_run_script(wf, wr) is False

    def test_code_version_with_code_run_with(self, service):
        """code_version >= 1 with explicit run_with=code should run code."""
        wf = _make_workflow(run_with="code", code_version=2)
        wr = _make_run(run_with="agent")
        # run-level agent overrides workflow code
        assert service.should_run_script(wf, wr) is False

    def test_workflow_code_run_code(self, service):
        """Both workflow and run say code → code."""
        wf = _make_workflow(run_with="code", code_version=2)
        wr = _make_run(run_with="code")
        assert service.should_run_script(wf, wr) is True

    def test_workflow_code_no_code_version(self, service):
        """Workflow says code, no code_version → should still run code."""
        wf = _make_workflow(run_with="code")
        wr = _make_run(run_with="code")
        assert service.should_run_script(wf, wr) is True

    def test_legacy_adaptive_caching_does_not_override_agent(self, service):
        """Legacy adaptive_caching=True does NOT override explicit run_with=agent."""
        wf = _make_workflow(run_with="agent", adaptive_caching=True)
        wr = _make_run(run_with="agent")
        # Explicit agent wins — adaptive_caching is a legacy fallback
        # that only applied when run_with was null (no longer possible).
        assert service.should_run_script(wf, wr) is False


class TestScriptReviewerGate:
    """The script reviewer should only fire when the script was actually executed.

    The gate requires BOTH is_adaptive_caching()=True AND should_run_script()=True.
    This prevents wasting LLM tokens reviewing scripts based on agent-only runs.
    """

    @pytest.mark.parametrize(
        "wf_run_with,run_run_with,code_version,expect_reviewer",
        [
            ("code", "code", 2, True),  # code + code_version=2 → adaptive caching on
            ("code", "code", 1, False),  # code + code_version=1 → adaptive caching off
            ("agent", "agent", 2, False),  # agent overrides everything
            ("agent", "agent", None, False),  # agent, no code_version
            ("code", "agent", 2, False),  # run-level agent overrides
            ("agent", "code", None, False),  # code without code_version → not adaptive
            ("agent", "code", 2, True),  # run-level code + code_version=2 → adaptive
        ],
    )
    def test_reviewer_gate(self, service, wf_run_with, run_run_with, code_version, expect_reviewer):
        wf = _make_workflow(run_with=wf_run_with, code_version=code_version)
        wr = _make_run(run_with=run_run_with)
        should_review = is_adaptive_caching(wf, wr) and service.should_run_script(wf, wr)
        assert should_review is expect_reviewer

    def test_legacy_adaptive_caching_backward_compat(self, service):
        """Legacy adaptive_caching=True with code_version=None still enables reviewer."""
        wf = _make_workflow(run_with="agent", adaptive_caching=True, code_version=None)
        wr = _make_run(run_with="code")
        should_review = is_adaptive_caching(wf, wr) and service.should_run_script(wf, wr)
        assert should_review is True