Skyvern/tests/unit/test_should_run_script.py

170 lines
7 KiB
Python

"""Tests for WorkflowService.should_run_script() and script reviewer gating.
Verifies the priority chain:
run-level run_with > workflow-level run_with > code_version fallback > default (agent).
run_with is always "agent" or "code" (never null). When run_with="agent" and
code_version >= 1, the code_version fallback still applies (workflow intended code mode).
"""
from datetime import datetime, timezone
import pytest
from skyvern.forge.sdk.workflow.models.workflow import (
Workflow,
WorkflowRun,
WorkflowRunStatus,
is_adaptive_caching,
)
def _make_workflow(
run_with: str = "agent",
adaptive_caching: bool = False,
code_version: int | None = None,
) -> Workflow:
return Workflow(
workflow_id="wf_test",
organization_id="org_test",
workflow_permanent_id="wpid_test",
title="test",
version=1,
is_saved_task=False,
workflow_definition={"parameters": [], "blocks": []},
run_with=run_with,
adaptive_caching=adaptive_caching,
code_version=code_version,
created_at=datetime.now(timezone.utc),
modified_at=datetime.now(timezone.utc),
)
def _make_run(run_with: str = "agent") -> WorkflowRun:
return WorkflowRun(
workflow_run_id="wr_test",
workflow_id="wf_test",
workflow_permanent_id="wpid_test",
organization_id="org_test",
status=WorkflowRunStatus.running,
run_with=run_with,
created_at=datetime.now(timezone.utc),
modified_at=datetime.now(timezone.utc),
)
@pytest.fixture
def service():
from skyvern.forge.sdk.workflow.service import WorkflowService
return WorkflowService()
class TestShouldRunScript:
"""Run-level run_with takes priority, then workflow-level, then code_version fallback, then agent."""
def test_run_code_overrides_workflow_agent(self, service):
wf = _make_workflow(run_with="agent")
wr = _make_run(run_with="code")
assert service.should_run_script(wf, wr) is True
def test_run_agent_overrides_workflow_code(self, service):
wf = _make_workflow(run_with="code")
wr = _make_run(run_with="agent")
assert service.should_run_script(wf, wr) is False
def test_run_agent_overrides_code_version(self, service):
"""Explicit run-level agent overrides code_version fallback."""
wf = _make_workflow(run_with="agent", code_version=2)
wr = _make_run(run_with="agent")
assert service.should_run_script(wf, wr) is False
def test_workflow_code_with_agent_run(self, service):
"""Workflow says code, run says agent → run wins."""
wf = _make_workflow(run_with="code")
wr = _make_run(run_with="agent")
assert service.should_run_script(wf, wr) is False
def test_workflow_agent_with_agent_run(self, service):
wf = _make_workflow(run_with="agent")
wr = _make_run(run_with="agent")
assert service.should_run_script(wf, wr) is False
def test_workflow_agent_overrides_code_version(self, service):
"""Explicit workflow-level agent takes priority over code_version."""
wf = _make_workflow(run_with="agent", code_version=2)
wr = _make_run(run_with="agent")
assert service.should_run_script(wf, wr) is False
def test_both_agent_defaults_to_agent(self, service):
"""When both workflow and run are agent, default to agent."""
wf = _make_workflow(run_with="agent")
wr = _make_run(run_with="agent")
assert service.should_run_script(wf, wr) is False
def test_code_version_with_agent_run_with(self, service):
"""code_version >= 1 with run_with=agent should still check code_version fallback."""
wf = _make_workflow(run_with="agent", code_version=2)
wr = _make_run(run_with="agent")
# run_with=agent on both sides → agent wins, code_version ignored
assert service.should_run_script(wf, wr) is False
def test_code_version_with_code_run_with(self, service):
"""code_version >= 1 with explicit run_with=code should run code."""
wf = _make_workflow(run_with="code", code_version=2)
wr = _make_run(run_with="agent")
# run-level agent overrides workflow code
assert service.should_run_script(wf, wr) is False
def test_workflow_code_run_code(self, service):
"""Both workflow and run say code → code."""
wf = _make_workflow(run_with="code", code_version=2)
wr = _make_run(run_with="code")
assert service.should_run_script(wf, wr) is True
def test_workflow_code_no_code_version(self, service):
"""Workflow says code, no code_version → should still run code."""
wf = _make_workflow(run_with="code")
wr = _make_run(run_with="code")
assert service.should_run_script(wf, wr) is True
def test_legacy_adaptive_caching_does_not_override_agent(self, service):
"""Legacy adaptive_caching=True does NOT override explicit run_with=agent."""
wf = _make_workflow(run_with="agent", adaptive_caching=True)
wr = _make_run(run_with="agent")
# Explicit agent wins — adaptive_caching is a legacy fallback
# that only applied when run_with was null (no longer possible).
assert service.should_run_script(wf, wr) is False
class TestScriptReviewerGate:
"""The script reviewer should only fire when the script was actually executed.
The gate requires BOTH is_adaptive_caching()=True AND should_run_script()=True.
This prevents wasting LLM tokens reviewing scripts based on agent-only runs.
"""
@pytest.mark.parametrize(
"wf_run_with,run_run_with,code_version,expect_reviewer",
[
("code", "code", 2, True), # code + code_version=2 → adaptive caching on
("code", "code", 1, False), # code + code_version=1 → adaptive caching off
("agent", "agent", 2, False), # agent overrides everything
("agent", "agent", None, False), # agent, no code_version
("code", "agent", 2, False), # run-level agent overrides
("agent", "code", None, False), # code without code_version → not adaptive
("agent", "code", 2, True), # run-level code + code_version=2 → adaptive
],
)
def test_reviewer_gate(self, service, wf_run_with, run_run_with, code_version, expect_reviewer):
wf = _make_workflow(run_with=wf_run_with, code_version=code_version)
wr = _make_run(run_with=run_run_with)
should_review = is_adaptive_caching(wf, wr) and service.should_run_script(wf, wr)
assert should_review is expect_reviewer
def test_legacy_adaptive_caching_backward_compat(self, service):
"""Legacy adaptive_caching=True with code_version=None still enables reviewer."""
wf = _make_workflow(run_with="agent", adaptive_caching=True, code_version=None)
wr = _make_run(run_with="code")
should_review = is_adaptive_caching(wf, wr) and service.should_run_script(wf, wr)
assert should_review is True