mirror of
https://github.com/Skyvern-AI/skyvern.git
synced 2026-04-28 19:50:42 +00:00
600 lines
24 KiB
Python
600 lines
24 KiB
Python
"""
|
|
Tests for workflow schema field name preservation (SKY-7434).
|
|
|
|
When a workflow is regenerated (e.g., after adding a new block), the LLM should
|
|
preserve field names for unchanged blocks to prevent schema mismatches with
|
|
cached block code.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
from unittest.mock import AsyncMock
|
|
|
|
import pytest
|
|
from dotenv import load_dotenv
|
|
|
|
from skyvern.core.script_generations.generate_script import (
|
|
ScriptBlockSource,
|
|
_build_existing_field_assignments,
|
|
)
|
|
from skyvern.core.script_generations.generate_workflow_parameters import (
|
|
generate_workflow_parameters_schema,
|
|
)
|
|
from skyvern.forge.forge_app_initializer import start_forge_app
|
|
from tests.unit.force_stub_app import start_forge_stub_app
|
|
|
|
# Load environment variables for real LLM tests
|
|
load_dotenv()
|
|
|
|
# Check if real LLM tests should run (set RUN_LLM_TESTS=1 to enable)
|
|
SKIP_LLM_TESTS = os.environ.get("RUN_LLM_TESTS", "0") != "1"
|
|
|
|
|
|
class TestBuildExistingFieldAssignments:
|
|
"""Test the helper function that builds existing field assignments from cached blocks."""
|
|
|
|
def test_returns_empty_dict_when_no_cached_blocks(self):
|
|
"""When there are no cached blocks, should return empty dict."""
|
|
blocks = [
|
|
{"block_type": "login", "label": "login_block", "task_id": "task_1"},
|
|
]
|
|
actions_by_task = {
|
|
"task_1": [
|
|
{"action_type": "input_text", "text": "john", "intention": "Enter username"},
|
|
]
|
|
}
|
|
cached_blocks: dict[str, ScriptBlockSource] = {}
|
|
updated_block_labels: set[str] = set()
|
|
|
|
result = _build_existing_field_assignments(blocks, actions_by_task, cached_blocks, updated_block_labels)
|
|
|
|
assert result == {}
|
|
|
|
def test_returns_empty_dict_when_all_blocks_updated(self):
|
|
"""When all blocks are in updated_block_labels, should return empty dict."""
|
|
blocks = [
|
|
{"block_type": "login", "label": "login_block", "task_id": "task_1"},
|
|
]
|
|
actions_by_task = {
|
|
"task_1": [
|
|
{"action_type": "input_text", "text": "john", "intention": "Enter username"},
|
|
]
|
|
}
|
|
cached_blocks = {
|
|
"login_block": ScriptBlockSource(
|
|
label="login_block",
|
|
code="async def login_block(): ...",
|
|
run_signature=None,
|
|
workflow_run_id=None,
|
|
workflow_run_block_id=None,
|
|
input_fields=["username"],
|
|
)
|
|
}
|
|
updated_block_labels = {"login_block"} # Block is updated, should not preserve
|
|
|
|
result = _build_existing_field_assignments(blocks, actions_by_task, cached_blocks, updated_block_labels)
|
|
|
|
assert result == {}
|
|
|
|
def test_preserves_field_names_for_unchanged_blocks(self):
|
|
"""Unchanged blocks with input_fields should have their field names preserved."""
|
|
blocks = [
|
|
{"block_type": "login", "label": "login_block", "task_id": "task_1"},
|
|
]
|
|
actions_by_task = {
|
|
"task_1": [
|
|
{"action_type": "input_text", "text": "john", "intention": "Enter username"},
|
|
{"action_type": "input_text", "text": "pass123", "intention": "Enter password"},
|
|
]
|
|
}
|
|
cached_blocks = {
|
|
"login_block": ScriptBlockSource(
|
|
label="login_block",
|
|
code="async def login_block(): ...",
|
|
run_signature=None,
|
|
workflow_run_id=None,
|
|
workflow_run_block_id=None,
|
|
input_fields=["user_full_name", "user_password"],
|
|
)
|
|
}
|
|
updated_block_labels: set[str] = set() # No blocks updated
|
|
|
|
result = _build_existing_field_assignments(blocks, actions_by_task, cached_blocks, updated_block_labels)
|
|
|
|
# Action 1 -> user_full_name, Action 2 -> user_password
|
|
assert result == {1: "user_full_name", 2: "user_password"}
|
|
|
|
def test_preserves_fields_for_multiple_unchanged_blocks(self):
|
|
"""Multiple unchanged blocks should each have their fields preserved."""
|
|
blocks = [
|
|
{"block_type": "login", "label": "login_block", "task_id": "task_1"},
|
|
{"block_type": "task", "label": "form_block", "task_id": "task_2"},
|
|
]
|
|
actions_by_task = {
|
|
"task_1": [
|
|
{"action_type": "input_text", "text": "john", "intention": "Enter username"},
|
|
],
|
|
"task_2": [
|
|
{"action_type": "input_text", "text": "Acme Inc", "intention": "Enter company"},
|
|
],
|
|
}
|
|
cached_blocks = {
|
|
"login_block": ScriptBlockSource(
|
|
label="login_block",
|
|
code="...",
|
|
run_signature=None,
|
|
workflow_run_id=None,
|
|
workflow_run_block_id=None,
|
|
input_fields=["username"],
|
|
),
|
|
"form_block": ScriptBlockSource(
|
|
label="form_block",
|
|
code="...",
|
|
run_signature=None,
|
|
workflow_run_id=None,
|
|
workflow_run_block_id=None,
|
|
input_fields=["company_name"],
|
|
),
|
|
}
|
|
updated_block_labels: set[str] = set()
|
|
|
|
result = _build_existing_field_assignments(blocks, actions_by_task, cached_blocks, updated_block_labels)
|
|
|
|
# Action 1 (task_1) -> username, Action 2 (task_2) -> company_name
|
|
assert result == {1: "username", 2: "company_name"}
|
|
|
|
def test_mixed_updated_and_unchanged_blocks(self):
|
|
"""Only unchanged blocks should have their fields preserved."""
|
|
blocks = [
|
|
{"block_type": "login", "label": "login_block", "task_id": "task_1"},
|
|
{"block_type": "task", "label": "new_block", "task_id": "task_2"},
|
|
]
|
|
actions_by_task = {
|
|
"task_1": [
|
|
{"action_type": "input_text", "text": "john", "intention": "Enter username"},
|
|
],
|
|
"task_2": [
|
|
{"action_type": "input_text", "text": "new value", "intention": "Enter something"},
|
|
],
|
|
}
|
|
cached_blocks = {
|
|
"login_block": ScriptBlockSource(
|
|
label="login_block",
|
|
code="...",
|
|
run_signature=None,
|
|
workflow_run_id=None,
|
|
workflow_run_block_id=None,
|
|
input_fields=["username"],
|
|
),
|
|
# new_block is not in cached_blocks (it's new)
|
|
}
|
|
updated_block_labels: set[str] = set()
|
|
|
|
result = _build_existing_field_assignments(blocks, actions_by_task, cached_blocks, updated_block_labels)
|
|
|
|
# Only action 1 should be preserved, action 2 is from a new block
|
|
assert result == {1: "username"}
|
|
|
|
def test_skips_non_custom_field_actions(self):
|
|
"""Actions that aren't INPUT_TEXT, UPLOAD_FILE, or SELECT_OPTION should be skipped."""
|
|
blocks = [
|
|
{"block_type": "login", "label": "login_block", "task_id": "task_1"},
|
|
]
|
|
actions_by_task = {
|
|
"task_1": [
|
|
{"action_type": "click", "intention": "Click button"}, # Not a custom field action
|
|
{"action_type": "input_text", "text": "john", "intention": "Enter username"},
|
|
]
|
|
}
|
|
cached_blocks = {
|
|
"login_block": ScriptBlockSource(
|
|
label="login_block",
|
|
code="...",
|
|
run_signature=None,
|
|
workflow_run_id=None,
|
|
workflow_run_block_id=None,
|
|
input_fields=["username"], # Only one input field
|
|
)
|
|
}
|
|
updated_block_labels: set[str] = set()
|
|
|
|
result = _build_existing_field_assignments(blocks, actions_by_task, cached_blocks, updated_block_labels)
|
|
|
|
# The click action is skipped, so input_text is action 1
|
|
assert result == {1: "username"}
|
|
|
|
|
|
class TestGenerateWorkflowParametersSchemaWithExistingFields:
|
|
"""Test that the LLM receives existing field names when generating schema."""
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def setup_stub_app(self):
|
|
"""Set up stub app for all tests in this class."""
|
|
self.stub_app = start_forge_stub_app()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_llm_receives_existing_field_names_in_prompt(self):
|
|
"""The LLM should receive existing field names to preserve in the prompt."""
|
|
actions_by_task = {
|
|
"task_1": [
|
|
{"action_type": "input_text", "text": "john", "intention": "Enter username", "action_id": "act_1"},
|
|
{"action_type": "input_text", "text": "pass", "intention": "Enter password", "action_id": "act_2"},
|
|
],
|
|
"task_2": [
|
|
{"action_type": "input_text", "text": "new", "intention": "Enter new field", "action_id": "act_3"},
|
|
],
|
|
}
|
|
existing_field_assignments = {
|
|
1: "preserved_username",
|
|
2: "preserved_password",
|
|
# Action 3 has no existing field - needs new name
|
|
}
|
|
|
|
# Mock the LLM response
|
|
mock_llm_response = {
|
|
"field_mappings": {
|
|
"action_index_1": "preserved_username",
|
|
"action_index_2": "preserved_password",
|
|
"action_index_3": "new_field_name",
|
|
},
|
|
"schema_fields": {
|
|
"preserved_username": {"type": "str", "description": "Username"},
|
|
"preserved_password": {"type": "str", "description": "Password"},
|
|
"new_field_name": {"type": "str", "description": "New field"},
|
|
},
|
|
}
|
|
|
|
captured_prompt = {}
|
|
|
|
async def mock_llm_handler(prompt, prompt_name):
|
|
captured_prompt["prompt"] = prompt
|
|
captured_prompt["prompt_name"] = prompt_name
|
|
return mock_llm_response
|
|
|
|
self.stub_app.SCRIPT_GENERATION_LLM_API_HANDLER = AsyncMock(side_effect=mock_llm_handler)
|
|
|
|
schema_code, field_mappings = await generate_workflow_parameters_schema(
|
|
actions_by_task, existing_field_assignments
|
|
)
|
|
|
|
# Verify the prompt contains the existing field names
|
|
prompt = captured_prompt["prompt"]
|
|
assert "preserved_username" in prompt
|
|
assert "preserved_password" in prompt
|
|
assert "MUST PRESERVE" in prompt or "EXISTING FIELD NAME" in prompt
|
|
|
|
# Verify the returned field mappings include preserved names
|
|
assert field_mappings["task_1:act_1"] == "preserved_username"
|
|
assert field_mappings["task_1:act_2"] == "preserved_password"
|
|
assert field_mappings["task_2:act_3"] == "new_field_name"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_no_existing_fields_works_normally(self):
|
|
"""When there are no existing fields, schema generation should work normally."""
|
|
actions_by_task = {
|
|
"task_1": [
|
|
{"action_type": "input_text", "text": "john", "intention": "Enter username", "action_id": "act_1"},
|
|
],
|
|
}
|
|
existing_field_assignments: dict[int, str] = {} # No existing fields
|
|
|
|
mock_llm_response = {
|
|
"field_mappings": {
|
|
"action_index_1": "username",
|
|
},
|
|
"schema_fields": {
|
|
"username": {"type": "str", "description": "Username field"},
|
|
},
|
|
}
|
|
|
|
captured_prompt = {}
|
|
|
|
async def mock_llm_handler(prompt, prompt_name):
|
|
captured_prompt["prompt"] = prompt
|
|
return mock_llm_response
|
|
|
|
self.stub_app.SCRIPT_GENERATION_LLM_API_HANDLER = AsyncMock(side_effect=mock_llm_handler)
|
|
|
|
schema_code, field_mappings = await generate_workflow_parameters_schema(
|
|
actions_by_task, existing_field_assignments
|
|
)
|
|
|
|
# Should not contain preservation instructions when no existing fields
|
|
prompt = captured_prompt["prompt"]
|
|
# The CRITICAL rule only appears when has_existing_fields is True
|
|
assert "CRITICAL" not in prompt
|
|
|
|
# Should still return valid mappings
|
|
assert field_mappings["task_1:act_1"] == "username"
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_schema_code_includes_preserved_field_names(self):
|
|
"""The generated schema code should include the preserved field names."""
|
|
actions_by_task = {
|
|
"task_1": [
|
|
{"action_type": "input_text", "text": "john", "intention": "Enter username", "action_id": "act_1"},
|
|
],
|
|
}
|
|
existing_field_assignments = {1: "user_full_name"}
|
|
|
|
mock_llm_response = {
|
|
"field_mappings": {
|
|
"action_index_1": "user_full_name",
|
|
},
|
|
"schema_fields": {
|
|
"user_full_name": {"type": "str", "description": "The user's full name"},
|
|
},
|
|
}
|
|
|
|
async def mock_llm_handler(prompt, prompt_name):
|
|
return mock_llm_response
|
|
|
|
self.stub_app.SCRIPT_GENERATION_LLM_API_HANDLER = AsyncMock(side_effect=mock_llm_handler)
|
|
|
|
schema_code, field_mappings = await generate_workflow_parameters_schema(
|
|
actions_by_task, existing_field_assignments
|
|
)
|
|
|
|
# Schema code should include the preserved field name
|
|
assert "user_full_name" in schema_code
|
|
assert "str" in schema_code
|
|
|
|
|
|
class TestEndToEndFieldPreservation:
|
|
"""
|
|
End-to-end test simulating the real scenario:
|
|
1. Workflow has a login block with cached code using field names
|
|
2. User adds a new block
|
|
3. Schema is regenerated
|
|
4. Login block's field names should be preserved
|
|
"""
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def setup_stub_app(self):
|
|
"""Set up stub app for all tests in this class."""
|
|
self.stub_app = start_forge_stub_app()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_adding_new_block_preserves_existing_block_field_names(self):
|
|
"""
|
|
Simulates: User has workflow with login block, adds a new block.
|
|
The login block's field names should be preserved in the regenerated schema.
|
|
"""
|
|
# Existing blocks (login was already there)
|
|
blocks = [
|
|
{"block_type": "login", "label": "login_block", "task_id": "task_1"},
|
|
{"block_type": "task", "label": "new_block", "task_id": "task_2"}, # Newly added
|
|
]
|
|
|
|
# Actions from both blocks
|
|
actions_by_task = {
|
|
"task_1": [
|
|
{
|
|
"action_type": "input_text",
|
|
"text": "john@example.com",
|
|
"intention": "Enter email",
|
|
"action_id": "act_1",
|
|
},
|
|
{"action_type": "input_text", "text": "secret123", "intention": "Enter password", "action_id": "act_2"},
|
|
],
|
|
"task_2": [
|
|
{
|
|
"action_type": "input_text",
|
|
"text": "Acme Inc",
|
|
"intention": "Enter company name",
|
|
"action_id": "act_3",
|
|
},
|
|
],
|
|
}
|
|
|
|
# Cached blocks - login_block has existing field names
|
|
cached_blocks = {
|
|
"login_block": ScriptBlockSource(
|
|
label="login_block",
|
|
code="""
|
|
@skyvern.cached(cache_key='login_block')
|
|
async def login_block(page: SkyvernPage, context: RunContext):
|
|
await page.fill(
|
|
selector='xpath=//input[@id="email"]',
|
|
value=context.parameters['user_email'],
|
|
)
|
|
await page.fill(
|
|
selector='xpath=//input[@id="password"]',
|
|
value=context.parameters['user_password'],
|
|
)
|
|
""",
|
|
run_signature="await skyvern.login(...)",
|
|
workflow_run_id="wr_123",
|
|
workflow_run_block_id="wrb_123",
|
|
input_fields=["user_email", "user_password"], # These must be preserved!
|
|
),
|
|
# new_block is not in cached_blocks - it's brand new
|
|
}
|
|
|
|
# Only the new block is "updated" (actually new)
|
|
updated_block_labels: set[str] = set() # login_block is NOT updated
|
|
|
|
# Step 1: Build existing field assignments
|
|
existing_field_assignments = _build_existing_field_assignments(
|
|
blocks, actions_by_task, cached_blocks, updated_block_labels
|
|
)
|
|
|
|
# Verify login block fields are identified for preservation
|
|
assert existing_field_assignments == {
|
|
1: "user_email",
|
|
2: "user_password",
|
|
# Action 3 has no existing field (new block)
|
|
}
|
|
|
|
# Step 2: Mock LLM that respects the preservation instructions
|
|
mock_llm_response = {
|
|
"field_mappings": {
|
|
"action_index_1": "user_email", # Preserved
|
|
"action_index_2": "user_password", # Preserved
|
|
"action_index_3": "company_name", # New field for new block
|
|
},
|
|
"schema_fields": {
|
|
"user_email": {"type": "str", "description": "User's email address"},
|
|
"user_password": {"type": "str", "description": "User's password"},
|
|
"company_name": {"type": "str", "description": "Company name"},
|
|
},
|
|
}
|
|
|
|
captured_prompt = {}
|
|
|
|
async def mock_llm_handler(prompt, prompt_name):
|
|
captured_prompt["prompt"] = prompt
|
|
return mock_llm_response
|
|
|
|
self.stub_app.SCRIPT_GENERATION_LLM_API_HANDLER = AsyncMock(side_effect=mock_llm_handler)
|
|
|
|
schema_code, field_mappings = await generate_workflow_parameters_schema(
|
|
actions_by_task, existing_field_assignments
|
|
)
|
|
|
|
# Verify the prompt contains preservation instructions
|
|
prompt = captured_prompt["prompt"]
|
|
assert "user_email" in prompt, "Prompt should contain existing field name 'user_email'"
|
|
assert "user_password" in prompt, "Prompt should contain existing field name 'user_password'"
|
|
assert "MUST PRESERVE" in prompt or "EXISTING FIELD NAME" in prompt
|
|
|
|
# Verify field mappings preserve the original names
|
|
assert field_mappings["task_1:act_1"] == "user_email", "Login block email field should be preserved"
|
|
assert field_mappings["task_1:act_2"] == "user_password", "Login block password field should be preserved"
|
|
assert field_mappings["task_2:act_3"] == "company_name", "New block should get new field name"
|
|
|
|
# Verify schema code contains preserved field names
|
|
assert "user_email" in schema_code
|
|
assert "user_password" in schema_code
|
|
assert "company_name" in schema_code
|
|
|
|
# The cached login block code references context.parameters['user_email']
|
|
# and context.parameters['user_password'], which now match the schema!
|
|
cached_code = cached_blocks["login_block"].code
|
|
assert "user_email" in cached_code
|
|
assert "user_password" in cached_code
|
|
|
|
|
|
@pytest.mark.skipif(SKIP_LLM_TESTS, reason="Real LLM test - set RUN_LLM_TESTS=1 to enable")
|
|
class TestRealLLMFieldPreservation:
|
|
"""
|
|
Integration tests that make actual LLM calls to verify field preservation.
|
|
|
|
These tests require environment variables to be set (via .env file):
|
|
- SCRIPT_GENERATION_LLM_KEY or SECONDARY_LLM_KEY
|
|
- Appropriate API keys for the LLM provider
|
|
|
|
Run these tests with:
|
|
RUN_LLM_TESTS=1 pytest tests/unit/test_workflow_schema_field_preservation.py::TestRealLLMFieldPreservation -v -s
|
|
|
|
Note: Skipped by default since they make real LLM calls (costs money).
|
|
"""
|
|
|
|
@pytest.fixture(scope="class", autouse=True)
|
|
def setup_real_app(self):
|
|
"""Set up the real Forge app for LLM calls."""
|
|
start_forge_app()
|
|
yield
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_llm_preserves_existing_field_names(self):
|
|
"""
|
|
Test that a real LLM preserves field names when instructed to.
|
|
|
|
This test sends a prompt with existing field names marked as "MUST PRESERVE"
|
|
and verifies the LLM returns those exact names in the response.
|
|
"""
|
|
actions_by_task = {
|
|
"task_1": [
|
|
{
|
|
"action_type": "input_text",
|
|
"text": "john.doe@example.com",
|
|
"intention": "Enter the user's email address for login",
|
|
"action_id": "act_1",
|
|
},
|
|
{
|
|
"action_type": "input_text",
|
|
"text": "secretpassword123",
|
|
"intention": "Enter the user's password",
|
|
"action_id": "act_2",
|
|
},
|
|
],
|
|
"task_2": [
|
|
{
|
|
"action_type": "input_text",
|
|
"text": "Acme Corporation",
|
|
"intention": "Enter the company name",
|
|
"action_id": "act_3",
|
|
},
|
|
],
|
|
}
|
|
|
|
# These are the existing field names that MUST be preserved
|
|
# Using unique names to ensure the LLM doesn't accidentally match them
|
|
existing_field_assignments = {
|
|
1: "preserved_login_email_xyz",
|
|
2: "preserved_login_password_abc",
|
|
# Action 3 has no existing field - LLM should generate a new name
|
|
}
|
|
|
|
schema_code, field_mappings = await generate_workflow_parameters_schema(
|
|
actions_by_task, existing_field_assignments
|
|
)
|
|
|
|
# Verify the LLM preserved the exact field names we specified
|
|
assert field_mappings["task_1:act_1"] == "preserved_login_email_xyz", (
|
|
f"LLM should have preserved 'preserved_login_email_xyz' but got '{field_mappings.get('task_1:act_1')}'"
|
|
)
|
|
assert field_mappings["task_1:act_2"] == "preserved_login_password_abc", (
|
|
f"LLM should have preserved 'preserved_login_password_abc' but got '{field_mappings.get('task_1:act_2')}'"
|
|
)
|
|
|
|
# Verify action 3 got a new field name (not one of the preserved ones)
|
|
action_3_field = field_mappings.get("task_2:act_3")
|
|
assert action_3_field is not None, "LLM should have generated a field name for action 3"
|
|
assert action_3_field not in ["preserved_login_email_xyz", "preserved_login_password_abc"], (
|
|
f"Action 3 should have a new field name, not a preserved one. Got: {action_3_field}"
|
|
)
|
|
|
|
# Verify the schema code contains the preserved field names
|
|
assert "preserved_login_email_xyz" in schema_code, "Schema should contain preserved email field"
|
|
assert "preserved_login_password_abc" in schema_code, "Schema should contain preserved password field"
|
|
assert action_3_field in schema_code, f"Schema should contain new field '{action_3_field}'"
|
|
|
|
print("\n✅ LLM preserved field names correctly!")
|
|
print(" - Action 1: preserved_login_email_xyz ✓")
|
|
print(" - Action 2: preserved_login_password_abc ✓")
|
|
print(f" - Action 3: {action_3_field} (newly generated) ✓")
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_llm_generates_all_new_names_when_no_existing_fields(self):
|
|
"""
|
|
Test that when there are no existing fields, the LLM generates appropriate new names.
|
|
This is a baseline test to ensure the LLM call works correctly.
|
|
"""
|
|
actions_by_task = {
|
|
"task_1": [
|
|
{
|
|
"action_type": "input_text",
|
|
"text": "test@example.com",
|
|
"intention": "Enter email address",
|
|
"action_id": "act_1",
|
|
},
|
|
],
|
|
}
|
|
|
|
# No existing field assignments
|
|
existing_field_assignments: dict[int, str] = {}
|
|
|
|
schema_code, field_mappings = await generate_workflow_parameters_schema(
|
|
actions_by_task, existing_field_assignments
|
|
)
|
|
|
|
# Verify we got a field mapping
|
|
assert "task_1:act_1" in field_mappings, "Should have a field mapping for the action"
|
|
field_name = field_mappings["task_1:act_1"]
|
|
assert field_name, "Field name should not be empty"
|
|
assert field_name in schema_code, f"Schema should contain the generated field name '{field_name}'"
|
|
|
|
print(f"\n✅ LLM generated new field name: {field_name}")
|