Skyvern/tests/unit/test_workflow_schema_field_preservation.py
2026-02-12 20:43:27 -08:00

600 lines
24 KiB
Python

"""
Tests for workflow schema field name preservation (SKY-7434).
When a workflow is regenerated (e.g., after adding a new block), the LLM should
preserve field names for unchanged blocks to prevent schema mismatches with
cached block code.
"""
from __future__ import annotations
import os
from unittest.mock import AsyncMock
import pytest
from dotenv import load_dotenv
from skyvern.core.script_generations.generate_script import (
ScriptBlockSource,
_build_existing_field_assignments,
)
from skyvern.core.script_generations.generate_workflow_parameters import (
generate_workflow_parameters_schema,
)
from skyvern.forge.forge_app_initializer import start_forge_app
from tests.unit.force_stub_app import start_forge_stub_app
# Load environment variables for real LLM tests
load_dotenv()
# Check if real LLM tests should run (set RUN_LLM_TESTS=1 to enable)
SKIP_LLM_TESTS = os.environ.get("RUN_LLM_TESTS", "0") != "1"
class TestBuildExistingFieldAssignments:
"""Test the helper function that builds existing field assignments from cached blocks."""
def test_returns_empty_dict_when_no_cached_blocks(self):
"""When there are no cached blocks, should return empty dict."""
blocks = [
{"block_type": "login", "label": "login_block", "task_id": "task_1"},
]
actions_by_task = {
"task_1": [
{"action_type": "input_text", "text": "john", "intention": "Enter username"},
]
}
cached_blocks: dict[str, ScriptBlockSource] = {}
updated_block_labels: set[str] = set()
result = _build_existing_field_assignments(blocks, actions_by_task, cached_blocks, updated_block_labels)
assert result == {}
def test_returns_empty_dict_when_all_blocks_updated(self):
"""When all blocks are in updated_block_labels, should return empty dict."""
blocks = [
{"block_type": "login", "label": "login_block", "task_id": "task_1"},
]
actions_by_task = {
"task_1": [
{"action_type": "input_text", "text": "john", "intention": "Enter username"},
]
}
cached_blocks = {
"login_block": ScriptBlockSource(
label="login_block",
code="async def login_block(): ...",
run_signature=None,
workflow_run_id=None,
workflow_run_block_id=None,
input_fields=["username"],
)
}
updated_block_labels = {"login_block"} # Block is updated, should not preserve
result = _build_existing_field_assignments(blocks, actions_by_task, cached_blocks, updated_block_labels)
assert result == {}
def test_preserves_field_names_for_unchanged_blocks(self):
"""Unchanged blocks with input_fields should have their field names preserved."""
blocks = [
{"block_type": "login", "label": "login_block", "task_id": "task_1"},
]
actions_by_task = {
"task_1": [
{"action_type": "input_text", "text": "john", "intention": "Enter username"},
{"action_type": "input_text", "text": "pass123", "intention": "Enter password"},
]
}
cached_blocks = {
"login_block": ScriptBlockSource(
label="login_block",
code="async def login_block(): ...",
run_signature=None,
workflow_run_id=None,
workflow_run_block_id=None,
input_fields=["user_full_name", "user_password"],
)
}
updated_block_labels: set[str] = set() # No blocks updated
result = _build_existing_field_assignments(blocks, actions_by_task, cached_blocks, updated_block_labels)
# Action 1 -> user_full_name, Action 2 -> user_password
assert result == {1: "user_full_name", 2: "user_password"}
def test_preserves_fields_for_multiple_unchanged_blocks(self):
"""Multiple unchanged blocks should each have their fields preserved."""
blocks = [
{"block_type": "login", "label": "login_block", "task_id": "task_1"},
{"block_type": "task", "label": "form_block", "task_id": "task_2"},
]
actions_by_task = {
"task_1": [
{"action_type": "input_text", "text": "john", "intention": "Enter username"},
],
"task_2": [
{"action_type": "input_text", "text": "Acme Inc", "intention": "Enter company"},
],
}
cached_blocks = {
"login_block": ScriptBlockSource(
label="login_block",
code="...",
run_signature=None,
workflow_run_id=None,
workflow_run_block_id=None,
input_fields=["username"],
),
"form_block": ScriptBlockSource(
label="form_block",
code="...",
run_signature=None,
workflow_run_id=None,
workflow_run_block_id=None,
input_fields=["company_name"],
),
}
updated_block_labels: set[str] = set()
result = _build_existing_field_assignments(blocks, actions_by_task, cached_blocks, updated_block_labels)
# Action 1 (task_1) -> username, Action 2 (task_2) -> company_name
assert result == {1: "username", 2: "company_name"}
def test_mixed_updated_and_unchanged_blocks(self):
"""Only unchanged blocks should have their fields preserved."""
blocks = [
{"block_type": "login", "label": "login_block", "task_id": "task_1"},
{"block_type": "task", "label": "new_block", "task_id": "task_2"},
]
actions_by_task = {
"task_1": [
{"action_type": "input_text", "text": "john", "intention": "Enter username"},
],
"task_2": [
{"action_type": "input_text", "text": "new value", "intention": "Enter something"},
],
}
cached_blocks = {
"login_block": ScriptBlockSource(
label="login_block",
code="...",
run_signature=None,
workflow_run_id=None,
workflow_run_block_id=None,
input_fields=["username"],
),
# new_block is not in cached_blocks (it's new)
}
updated_block_labels: set[str] = set()
result = _build_existing_field_assignments(blocks, actions_by_task, cached_blocks, updated_block_labels)
# Only action 1 should be preserved, action 2 is from a new block
assert result == {1: "username"}
def test_skips_non_custom_field_actions(self):
"""Actions that aren't INPUT_TEXT, UPLOAD_FILE, or SELECT_OPTION should be skipped."""
blocks = [
{"block_type": "login", "label": "login_block", "task_id": "task_1"},
]
actions_by_task = {
"task_1": [
{"action_type": "click", "intention": "Click button"}, # Not a custom field action
{"action_type": "input_text", "text": "john", "intention": "Enter username"},
]
}
cached_blocks = {
"login_block": ScriptBlockSource(
label="login_block",
code="...",
run_signature=None,
workflow_run_id=None,
workflow_run_block_id=None,
input_fields=["username"], # Only one input field
)
}
updated_block_labels: set[str] = set()
result = _build_existing_field_assignments(blocks, actions_by_task, cached_blocks, updated_block_labels)
# The click action is skipped, so input_text is action 1
assert result == {1: "username"}
class TestGenerateWorkflowParametersSchemaWithExistingFields:
"""Test that the LLM receives existing field names when generating schema."""
@pytest.fixture(autouse=True)
def setup_stub_app(self):
"""Set up stub app for all tests in this class."""
self.stub_app = start_forge_stub_app()
@pytest.mark.asyncio
async def test_llm_receives_existing_field_names_in_prompt(self):
"""The LLM should receive existing field names to preserve in the prompt."""
actions_by_task = {
"task_1": [
{"action_type": "input_text", "text": "john", "intention": "Enter username", "action_id": "act_1"},
{"action_type": "input_text", "text": "pass", "intention": "Enter password", "action_id": "act_2"},
],
"task_2": [
{"action_type": "input_text", "text": "new", "intention": "Enter new field", "action_id": "act_3"},
],
}
existing_field_assignments = {
1: "preserved_username",
2: "preserved_password",
# Action 3 has no existing field - needs new name
}
# Mock the LLM response
mock_llm_response = {
"field_mappings": {
"action_index_1": "preserved_username",
"action_index_2": "preserved_password",
"action_index_3": "new_field_name",
},
"schema_fields": {
"preserved_username": {"type": "str", "description": "Username"},
"preserved_password": {"type": "str", "description": "Password"},
"new_field_name": {"type": "str", "description": "New field"},
},
}
captured_prompt = {}
async def mock_llm_handler(prompt, prompt_name):
captured_prompt["prompt"] = prompt
captured_prompt["prompt_name"] = prompt_name
return mock_llm_response
self.stub_app.SCRIPT_GENERATION_LLM_API_HANDLER = AsyncMock(side_effect=mock_llm_handler)
schema_code, field_mappings = await generate_workflow_parameters_schema(
actions_by_task, existing_field_assignments
)
# Verify the prompt contains the existing field names
prompt = captured_prompt["prompt"]
assert "preserved_username" in prompt
assert "preserved_password" in prompt
assert "MUST PRESERVE" in prompt or "EXISTING FIELD NAME" in prompt
# Verify the returned field mappings include preserved names
assert field_mappings["task_1:act_1"] == "preserved_username"
assert field_mappings["task_1:act_2"] == "preserved_password"
assert field_mappings["task_2:act_3"] == "new_field_name"
@pytest.mark.asyncio
async def test_no_existing_fields_works_normally(self):
"""When there are no existing fields, schema generation should work normally."""
actions_by_task = {
"task_1": [
{"action_type": "input_text", "text": "john", "intention": "Enter username", "action_id": "act_1"},
],
}
existing_field_assignments: dict[int, str] = {} # No existing fields
mock_llm_response = {
"field_mappings": {
"action_index_1": "username",
},
"schema_fields": {
"username": {"type": "str", "description": "Username field"},
},
}
captured_prompt = {}
async def mock_llm_handler(prompt, prompt_name):
captured_prompt["prompt"] = prompt
return mock_llm_response
self.stub_app.SCRIPT_GENERATION_LLM_API_HANDLER = AsyncMock(side_effect=mock_llm_handler)
schema_code, field_mappings = await generate_workflow_parameters_schema(
actions_by_task, existing_field_assignments
)
# Should not contain preservation instructions when no existing fields
prompt = captured_prompt["prompt"]
# The CRITICAL rule only appears when has_existing_fields is True
assert "CRITICAL" not in prompt
# Should still return valid mappings
assert field_mappings["task_1:act_1"] == "username"
@pytest.mark.asyncio
async def test_schema_code_includes_preserved_field_names(self):
"""The generated schema code should include the preserved field names."""
actions_by_task = {
"task_1": [
{"action_type": "input_text", "text": "john", "intention": "Enter username", "action_id": "act_1"},
],
}
existing_field_assignments = {1: "user_full_name"}
mock_llm_response = {
"field_mappings": {
"action_index_1": "user_full_name",
},
"schema_fields": {
"user_full_name": {"type": "str", "description": "The user's full name"},
},
}
async def mock_llm_handler(prompt, prompt_name):
return mock_llm_response
self.stub_app.SCRIPT_GENERATION_LLM_API_HANDLER = AsyncMock(side_effect=mock_llm_handler)
schema_code, field_mappings = await generate_workflow_parameters_schema(
actions_by_task, existing_field_assignments
)
# Schema code should include the preserved field name
assert "user_full_name" in schema_code
assert "str" in schema_code
class TestEndToEndFieldPreservation:
"""
End-to-end test simulating the real scenario:
1. Workflow has a login block with cached code using field names
2. User adds a new block
3. Schema is regenerated
4. Login block's field names should be preserved
"""
@pytest.fixture(autouse=True)
def setup_stub_app(self):
"""Set up stub app for all tests in this class."""
self.stub_app = start_forge_stub_app()
@pytest.mark.asyncio
async def test_adding_new_block_preserves_existing_block_field_names(self):
"""
Simulates: User has workflow with login block, adds a new block.
The login block's field names should be preserved in the regenerated schema.
"""
# Existing blocks (login was already there)
blocks = [
{"block_type": "login", "label": "login_block", "task_id": "task_1"},
{"block_type": "task", "label": "new_block", "task_id": "task_2"}, # Newly added
]
# Actions from both blocks
actions_by_task = {
"task_1": [
{
"action_type": "input_text",
"text": "john@example.com",
"intention": "Enter email",
"action_id": "act_1",
},
{"action_type": "input_text", "text": "secret123", "intention": "Enter password", "action_id": "act_2"},
],
"task_2": [
{
"action_type": "input_text",
"text": "Acme Inc",
"intention": "Enter company name",
"action_id": "act_3",
},
],
}
# Cached blocks - login_block has existing field names
cached_blocks = {
"login_block": ScriptBlockSource(
label="login_block",
code="""
@skyvern.cached(cache_key='login_block')
async def login_block(page: SkyvernPage, context: RunContext):
await page.fill(
selector='xpath=//input[@id="email"]',
value=context.parameters['user_email'],
)
await page.fill(
selector='xpath=//input[@id="password"]',
value=context.parameters['user_password'],
)
""",
run_signature="await skyvern.login(...)",
workflow_run_id="wr_123",
workflow_run_block_id="wrb_123",
input_fields=["user_email", "user_password"], # These must be preserved!
),
# new_block is not in cached_blocks - it's brand new
}
# Only the new block is "updated" (actually new)
updated_block_labels: set[str] = set() # login_block is NOT updated
# Step 1: Build existing field assignments
existing_field_assignments = _build_existing_field_assignments(
blocks, actions_by_task, cached_blocks, updated_block_labels
)
# Verify login block fields are identified for preservation
assert existing_field_assignments == {
1: "user_email",
2: "user_password",
# Action 3 has no existing field (new block)
}
# Step 2: Mock LLM that respects the preservation instructions
mock_llm_response = {
"field_mappings": {
"action_index_1": "user_email", # Preserved
"action_index_2": "user_password", # Preserved
"action_index_3": "company_name", # New field for new block
},
"schema_fields": {
"user_email": {"type": "str", "description": "User's email address"},
"user_password": {"type": "str", "description": "User's password"},
"company_name": {"type": "str", "description": "Company name"},
},
}
captured_prompt = {}
async def mock_llm_handler(prompt, prompt_name):
captured_prompt["prompt"] = prompt
return mock_llm_response
self.stub_app.SCRIPT_GENERATION_LLM_API_HANDLER = AsyncMock(side_effect=mock_llm_handler)
schema_code, field_mappings = await generate_workflow_parameters_schema(
actions_by_task, existing_field_assignments
)
# Verify the prompt contains preservation instructions
prompt = captured_prompt["prompt"]
assert "user_email" in prompt, "Prompt should contain existing field name 'user_email'"
assert "user_password" in prompt, "Prompt should contain existing field name 'user_password'"
assert "MUST PRESERVE" in prompt or "EXISTING FIELD NAME" in prompt
# Verify field mappings preserve the original names
assert field_mappings["task_1:act_1"] == "user_email", "Login block email field should be preserved"
assert field_mappings["task_1:act_2"] == "user_password", "Login block password field should be preserved"
assert field_mappings["task_2:act_3"] == "company_name", "New block should get new field name"
# Verify schema code contains preserved field names
assert "user_email" in schema_code
assert "user_password" in schema_code
assert "company_name" in schema_code
# The cached login block code references context.parameters['user_email']
# and context.parameters['user_password'], which now match the schema!
cached_code = cached_blocks["login_block"].code
assert "user_email" in cached_code
assert "user_password" in cached_code
@pytest.mark.skipif(SKIP_LLM_TESTS, reason="Real LLM test - set RUN_LLM_TESTS=1 to enable")
class TestRealLLMFieldPreservation:
"""
Integration tests that make actual LLM calls to verify field preservation.
These tests require environment variables to be set (via .env file):
- SCRIPT_GENERATION_LLM_KEY or SECONDARY_LLM_KEY
- Appropriate API keys for the LLM provider
Run these tests with:
RUN_LLM_TESTS=1 pytest tests/unit/test_workflow_schema_field_preservation.py::TestRealLLMFieldPreservation -v -s
Note: Skipped by default since they make real LLM calls (costs money).
"""
@pytest.fixture(scope="class", autouse=True)
def setup_real_app(self):
"""Set up the real Forge app for LLM calls."""
start_forge_app()
yield
@pytest.mark.asyncio
async def test_llm_preserves_existing_field_names(self):
"""
Test that a real LLM preserves field names when instructed to.
This test sends a prompt with existing field names marked as "MUST PRESERVE"
and verifies the LLM returns those exact names in the response.
"""
actions_by_task = {
"task_1": [
{
"action_type": "input_text",
"text": "john.doe@example.com",
"intention": "Enter the user's email address for login",
"action_id": "act_1",
},
{
"action_type": "input_text",
"text": "secretpassword123",
"intention": "Enter the user's password",
"action_id": "act_2",
},
],
"task_2": [
{
"action_type": "input_text",
"text": "Acme Corporation",
"intention": "Enter the company name",
"action_id": "act_3",
},
],
}
# These are the existing field names that MUST be preserved
# Using unique names to ensure the LLM doesn't accidentally match them
existing_field_assignments = {
1: "preserved_login_email_xyz",
2: "preserved_login_password_abc",
# Action 3 has no existing field - LLM should generate a new name
}
schema_code, field_mappings = await generate_workflow_parameters_schema(
actions_by_task, existing_field_assignments
)
# Verify the LLM preserved the exact field names we specified
assert field_mappings["task_1:act_1"] == "preserved_login_email_xyz", (
f"LLM should have preserved 'preserved_login_email_xyz' but got '{field_mappings.get('task_1:act_1')}'"
)
assert field_mappings["task_1:act_2"] == "preserved_login_password_abc", (
f"LLM should have preserved 'preserved_login_password_abc' but got '{field_mappings.get('task_1:act_2')}'"
)
# Verify action 3 got a new field name (not one of the preserved ones)
action_3_field = field_mappings.get("task_2:act_3")
assert action_3_field is not None, "LLM should have generated a field name for action 3"
assert action_3_field not in ["preserved_login_email_xyz", "preserved_login_password_abc"], (
f"Action 3 should have a new field name, not a preserved one. Got: {action_3_field}"
)
# Verify the schema code contains the preserved field names
assert "preserved_login_email_xyz" in schema_code, "Schema should contain preserved email field"
assert "preserved_login_password_abc" in schema_code, "Schema should contain preserved password field"
assert action_3_field in schema_code, f"Schema should contain new field '{action_3_field}'"
print("\n✅ LLM preserved field names correctly!")
print(" - Action 1: preserved_login_email_xyz ✓")
print(" - Action 2: preserved_login_password_abc ✓")
print(f" - Action 3: {action_3_field} (newly generated) ✓")
@pytest.mark.asyncio
async def test_llm_generates_all_new_names_when_no_existing_fields(self):
"""
Test that when there are no existing fields, the LLM generates appropriate new names.
This is a baseline test to ensure the LLM call works correctly.
"""
actions_by_task = {
"task_1": [
{
"action_type": "input_text",
"text": "test@example.com",
"intention": "Enter email address",
"action_id": "act_1",
},
],
}
# No existing field assignments
existing_field_assignments: dict[int, str] = {}
schema_code, field_mappings = await generate_workflow_parameters_schema(
actions_by_task, existing_field_assignments
)
# Verify we got a field mapping
assert "task_1:act_1" in field_mappings, "Should have a field mapping for the action"
field_name = field_mappings["task_1:act_1"]
assert field_name, "Field name should not be empty"
assert field_name in schema_code, f"Schema should contain the generated field name '{field_name}'"
print(f"\n✅ LLM generated new field name: {field_name}")