Skyvern/tests/unit/test_script_block_dedup.py

192 lines
9.4 KiB
Python

"""
Tests for script block content deduplication (SKY-8684).
Verifies that:
1. create_or_update_script_block skips S3 upload when content matches (dedup hit)
2. create_or_update_script_block uploads to S3 when content differs (dedup miss)
3. create_or_update_script_block uploads to S3 when no previous block exists
4. Edge case: matching hash but no artifact_id falls through to full upload
"""
import hashlib
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from skyvern.core.script_generations.generate_script import create_or_update_script_block
class TestScriptBlockDedup:
"""Test content deduplication in create_or_update_script_block."""
@pytest.mark.asyncio
async def test_create_new_block_no_previous_file_uploads_to_s3(self) -> None:
"""When no ScriptFile with matching content_hash exists, full S3 upload occurs."""
mock_script_block = MagicMock()
mock_script_block.script_block_id = "sb_new"
mock_script_block.script_file_id = None
mock_script_file = MagicMock()
mock_script_file.file_id = "sf_new"
with patch("skyvern.core.script_generations.generate_script.app") as mock_app:
mock_app.DATABASE.scripts.get_script_block_by_label = AsyncMock(return_value=None)
mock_app.DATABASE.scripts.create_script_block = AsyncMock(return_value=mock_script_block)
mock_app.DATABASE.scripts.get_script_file_by_content_hash = AsyncMock(return_value=None)
mock_app.ARTIFACT_MANAGER.create_script_file_artifact = AsyncMock(return_value="artifact_new")
mock_app.DATABASE.scripts.create_script_file = AsyncMock(return_value=mock_script_file)
mock_app.DATABASE.scripts.update_script_block = AsyncMock(return_value=mock_script_block)
result = await create_or_update_script_block(
block_code="async def task_1(): pass",
script_revision_id="rev_new",
script_id="script_1",
organization_id="org_1",
block_label="task_1",
)
assert result is True
# S3 upload SHOULD happen — no dedup match
mock_app.ARTIFACT_MANAGER.create_script_file_artifact.assert_called_once()
mock_app.DATABASE.scripts.create_script_file.assert_called_once()
# Verify content_hash was passed to create_script_file
call_kwargs = mock_app.DATABASE.scripts.create_script_file.call_args.kwargs
assert call_kwargs["content_hash"].startswith("sha256:")
@pytest.mark.asyncio
async def test_create_new_block_matching_hash_skips_s3_upload(self) -> None:
"""When a ScriptFile with matching content_hash exists, S3 upload is skipped (cross-revision dedup)."""
mock_script_block = MagicMock()
mock_script_block.script_block_id = "sb_dedup"
mock_script_block.script_file_id = None
existing_script_file = MagicMock()
existing_script_file.artifact_id = "artifact_existing"
new_script_file = MagicMock()
new_script_file.file_id = "sf_dedup"
with patch("skyvern.core.script_generations.generate_script.app") as mock_app:
mock_app.DATABASE.scripts.get_script_block_by_label = AsyncMock(return_value=None)
mock_app.DATABASE.scripts.create_script_block = AsyncMock(return_value=mock_script_block)
mock_app.DATABASE.scripts.get_script_file_by_content_hash = AsyncMock(return_value=existing_script_file)
mock_app.DATABASE.scripts.create_script_file = AsyncMock(return_value=new_script_file)
mock_app.DATABASE.scripts.update_script_block = AsyncMock(return_value=mock_script_block)
result = await create_or_update_script_block(
block_code="async def task_1(): pass",
script_revision_id="rev_new",
script_id="script_1",
organization_id="org_1",
block_label="task_1",
)
assert result is True
# S3 upload should NOT happen — dedup hit
mock_app.ARTIFACT_MANAGER.create_script_file_artifact.assert_not_called()
# ScriptFile should still be created (for the new revision) with reused artifact_id
mock_app.DATABASE.scripts.create_script_file.assert_called_once()
call_kwargs = mock_app.DATABASE.scripts.create_script_file.call_args.kwargs
assert call_kwargs["artifact_id"] == "artifact_existing"
@pytest.mark.asyncio
async def test_update_existing_block_same_content_skips_s3(self) -> None:
"""When updating a block and content_hash matches, S3 upload is skipped entirely."""
block_code = "async def task_1(): pass"
content_hash = f"sha256:{hashlib.sha256(block_code.encode('utf-8')).hexdigest()}"
mock_script_block = MagicMock()
mock_script_block.script_block_id = "sb_existing"
mock_script_block.script_file_id = "sf_existing"
mock_script_file = MagicMock()
mock_script_file.content_hash = content_hash
mock_script_file.artifact_id = "artifact_existing"
with patch("skyvern.core.script_generations.generate_script.app") as mock_app:
mock_app.DATABASE.scripts.get_script_block_by_label = AsyncMock(return_value=mock_script_block)
mock_app.DATABASE.scripts.get_script_file_by_id = AsyncMock(return_value=mock_script_file)
result = await create_or_update_script_block(
block_code=block_code,
script_revision_id="rev_1",
script_id="script_1",
organization_id="org_1",
block_label="task_1",
update=True,
)
assert result is True
# S3 upload should NOT happen — content unchanged
mock_app.STORAGE.store_artifact.assert_not_called()
mock_app.DATABASE.artifacts.get_artifact_by_id.assert_not_called()
@pytest.mark.asyncio
async def test_update_existing_block_different_content_uploads_to_s3(self) -> None:
"""When updating a block and content_hash differs, S3 upload proceeds."""
mock_script_block = MagicMock()
mock_script_block.script_block_id = "sb_existing"
mock_script_block.script_file_id = "sf_existing"
mock_script_file = MagicMock()
mock_script_file.content_hash = "sha256:old_hash_value"
mock_script_file.artifact_id = "artifact_existing"
mock_artifact = MagicMock()
with patch("skyvern.core.script_generations.generate_script.app") as mock_app:
mock_app.DATABASE.scripts.get_script_block_by_label = AsyncMock(return_value=mock_script_block)
mock_app.DATABASE.scripts.get_script_file_by_id = AsyncMock(return_value=mock_script_file)
mock_app.DATABASE.artifacts.get_artifact_by_id = AsyncMock(return_value=mock_artifact)
mock_app.DATABASE.scripts.update_script_file = AsyncMock()
mock_app.STORAGE.store_artifact = AsyncMock()
result = await create_or_update_script_block(
block_code="async def task_1_v2(): pass",
script_revision_id="rev_1",
script_id="script_1",
organization_id="org_1",
block_label="task_1",
update=True,
)
assert result is True
# S3 upload SHOULD happen — content changed
mock_app.DATABASE.artifacts.get_artifact_by_id.assert_called_once()
mock_app.STORAGE.store_artifact.assert_called_once()
# content_hash should be updated on the ScriptFile record AFTER S3 upload
mock_app.DATABASE.scripts.update_script_file.assert_called_once()
assert "content_hash" in mock_app.DATABASE.scripts.update_script_file.call_args.kwargs
@pytest.mark.asyncio
async def test_create_block_matching_hash_but_no_artifact_falls_through(self) -> None:
"""When ScriptFile has matching hash but artifact_id is None, fall through to full upload."""
mock_script_block = MagicMock()
mock_script_block.script_block_id = "sb_edge"
mock_script_block.script_file_id = None
orphan_script_file = MagicMock()
orphan_script_file.artifact_id = None # No artifact — can't reuse
new_script_file = MagicMock()
new_script_file.file_id = "sf_edge"
with patch("skyvern.core.script_generations.generate_script.app") as mock_app:
mock_app.DATABASE.scripts.get_script_block_by_label = AsyncMock(return_value=None)
mock_app.DATABASE.scripts.create_script_block = AsyncMock(return_value=mock_script_block)
mock_app.DATABASE.scripts.get_script_file_by_content_hash = AsyncMock(return_value=orphan_script_file)
mock_app.ARTIFACT_MANAGER.create_script_file_artifact = AsyncMock(return_value="artifact_new")
mock_app.DATABASE.scripts.create_script_file = AsyncMock(return_value=new_script_file)
mock_app.DATABASE.scripts.update_script_block = AsyncMock(return_value=mock_script_block)
result = await create_or_update_script_block(
block_code="async def task_edge(): pass",
script_revision_id="rev_edge",
script_id="script_1",
organization_id="org_1",
block_label="task_edge",
)
assert result is True
# Should fall through to full upload since artifact_id is None
mock_app.ARTIFACT_MANAGER.create_script_file_artifact.assert_called_once()