mirror of
https://github.com/Skyvern-AI/skyvern.git
synced 2026-04-30 20:50:05 +00:00
Optimize caching: content dedup, recursive for-loop tracking, observability (SKY-8684) (#5371)
This commit is contained in:
parent
12b80f531a
commit
58bbc7dbb9
13 changed files with 1637 additions and 195 deletions
192
tests/unit/test_script_block_dedup.py
Normal file
192
tests/unit/test_script_block_dedup.py
Normal file
|
|
@ -0,0 +1,192 @@
|
|||
"""
|
||||
Tests for script block content deduplication (SKY-8684).
|
||||
|
||||
Verifies that:
|
||||
1. create_or_update_script_block skips S3 upload when content matches (dedup hit)
|
||||
2. create_or_update_script_block uploads to S3 when content differs (dedup miss)
|
||||
3. create_or_update_script_block uploads to S3 when no previous block exists
|
||||
4. Edge case: matching hash but no artifact_id falls through to full upload
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from skyvern.core.script_generations.generate_script import create_or_update_script_block
|
||||
|
||||
|
||||
class TestScriptBlockDedup:
|
||||
"""Test content deduplication in create_or_update_script_block."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_new_block_no_previous_file_uploads_to_s3(self) -> None:
|
||||
"""When no ScriptFile with matching content_hash exists, full S3 upload occurs."""
|
||||
mock_script_block = MagicMock()
|
||||
mock_script_block.script_block_id = "sb_new"
|
||||
mock_script_block.script_file_id = None
|
||||
|
||||
mock_script_file = MagicMock()
|
||||
mock_script_file.file_id = "sf_new"
|
||||
|
||||
with patch("skyvern.core.script_generations.generate_script.app") as mock_app:
|
||||
mock_app.DATABASE.scripts.get_script_block_by_label = AsyncMock(return_value=None)
|
||||
mock_app.DATABASE.scripts.create_script_block = AsyncMock(return_value=mock_script_block)
|
||||
mock_app.DATABASE.scripts.get_script_file_by_content_hash = AsyncMock(return_value=None)
|
||||
mock_app.ARTIFACT_MANAGER.create_script_file_artifact = AsyncMock(return_value="artifact_new")
|
||||
mock_app.DATABASE.scripts.create_script_file = AsyncMock(return_value=mock_script_file)
|
||||
mock_app.DATABASE.scripts.update_script_block = AsyncMock(return_value=mock_script_block)
|
||||
|
||||
result = await create_or_update_script_block(
|
||||
block_code="async def task_1(): pass",
|
||||
script_revision_id="rev_new",
|
||||
script_id="script_1",
|
||||
organization_id="org_1",
|
||||
block_label="task_1",
|
||||
)
|
||||
|
||||
assert result is True
|
||||
# S3 upload SHOULD happen — no dedup match
|
||||
mock_app.ARTIFACT_MANAGER.create_script_file_artifact.assert_called_once()
|
||||
mock_app.DATABASE.scripts.create_script_file.assert_called_once()
|
||||
# Verify content_hash was passed to create_script_file
|
||||
call_kwargs = mock_app.DATABASE.scripts.create_script_file.call_args.kwargs
|
||||
assert call_kwargs["content_hash"].startswith("sha256:")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_new_block_matching_hash_skips_s3_upload(self) -> None:
|
||||
"""When a ScriptFile with matching content_hash exists, S3 upload is skipped (cross-revision dedup)."""
|
||||
mock_script_block = MagicMock()
|
||||
mock_script_block.script_block_id = "sb_dedup"
|
||||
mock_script_block.script_file_id = None
|
||||
|
||||
existing_script_file = MagicMock()
|
||||
existing_script_file.artifact_id = "artifact_existing"
|
||||
|
||||
new_script_file = MagicMock()
|
||||
new_script_file.file_id = "sf_dedup"
|
||||
|
||||
with patch("skyvern.core.script_generations.generate_script.app") as mock_app:
|
||||
mock_app.DATABASE.scripts.get_script_block_by_label = AsyncMock(return_value=None)
|
||||
mock_app.DATABASE.scripts.create_script_block = AsyncMock(return_value=mock_script_block)
|
||||
mock_app.DATABASE.scripts.get_script_file_by_content_hash = AsyncMock(return_value=existing_script_file)
|
||||
mock_app.DATABASE.scripts.create_script_file = AsyncMock(return_value=new_script_file)
|
||||
mock_app.DATABASE.scripts.update_script_block = AsyncMock(return_value=mock_script_block)
|
||||
|
||||
result = await create_or_update_script_block(
|
||||
block_code="async def task_1(): pass",
|
||||
script_revision_id="rev_new",
|
||||
script_id="script_1",
|
||||
organization_id="org_1",
|
||||
block_label="task_1",
|
||||
)
|
||||
|
||||
assert result is True
|
||||
# S3 upload should NOT happen — dedup hit
|
||||
mock_app.ARTIFACT_MANAGER.create_script_file_artifact.assert_not_called()
|
||||
# ScriptFile should still be created (for the new revision) with reused artifact_id
|
||||
mock_app.DATABASE.scripts.create_script_file.assert_called_once()
|
||||
call_kwargs = mock_app.DATABASE.scripts.create_script_file.call_args.kwargs
|
||||
assert call_kwargs["artifact_id"] == "artifact_existing"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_update_existing_block_same_content_skips_s3(self) -> None:
|
||||
"""When updating a block and content_hash matches, S3 upload is skipped entirely."""
|
||||
block_code = "async def task_1(): pass"
|
||||
content_hash = f"sha256:{hashlib.sha256(block_code.encode('utf-8')).hexdigest()}"
|
||||
|
||||
mock_script_block = MagicMock()
|
||||
mock_script_block.script_block_id = "sb_existing"
|
||||
mock_script_block.script_file_id = "sf_existing"
|
||||
|
||||
mock_script_file = MagicMock()
|
||||
mock_script_file.content_hash = content_hash
|
||||
mock_script_file.artifact_id = "artifact_existing"
|
||||
|
||||
with patch("skyvern.core.script_generations.generate_script.app") as mock_app:
|
||||
mock_app.DATABASE.scripts.get_script_block_by_label = AsyncMock(return_value=mock_script_block)
|
||||
mock_app.DATABASE.scripts.get_script_file_by_id = AsyncMock(return_value=mock_script_file)
|
||||
|
||||
result = await create_or_update_script_block(
|
||||
block_code=block_code,
|
||||
script_revision_id="rev_1",
|
||||
script_id="script_1",
|
||||
organization_id="org_1",
|
||||
block_label="task_1",
|
||||
update=True,
|
||||
)
|
||||
|
||||
assert result is True
|
||||
# S3 upload should NOT happen — content unchanged
|
||||
mock_app.STORAGE.store_artifact.assert_not_called()
|
||||
mock_app.DATABASE.artifacts.get_artifact_by_id.assert_not_called()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_update_existing_block_different_content_uploads_to_s3(self) -> None:
|
||||
"""When updating a block and content_hash differs, S3 upload proceeds."""
|
||||
mock_script_block = MagicMock()
|
||||
mock_script_block.script_block_id = "sb_existing"
|
||||
mock_script_block.script_file_id = "sf_existing"
|
||||
|
||||
mock_script_file = MagicMock()
|
||||
mock_script_file.content_hash = "sha256:old_hash_value"
|
||||
mock_script_file.artifact_id = "artifact_existing"
|
||||
|
||||
mock_artifact = MagicMock()
|
||||
|
||||
with patch("skyvern.core.script_generations.generate_script.app") as mock_app:
|
||||
mock_app.DATABASE.scripts.get_script_block_by_label = AsyncMock(return_value=mock_script_block)
|
||||
mock_app.DATABASE.scripts.get_script_file_by_id = AsyncMock(return_value=mock_script_file)
|
||||
mock_app.DATABASE.artifacts.get_artifact_by_id = AsyncMock(return_value=mock_artifact)
|
||||
mock_app.DATABASE.scripts.update_script_file = AsyncMock()
|
||||
mock_app.STORAGE.store_artifact = AsyncMock()
|
||||
|
||||
result = await create_or_update_script_block(
|
||||
block_code="async def task_1_v2(): pass",
|
||||
script_revision_id="rev_1",
|
||||
script_id="script_1",
|
||||
organization_id="org_1",
|
||||
block_label="task_1",
|
||||
update=True,
|
||||
)
|
||||
|
||||
assert result is True
|
||||
# S3 upload SHOULD happen — content changed
|
||||
mock_app.DATABASE.artifacts.get_artifact_by_id.assert_called_once()
|
||||
mock_app.STORAGE.store_artifact.assert_called_once()
|
||||
# content_hash should be updated on the ScriptFile record AFTER S3 upload
|
||||
mock_app.DATABASE.scripts.update_script_file.assert_called_once()
|
||||
assert "content_hash" in mock_app.DATABASE.scripts.update_script_file.call_args.kwargs
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_create_block_matching_hash_but_no_artifact_falls_through(self) -> None:
|
||||
"""When ScriptFile has matching hash but artifact_id is None, fall through to full upload."""
|
||||
mock_script_block = MagicMock()
|
||||
mock_script_block.script_block_id = "sb_edge"
|
||||
mock_script_block.script_file_id = None
|
||||
|
||||
orphan_script_file = MagicMock()
|
||||
orphan_script_file.artifact_id = None # No artifact — can't reuse
|
||||
|
||||
new_script_file = MagicMock()
|
||||
new_script_file.file_id = "sf_edge"
|
||||
|
||||
with patch("skyvern.core.script_generations.generate_script.app") as mock_app:
|
||||
mock_app.DATABASE.scripts.get_script_block_by_label = AsyncMock(return_value=None)
|
||||
mock_app.DATABASE.scripts.create_script_block = AsyncMock(return_value=mock_script_block)
|
||||
mock_app.DATABASE.scripts.get_script_file_by_content_hash = AsyncMock(return_value=orphan_script_file)
|
||||
mock_app.ARTIFACT_MANAGER.create_script_file_artifact = AsyncMock(return_value="artifact_new")
|
||||
mock_app.DATABASE.scripts.create_script_file = AsyncMock(return_value=new_script_file)
|
||||
mock_app.DATABASE.scripts.update_script_block = AsyncMock(return_value=mock_script_block)
|
||||
|
||||
result = await create_or_update_script_block(
|
||||
block_code="async def task_edge(): pass",
|
||||
script_revision_id="rev_edge",
|
||||
script_id="script_1",
|
||||
organization_id="org_1",
|
||||
block_label="task_edge",
|
||||
)
|
||||
|
||||
assert result is True
|
||||
# Should fall through to full upload since artifact_id is None
|
||||
mock_app.ARTIFACT_MANAGER.create_script_file_artifact.assert_called_once()
|
||||
Loading…
Add table
Add a link
Reference in a new issue