mirror of
https://github.com/Skyvern-AI/skyvern.git
synced 2026-04-28 03:30:10 +00:00
fix: populate workflow_run.script_run with script identity + complete ai_fallback_triggered coverage (#5635)
This commit is contained in:
parent
71316dbc01
commit
37f10704e9
11 changed files with 499 additions and 2 deletions
|
|
@ -9582,6 +9582,16 @@
|
|||
"type": "boolean",
|
||||
"title": "Ai Fallback Triggered",
|
||||
"default": false
|
||||
},
|
||||
"script_id": {
|
||||
"type": "string",
|
||||
"nullable": true,
|
||||
"title": "Script Id"
|
||||
},
|
||||
"script_revision_id": {
|
||||
"type": "string",
|
||||
"nullable": true,
|
||||
"title": "Script Revision Id"
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
|
|
|
|||
|
|
@ -14353,6 +14353,14 @@
|
|||
"type": "boolean",
|
||||
"title": "Ai Fallback Triggered",
|
||||
"default": false
|
||||
},
|
||||
"script_id": {
|
||||
"anyOf": [{"type": "string"}, {"type": "null"}],
|
||||
"title": "Script Id"
|
||||
},
|
||||
"script_revision_id": {
|
||||
"anyOf": [{"type": "string"}, {"type": "null"}],
|
||||
"title": "Script Revision Id"
|
||||
}
|
||||
},
|
||||
"type": "object",
|
||||
|
|
|
|||
|
|
@ -8,6 +8,8 @@ from ..core.pydantic_utilities import IS_PYDANTIC_V2, UniversalBaseModel
|
|||
|
||||
class ScriptRunResponse(UniversalBaseModel):
|
||||
ai_fallback_triggered: typing.Optional[bool] = None
|
||||
script_id: typing.Optional[str] = None
|
||||
script_revision_id: typing.Optional[str] = None
|
||||
|
||||
if IS_PYDANTIC_V2:
|
||||
model_config: typing.ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict(extra="allow", frozen=True) # type: ignore # Pydantic v2
|
||||
|
|
|
|||
|
|
@ -51,6 +51,33 @@ from skyvern.schemas.runs import ProxyLocationInput, RunType
|
|||
LOG = structlog.get_logger()
|
||||
|
||||
|
||||
def _merge_script_run(
|
||||
existing: dict | None,
|
||||
ai_fallback_triggered: bool | None,
|
||||
script_id: str | None,
|
||||
script_revision_id: str | None,
|
||||
) -> dict:
|
||||
"""Merge-on-write semantics for `workflow_runs.script_run`.
|
||||
|
||||
Callers update different facets of `script_run` at different points in a
|
||||
run's lifecycle — setup time writes script identity, mid-execution fallback
|
||||
writes `ai_fallback_triggered=True`. A replace-based update would clobber
|
||||
whichever facet the caller didn't touch, so merge preserves the other.
|
||||
|
||||
Pure function for testability (see `tests/unit/db/
|
||||
test_workflow_runs_script_run_merge.py`). None-valued params are skipped;
|
||||
non-None params overwrite the corresponding key in the merged dict.
|
||||
"""
|
||||
merged = dict(existing or {})
|
||||
if ai_fallback_triggered is not None:
|
||||
merged["ai_fallback_triggered"] = ai_fallback_triggered
|
||||
if script_id is not None:
|
||||
merged["script_id"] = script_id
|
||||
if script_revision_id is not None:
|
||||
merged["script_revision_id"] = script_revision_id
|
||||
return merged
|
||||
|
||||
|
||||
class WorkflowRunsRepository(BaseRepository):
|
||||
"""Database operations for workflow runs."""
|
||||
|
||||
|
|
@ -178,6 +205,8 @@ class WorkflowRunsRepository(BaseRepository):
|
|||
failure_reason: str | None = None,
|
||||
webhook_failure_reason: str | None = None,
|
||||
ai_fallback_triggered: bool | None = None,
|
||||
script_id: str | None = None,
|
||||
script_revision_id: str | None = None,
|
||||
job_id: str | None = None,
|
||||
run_with: str | None = None,
|
||||
sequential_key: str | None = None,
|
||||
|
|
@ -212,8 +241,13 @@ class WorkflowRunsRepository(BaseRepository):
|
|||
workflow_run.failure_reason = failure_reason
|
||||
if webhook_failure_reason is not None:
|
||||
workflow_run.webhook_failure_reason = webhook_failure_reason
|
||||
if ai_fallback_triggered is not None:
|
||||
workflow_run.script_run = {"ai_fallback_triggered": ai_fallback_triggered}
|
||||
if ai_fallback_triggered is not None or script_id is not None or script_revision_id is not None:
|
||||
workflow_run.script_run = _merge_script_run(
|
||||
existing=workflow_run.script_run,
|
||||
ai_fallback_triggered=ai_fallback_triggered,
|
||||
script_id=script_id,
|
||||
script_revision_id=script_revision_id,
|
||||
)
|
||||
if job_id:
|
||||
workflow_run.job_id = job_id
|
||||
if run_with:
|
||||
|
|
|
|||
|
|
@ -1524,6 +1524,12 @@ class WorkflowService:
|
|||
empty_blocks_detected=script is not None and is_script_run and not script_blocks_by_label,
|
||||
)
|
||||
|
||||
if script_mode_active and script is not None:
|
||||
# Regression-locked by tests/unit/workflow/test_mark_script_run_loaded.py
|
||||
# ::test_mark_script_run_loaded_calls_update_with_script_identity.
|
||||
# If you modify this branch, update that test.
|
||||
await self._mark_script_run_loaded(workflow_run_id, script)
|
||||
|
||||
if block_labels and len(block_labels):
|
||||
blocks: list[BlockTypeVar] = []
|
||||
all_labels = {block.label: block for block in all_blocks}
|
||||
|
|
@ -2283,6 +2289,33 @@ class WorkflowService:
|
|||
organization_id=organization_id,
|
||||
browser_session_id=browser_session_id,
|
||||
)
|
||||
# Record that this run experienced a script → AI fallback if
|
||||
# the agent execution we just ran was a consequence of a failed
|
||||
# script attempt. The gate correctly excludes:
|
||||
# - ai_fallback=False kept-the-failure (execute_safe not reached)
|
||||
# - requires_agent / uncached / disable_cache / non-cacheable
|
||||
# routes (valid_to_run_code is False for these)
|
||||
# - agent-only workflows (is_script_run=False → valid_to_run_code=False)
|
||||
# and correctly covers all three script-failure modes: script-
|
||||
# block failed, script threw, and script-ran-but-no-block-found.
|
||||
# Complements the task-block AI-fallback writers in `services/script_service.py`
|
||||
# writers which handle a separate task-block AI-fallback surface.
|
||||
# Perf: a fallback-heavy run issues N writes for N fallbacks.
|
||||
# Typical runs have 0-3. `_merge_script_run` is idempotent at the
|
||||
# DB layer. If observed latency regresses, add a context-scoped
|
||||
# already-flipped cache (tracked separately).
|
||||
await self._mark_script_fallback_triggered(
|
||||
workflow_run_id=workflow_run_id,
|
||||
# `valid_to_run_code` is computed as a chain of `and`s that
|
||||
# includes `block.label` (str | None), so its static type is
|
||||
# `Literal[''] | bool` when block.label is an empty string.
|
||||
# The helper treats it as a pure truthiness gate; `bool(...)`
|
||||
# narrows the type cleanly for mypy without changing runtime
|
||||
# semantics.
|
||||
valid_to_run_code=bool(valid_to_run_code),
|
||||
block_executed_with_code=block_executed_with_code,
|
||||
block_label=block.label,
|
||||
)
|
||||
|
||||
# Update fallback episode with agent actions for both success and failure.
|
||||
# Failed fallbacks are kept for triage — the reviewer will determine
|
||||
|
|
@ -6019,3 +6052,88 @@ class WorkflowService:
|
|||
if workflow_run.run_with is not None:
|
||||
return workflow_run.run_with == "code"
|
||||
return workflow.run_with == "code"
|
||||
|
||||
async def _mark_script_run_loaded(self, workflow_run_id: str, script: Script) -> None:
|
||||
"""Record that a cached script was loaded for this workflow run.
|
||||
|
||||
Populates `workflow_run.script_run` with the script's identity at
|
||||
workflow setup time so API consumers can detect cache use. Sets
|
||||
`ai_fallback_triggered=False` as the initial state; if a fallback
|
||||
fires mid-execution, other writers (`services/script_service.py`
|
||||
and `_mark_script_fallback_triggered` below) merge the flipped
|
||||
`ai_fallback_triggered=True` on top without clobbering identity
|
||||
via the merge-on-write behavior in `update_workflow_run`.
|
||||
|
||||
Semantic: `script_run != null` after this runs means "a cached
|
||||
script was loaded for this run at setup time." It does NOT imply
|
||||
that every (or any) block actually executed from that cache —
|
||||
`block_labels` filtering, `requires_agent`, `disable_cache`, and
|
||||
non-cacheable block types can still route individual blocks to AI.
|
||||
See `ScriptRunResponse` docstrings for the full semantic.
|
||||
|
||||
Wrapped in try/except (matching `_mark_script_fallback_triggered`) so
|
||||
a transient DB error on the metadata write doesn't abort workflow
|
||||
setup. The `script_run` payload is informational — reporting state
|
||||
to API consumers — not load-bearing for the run's own execution.
|
||||
"""
|
||||
try:
|
||||
await app.DATABASE.workflow_runs.update_workflow_run(
|
||||
workflow_run_id=workflow_run_id,
|
||||
ai_fallback_triggered=False,
|
||||
script_id=script.script_id,
|
||||
script_revision_id=script.script_revision_id,
|
||||
)
|
||||
except Exception:
|
||||
LOG.warning(
|
||||
"Failed to mark script_run loaded at workflow setup",
|
||||
workflow_run_id=workflow_run_id,
|
||||
script_id=script.script_id,
|
||||
script_revision_id=script.script_revision_id,
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
async def _mark_script_fallback_triggered(
|
||||
self,
|
||||
workflow_run_id: str,
|
||||
valid_to_run_code: bool,
|
||||
block_executed_with_code: bool,
|
||||
block_label: str | None,
|
||||
) -> None:
|
||||
"""Flip `ai_fallback_triggered=True` on the run iff the just-executed
|
||||
agent block was a script→AI fallback (not an always-agent route).
|
||||
|
||||
Gate semantics:
|
||||
- `valid_to_run_code=True` ⇒ we attempted script execution for this
|
||||
block. False rules out always-agent routes (requires_agent,
|
||||
disable_cache, uncached, non-cacheable block types, agent-only
|
||||
workflows).
|
||||
- `block_executed_with_code=False` ⇒ script didn't succeed. True means
|
||||
script ran cleanly; no fallback occurred; no flag flip.
|
||||
|
||||
Together, a True/False combination means "we tried script, it failed,
|
||||
we then ran agent." That's the precise definition of a mid-execution
|
||||
script→AI fallback.
|
||||
|
||||
Caller must only invoke this AFTER `block.execute_safe` actually ran
|
||||
(i.e., the fallback agent execution happened). Calling before would
|
||||
risk false positives in the `ai_fallback=False` kept-the-failure
|
||||
case where `execute_safe` is never reached.
|
||||
|
||||
Wrapped in try/except so a transient DB error on the flag flip can't
|
||||
abort downstream block post-processing. Regression-locked by
|
||||
`tests/unit/workflow/test_mark_script_fallback_triggered.py`.
|
||||
"""
|
||||
if not (valid_to_run_code and not block_executed_with_code):
|
||||
return
|
||||
try:
|
||||
await app.DATABASE.workflow_runs.update_workflow_run(
|
||||
workflow_run_id=workflow_run_id,
|
||||
ai_fallback_triggered=True,
|
||||
)
|
||||
except Exception:
|
||||
LOG.warning(
|
||||
"Failed to mark ai_fallback_triggered after script→AI fallback",
|
||||
workflow_run_id=workflow_run_id,
|
||||
block_label=block_label,
|
||||
exc_info=True,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -590,8 +590,30 @@ class BlockRunRequest(WorkflowRunRequest):
|
|||
|
||||
|
||||
class ScriptRunResponse(BaseModel):
|
||||
# True iff a fallback fired during this run, flipping at least one
|
||||
# block's execution from cached script to the agent. Writers: the two
|
||||
# `services/script_service.py` fallback paths (script-block failure +
|
||||
# conditional-agent episode) and the `_execute_single_block` script-
|
||||
# failure path. `False` here does NOT imply "no AI execution" — blocks
|
||||
# that were ALWAYS-agent (via `requires_agent`, `disable_cache`, or
|
||||
# non-cacheable block types) never create a fallback episode and don't
|
||||
# flip this flag. For per-block routing ground truth, consult the
|
||||
# `Block execution mode resolved` log emitted at per-block execution
|
||||
# time in `skyvern/forge/sdk/workflow/service.py`.
|
||||
ai_fallback_triggered: bool = False
|
||||
|
||||
# Identity of the cached script that was loaded for this run at
|
||||
# workflow setup time. Non-null iff a script was loaded. Does NOT
|
||||
# imply that every (or any) block actually executed from that cache —
|
||||
# per-block `block_labels` filtering, `requires_agent`, `disable_cache`,
|
||||
# or non-cacheable block types can still route individual blocks to AI.
|
||||
# Populated by the server-side execution path (workflow/service.py) and
|
||||
# the local CLI entrypoint (services/script_service.run_script). None
|
||||
# on rows written by older code paths that only recorded
|
||||
# `ai_fallback_triggered`.
|
||||
script_id: str | None = None
|
||||
script_revision_id: str | None = None
|
||||
|
||||
|
||||
class UploadFileResponse(BaseModel):
|
||||
s3_uri: str = Field(description="S3 URI where the file was uploaded")
|
||||
|
|
|
|||
|
|
@ -2537,6 +2537,8 @@ async def run_script(
|
|||
workflow_run = await app.DATABASE.workflow_runs.update_workflow_run(
|
||||
workflow_run_id=workflow_run_id,
|
||||
ai_fallback_triggered=False,
|
||||
script_id=script_id,
|
||||
script_revision_id=script_revision_id,
|
||||
)
|
||||
context.workflow_run_id = workflow_run_id
|
||||
context.organization_id = organization_id
|
||||
|
|
|
|||
0
tests/unit/db/__init__.py
Normal file
0
tests/unit/db/__init__.py
Normal file
127
tests/unit/db/test_workflow_runs_script_run_merge.py
Normal file
127
tests/unit/db/test_workflow_runs_script_run_merge.py
Normal file
|
|
@ -0,0 +1,127 @@
|
|||
"""Regression guards for the `_merge_script_run` helper in
|
||||
`workflow_runs` repository.
|
||||
|
||||
Merge-on-write is load-bearing: callers update different facets of
|
||||
`workflow_run.script_run` at different points in a run's lifecycle
|
||||
(setup time writes script identity; mid-execution fallback writes
|
||||
`ai_fallback_triggered=True`). Without the merge, the second write
|
||||
clobbers whichever facet wasn't touched, and consumers that read the
|
||||
`script_run` API field see stale/missing data.
|
||||
|
||||
These tests lock in the three invariants:
|
||||
|
||||
1. First write into a null column produces the full populated dict.
|
||||
2. A subsequent partial update preserves unrelated keys.
|
||||
3. Calls with all nullable params as None are documented no-ops
|
||||
(enforced at the caller; this file documents the expectation).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from skyvern.forge.sdk.db.repositories.workflow_runs import _merge_script_run
|
||||
|
||||
|
||||
def test_merge_script_run_first_write_creates_full_dict() -> None:
|
||||
"""A server-side code-mode run initializes `script_run` from null.
|
||||
Helper called with all three fields → dict has all three keys."""
|
||||
result = _merge_script_run(
|
||||
existing=None,
|
||||
ai_fallback_triggered=False,
|
||||
script_id="s_abc",
|
||||
script_revision_id="sr_xyz",
|
||||
)
|
||||
assert result == {
|
||||
"ai_fallback_triggered": False,
|
||||
"script_id": "s_abc",
|
||||
"script_revision_id": "sr_xyz",
|
||||
}
|
||||
|
||||
|
||||
def test_merge_script_run_fallback_flip_preserves_identity() -> None:
|
||||
"""Regression guard for the original motivating bug: after
|
||||
`_mark_script_run_loaded` writes identity at setup, a later
|
||||
fallback-flip update to `ai_fallback_triggered=True` must NOT
|
||||
clobber `script_id` / `script_revision_id`. A replace-based
|
||||
update (pre-this-PR behavior) would have emitted
|
||||
`{"ai_fallback_triggered": True}` only, breaking consumers that
|
||||
read script identity from the API."""
|
||||
existing = {
|
||||
"ai_fallback_triggered": False,
|
||||
"script_id": "s_abc",
|
||||
"script_revision_id": "sr_xyz",
|
||||
}
|
||||
result = _merge_script_run(
|
||||
existing=existing,
|
||||
ai_fallback_triggered=True,
|
||||
script_id=None,
|
||||
script_revision_id=None,
|
||||
)
|
||||
assert result == {
|
||||
"ai_fallback_triggered": True,
|
||||
"script_id": "s_abc",
|
||||
"script_revision_id": "sr_xyz",
|
||||
}
|
||||
|
||||
|
||||
def test_merge_script_run_later_identity_write_preserves_fallback_flag() -> None:
|
||||
"""Symmetric case: if `ai_fallback_triggered` was written first
|
||||
(e.g., by a writer that doesn't know about identity yet), a later
|
||||
identity write must not clobber the fallback bool."""
|
||||
existing = {"ai_fallback_triggered": True}
|
||||
result = _merge_script_run(
|
||||
existing=existing,
|
||||
ai_fallback_triggered=None,
|
||||
script_id="s_abc",
|
||||
script_revision_id="sr_xyz",
|
||||
)
|
||||
assert result == {
|
||||
"ai_fallback_triggered": True,
|
||||
"script_id": "s_abc",
|
||||
"script_revision_id": "sr_xyz",
|
||||
}
|
||||
|
||||
|
||||
def test_merge_script_run_ignores_none_params() -> None:
|
||||
"""A call with all three params as None returns the existing dict
|
||||
unchanged. The gate in `update_workflow_run` prevents invoking
|
||||
`_merge_script_run` in this case, but the helper's own behavior
|
||||
must be a no-op for defense in depth."""
|
||||
existing = {"ai_fallback_triggered": True, "script_id": "s_abc"}
|
||||
result = _merge_script_run(
|
||||
existing=existing,
|
||||
ai_fallback_triggered=None,
|
||||
script_id=None,
|
||||
script_revision_id=None,
|
||||
)
|
||||
assert result == {"ai_fallback_triggered": True, "script_id": "s_abc"}
|
||||
|
||||
|
||||
def test_merge_script_run_false_fallback_is_written_not_skipped() -> None:
|
||||
"""Guard against the `if ai_fallback_triggered:` pitfall — `False`
|
||||
is a meaningful value, not a skip signal. Only `None` should skip."""
|
||||
result = _merge_script_run(
|
||||
existing={"ai_fallback_triggered": True},
|
||||
ai_fallback_triggered=False,
|
||||
script_id=None,
|
||||
script_revision_id=None,
|
||||
)
|
||||
assert result == {"ai_fallback_triggered": False}
|
||||
|
||||
|
||||
def test_merge_script_run_empty_dict_existing_same_as_none() -> None:
|
||||
"""Legacy rows may have `script_run = {}` (empty dict) instead of
|
||||
null. Merge treats them identically — the helper uses `or {}` so
|
||||
both produce the same starting point."""
|
||||
from_none = _merge_script_run(
|
||||
existing=None,
|
||||
ai_fallback_triggered=False,
|
||||
script_id="s_abc",
|
||||
script_revision_id=None,
|
||||
)
|
||||
from_empty = _merge_script_run(
|
||||
existing={},
|
||||
ai_fallback_triggered=False,
|
||||
script_id="s_abc",
|
||||
script_revision_id=None,
|
||||
)
|
||||
assert from_none == from_empty
|
||||
113
tests/unit/workflow/test_mark_script_fallback_triggered.py
Normal file
113
tests/unit/workflow/test_mark_script_fallback_triggered.py
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
"""Regression guards for `_mark_script_fallback_triggered` — flips
|
||||
`ai_fallback_triggered=True` on the run iff a real script→AI fallback
|
||||
occurred.
|
||||
|
||||
Without this helper, several real fallback paths in `_execute_single_block`
|
||||
would never flip the run-level flag (see CORR-5/CORR-6/CORR-7 in the
|
||||
debate). The gate semantics are:
|
||||
|
||||
- `valid_to_run_code=True` ⇒ script execution was attempted for this
|
||||
block. False rules out always-agent routes.
|
||||
- `block_executed_with_code=False` ⇒ the script attempt didn't succeed.
|
||||
True means clean script execution; no fallback; no flip.
|
||||
|
||||
Together those two booleans are the exact condition under which the
|
||||
agent `execute_safe` call we just ran constituted a script→AI fallback.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import AsyncMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from skyvern.forge.sdk.workflow.service import WorkflowService
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_flips_flag_when_script_attempted_and_failed() -> None:
|
||||
"""The happy-path fallback case: we tried script, it didn't succeed,
|
||||
agent ran. Flag must flip."""
|
||||
service = WorkflowService()
|
||||
with patch(
|
||||
"skyvern.forge.sdk.workflow.service.app.DATABASE.workflow_runs.update_workflow_run",
|
||||
new_callable=AsyncMock,
|
||||
) as mock_update:
|
||||
await service._mark_script_fallback_triggered(
|
||||
workflow_run_id="wr_test",
|
||||
valid_to_run_code=True,
|
||||
block_executed_with_code=False,
|
||||
block_label="some_block",
|
||||
)
|
||||
mock_update.assert_awaited_once_with(
|
||||
workflow_run_id="wr_test",
|
||||
ai_fallback_triggered=True,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_does_not_flip_when_script_was_not_attempted() -> None:
|
||||
"""Always-agent route: `valid_to_run_code=False` means this block was
|
||||
never going to run as script (requires_agent, disable_cache, uncached,
|
||||
non-cacheable block type, or pure agent workflow). The agent execution
|
||||
is NOT a fallback; it's the intended execution mode. Flag must not
|
||||
flip — else always-agent blocks in otherwise code-mode runs would
|
||||
falsely claim 'a fallback happened'."""
|
||||
service = WorkflowService()
|
||||
with patch(
|
||||
"skyvern.forge.sdk.workflow.service.app.DATABASE.workflow_runs.update_workflow_run",
|
||||
new_callable=AsyncMock,
|
||||
) as mock_update:
|
||||
await service._mark_script_fallback_triggered(
|
||||
workflow_run_id="wr_test",
|
||||
valid_to_run_code=False,
|
||||
block_executed_with_code=False,
|
||||
block_label="agent_only_block",
|
||||
)
|
||||
mock_update.assert_not_awaited()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_does_not_flip_when_script_succeeded() -> None:
|
||||
"""`block_executed_with_code=True` means the cached script ran cleanly.
|
||||
No fallback occurred. Flag must not flip."""
|
||||
service = WorkflowService()
|
||||
with patch(
|
||||
"skyvern.forge.sdk.workflow.service.app.DATABASE.workflow_runs.update_workflow_run",
|
||||
new_callable=AsyncMock,
|
||||
) as mock_update:
|
||||
await service._mark_script_fallback_triggered(
|
||||
workflow_run_id="wr_test",
|
||||
valid_to_run_code=True,
|
||||
block_executed_with_code=True,
|
||||
block_label="cached_block",
|
||||
)
|
||||
mock_update.assert_not_awaited()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_swallows_db_error_with_warning_log() -> None:
|
||||
"""A transient DB error on the flag flip must not abort block
|
||||
post-processing. The helper wraps the write in try/except so the
|
||||
caller's downstream logic (fallback-episode enrichment, etc.) runs
|
||||
to completion regardless. Asserts both the no-raise contract AND
|
||||
the warning-log-was-emitted contract."""
|
||||
service = WorkflowService()
|
||||
with (
|
||||
patch(
|
||||
"skyvern.forge.sdk.workflow.service.app.DATABASE.workflow_runs.update_workflow_run",
|
||||
new_callable=AsyncMock,
|
||||
side_effect=RuntimeError("db transient"),
|
||||
),
|
||||
patch("skyvern.forge.sdk.workflow.service.LOG.warning") as mock_log,
|
||||
):
|
||||
# Must not raise.
|
||||
await service._mark_script_fallback_triggered(
|
||||
workflow_run_id="wr_test",
|
||||
valid_to_run_code=True,
|
||||
block_executed_with_code=False,
|
||||
block_label="some_block",
|
||||
)
|
||||
mock_log.assert_called_once()
|
||||
# The first positional arg is the log event name.
|
||||
assert "ai_fallback_triggered" in mock_log.call_args.args[0]
|
||||
61
tests/unit/workflow/test_mark_script_run_loaded.py
Normal file
61
tests/unit/workflow/test_mark_script_run_loaded.py
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
"""Regression guard for _mark_script_run_loaded — populates script_run on
|
||||
server-side code-mode runs so the API can detect cache use.
|
||||
|
||||
Before this helper existed, `workflow_run.script_run` was null on every
|
||||
Temporal run even when `execution_mode=code` was resolved at
|
||||
service.py:1508. See SKY-* / PR #10522 for context.
|
||||
"""
|
||||
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from skyvern.forge.sdk.workflow.service import WorkflowService
|
||||
from skyvern.schemas.scripts import Script
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_mark_script_run_loaded_calls_update_with_script_identity() -> None:
|
||||
"""The helper must issue exactly one update with the script identity +
|
||||
`ai_fallback_triggered=False` initialization. The call-site caller in
|
||||
`_execute_workflow_blocks` routes through this helper, so this test
|
||||
pins down the shape of what hits the DB on every cached-code run."""
|
||||
service = WorkflowService()
|
||||
script = MagicMock(spec=Script, script_id="s_abc", script_revision_id="sr_xyz")
|
||||
|
||||
with patch(
|
||||
"skyvern.forge.sdk.workflow.service.app.DATABASE.workflow_runs.update_workflow_run",
|
||||
new_callable=AsyncMock,
|
||||
) as mock_update:
|
||||
await service._mark_script_run_loaded("wr_test", script)
|
||||
|
||||
mock_update.assert_awaited_once_with(
|
||||
workflow_run_id="wr_test",
|
||||
ai_fallback_triggered=False,
|
||||
script_id="s_abc",
|
||||
script_revision_id="sr_xyz",
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_mark_script_run_loaded_swallows_db_error_with_warning() -> None:
|
||||
"""A transient DB error on the setup-time metadata write must not abort
|
||||
workflow setup. `script_run` is reporting state for API consumers — not
|
||||
load-bearing for the run's own execution — so a failure here should
|
||||
degrade to a warning, not a hard stop. This mirrors the try/except in
|
||||
`_mark_script_fallback_triggered`."""
|
||||
service = WorkflowService()
|
||||
script = MagicMock(spec=Script, script_id="s_abc", script_revision_id="sr_xyz")
|
||||
|
||||
with (
|
||||
patch(
|
||||
"skyvern.forge.sdk.workflow.service.app.DATABASE.workflow_runs.update_workflow_run",
|
||||
new_callable=AsyncMock,
|
||||
side_effect=RuntimeError("db transient"),
|
||||
),
|
||||
patch("skyvern.forge.sdk.workflow.service.LOG.warning") as mock_log,
|
||||
):
|
||||
# Must not raise.
|
||||
await service._mark_script_run_loaded("wr_test", script)
|
||||
mock_log.assert_called_once()
|
||||
assert "script_run" in mock_log.call_args.args[0]
|
||||
Loading…
Add table
Add a link
Reference in a new issue