fix: skip script_block entries for blocks with no actions (#SKY-8443) (#5141)

Co-authored-by: Aaron Perez <aperez0295@gmail.com>
This commit is contained in:
pedrohsdb 2026-03-18 23:24:57 -07:00 committed by GitHub
parent e321c89fa2
commit 416b6b41c5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 123 additions and 1 deletions

View file

@ -2648,9 +2648,27 @@ async def generate_workflow_script_python_code(
block_workflow_run_id = cached_source.workflow_run_id
block_workflow_run_block_id = cached_source.workflow_run_block_id
else:
task_id = task.get("task_id", "")
block_actions = actions_by_task.get(task_id, [])
# Skip blocks that have no actions AND no task_id — they haven't executed yet.
# Creating script_block entries for actionless blocks causes a permanent
# stuck state where generate_script_if_needed thinks they're cached but
# the Python file has no code for them. (SKY-8443)
# Note: a block WITH task_id but zero actions is valid — it means the block
# executed but completed immediately (e.g., page.complete() with no interaction).
# _build_block_fn handles this correctly by generating a minimal function.
if not block_actions and not task_id:
LOG.debug(
"Skipping block with no actions and no task_id — not yet executed",
block_label=block_name,
script_id=script_id,
)
continue
block_fn_def = _build_block_fn(
task,
actions_by_task.get(task.get("task_id", ""), []),
block_actions,
value_to_param=value_to_param,
use_semantic_selectors=use_semantic_selectors,
)
@ -2708,6 +2726,16 @@ async def generate_workflow_script_python_code(
block_workflow_run_id = cached_source.workflow_run_id
block_workflow_run_block_id = cached_source.workflow_run_block_id
else:
# Skip task_v2 blocks that haven't executed (no child workflow run).
# Same rationale as task_v1 guard — prevents phantom script_block entries. (SKY-8443)
if not child_blocks and not task_v2.get("block_workflow_run_id"):
LOG.debug(
"Skipping task_v2 block with no child blocks — not yet executed",
block_label=task_v2_label,
script_id=script_id,
)
continue
task_v2_fn_def = _build_task_v2_block_fn(task_v2, child_blocks)
task_v2_block_body: list[cst.CSTNode] = [task_v2_fn_def]

View file

@ -0,0 +1,94 @@
"""Tests for phantom script_block entry prevention (SKY-8443).
When _generate_pending_script_for_block fires after an early block completion,
generate_workflow_script_python_code must NOT create script_block DB entries
for blocks that haven't executed yet (no actions, no task_id). Creating such
entries causes a permanent stuck state where subsequent runs think all blocks
are cached but the Python file only has code for the first block.
"""
def test_unexecuted_block_detected_by_guard():
"""A block with no actions and no task_id should be identified as unexecuted."""
# Simulates a block in workflow_blocks that has a workflow_run_block entry
# (pre-created in queued state) but hasn't actually run yet
unexecuted_block = {
"block_type": "navigation",
"label": "block_1",
"task_id": "", # Empty — no task created yet
"workflow_run_block_id": "wrb_123",
}
actions_by_task: dict = {}
task_id = unexecuted_block.get("task_id", "")
block_actions = actions_by_task.get(task_id, [])
# This is the guard condition from generate_script.py
should_skip = not block_actions and not task_id
assert should_skip, "Block with no actions and no task_id should be skipped"
def test_executed_block_with_actions_passes_guard():
"""A block with a task_id and actions should NOT be skipped."""
executed_block = {
"block_type": "navigation",
"label": "block_1",
"task_id": "tsk_456",
"workflow_run_block_id": "wrb_123",
}
actions_by_task = {
"tsk_456": [{"action_type": "click", "element_id": "AAAB"}],
}
task_id = executed_block.get("task_id", "")
block_actions = actions_by_task.get(task_id, [])
should_skip = not block_actions and not task_id
assert not should_skip, "Block with task_id and actions should NOT be skipped"
def test_executed_block_with_task_id_but_no_actions_passes_guard():
"""A block with a task_id but zero actions should NOT be skipped.
This is a valid scenario the block executed but completed immediately."""
executed_empty_block = {
"block_type": "navigation",
"label": "block_1",
"task_id": "tsk_789",
"workflow_run_block_id": "wrb_123",
}
actions_by_task: dict = {}
task_id = executed_empty_block.get("task_id", "")
block_actions = actions_by_task.get(task_id, [])
should_skip = not block_actions and not task_id
assert not should_skip, "Block with task_id (even without actions) should NOT be skipped"
def test_progressive_caching_scenario():
"""Simulate the exact scenario from SKY-8443: login completes first,
block_1 and block_2 haven't executed yet. Only login should produce
a script_block entry."""
blocks = [
{"block_type": "login", "label": "login", "task_id": "tsk_001"},
{"block_type": "navigation", "label": "block_1", "task_id": ""}, # Not executed
{"block_type": "file_download", "label": "block_2", "task_id": ""}, # Not executed
]
actions_by_task = {
"tsk_001": [
{"action_type": "input_text", "element_id": "AAA1"},
{"action_type": "click", "element_id": "AAA2"},
],
}
blocks_that_should_get_entries = []
for block in blocks:
task_id = block.get("task_id", "")
block_actions = actions_by_task.get(task_id, [])
if not block_actions and not task_id:
continue # Skip — unexecuted
blocks_that_should_get_entries.append(block["label"])
assert blocks_that_should_get_entries == ["login"], (
"Only login should get a script_block entry; block_1 and block_2 haven't executed"
)