mirror of
https://github.com/Skyvern-AI/skyvern.git
synced 2026-04-30 04:30:19 +00:00
fix: nested for-loop script block persistence (SKY-8757) (#5404)
This commit is contained in:
parent
15f39b999d
commit
2e72c2380c
6 changed files with 1476 additions and 89 deletions
|
|
@ -9,6 +9,7 @@ from __future__ import annotations
|
|||
import hashlib
|
||||
import keyword
|
||||
import re
|
||||
from collections import deque
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
|
|
@ -537,7 +538,9 @@ def _render_value(
|
|||
) -> cst.BaseExpression:
|
||||
"""Create a prompt value with template rendering logic if needed."""
|
||||
if not prompt_text:
|
||||
return cst.SimpleString("")
|
||||
# Delegate to _value so empty/None inputs produce a valid CST node
|
||||
# (libcst rejects SimpleString("") because it lacks enclosing quotes).
|
||||
return _value(prompt_text)
|
||||
if "{{" in prompt_text and "}}" in prompt_text:
|
||||
args = [cst.Arg(value=_value(prompt_text))]
|
||||
if data_variable_name:
|
||||
|
|
@ -1052,10 +1055,16 @@ def _build_form_filling_block_fn(
|
|||
|
||||
navigation_goal = block.get("navigation_goal") or "Fill out the form"
|
||||
|
||||
# Include page.goto(url) if the block has a URL, just like _build_block_fn does
|
||||
# Include page.goto(url) if the block has a URL, just like _build_block_fn does.
|
||||
# Templated URLs (e.g. {{ outer_loop.current_value.url }}) are wrapped in
|
||||
# skyvern.render_template() so they resolve at runtime.
|
||||
goto_line = ""
|
||||
if block.get("url"):
|
||||
goto_line = f" await page.goto({repr(block['url'])})\n"
|
||||
url_str = block["url"]
|
||||
if isinstance(url_str, str) and "{{" in url_str and "}}" in url_str:
|
||||
goto_line = f" await page.goto(skyvern.render_template({repr(url_str)}))\n"
|
||||
else:
|
||||
goto_line = f" await page.goto({repr(url_str)})\n"
|
||||
|
||||
func_code = (
|
||||
f"async def {name}(page: SkyvernPage, context: RunContext):\n"
|
||||
|
|
@ -1135,7 +1144,14 @@ def _build_block_fn(
|
|||
actions = _annotate_multi_field_totp_sequence(actions)
|
||||
|
||||
if block.get("url"):
|
||||
body_stmts.append(cst.parse_statement(f"await page.goto({repr(block['url'])})"))
|
||||
# Use skyvern.render_template() when the URL contains a Jinja expression
|
||||
# (e.g. {{ outer_page_loop.current_value.url }}) so it resolves at runtime
|
||||
# against workflow_run_context.values populated by skyvern.loop().
|
||||
url_str = block["url"]
|
||||
if isinstance(url_str, str) and "{{" in url_str and "}}" in url_str:
|
||||
body_stmts.append(cst.parse_statement(f"await page.goto(skyvern.render_template({repr(url_str)}))"))
|
||||
else:
|
||||
body_stmts.append(cst.parse_statement(f"await page.goto({repr(url_str)})"))
|
||||
|
||||
# For file_download blocks inside for-loops, generate a dynamic click that uses
|
||||
# per-iteration context instead of hardcoded xpath/prompt from iteration 0.
|
||||
|
|
@ -1406,7 +1422,7 @@ def _build_extract_statement(
|
|||
args = [
|
||||
cst.Arg(
|
||||
keyword=cst.Name("prompt"),
|
||||
value=_value(block.get("data_extraction_goal", "")),
|
||||
value=_render_value(block.get("data_extraction_goal", ""), data_variable_name),
|
||||
whitespace_after_arg=cst.ParenthesizedWhitespace(
|
||||
indent=True,
|
||||
last_line=cst.SimpleWhitespace(INDENT),
|
||||
|
|
@ -1414,6 +1430,8 @@ def _build_extract_statement(
|
|||
),
|
||||
cst.Arg(
|
||||
keyword=cst.Name("schema"),
|
||||
# data_schema is a dict/object, not a string template — _render_value only
|
||||
# handles strings, so we intentionally keep _value here.
|
||||
value=_value(block.get("data_schema", "")),
|
||||
whitespace_after_arg=cst.ParenthesizedWhitespace(
|
||||
indent=True,
|
||||
|
|
@ -1421,6 +1439,20 @@ def _build_extract_statement(
|
|||
),
|
||||
),
|
||||
]
|
||||
# Emit url so the extraction block navigates to the right page on cache hit.
|
||||
# Uses _render_value so Jinja refs like {{ outer_page_loop.current_value.url }}
|
||||
# resolve at runtime from workflow_run_context.values (populated by skyvern.loop()).
|
||||
if block.get("url"):
|
||||
args.append(
|
||||
cst.Arg(
|
||||
keyword=cst.Name("url"),
|
||||
value=_render_value(block.get("url", ""), data_variable_name),
|
||||
whitespace_after_arg=cst.ParenthesizedWhitespace(
|
||||
indent=True,
|
||||
last_line=cst.SimpleWhitespace(INDENT),
|
||||
),
|
||||
)
|
||||
)
|
||||
if block.get("model"):
|
||||
args.append(
|
||||
cst.Arg(
|
||||
|
|
@ -2440,7 +2472,14 @@ def __build_base_task_statement(
|
|||
if value_to_param and prompt:
|
||||
prompt_value = _build_parameterized_prompt_cst(prompt, value_to_param)
|
||||
if prompt_value is None:
|
||||
prompt_value = _value(prompt)
|
||||
if prompt:
|
||||
# Use _render_value so Jinja refs (e.g. {{ current_value }},
|
||||
# {{ outer_loop.current_value.url }}) are resolved at runtime via
|
||||
# skyvern.render_template() instead of emitted as Python literals.
|
||||
prompt_value = _render_value(prompt, data_variable_name)
|
||||
else:
|
||||
# Preserve old behavior for None/empty prompts (emits `None` vs `""`).
|
||||
prompt_value = _value(prompt)
|
||||
|
||||
args = [
|
||||
cst.Arg(
|
||||
|
|
@ -2456,7 +2495,10 @@ def __build_base_task_statement(
|
|||
args.append(
|
||||
cst.Arg(
|
||||
keyword=cst.Name("url"),
|
||||
value=_value(block.get("url", "")),
|
||||
# Use _render_value so Jinja refs (e.g. {{ current_value }},
|
||||
# {{ outer_loop.current_value.url }}) resolve at runtime via
|
||||
# skyvern.render_template() instead of being emitted as literals.
|
||||
value=_render_value(block.get("url", ""), data_variable_name),
|
||||
whitespace_after_arg=cst.ParenthesizedWhitespace(
|
||||
indent=True,
|
||||
last_line=cst.SimpleWhitespace(INDENT),
|
||||
|
|
@ -3030,8 +3072,86 @@ async def generate_workflow_script_python_code(
|
|||
# Inner blocks (e.g. extraction inside a loop) are nested in loop_blocks and
|
||||
# are NOT in the top-level blocks list, so they need separate processing here.
|
||||
# This follows the same pattern as task_v2 child block handling (lines 2704-2714).
|
||||
for loop_block in for_loop_block.get("loop_blocks", []):
|
||||
if loop_block.get("block_type") not in SCRIPT_TASK_BLOCKS:
|
||||
# Uses a BFS queue to recursively handle nested for-loops (SKY-8757).
|
||||
# Each queue entry is (block_dict, parent_forloop_label) so cache
|
||||
# invalidation propagates from the correct parent at any depth.
|
||||
loop_block_queue: deque[tuple[dict[str, Any], str]] = deque(
|
||||
(lb, for_loop_label) for lb in for_loop_block.get("loop_blocks", [])
|
||||
)
|
||||
while loop_block_queue:
|
||||
loop_block, parent_fl_label = loop_block_queue.popleft()
|
||||
loop_block_type = loop_block.get("block_type")
|
||||
|
||||
# block_type is a string here (from dict.get on model_dump output),
|
||||
# not a BlockType enum — unlike transform_workflow_run.py which
|
||||
# works with ORM objects. Both compare correctly to string literals.
|
||||
#
|
||||
# Nested for-loop: create script_block for the inner for-loop itself,
|
||||
# then push its children onto the queue for processing.
|
||||
# NOTE: Do NOT call append_block_code() for nested for_loop blocks
|
||||
# (same as top-level for_loops) — they produce bare `async for`
|
||||
# statements that cause SyntaxError at module level.
|
||||
if loop_block_type == "for_loop":
|
||||
nested_label = loop_block.get("label") or f"for_loop_{loop_block.get('workflow_run_block_id')}"
|
||||
|
||||
cached_nested = cached_blocks.get(nested_label)
|
||||
# Force rebuild when the nested label OR its immediate parent
|
||||
# for-loop is marked for regeneration (invalidation propagates
|
||||
# down at every nesting depth, not just from the top-level).
|
||||
use_nested_cached = (
|
||||
cached_nested is not None
|
||||
and nested_label not in updated_block_labels
|
||||
and parent_fl_label not in updated_block_labels
|
||||
)
|
||||
|
||||
nested_wrbi = loop_block.get("workflow_run_block_id")
|
||||
nested_wri = loop_block.get("workflow_run_id") or run_id
|
||||
|
||||
# use_nested_cached already guarantees cached_nested is not None;
|
||||
# the explicit check is retained only for mypy type narrowing.
|
||||
if (
|
||||
use_nested_cached
|
||||
and cached_nested is not None
|
||||
and cached_nested.code
|
||||
and cached_nested.run_signature
|
||||
):
|
||||
nested_code = cached_nested.code
|
||||
nested_sig = cached_nested.run_signature
|
||||
nested_wrbi = cached_nested.workflow_run_block_id
|
||||
nested_wri = cached_nested.workflow_run_id
|
||||
else:
|
||||
# No usable cache entry (missing, incomplete, or needs update)
|
||||
# — rebuild from current run data. Mark this label as updated
|
||||
# so invalidation cascades to deeper descendants.
|
||||
updated_block_labels.add(nested_label)
|
||||
nested_stmt = _build_for_loop_statement(nested_label, loop_block)
|
||||
temp_mod = cst.Module(body=[nested_stmt])
|
||||
nested_code = temp_mod.code
|
||||
nested_sig = nested_code.strip()
|
||||
|
||||
if script_id and script_revision_id and organization_id:
|
||||
ok = await create_or_update_script_block(
|
||||
block_code=nested_code,
|
||||
script_revision_id=script_revision_id,
|
||||
script_id=script_id,
|
||||
organization_id=organization_id,
|
||||
block_label=nested_label,
|
||||
update=pending,
|
||||
run_signature=nested_sig,
|
||||
workflow_run_id=nested_wri,
|
||||
workflow_run_block_id=nested_wrbi,
|
||||
input_fields=None,
|
||||
)
|
||||
if ok:
|
||||
blocks_created += 1
|
||||
else:
|
||||
blocks_failed += 1
|
||||
|
||||
# Push nested for-loop's children with this loop as their parent
|
||||
loop_block_queue.extend((child, nested_label) for child in loop_block.get("loop_blocks", []))
|
||||
continue
|
||||
|
||||
if loop_block_type not in SCRIPT_TASK_BLOCKS:
|
||||
continue
|
||||
|
||||
inner_label = (
|
||||
|
|
@ -3051,7 +3171,11 @@ async def generate_workflow_script_python_code(
|
|||
else:
|
||||
inner_actions = actions_by_task.get(loop_block.get("task_id", ""), [])
|
||||
if not inner_actions:
|
||||
continue # No actions from agent run = can't generate cached function
|
||||
# No actions from agent run = can't generate cached function.
|
||||
# No script_block row is created; the block will be cached on
|
||||
# a future run when actions become available. This is intentional
|
||||
# — generating a stub would produce broken code.
|
||||
continue
|
||||
|
||||
inner_fn_def = _build_block_fn(
|
||||
loop_block,
|
||||
|
|
@ -3160,10 +3284,22 @@ async def generate_workflow_script_python_code(
|
|||
for flb in for_loop_blocks:
|
||||
label = flb.get("label") or f"for_loop_{flb.get('workflow_run_block_id')}"
|
||||
processed_labels.add(label)
|
||||
# Also track inner block labels to prevent duplication in the
|
||||
# "preserve unexecuted branch" section below
|
||||
for lb in flb.get("loop_blocks", []):
|
||||
inner_lbl = lb.get("label") or lb.get("title")
|
||||
# Recursively track all inner block labels (including nested for-loops)
|
||||
# to prevent duplication in the "preserve unexecuted branch" section below.
|
||||
# Use the same label derivation as the main code generation loop to ensure
|
||||
# labels match (e.g., for_loop blocks without explicit labels get the
|
||||
# "for_loop_{workflow_run_block_id}" fallback).
|
||||
inner_queue: deque[dict[str, Any]] = deque(flb.get("loop_blocks", []))
|
||||
while inner_queue:
|
||||
lb = inner_queue.popleft()
|
||||
lb_type = lb.get("block_type")
|
||||
if lb_type == "for_loop":
|
||||
inner_lbl = lb.get("label") or f"for_loop_{lb.get('workflow_run_block_id')}"
|
||||
inner_queue.extend(lb.get("loop_blocks", []))
|
||||
else:
|
||||
# Use the same 3-fallback chain as the main generation loop
|
||||
# (label → title → block_{wrb_id}) so labels always match.
|
||||
inner_lbl = lb.get("label") or lb.get("title") or f"block_{lb.get('workflow_run_block_id')}"
|
||||
if inner_lbl:
|
||||
processed_labels.add(inner_lbl)
|
||||
if adaptive_caching:
|
||||
|
|
|
|||
|
|
@ -47,6 +47,182 @@ def _process_action_for_block(
|
|||
return action_dump
|
||||
|
||||
|
||||
def _build_children_by_parent(workflow_run_blocks: list[Any]) -> dict[str | None, list[Any]]:
|
||||
"""Build a parent_id -> [child_blocks] mapping for O(1) lookups."""
|
||||
result: dict[str | None, list[Any]] = defaultdict(list)
|
||||
for block in workflow_run_blocks:
|
||||
result[block.parent_workflow_run_block_id].append(block)
|
||||
return result
|
||||
|
||||
|
||||
def _count_descendant_actions(
|
||||
root_wrb_id: str,
|
||||
children_by_parent: dict[str | None, list[Any]],
|
||||
actions_by_task_id: dict[str, list[Action]],
|
||||
) -> int:
|
||||
"""Count total actions across all descendants of a run block (DFS)."""
|
||||
total = 0
|
||||
stack = list(children_by_parent.get(root_wrb_id, []))
|
||||
while stack:
|
||||
node = stack.pop()
|
||||
if node.task_id:
|
||||
total += len(actions_by_task_id.get(node.task_id, []))
|
||||
stack.extend(children_by_parent.get(node.workflow_run_block_id, []))
|
||||
return total
|
||||
|
||||
|
||||
def _process_forloop_children(
|
||||
forloop_run_block: Any,
|
||||
loop_blocks_def: list[dict[str, Any]],
|
||||
children_by_parent: dict[str | None, list[Any]],
|
||||
tasks_by_id: dict[str, Any],
|
||||
actions_by_task_id: dict[str, list[Action]],
|
||||
actions_by_task: dict[str, list[dict[str, Any]]],
|
||||
) -> list[dict[str, Any]]:
|
||||
"""Process ForLoop child blocks, merging run data into definition blocks.
|
||||
|
||||
Recursively handles nested for-loops so deeply nested blocks (e.g., extraction
|
||||
inside a double-nested for-loop) get their task_id and actions merged.
|
||||
"""
|
||||
# Child blocks have parent_workflow_run_block_id pointing to the ForLoop's workflow_run_block_id.
|
||||
# When the outer loop iterates N times, there are N child run blocks per label.
|
||||
# Pick the best candidate per label: prefer the block that has a task_id (for task
|
||||
# blocks) or the most grandchildren (for nested for-loops), so we don't lose data
|
||||
# if the last iteration happened to be empty.
|
||||
child_run_blocks = children_by_parent.get(forloop_run_block.workflow_run_block_id, [])
|
||||
child_run_blocks_by_label: dict[str, Any] = {}
|
||||
for b in child_run_blocks:
|
||||
if not b.label:
|
||||
continue
|
||||
existing = child_run_blocks_by_label.get(b.label)
|
||||
if existing is None:
|
||||
child_run_blocks_by_label[b.label] = b
|
||||
elif b.block_type in SCRIPT_TASK_BLOCKS:
|
||||
# Prefer the iteration with the richest execution evidence:
|
||||
# 1. has task_id beats no task_id
|
||||
# 2. when both have task_id, prefer more actions
|
||||
# 3. on action tie, prefer completed status over failed/other
|
||||
if b.task_id and not existing.task_id:
|
||||
child_run_blocks_by_label[b.label] = b
|
||||
elif b.task_id and existing.task_id:
|
||||
b_actions = len(actions_by_task_id.get(b.task_id, []))
|
||||
existing_actions = len(actions_by_task_id.get(existing.task_id, []))
|
||||
if b_actions > existing_actions:
|
||||
child_run_blocks_by_label[b.label] = b
|
||||
elif b_actions == existing_actions:
|
||||
# Break tie by status: completed > everything else.
|
||||
# Use str() in case the ORM returns a Status enum.
|
||||
b_completed = str(b.status) == "completed"
|
||||
existing_completed = str(existing.status) == "completed"
|
||||
if b_completed and not existing_completed:
|
||||
child_run_blocks_by_label[b.label] = b
|
||||
elif b.block_type == BlockType.FOR_LOOP:
|
||||
# Prefer the nested for-loop iteration that produced grandchildren.
|
||||
# On ties, break by total deep-descendant action count so the
|
||||
# iteration with usable actions wins even at 3+ nesting levels.
|
||||
existing_children = children_by_parent.get(existing.workflow_run_block_id, [])
|
||||
b_children_list = children_by_parent.get(b.workflow_run_block_id, [])
|
||||
if len(b_children_list) > len(existing_children):
|
||||
child_run_blocks_by_label[b.label] = b
|
||||
elif len(b_children_list) == len(existing_children) and b_children_list:
|
||||
b_desc = _count_descendant_actions(b.workflow_run_block_id, children_by_parent, actions_by_task_id)
|
||||
existing_desc = _count_descendant_actions(
|
||||
existing.workflow_run_block_id, children_by_parent, actions_by_task_id
|
||||
)
|
||||
if b_desc > existing_desc:
|
||||
child_run_blocks_by_label[b.label] = b
|
||||
|
||||
unlabeled_children = [b for b in child_run_blocks if not b.label]
|
||||
if unlabeled_children:
|
||||
LOG.warning(
|
||||
"ForLoop has child blocks without labels - these will not be matched to loop_blocks definitions",
|
||||
forloop_label=forloop_run_block.label,
|
||||
unlabeled_count=len(unlabeled_children),
|
||||
)
|
||||
|
||||
if loop_blocks_def and not child_run_blocks:
|
||||
LOG.warning(
|
||||
"ForLoop block has loop_blocks definitions but no child run blocks found",
|
||||
forloop_label=forloop_run_block.label,
|
||||
workflow_run_block_id=forloop_run_block.workflow_run_block_id,
|
||||
loop_blocks_count=len(loop_blocks_def),
|
||||
)
|
||||
|
||||
updated_loop_blocks: list[dict[str, Any]] = []
|
||||
matched_count = 0
|
||||
nested_forloop_count = 0
|
||||
for loop_block_def in loop_blocks_def:
|
||||
# Shallow copy: safe because we only replace top-level keys (update(),
|
||||
# ["loop_blocks"] = ...). Do not mutate nested dicts in-place.
|
||||
if isinstance(loop_block_def, dict):
|
||||
loop_block_dump = loop_block_def.copy()
|
||||
elif hasattr(loop_block_def, "model_dump"):
|
||||
loop_block_dump = loop_block_def.model_dump()
|
||||
else:
|
||||
loop_block_dump = dict(loop_block_def)
|
||||
loop_block_label = loop_block_dump.get("label")
|
||||
|
||||
child_run_block = child_run_blocks_by_label.get(loop_block_label) if loop_block_label else None
|
||||
|
||||
if child_run_block and child_run_block.block_type in SCRIPT_TASK_BLOCKS and child_run_block.task_id:
|
||||
matched_count += 1
|
||||
task = tasks_by_id.get(child_run_block.task_id)
|
||||
if task:
|
||||
task_dump = task.model_dump()
|
||||
loop_block_dump.update({k: v for k, v in task_dump.items() if k not in loop_block_dump})
|
||||
loop_block_dump.update(
|
||||
{
|
||||
"task_id": child_run_block.task_id,
|
||||
"status": child_run_block.status,
|
||||
"output": child_run_block.output,
|
||||
}
|
||||
)
|
||||
|
||||
actions = actions_by_task_id.get(child_run_block.task_id, [])
|
||||
action_dumps = [_process_action_for_block(action, loop_block_dump) for action in actions]
|
||||
actions_by_task[child_run_block.task_id] = action_dumps
|
||||
else:
|
||||
LOG.warning(
|
||||
"Task not found for ForLoop child block",
|
||||
task_id=child_run_block.task_id,
|
||||
forloop_label=forloop_run_block.label,
|
||||
)
|
||||
|
||||
# Recursively process nested for-loops so their inner blocks
|
||||
# also get task_id and actions merged (SKY-8757).
|
||||
if child_run_block and child_run_block.block_type == BlockType.FOR_LOOP:
|
||||
nested_forloop_count += 1
|
||||
inner_loop_blocks = loop_block_dump.get("loop_blocks", [])
|
||||
if inner_loop_blocks:
|
||||
loop_block_dump["loop_blocks"] = _process_forloop_children(
|
||||
forloop_run_block=child_run_block,
|
||||
loop_blocks_def=inner_loop_blocks,
|
||||
children_by_parent=children_by_parent,
|
||||
tasks_by_id=tasks_by_id,
|
||||
actions_by_task_id=actions_by_task_id,
|
||||
actions_by_task=actions_by_task,
|
||||
)
|
||||
# Always set run block metadata for the inner for-loop, even when
|
||||
# loop_blocks is empty. generate_script.py needs workflow_run_block_id
|
||||
# for script_block creation and label fallback derivation.
|
||||
loop_block_dump["workflow_run_block_id"] = child_run_block.workflow_run_block_id
|
||||
loop_block_dump["workflow_run_id"] = child_run_block.workflow_run_id
|
||||
|
||||
updated_loop_blocks.append(loop_block_dump)
|
||||
|
||||
if matched_count or nested_forloop_count:
|
||||
LOG.info(
|
||||
"ForLoop child block processing summary",
|
||||
forloop_label=forloop_run_block.label,
|
||||
definition_count=len(loop_blocks_def),
|
||||
run_block_count=len(child_run_blocks),
|
||||
task_blocks_matched=matched_count,
|
||||
nested_forloops=nested_forloop_count,
|
||||
)
|
||||
|
||||
return updated_loop_blocks
|
||||
|
||||
|
||||
async def transform_workflow_run_to_code_gen_input(workflow_run_id: str, organization_id: str) -> CodeGenInput:
|
||||
# get the workflow run request
|
||||
workflow_run_resp = await workflow_service.get_workflow_run_response(
|
||||
|
|
@ -70,7 +246,10 @@ async def transform_workflow_run_to_code_gen_input(workflow_run_id: str, organiz
|
|||
# get the original workflow definition blocks (with templated information)
|
||||
workflow_definition_blocks = workflow.workflow_definition.blocks
|
||||
|
||||
# get workflow run blocks for task execution data
|
||||
# Get workflow run blocks for task execution data.
|
||||
# IMPORTANT: This returns ALL descendant blocks (including for-loop children
|
||||
# and deeply nested blocks), not just top-level blocks. The batch task_id
|
||||
# collection below and _process_forloop_children both depend on this.
|
||||
workflow_run_blocks = await app.DATABASE.observer.get_workflow_run_blocks(
|
||||
workflow_run_id=workflow_run_id, organization_id=organization_id
|
||||
)
|
||||
|
|
@ -119,6 +298,9 @@ async def transform_workflow_run_to_code_gen_input(workflow_run_id: str, organiz
|
|||
actions_by_task: dict[str, list[dict[str, Any]]] = {}
|
||||
task_v2_child_blocks = {}
|
||||
|
||||
# Pre-build parent -> children mapping for O(1) lookups in for-loop processing
|
||||
children_by_parent = _build_children_by_parent(workflow_run_blocks)
|
||||
|
||||
# Loop through workflow run blocks and match to original definition blocks by label
|
||||
for definition_block in workflow_definition_blocks:
|
||||
# if definition_block.block_type == BlockType.TaskV2:
|
||||
|
|
@ -202,71 +384,14 @@ async def transform_workflow_run_to_code_gen_input(workflow_run_id: str, organiz
|
|||
)
|
||||
|
||||
if run_block.block_type == BlockType.FOR_LOOP:
|
||||
# Process ForLoop child blocks to get actions for task blocks inside the loop
|
||||
# Child blocks have parent_workflow_run_block_id pointing to the ForLoop's workflow_run_block_id
|
||||
child_run_blocks = [
|
||||
b for b in workflow_run_blocks if b.parent_workflow_run_block_id == run_block.workflow_run_block_id
|
||||
]
|
||||
# Create mapping of child run blocks by label
|
||||
child_run_blocks_by_label = {b.label: b for b in child_run_blocks if b.label}
|
||||
|
||||
# Warn about any unlabeled child blocks that won't be matched
|
||||
unlabeled_children = [b for b in child_run_blocks if not b.label]
|
||||
if unlabeled_children:
|
||||
LOG.warning(
|
||||
"ForLoop has child blocks without labels - these will not be matched to loop_blocks definitions",
|
||||
forloop_label=run_block.label,
|
||||
unlabeled_count=len(unlabeled_children),
|
||||
)
|
||||
|
||||
# Get loop_blocks from the definition block
|
||||
loop_blocks = final_dump.get("loop_blocks", [])
|
||||
|
||||
if loop_blocks and not child_run_blocks:
|
||||
LOG.warning(
|
||||
"ForLoop block has loop_blocks definitions but no child run blocks found",
|
||||
forloop_label=run_block.label,
|
||||
workflow_run_block_id=run_block.workflow_run_block_id,
|
||||
loop_blocks_count=len(loop_blocks),
|
||||
)
|
||||
updated_loop_blocks = []
|
||||
|
||||
for loop_block_def in loop_blocks:
|
||||
loop_block_dump = loop_block_def.copy() if isinstance(loop_block_def, dict) else loop_block_def
|
||||
loop_block_label = loop_block_dump.get("label")
|
||||
|
||||
# Find matching child run block
|
||||
child_run_block = child_run_blocks_by_label.get(loop_block_label) if loop_block_label else None
|
||||
|
||||
if child_run_block and child_run_block.block_type in SCRIPT_TASK_BLOCKS and child_run_block.task_id:
|
||||
# Use pre-fetched task data (batch fetched)
|
||||
task = tasks_by_id.get(child_run_block.task_id)
|
||||
if task:
|
||||
task_dump = task.model_dump()
|
||||
loop_block_dump.update({k: v for k, v in task_dump.items() if k not in loop_block_dump})
|
||||
loop_block_dump.update(
|
||||
{
|
||||
"task_id": child_run_block.task_id,
|
||||
"status": child_run_block.status,
|
||||
"output": child_run_block.output,
|
||||
}
|
||||
)
|
||||
|
||||
# Use pre-fetched actions (batch fetched)
|
||||
actions = actions_by_task_id.get(child_run_block.task_id, [])
|
||||
action_dumps = [_process_action_for_block(action, loop_block_dump) for action in actions]
|
||||
actions_by_task[child_run_block.task_id] = action_dumps
|
||||
else:
|
||||
LOG.warning(
|
||||
"Task not found for ForLoop child block",
|
||||
task_id=child_run_block.task_id,
|
||||
forloop_label=run_block.label,
|
||||
)
|
||||
|
||||
updated_loop_blocks.append(loop_block_dump)
|
||||
|
||||
# Update final_dump with the processed loop_blocks
|
||||
final_dump["loop_blocks"] = updated_loop_blocks
|
||||
final_dump["loop_blocks"] = _process_forloop_children(
|
||||
forloop_run_block=run_block,
|
||||
loop_blocks_def=final_dump.get("loop_blocks", []),
|
||||
children_by_parent=children_by_parent,
|
||||
tasks_by_id=tasks_by_id,
|
||||
actions_by_task_id=actions_by_task_id,
|
||||
actions_by_task=actions_by_task,
|
||||
)
|
||||
|
||||
final_dump["workflow_run_id"] = workflow_run_id
|
||||
if run_block:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue