Skyvern/tests/unit/test_nested_forloop_script_generation.py

"""Tests for nested for-loop script generation and transformation (SKY-8757).

Covers:
1. _process_forloop_children in transform_workflow_run.py — recursive merging
   of task data for nested for-loop children.
2. generate_workflow_script_python_code in generate_script.py — code generation
   for nested for-loop inner blocks (script_block creation + function bodies).
"""

import ast
from typing import Any
from unittest.mock import AsyncMock, MagicMock, patch

import pytest

from skyvern.core.script_generations.transform_workflow_run import (
    _build_children_by_parent,
    _process_forloop_children,
)
from skyvern.schemas.workflows import BlockType
from skyvern.webeye.actions.actions import Action


# ---------------------------------------------------------------------------
# Part 1: _process_forloop_children tests
# ---------------------------------------------------------------------------
class TestProcessForloopChildren:
    """Test recursive merging of for-loop children in transform_workflow_run."""

    def test_single_level_merges_task_data(self) -> None:
        """Direct task children get task_id and actions merged."""
        forloop_run_block = MagicMock()
        forloop_run_block.workflow_run_block_id = "wfrb_outer"
        forloop_run_block.label = "outer_loop"

        child_run_block = MagicMock()
        child_run_block.workflow_run_block_id = "wfrb_child"
        child_run_block.parent_workflow_run_block_id = "wfrb_outer"
        child_run_block.block_type = "extraction"
        child_run_block.label = "extract_data"
        child_run_block.task_id = "task_1"
        child_run_block.status = "completed"
        child_run_block.output = {"data": "extracted"}
        child_run_block.workflow_run_id = "wr_1"

        mock_task = MagicMock()
        mock_task.model_dump.return_value = {"task_id": "task_1", "navigation_goal": "Extract"}

        mock_action = MagicMock(spec=Action)
        mock_action.model_dump.return_value = {"action_type": "extract", "action_id": "a1"}
        mock_action.get_xpath.return_value = "//div"
        mock_action.has_mini_agent = False
        mock_action.action_type = "extract"
        mock_action.task_id = "task_1"

        loop_blocks_def = [
            {"block_type": "extraction", "label": "extract_data", "data_extraction_goal": "Extract"},
        ]

        actions_by_task: dict[str, list[dict[str, Any]]] = {}
        all_blocks = [forloop_run_block, child_run_block]

        result = _process_forloop_children(
            forloop_run_block=forloop_run_block,
            loop_blocks_def=loop_blocks_def,
            children_by_parent=_build_children_by_parent(all_blocks),
            tasks_by_id={"task_1": mock_task},
            actions_by_task_id={"task_1": [mock_action]},
            actions_by_task=actions_by_task,
        )

        assert len(result) == 1
        assert result[0]["task_id"] == "task_1"
        assert result[0]["status"] == "completed"
        assert "task_1" in actions_by_task

    def test_nested_forloop_recurses_into_children(self) -> None:
        """Nested for-loop's children should get task data merged recursively."""
        outer_run = MagicMock()
        outer_run.workflow_run_block_id = "wfrb_outer"
        outer_run.label = "outer_loop"

        inner_run = MagicMock()
        inner_run.workflow_run_block_id = "wfrb_inner"
        inner_run.parent_workflow_run_block_id = "wfrb_outer"
        inner_run.block_type = BlockType.FOR_LOOP
        inner_run.label = "inner_loop"
        inner_run.task_id = None
        inner_run.workflow_run_id = "wr_1"

        grandchild_run = MagicMock()
        grandchild_run.workflow_run_block_id = "wfrb_grandchild"
        grandchild_run.parent_workflow_run_block_id = "wfrb_inner"
        grandchild_run.block_type = "extraction"
        grandchild_run.label = "deep_extract"
        grandchild_run.task_id = "task_deep"
        grandchild_run.status = "completed"
        grandchild_run.output = {"deep": True}
        grandchild_run.workflow_run_id = "wr_1"

        mock_task = MagicMock()
        mock_task.model_dump.return_value = {"task_id": "task_deep"}

        mock_action = MagicMock(spec=Action)
        mock_action.model_dump.return_value = {"action_type": "extract"}
        mock_action.get_xpath.return_value = "//span"
        mock_action.has_mini_agent = False
        mock_action.action_type = "extract"
        mock_action.task_id = "task_deep"

        loop_blocks_def = [
            {
                "block_type": "for_loop",
                "label": "inner_loop",
                "loop_blocks": [
                    {"block_type": "extraction", "label": "deep_extract", "data_extraction_goal": "Deep extract"},
                ],
            },
        ]

        all_run_blocks = [outer_run, inner_run, grandchild_run]
        actions_by_task: dict[str, list[dict[str, Any]]] = {}

        result = _process_forloop_children(
            forloop_run_block=outer_run,
            loop_blocks_def=loop_blocks_def,
            children_by_parent=_build_children_by_parent(all_run_blocks),
            tasks_by_id={"task_deep": mock_task},
            actions_by_task_id={"task_deep": [mock_action]},
            actions_by_task=actions_by_task,
        )

        assert len(result) == 1
        inner_loop = result[0]
        assert inner_loop["block_type"] == "for_loop"
        assert inner_loop["workflow_run_block_id"] == "wfrb_inner"

        # Verify the grandchild got task data merged
        inner_children = inner_loop.get("loop_blocks", [])
        assert len(inner_children) == 1
        assert inner_children[0]["task_id"] == "task_deep"
        assert "task_deep" in actions_by_task

    def test_multi_iteration_picks_best_task_block(self) -> None:
        """When the outer loop iterates multiple times, the run block with task_id should win."""
        outer_run = MagicMock()
        outer_run.workflow_run_block_id = "wfrb_outer"
        outer_run.parent_workflow_run_block_id = None
        outer_run.label = "outer_loop"

        # Iteration 1: has task_id (good)
        child_iter1 = MagicMock()
        child_iter1.workflow_run_block_id = "wfrb_child_iter1"
        child_iter1.parent_workflow_run_block_id = "wfrb_outer"
        child_iter1.block_type = "extraction"
        child_iter1.label = "extract_data"
        child_iter1.task_id = "task_good"
        child_iter1.status = "completed"
        child_iter1.output = {"data": True}
        child_iter1.workflow_run_id = "wr_1"

        # Iteration 2: no task_id (empty iteration)
        child_iter2 = MagicMock()
        child_iter2.workflow_run_block_id = "wfrb_child_iter2"
        child_iter2.parent_workflow_run_block_id = "wfrb_outer"
        child_iter2.block_type = "extraction"
        child_iter2.label = "extract_data"
        child_iter2.task_id = None
        child_iter2.status = None
        child_iter2.output = None
        child_iter2.workflow_run_id = "wr_1"

        mock_task = MagicMock()
        mock_task.model_dump.return_value = {"task_id": "task_good"}

        mock_action = MagicMock(spec=Action)
        mock_action.model_dump.return_value = {"action_type": "extract"}
        mock_action.get_xpath.return_value = "//div"
        mock_action.has_mini_agent = False
        mock_action.action_type = "extract"
        mock_action.task_id = "task_good"

        loop_blocks_def = [
            {"block_type": "extraction", "label": "extract_data", "data_extraction_goal": "Extract"},
        ]

        actions_by_task: dict[str, list[dict[str, Any]]] = {}
        # child_iter2 comes after child_iter1 — old code would keep iter2 (no task_id)
        all_blocks = [outer_run, child_iter1, child_iter2]

        result = _process_forloop_children(
            forloop_run_block=outer_run,
            loop_blocks_def=loop_blocks_def,
            children_by_parent=_build_children_by_parent(all_blocks),
            tasks_by_id={"task_good": mock_task},
            actions_by_task_id={"task_good": [mock_action]},
            actions_by_task=actions_by_task,
        )

        assert len(result) == 1
        # Should pick the block with task_id, not the empty one
        assert result[0]["task_id"] == "task_good"
        assert "task_good" in actions_by_task

    def test_multi_iteration_prefers_richer_actions(self) -> None:
        """When both iterations have task_id, prefer the one with more actions."""
        outer_run = MagicMock()
        outer_run.workflow_run_block_id = "wfrb_outer"
        outer_run.parent_workflow_run_block_id = None
        outer_run.label = "outer_loop"

        # Iteration 1: has task_id but only 1 action (partial)
        child_iter1 = MagicMock()
        child_iter1.workflow_run_block_id = "wfrb_child_iter1"
        child_iter1.parent_workflow_run_block_id = "wfrb_outer"
        child_iter1.block_type = "extraction"
        child_iter1.label = "extract_data"
        child_iter1.task_id = "task_partial"
        child_iter1.status = "completed"
        child_iter1.output = {}
        child_iter1.workflow_run_id = "wr_1"

        # Iteration 2: has task_id with 3 actions (richer)
        child_iter2 = MagicMock()
        child_iter2.workflow_run_block_id = "wfrb_child_iter2"
        child_iter2.parent_workflow_run_block_id = "wfrb_outer"
        child_iter2.block_type = "extraction"
        child_iter2.label = "extract_data"
        child_iter2.task_id = "task_rich"
        child_iter2.status = "completed"
        child_iter2.output = {"data": "full"}
        child_iter2.workflow_run_id = "wr_1"

        mock_task_partial = MagicMock()
        mock_task_partial.model_dump.return_value = {"task_id": "task_partial"}
        mock_task_rich = MagicMock()
        mock_task_rich.model_dump.return_value = {"task_id": "task_rich"}

        def _make_action(task_id: str) -> MagicMock:
            a = MagicMock(spec=Action)
            a.model_dump.return_value = {"action_type": "extract"}
            a.get_xpath.return_value = "//div"
            a.has_mini_agent = False
            a.action_type = "extract"
            a.task_id = task_id
            return a

        loop_blocks_def = [
            {"block_type": "extraction", "label": "extract_data", "data_extraction_goal": "Extract"},
        ]

        actions_by_task: dict[str, list[dict[str, Any]]] = {}
        all_blocks = [outer_run, child_iter1, child_iter2]

        result = _process_forloop_children(
            forloop_run_block=outer_run,
            loop_blocks_def=loop_blocks_def,
            children_by_parent=_build_children_by_parent(all_blocks),
            tasks_by_id={"task_partial": mock_task_partial, "task_rich": mock_task_rich},
            actions_by_task_id={
                "task_partial": [_make_action("task_partial")],
                "task_rich": [_make_action("task_rich"), _make_action("task_rich"), _make_action("task_rich")],
            },
            actions_by_task=actions_by_task,
        )

        assert len(result) == 1
        # Should pick task_rich (3 actions) over task_partial (1 action)
        assert result[0]["task_id"] == "task_rich"
        assert "task_rich" in actions_by_task

    def test_multi_iteration_nested_forloop_picks_richest(self) -> None:
        """When a nested for-loop has multiple iterations, pick the one with most grandchildren."""
        outer_run = MagicMock()
        outer_run.workflow_run_block_id = "wfrb_outer"
        outer_run.parent_workflow_run_block_id = None
        outer_run.label = "outer_loop"

        # Iteration 1 of inner loop: has grandchildren
        inner_iter1 = MagicMock()
        inner_iter1.workflow_run_block_id = "wfrb_inner_iter1"
        inner_iter1.parent_workflow_run_block_id = "wfrb_outer"
        inner_iter1.block_type = BlockType.FOR_LOOP
        inner_iter1.label = "inner_loop"
        inner_iter1.task_id = None
        inner_iter1.workflow_run_id = "wr_1"

        grandchild = MagicMock()
        grandchild.workflow_run_block_id = "wfrb_grandchild"
        grandchild.parent_workflow_run_block_id = "wfrb_inner_iter1"
        grandchild.block_type = "extraction"
        grandchild.label = "deep_extract"
        grandchild.task_id = "task_deep"
        grandchild.status = "completed"
        grandchild.output = {"deep": True}
        grandchild.workflow_run_id = "wr_1"

        # Iteration 2 of inner loop: empty (no grandchildren)
        inner_iter2 = MagicMock()
        inner_iter2.workflow_run_block_id = "wfrb_inner_iter2"
        inner_iter2.parent_workflow_run_block_id = "wfrb_outer"
        inner_iter2.block_type = BlockType.FOR_LOOP
        inner_iter2.label = "inner_loop"
        inner_iter2.task_id = None
        inner_iter2.workflow_run_id = "wr_1"

        mock_task = MagicMock()
        mock_task.model_dump.return_value = {"task_id": "task_deep"}

        mock_action = MagicMock(spec=Action)
        mock_action.model_dump.return_value = {"action_type": "extract"}
        mock_action.get_xpath.return_value = "//span"
        mock_action.has_mini_agent = False
        mock_action.action_type = "extract"
        mock_action.task_id = "task_deep"

        loop_blocks_def = [
            {
                "block_type": "for_loop",
                "label": "inner_loop",
                "loop_blocks": [
                    {"block_type": "extraction", "label": "deep_extract", "data_extraction_goal": "Deep"},
                ],
            },
        ]

        actions_by_task: dict[str, list[dict[str, Any]]] = {}
        # inner_iter2 comes last but has no grandchildren — should pick inner_iter1
        all_blocks = [outer_run, inner_iter1, grandchild, inner_iter2]

        result = _process_forloop_children(
            forloop_run_block=outer_run,
            loop_blocks_def=loop_blocks_def,
            children_by_parent=_build_children_by_parent(all_blocks),
            tasks_by_id={"task_deep": mock_task},
            actions_by_task_id={"task_deep": [mock_action]},
            actions_by_task=actions_by_task,
        )

        assert len(result) == 1
        inner = result[0]
        # Should have picked inner_iter1 (has grandchildren)
        assert inner["workflow_run_block_id"] == "wfrb_inner_iter1"
        # Grandchild should have task data merged
        inner_children = inner.get("loop_blocks", [])
        assert len(inner_children) == 1
        assert inner_children[0]["task_id"] == "task_deep"
        assert "task_deep" in actions_by_task

    def test_nested_forloop_tie_broken_by_descendant_actions(self) -> None:
        """When two nested for-loop iterations have the same child count,
        prefer the one whose descendants have more actions."""
        outer_run = MagicMock()
        outer_run.workflow_run_block_id = "wfrb_outer"
        outer_run.parent_workflow_run_block_id = None
        outer_run.label = "outer_loop"

        # Iteration 1: 1 grandchild, no actions
        inner_iter1 = MagicMock()
        inner_iter1.workflow_run_block_id = "wfrb_inner_iter1"
        inner_iter1.parent_workflow_run_block_id = "wfrb_outer"
        inner_iter1.block_type = BlockType.FOR_LOOP
        inner_iter1.label = "inner_loop"
        inner_iter1.task_id = None
        inner_iter1.workflow_run_id = "wr_1"

        gc1 = MagicMock()
        gc1.workflow_run_block_id = "wfrb_gc1"
        gc1.parent_workflow_run_block_id = "wfrb_inner_iter1"
        gc1.block_type = "extraction"
        gc1.label = "deep_extract"
        gc1.task_id = "task_empty"
        gc1.status = "completed"
        gc1.output = {}
        gc1.workflow_run_id = "wr_1"

        # Iteration 2: 1 grandchild, with actions (richer)
        inner_iter2 = MagicMock()
        inner_iter2.workflow_run_block_id = "wfrb_inner_iter2"
        inner_iter2.parent_workflow_run_block_id = "wfrb_outer"
        inner_iter2.block_type = BlockType.FOR_LOOP
        inner_iter2.label = "inner_loop"
        inner_iter2.task_id = None
        inner_iter2.workflow_run_id = "wr_1"

        gc2 = MagicMock()
        gc2.workflow_run_block_id = "wfrb_gc2"
        gc2.parent_workflow_run_block_id = "wfrb_inner_iter2"
        gc2.block_type = "extraction"
        gc2.label = "deep_extract"
        gc2.task_id = "task_rich"
        gc2.status = "completed"
        gc2.output = {"data": True}
        gc2.workflow_run_id = "wr_1"

        mock_task = MagicMock()
        mock_task.model_dump.return_value = {"task_id": "task_rich"}

        mock_action = MagicMock(spec=Action)
        mock_action.model_dump.return_value = {"action_type": "extract"}
        mock_action.get_xpath.return_value = "//div"
        mock_action.has_mini_agent = False
        mock_action.action_type = "extract"
        mock_action.task_id = "task_rich"

        loop_blocks_def = [
            {
                "block_type": "for_loop",
                "label": "inner_loop",
                "loop_blocks": [
                    {"block_type": "extraction", "label": "deep_extract", "data_extraction_goal": "Extract"},
                ],
            },
        ]

        actions_by_task: dict[str, list[dict[str, Any]]] = {}
        # Both iterations have 1 grandchild, but only iter2's grandchild has actions
        all_blocks = [outer_run, inner_iter1, gc1, inner_iter2, gc2]

        result = _process_forloop_children(
            forloop_run_block=outer_run,
            loop_blocks_def=loop_blocks_def,
            children_by_parent=_build_children_by_parent(all_blocks),
            tasks_by_id={"task_rich": mock_task},
            actions_by_task_id={"task_rich": [mock_action]},  # only task_rich has actions
            actions_by_task=actions_by_task,
        )

        assert len(result) == 1
        inner = result[0]
        # Should pick iter2 (descendant has actions) over iter1 (no actions)
        assert inner["workflow_run_block_id"] == "wfrb_inner_iter2"

    def test_no_matching_run_block_preserves_definition(self) -> None:
        """If no run block matches a definition child, the definition is preserved unchanged."""
        forloop_run = MagicMock()
        forloop_run.workflow_run_block_id = "wfrb_loop"
        forloop_run.label = "loop"

        loop_blocks_def = [
            {"block_type": "extraction", "label": "unexecuted_block"},
        ]

        actions_by_task: dict[str, list[dict[str, Any]]] = {}

        result = _process_forloop_children(
            forloop_run_block=forloop_run,
            loop_blocks_def=loop_blocks_def,
            children_by_parent=_build_children_by_parent([forloop_run]),
            tasks_by_id={},
            actions_by_task_id={},
            actions_by_task=actions_by_task,
        )

        assert len(result) == 1
        assert result[0]["label"] == "unexecuted_block"
        assert "task_id" not in result[0]


# ---------------------------------------------------------------------------
# Part 2: generate_workflow_script_python_code tests for nested for-loops
# ---------------------------------------------------------------------------
class TestNestedForloopCodeGeneration:
    """Test that nested for-loops generate correct script blocks and code."""

    @pytest.mark.asyncio
    async def test_nested_forloop_creates_script_blocks_for_all_levels(self) -> None:
        """A double-nested for-loop should create script_blocks for:
        1. Outer for-loop
        2. Inner for-loop
        3. Inner task blocks
        """
        from skyvern.core.script_generations.generate_script import generate_workflow_script_python_code

        blocks = [
            {
                "block_type": "for_loop",
                "label": "outer_loop",
                "loop_variable_reference": "{{ urls }}",
                "workflow_run_block_id": "wfrb_outer",
                "loop_blocks": [
                    {
                        "block_type": "for_loop",
                        "label": "inner_loop",
                        "loop_variable_reference": "{{ documents }}",
                        "workflow_run_block_id": "wfrb_inner",
                        "loop_blocks": [
                            {
                                "block_type": "extraction",
                                "label": "extract_data",
                                "data_extraction_goal": "Get content",
                                "task_id": "task_extract",
                                "workflow_run_block_id": "wfrb_extract",
                            },
                        ],
                    },
                ],
            },
        ]

        actions_by_task = {
            "task_extract": [
                {
                    "action_type": "extract",
                    "action_id": "action_1",
                    "xpath": "//div[@id='content']",
                    "element_id": "elem_1",
                    "text": None,
                    "data_extraction_goal": "Get content",
                },
            ],
        }

        workflow = {
            "workflow_id": "wf_test",
            "title": "Nested ForLoop Test",
            "workflow_definition": {"parameters": []},
        }

        mock_create_script_block = AsyncMock(return_value=True)

        with (
            patch(
                "skyvern.core.script_generations.generate_script.generate_workflow_parameters_schema",
                new_callable=AsyncMock,
                return_value=("", {}),
            ),
            patch(
                "skyvern.core.script_generations.generate_script.create_or_update_script_block",
                mock_create_script_block,
            ),
        ):
            result = await generate_workflow_script_python_code(
                file_name="test_nested.py",
                workflow_run_request={"workflow_id": "wpid_test"},
                workflow=workflow,
                blocks=blocks,
                actions_by_task=actions_by_task,
                script_id="script_1",
                script_revision_id="rev_1",
                organization_id="org_1",
            )

            # Must compile
            try:
                ast.parse(result.source_code)
            except SyntaxError as e:
                pytest.fail(f"Generated script has SyntaxError: {e}\n\n{result.source_code}")

            # Verify script blocks created for all three levels
            call_labels = [call.kwargs.get("block_label") for call in mock_create_script_block.call_args_list]
            assert "outer_loop" in call_labels, f"outer for-loop missing. Labels: {call_labels}"
            assert "inner_loop" in call_labels, f"inner for-loop missing. Labels: {call_labels}"
            assert "extract_data" in call_labels, f"inner extraction missing. Labels: {call_labels}"

    @pytest.mark.asyncio
    async def test_nested_forloop_inner_block_gets_cached_function(self) -> None:
        """The extraction block inside a nested for-loop should get a @skyvern.cached function."""
        from skyvern.core.script_generations.generate_script import generate_workflow_script_python_code

        blocks = [
            {
                "block_type": "for_loop",
                "label": "page_loop",
                "loop_variable_reference": "{{ pages }}",
                "workflow_run_block_id": "wfrb_page",
                "loop_blocks": [
                    {
                        "block_type": "for_loop",
                        "label": "doc_loop",
                        "loop_variable_reference": "{{ docs }}",
                        "workflow_run_block_id": "wfrb_doc",
                        "loop_blocks": [
                            {
                                "block_type": "file_download",
                                "label": "download_file",
                                "url": "https://example.com",
                                "navigation_goal": "Download the file",
                                "task_id": "task_download",
                                "workflow_run_block_id": "wfrb_download",
                            },
                        ],
                    },
                ],
            },
        ]

        actions_by_task = {
            "task_download": [
                {
                    "action_type": "click",
                    "action_id": "act_1",
                    "xpath": "//a[@class='download']",
                    "element_id": "elem_dl",
                    "text": None,
                },
            ],
        }

        workflow = {
            "workflow_id": "wf_test",
            "title": "Nested Download Test",
            "workflow_definition": {"parameters": []},
        }

        mock_create_script_block = AsyncMock(return_value=True)

        with (
            patch(
                "skyvern.core.script_generations.generate_script.generate_workflow_parameters_schema",
                new_callable=AsyncMock,
                return_value=("", {}),
            ),
            patch(
                "skyvern.core.script_generations.generate_script.create_or_update_script_block",
                mock_create_script_block,
            ),
        ):
            result = await generate_workflow_script_python_code(
                file_name="test_nested_download.py",
                workflow_run_request={"workflow_id": "wpid_test"},
                workflow=workflow,
                blocks=blocks,
                actions_by_task=actions_by_task,
                script_id="script_1",
                script_revision_id="rev_1",
                organization_id="org_1",
            )

            try:
                ast.parse(result.source_code)
            except SyntaxError as e:
                pytest.fail(f"SyntaxError: {e}\n\n{result.source_code}")

            # Inner block should have a @skyvern.cached function
            assert "@skyvern.cached" in result.source_code
            assert "download_file" in result.source_code

    @pytest.mark.asyncio
    async def test_nested_forloop_labels_tracked_in_processed_labels(self) -> None:
        """Nested for-loop labels should be tracked to avoid duplication
        when preserving unexecuted branch cached blocks."""
        from skyvern.core.script_generations.generate_script import generate_workflow_script_python_code

        blocks = [
            {
                "block_type": "for_loop",
                "label": "outer_loop",
                "loop_variable_reference": "{{ urls }}",
                "workflow_run_block_id": "wfrb_outer",
                "loop_blocks": [
                    {
                        "block_type": "for_loop",
                        "label": "inner_loop",
                        "loop_variable_reference": "{{ items }}",
                        "workflow_run_block_id": "wfrb_inner",
                        "loop_blocks": [
                            {
                                "block_type": "extraction",
                                "label": "deep_extract",
                                "data_extraction_goal": "Extract",
                                "task_id": "task_deep",
                                "workflow_run_block_id": "wfrb_deep",
                            },
                        ],
                    },
                ],
            },
        ]

        actions_by_task = {
            "task_deep": [
                {
                    "action_type": "extract",
                    "action_id": "a1",
                    "xpath": "//div",
                    "element_id": "e1",
                    "text": None,
                    "data_extraction_goal": "Extract",
                },
            ],
        }

        # Also provide the same labels as cached_blocks to test dedup
        mock_cached_extract = MagicMock()
        mock_cached_extract.code = "@skyvern.cached(cache_key='deep_extract')\nasync def deep_extract_fn(): pass"
        mock_cached_extract.run_signature = "await skyvern.extract(prompt='Extract', label='deep_extract')"
        mock_cached_extract.workflow_run_id = "wr_old"
        mock_cached_extract.workflow_run_block_id = "wfrb_old"
        mock_cached_extract.input_fields = None

        mock_cached_loop = MagicMock()
        mock_cached_loop.code = (
            "async for current_value in skyvern.loop(values='{{ items }}', label='inner_loop'): pass"
        )
        mock_cached_loop.run_signature = (
            "async for current_value in skyvern.loop(values='{{ items }}', label='inner_loop'): pass"
        )
        mock_cached_loop.workflow_run_id = "wr_old"
        mock_cached_loop.workflow_run_block_id = "wfrb_old"
        mock_cached_loop.input_fields = None

        workflow = {
            "workflow_id": "wf_test",
            "title": "Dedup Test",
            "workflow_definition": {"parameters": []},
        }

        mock_create_script_block = AsyncMock(return_value=True)

        with (
            patch(
                "skyvern.core.script_generations.generate_script.generate_workflow_parameters_schema",
                new_callable=AsyncMock,
                return_value=("", {}),
            ),
            patch(
                "skyvern.core.script_generations.generate_script.create_or_update_script_block",
                mock_create_script_block,
            ),
        ):
            result = await generate_workflow_script_python_code(
                file_name="test_dedup.py",
                workflow_run_request={"workflow_id": "wpid_test"},
                workflow=workflow,
                blocks=blocks,
                actions_by_task=actions_by_task,
                script_id="s1",
                script_revision_id="r1",
                organization_id="o1",
                cached_blocks={
                    "deep_extract": mock_cached_extract,
                    "inner_loop": mock_cached_loop,
                },
            )

            try:
                ast.parse(result.source_code)
            except SyntaxError as e:
                pytest.fail(f"SyntaxError: {e}\n\n{result.source_code}")

            # @skyvern.cached should appear exactly once (not duplicated by
            # the "preserve unexecuted branch" section)
            cached_count = result.source_code.count("@skyvern.cached")
            assert cached_count == 1, (
                f"Expected 1 @skyvern.cached but found {cached_count}. "
                f"Nested labels may not be tracked in processed_labels.\n\n{result.source_code}"
            )

    @pytest.mark.asyncio
    async def test_nested_forloop_uses_cached_entry_when_valid(self) -> None:
        """When a nested for-loop has a valid cached entry and is NOT in
        updated_block_labels, create_or_update_script_block should still be
        called (to persist metadata) but use the cached code, not rebuild."""
        from skyvern.core.script_generations.generate_script import generate_workflow_script_python_code

        cached_inner_loop = MagicMock()
        cached_inner_loop.code = (
            "async for current_value in skyvern.loop(values='{{ docs }}', label='inner_loop'):\n    pass"
        )
        cached_inner_loop.run_signature = cached_inner_loop.code.strip()
        cached_inner_loop.workflow_run_id = "wr_cached"
        cached_inner_loop.workflow_run_block_id = "wfrb_cached"
        cached_inner_loop.input_fields = None

        cached_deep_extract = MagicMock()
        cached_deep_extract.code = "@skyvern.cached(cache_key='deep_extract')\nasync def deep_extract_fn(page, context):\n    await skyvern.extract(prompt='Get data', label='deep_extract')"
        cached_deep_extract.run_signature = "await skyvern.extract(prompt='Get data', label='deep_extract')"
        cached_deep_extract.workflow_run_id = "wr_cached"
        cached_deep_extract.workflow_run_block_id = "wfrb_cached_deep"
        cached_deep_extract.input_fields = None

        blocks = [
            {
                "block_type": "for_loop",
                "label": "outer_loop",
                "loop_variable_reference": "{{ urls }}",
                "workflow_run_block_id": "wfrb_outer",
                "loop_blocks": [
                    {
                        "block_type": "for_loop",
                        "label": "inner_loop",
                        "loop_variable_reference": "{{ docs }}",
                        "workflow_run_block_id": "wfrb_inner",
                        "loop_blocks": [
                            {
                                "block_type": "extraction",
                                "label": "deep_extract",
                                "data_extraction_goal": "Get data",
                                "task_id": "task_deep",
                                "workflow_run_block_id": "wfrb_deep",
                            },
                        ],
                    },
                ],
            },
        ]

        workflow = {
            "workflow_id": "wf_test",
            "title": "Cache Hit Test",
            "workflow_definition": {"parameters": []},
        }

        mock_create_script_block = AsyncMock(return_value=True)

        with (
            patch(
                "skyvern.core.script_generations.generate_script.generate_workflow_parameters_schema",
                new_callable=AsyncMock,
                return_value=("", {}),
            ),
            patch(
                "skyvern.core.script_generations.generate_script.create_or_update_script_block",
                mock_create_script_block,
            ),
        ):
            result = await generate_workflow_script_python_code(
                file_name="test_cache_hit.py",
                workflow_run_request={"workflow_id": "wpid_test"},
                workflow=workflow,
                blocks=blocks,
                actions_by_task={},  # No fresh actions — relying on cache
                script_id="s1",
                script_revision_id="r1",
                organization_id="o1",
                cached_blocks={
                    "inner_loop": cached_inner_loop,
                    "deep_extract": cached_deep_extract,
                },
                # Neither inner_loop nor its parent outer_loop are in
                # updated_block_labels → should use cached entries.
                updated_block_labels={"__start_block__"},
            )

            try:
                ast.parse(result.source_code)
            except SyntaxError as e:
                pytest.fail(f"SyntaxError: {e}\n\n{result.source_code}")

            # The nested for-loop's cached code should be used
            call_labels = [call.kwargs.get("block_label") for call in mock_create_script_block.call_args_list]
            # inner_loop and deep_extract should both have script_block entries
            assert "inner_loop" in call_labels, f"inner_loop missing from calls: {call_labels}"
            assert "deep_extract" in call_labels, f"deep_extract missing from calls: {call_labels}"

            # The inner_loop call should use the cached code, not freshly built
            inner_loop_call = next(
                c for c in mock_create_script_block.call_args_list if c.kwargs.get("block_label") == "inner_loop"
            )
            assert inner_loop_call.kwargs["block_code"] == cached_inner_loop.code
            assert inner_loop_call.kwargs["workflow_run_id"] == "wr_cached"

    @pytest.mark.asyncio
    async def test_nested_forloop_extraction_url_uses_render_template(self) -> None:
        """Regression: nested extraction with a templated URL (e.g. `{{ outer_loop.current_value.url }}`)
        must be emitted as a `skyvern.render_template("...")` call, not a Python literal.

        This is the follow-up to SKY-8757 surfaced by tests/manual/test_nested_forloop_workflow.py's
        second-run cache-hit check. Without this fix, the cached code ran `await skyvern.extract(...)`
        without a `url=` arg and the fallback `ExtractionBlock(url=None)` hit
        `InvalidWorkflowTaskURLState` at agent.py:200 on every cache hit.
        """
        from skyvern.core.script_generations.generate_script import generate_workflow_script_python_code

        blocks = [
            {
                "block_type": "for_loop",
                "label": "outer_page_loop",
                "loop_variable_reference": "{{ pages }}",
                "workflow_run_block_id": "wfrb_outer",
                "loop_blocks": [
                    {
                        "block_type": "for_loop",
                        "label": "inner_field_loop",
                        "loop_variable_reference": "{{ current_value.fields }}",
                        "workflow_run_block_id": "wfrb_inner",
                        "loop_blocks": [
                            {
                                "block_type": "extraction",
                                "label": "extract_field_data",
                                "url": "{{ outer_page_loop.current_value.url }}",
                                "data_extraction_goal": "Extract {{ current_value }}.",
                                "data_schema": {"type": "object", "properties": {}},
                                "task_id": "task_extract",
                                "workflow_run_block_id": "wfrb_extract",
                            },
                        ],
                    },
                ],
            },
        ]

        actions_by_task = {
            "task_extract": [
                {
                    "action_type": "extract",
                    "action_id": "action_1",
                    "xpath": "//div",
                    "element_id": "elem_1",
                    "text": None,
                    "data_extraction_goal": "Extract",
                },
            ],
        }

        workflow = {
            "workflow_id": "wf_test",
            "title": "Nested Templated URL Test",
            "workflow_definition": {"parameters": []},
        }

        mock_create_script_block = AsyncMock(return_value=True)

        with (
            patch(
                "skyvern.core.script_generations.generate_script.generate_workflow_parameters_schema",
                new_callable=AsyncMock,
                return_value=("", {}),
            ),
            patch(
                "skyvern.core.script_generations.generate_script.create_or_update_script_block",
                mock_create_script_block,
            ),
        ):
            result = await generate_workflow_script_python_code(
                file_name="test_templated_url.py",
                workflow_run_request={"workflow_id": "wpid_test"},
                workflow=workflow,
                blocks=blocks,
                actions_by_task=actions_by_task,
                script_id="script_1",
                script_revision_id="rev_1",
                organization_id="org_1",
            )

            # Generated code must compile.
            try:
                ast.parse(result.source_code)
            except SyntaxError as e:
                pytest.fail(f"Generated script has SyntaxError: {e}\n\n{result.source_code}")

            # Find the block_code stored for extract_field_data.
            extract_call = next(
                (
                    c
                    for c in mock_create_script_block.call_args_list
                    if c.kwargs.get("block_label") == "extract_field_data"
                ),
                None,
            )
            assert extract_call is not None, (
                "No create_or_update_script_block call for extract_field_data. "
                f"Labels seen: {[c.kwargs.get('block_label') for c in mock_create_script_block.call_args_list]}"
            )
            block_code: str = extract_call.kwargs["block_code"]

            # The URL must be emitted as a skyvern.render_template() call so that
            # {{ outer_page_loop.current_value.url }} resolves at runtime from the
            # workflow_run_context.values populated by skyvern.loop().
            assert "skyvern.render_template" in block_code, (
                f"extract_field_data block_code does not contain skyvern.render_template:\n\n{block_code}"
            )
            assert "{{ outer_page_loop.current_value.url }}" in block_code, (
                f"Template string not present in block_code:\n\n{block_code}"
            )
            # And must NOT appear as a raw Python literal passed to extract(..., url=...).
            assert "url='{{ outer_page_loop.current_value.url }}'" not in block_code, (
                f"URL was emitted as a literal, not a render_template call:\n\n{block_code}"
            )
            assert 'url="{{ outer_page_loop.current_value.url }}"' not in block_code, (
                f"URL was emitted as a literal, not a render_template call:\n\n{block_code}"
            )


# ---------------------------------------------------------------------------
# Part 3: _render_value unit tests
# ---------------------------------------------------------------------------
class TestRenderValue:
    """Unit tests for the _render_value CST helper in generate_script.py."""

    def test_empty_or_none_returns_valid_cst_node(self) -> None:
        """Empty/None prompts return valid CST nodes that libcst can serialize.

        Regression: the original helper returned `cst.SimpleString("")` which
        libcst rejects because it lacks enclosing quotes. The helper now
        delegates to `_value` for the empty/None case — `_value("")` emits a
        `SimpleString("''")` and `_value(None)` emits `Name("None")`.
        """
        import libcst as cst

        from skyvern.core.script_generations.generate_script import _render_value

        module = cst.Module(body=[])

        result_empty = _render_value("")
        assert isinstance(result_empty, cst.SimpleString)
        assert module.code_for_node(result_empty) == "''"

        # None passes through _value → cst.parse_expression("None") which
        # is a Name node, not a SimpleString, but still a valid BaseExpression.
        result_none = _render_value(None)
        assert module.code_for_node(result_none) == "None"

    def test_plain_string_returns_literal_simple_string(self) -> None:
        """A non-template string falls back to _value() and emits a Python literal."""
        import libcst as cst

        from skyvern.core.script_generations.generate_script import _render_value

        result = _render_value("https://example.com")
        # _value wraps strings via repr() → "'https://example.com'"
        assert isinstance(result, cst.SimpleString)
        assert result.value == "'https://example.com'"

    def test_template_string_emits_render_template_call(self) -> None:
        """A string containing {{...}} is emitted as skyvern.render_template(...) call."""
        import libcst as cst

        from skyvern.core.script_generations.generate_script import _render_value

        result = _render_value("{{ outer_page_loop.current_value.url }}")
        assert isinstance(result, cst.Call)
        # The call should be skyvern.render_template("{{ ... }}")
        module = cst.Module(body=[])
        rendered = module.code_for_node(result)
        assert rendered.startswith("skyvern.render_template(")
        assert '"{{ outer_page_loop.current_value.url }}"' in rendered or (
            "'{{ outer_page_loop.current_value.url }}'" in rendered
        )

    def test_template_string_with_data_variable_appends_kwarg(self) -> None:
        """When a data_variable_name is provided, it is passed as a data= kwarg."""
        import libcst as cst

        from skyvern.core.script_generations.generate_script import _render_value

        result = _render_value("{{ current_value }}", data_variable_name="context_params")
        assert isinstance(result, cst.Call)
        module = cst.Module(body=[])
        rendered = module.code_for_node(result)
        # libcst may emit with or without whitespace around =; accept both.
        assert "data=context_params" in rendered or "data = context_params" in rendered