Skyvern/skyvern/forge/sdk/copilot/agent.py

"""Copilot agent — multi-turn tool-use agent for workflow building.

Uses the OpenAI Agents SDK with LiteLLM for multi-provider LLM support.
"""

from __future__ import annotations

import asyncio
import contextlib
from datetime import datetime, timezone
from pathlib import Path
from typing import TYPE_CHECKING, Any

if TYPE_CHECKING:
    from agents.result import RunResultStreaming

    from skyvern.forge.sdk.experimentation.llm_prompt_config import LLMAPIHandler
    from skyvern.forge.sdk.schemas.workflow_copilot import WorkflowCopilotChatRequest

import structlog
import yaml
from pydantic import ValidationError

from skyvern.forge.prompts import prompt_engine
from skyvern.forge.sdk.copilot.block_goal_wrapping import wrap_block_goals
from skyvern.forge.sdk.copilot.context import AgentResult, CopilotContext, StructuredContext
from skyvern.forge.sdk.copilot.output_utils import extract_final_text, parse_final_response
from skyvern.forge.sdk.copilot.tracing_setup import _copilot_model_name, ensure_tracing_initialized, is_tracing_enabled
from skyvern.forge.sdk.routes.event_source_stream import EventSourceStream
from skyvern.forge.sdk.schemas.workflow_copilot import (
    WorkflowCopilotChatHistoryMessage,
)
from skyvern.forge.sdk.workflow.exceptions import BaseWorkflowHTTPException
from skyvern.utils.strings import escape_code_fences

LOG = structlog.get_logger()

WORKFLOW_KNOWLEDGE_BASE_PATH = (
    Path(__file__).resolve().parents[2] / "prompts" / "skyvern" / "workflow_knowledge_base.txt"
)

MAX_TURNS = 25


def _format_chat_history(chat_history: list[WorkflowCopilotChatHistoryMessage]) -> str:
    if not chat_history:
        return ""
    lines = [f"{msg.sender}: {msg.content}" for msg in chat_history]
    return "\n".join(lines)


def _build_system_prompt(
    tool_usage_guide: str,
    security_rules: str = "",
) -> str:
    workflow_knowledge_base = WORKFLOW_KNOWLEDGE_BASE_PATH.read_text(encoding="utf-8")
    return prompt_engine.load_prompt(
        template="workflow-copilot-agent",
        workflow_knowledge_base=workflow_knowledge_base,
        current_datetime=datetime.now(timezone.utc).isoformat(),
        tool_usage_guide=tool_usage_guide,
        security_rules=security_rules,
    )


def _build_user_context(
    workflow_yaml: str,
    chat_history_text: str,
    global_llm_context: str,
    debug_run_info_text: str,
    user_message: str,
) -> str:
    """Render untrusted context into the user message with code fencing.

    Every argument is treated as untrusted and passed through
    ``escape_code_fences`` before the template interpolates it into a
    triple-backtick block. Without this, a value containing a literal
    ``` would close the fence early and let the model see the rest as
    system-level content (the classic code-fence breakout). The old
    copilot path in ``workflow_copilot.py`` and ``feasibility_gate.py``
    both apply the same guard.
    """
    return prompt_engine.load_prompt(
        template="workflow-copilot-user",
        workflow_yaml=escape_code_fences(workflow_yaml or ""),
        chat_history=escape_code_fences(chat_history_text),
        global_llm_context=escape_code_fences(global_llm_context or ""),
        debug_run_info=escape_code_fences(debug_run_info_text),
        user_message=escape_code_fences(user_message),
    )


def _build_tool_usage_guide(tool_names_and_descriptions: list[tuple[str, str]]) -> str:
    if not tool_names_and_descriptions:
        return ""
    return "\n".join(
        f"- **{name}** — {description or 'No description provided.'}"
        for name, description in tool_names_and_descriptions
    )


def _normalize_failure_reason(failure_reason: str | None) -> str:
    if not failure_reason:
        return "The workflow test run failed."

    normalized = failure_reason.split("Call log:", 1)[0].strip()
    normalized = " ".join(normalized.split())
    if len(normalized) > 240:
        normalized = normalized[:237].rstrip() + "..."
    return normalized or "The workflow test run failed."


_FAILURE_FOLLOW_UP = {
    "NAVIGATION_FAILURE": " Can you confirm the URL is correct?",
    "PROXY_ERROR": " Want me to retry with a different proxy location?",
    "PAGE_LOAD_TIMEOUT": " Can you confirm the URL and try again in a moment?",
    "ANTI_BOT_DETECTION": " Want me to retry with a different proxy location?",
    "AUTH_FAILURE": " The site rejected the login — is the stored password still valid?",
    "CREDENTIAL_ERROR": " I couldn't find a credential to use — can you link one in Settings?",
}


def _rewrite_failed_test_response(user_response: str, ctx: CopilotContext) -> str:
    # Reshape replies when we cannot ship a proposal this turn so the user
    # sees why nothing is being offered rather than an un-grounded claim.
    if ctx.last_test_ok is False and ctx.last_update_block_count is not None:
        if ctx.last_update_block_count <= 0:
            draft_phrase = "a draft workflow"
        else:
            block_word = "block" if ctx.last_update_block_count == 1 else "blocks"
            draft_phrase = f"a draft workflow with {ctx.last_update_block_count} {block_word}"

        failure_summary = _normalize_failure_reason(ctx.last_test_failure_reason)
        follow_up = _FAILURE_FOLLOW_UP.get(ctx.last_failure_category_top or "", "")
        return f"I created {draft_phrase} and tested it, but the test failed. Failure: {failure_summary}.{follow_up}"

    if ctx.last_test_ok is None and ctx.last_update_block_count is not None and ctx.last_workflow is not None:
        # Agent edited the YAML but didn't verify it this turn; don't promise
        # a re-run we can't durably execute (the restore helper rolls the
        # mid-turn DB write back and there's no durable draft to re-test).
        return (
            "I drafted an update but wasn't able to verify it this turn. "
            "Could you share more context about what you'd like me to do?"
        )

    return user_response


def _verified_workflow_or_none(ctx: CopilotContext) -> tuple[Any, str | None]:
    """Only surface a workflow proposal when it passed a test this turn.

    SKY-9143: the Accept/Reject UI must never reflect a workflow we haven't
    proven works. Every agent exit path that builds an AgentResult directly
    (cancel, max-turns, timeout, non-retriable nav error, catch-all Exception)
    funnels through this so the strict invariant holds regardless of which
    branch the run took.
    """
    if ctx.last_workflow is not None and ctx.last_test_ok is True:
        return ctx.last_workflow, ctx.last_workflow_yaml
    return None, None


def _build_exit_result(ctx: CopilotContext, user_response: str, global_llm_context: str | None) -> AgentResult:
    """AgentResult for agent-loop exits that don't go through ``_translate_to_agent_result``."""
    verified_workflow, verified_yaml = _verified_workflow_or_none(ctx)
    return AgentResult(
        user_response=user_response,
        updated_workflow=verified_workflow,
        global_llm_context=global_llm_context,
        workflow_yaml=verified_yaml,
        workflow_was_persisted=ctx.workflow_persisted,
        total_tokens=ctx.total_tokens_used,
    )


def _translate_to_agent_result(
    result: RunResultStreaming,
    ctx: CopilotContext,
    global_llm_context: str | None,
    chat_request: WorkflowCopilotChatRequest,
    organization_id: str,
) -> AgentResult:
    # Deferred tools.py imports here and below: tools.py -> routes.workflow_copilot -> this module (circular at import time).
    from skyvern.forge.sdk.copilot.tools import _process_workflow_yaml

    text = extract_final_text(result)
    if not text:
        text = '{"type": "REPLY", "user_response": "I\'m not sure how to help with that. Could you rephrase?"}'

    action_data = parse_final_response(text)
    user_response = action_data.get("user_response") or "Done."

    resp_type = action_data.get("type", "REPLY")
    if resp_type not in ("REPLY", "ASK_QUESTION", "REPLACE_WORKFLOW"):
        resp_type = "REPLY"

    last_workflow = ctx.last_workflow
    last_workflow_yaml = ctx.last_workflow_yaml

    if resp_type == "REPLACE_WORKFLOW":
        LOG.warning("Agent used inline REPLACE_WORKFLOW instead of update_workflow tool")
        workflow_yaml = action_data.get("workflow_yaml", "")
        if workflow_yaml:
            # REPLACE_WORKFLOW bypasses _update_workflow, so the post-emission
            # reject has to run here too. Skip processing on detection; leave
            # last_workflow / last_workflow_yaml at their pre-REPLACE values so
            # the rejected YAML does not latch onto ctx.
            from skyvern.forge.sdk.copilot.tools import (
                _banned_block_reject_message,
                _detect_new_banned_blocks,
                _record_banned_block_reject_span,
            )

            banned_items = _detect_new_banned_blocks(workflow_yaml, ctx.last_workflow_yaml)
            if banned_items:
                _record_banned_block_reject_span("replace_workflow_inline", banned_items)
                user_response = f"{user_response}\n\n(Note: {_banned_block_reject_message(banned_items)})"
                workflow_yaml = ""
        if workflow_yaml:
            if ctx.user_message:
                workflow_yaml = wrap_block_goals(workflow_yaml, ctx.user_message)
            else:
                LOG.warning("REPLACE_WORKFLOW inline path missing ctx.user_message; skipping block-goal wrap")
            try:
                last_workflow = _process_workflow_yaml(
                    workflow_id=chat_request.workflow_id,
                    workflow_permanent_id=chat_request.workflow_permanent_id,
                    organization_id=organization_id,
                    workflow_yaml=workflow_yaml,
                )
                last_workflow_yaml = workflow_yaml
            except (yaml.YAMLError, ValidationError, BaseWorkflowHTTPException) as e:
                LOG.warning("Failed to process final workflow YAML", error=str(e))
                user_response = (
                    f"{user_response}\n\n"
                    f"(Note: The proposed workflow had a validation error: {str(e)[:200]}. "
                    f"Please ask me to fix it.)"
                )

    # Inline REPLACE_WORKFLOW bypasses _update_workflow, so ctx.last_workflow
    # is whatever the tool layer last saw. Write the REPLACE candidate onto
    # ctx and invalidate any prior passing test: the REPLACE yaml itself was
    # never run, so a leftover ``last_test_ok is True`` from an earlier tested
    # (but different) yaml must not promote this untested one.
    if resp_type == "REPLACE_WORKFLOW" and last_workflow is not ctx.last_workflow:
        ctx.last_workflow = last_workflow
        ctx.last_workflow_yaml = last_workflow_yaml
        ctx.last_test_ok = None

    # ASK_QUESTION replies carry a specific clarifying question — often the
    # "stop and ask" unblocker the system prompt now requires when the agent
    # cannot test. The generic rewrite would replace it with a vague
    # "Could you share more context", so skip it for ASK_QUESTION.
    if resp_type != "ASK_QUESTION":
        user_response = _rewrite_failed_test_response(str(user_response), ctx)
    last_workflow, last_workflow_yaml = _verified_workflow_or_none(ctx)

    llm_context_raw = action_data.get("global_llm_context")
    structured = StructuredContext.from_json_str(global_llm_context)
    if isinstance(llm_context_raw, dict):
        try:
            structured = StructuredContext.model_validate(llm_context_raw)
        except Exception:
            pass
    elif isinstance(llm_context_raw, str):
        structured = StructuredContext.from_json_str(llm_context_raw)
    structured.merge_turn_summary(ctx.tool_activity)
    enriched_context = structured.to_json_str()

    return AgentResult(
        user_response=str(user_response),
        updated_workflow=last_workflow,
        global_llm_context=enriched_context or None,
        response_type=resp_type,
        workflow_yaml=last_workflow_yaml,
        workflow_was_persisted=ctx.workflow_persisted,
        total_tokens=ctx.total_tokens_used,
    )


def _build_feasibility_clarification_result(
    question: str,
    rationale: str | None,
    user_message: str,
    prior_global_llm_context: str | None,
    prior_workflow_yaml: str | None,
) -> AgentResult:
    """Construct an AgentResult for the feasibility-gate fast-path.

    Preserves structured cross-turn context, sets user_goal from the
    classifier's rationale (or the raw user message as a fallback), and
    appends a decisions_made entry so a follow-up turn can see that a
    clarification was already asked and return ``proceed`` instead of
    re-asking.
    """
    structured = StructuredContext.from_json_str(prior_global_llm_context)
    if not structured.user_goal:
        structured.user_goal = (rationale or user_message)[:300]
    structured.decisions_made.append(f"feasibility-gate clarification asked: {question}")
    enriched_context = structured.to_json_str()

    return AgentResult(
        user_response=question,
        updated_workflow=None,
        global_llm_context=enriched_context,
        response_type="ASK_QUESTION",
        workflow_yaml=prior_workflow_yaml or None,
        workflow_was_persisted=False,
        clear_proposed_workflow=True,
    )


async def run_copilot_agent(
    stream: EventSourceStream,
    organization_id: str,
    chat_request: WorkflowCopilotChatRequest,
    chat_history: list[WorkflowCopilotChatHistoryMessage],
    global_llm_context: str | None,
    debug_run_info_text: str,
    llm_api_handler: LLMAPIHandler | None,
    api_key: str | None = None,
    security_rules: str = "",
) -> AgentResult:
    # Preflight feasibility classifier. Never raises (errors fall through to
    # proceed). Off by default; enable via settings.ENABLE_COPILOT_FEASIBILITY_GATE.
    from skyvern.forge.sdk.copilot.feasibility_gate import run_feasibility_gate

    feasibility_verdict = await run_feasibility_gate(
        user_message=chat_request.message,
        workflow_yaml=chat_request.workflow_yaml or "",
        chat_history=_format_chat_history(chat_history),
        global_llm_context=global_llm_context or "",
        distinct_id=chat_request.workflow_permanent_id,
        organization_id=organization_id,
    )
    if feasibility_verdict.verdict == "ask_clarification" and feasibility_verdict.question:
        return _build_feasibility_clarification_result(
            question=feasibility_verdict.question,
            rationale=feasibility_verdict.rationale,
            user_message=chat_request.message,
            prior_global_llm_context=global_llm_context,
            prior_workflow_yaml=chat_request.workflow_yaml,
        )

    try:
        from agents import Agent, trace
        from agents.exceptions import MaxTurnsExceeded
        from agents.mcp import MCPServerManager
    except ModuleNotFoundError as e:
        if e.name == "agents":
            LOG.error(
                "OpenAI Agents SDK dependency missing",
                error=str(e),
                workflow_permanent_id=chat_request.workflow_permanent_id,
            )
            return AgentResult(
                user_response=(
                    "Copilot backend is missing the OpenAI Agents SDK dependency. "
                    "Rebuild or redeploy the backend image so `openai-agents` is installed."
                ),
                updated_workflow=None,
                global_llm_context=global_llm_context,
                workflow_yaml=chat_request.workflow_yaml or None,
            )
        raise

    from skyvern.cli.mcp_tools import mcp as skyvern_mcp
    from skyvern.forge.sdk.copilot.enforcement import (
        CopilotNonRetriableNavError,
        CopilotTotalTimeoutError,
        run_with_enforcement,
    )
    from skyvern.forge.sdk.copilot.hooks import CopilotRunHooks
    from skyvern.forge.sdk.copilot.mcp_adapter import SkyvernOverlayMCPServer
    from skyvern.forge.sdk.copilot.model_resolver import resolve_model_config
    from skyvern.forge.sdk.copilot.session_factory import create_copilot_session
    from skyvern.forge.sdk.copilot.tools import (
        NATIVE_TOOLS,
        _build_skyvern_mcp_overlays,
        get_skyvern_mcp_alias_map,
    )

    ctx = CopilotContext(
        organization_id=organization_id,
        workflow_id=chat_request.workflow_id,
        workflow_permanent_id=chat_request.workflow_permanent_id,
        workflow_yaml=chat_request.workflow_yaml or "",
        browser_session_id=None,
        stream=stream,
        api_key=api_key,
        user_message=chat_request.message,
    )

    model_name, run_config, llm_key, supports_vision = resolve_model_config(llm_api_handler)
    ctx.supports_vision = supports_vision
    ensure_tracing_initialized()

    alias_map = get_skyvern_mcp_alias_map()
    overlays = _build_skyvern_mcp_overlays()

    mcp_server = SkyvernOverlayMCPServer(
        transport=skyvern_mcp,
        overlays=overlays,
        alias_map=alias_map,
        allowlist=frozenset(alias_map.values()),
        context_provider=lambda: ctx,
    )

    tool_info: list[tuple[str, str]] = [(tool.name, tool.description or "") for tool in NATIVE_TOOLS]
    tool_info.extend((name, overlay.description or "") for name, overlay in overlays.items())

    chat_history_text = _format_chat_history(chat_history)
    tool_usage_guide = _build_tool_usage_guide(tool_info)
    system_prompt = _build_system_prompt(
        tool_usage_guide=tool_usage_guide,
        security_rules=security_rules,
    )

    agent = Agent(
        name="workflow-copilot",
        instructions=system_prompt,
        tools=list(NATIVE_TOOLS),
        mcp_servers=[mcp_server],
        model=model_name,
    )

    user_message = _build_user_context(
        workflow_yaml=chat_request.workflow_yaml or "",
        chat_history_text=chat_history_text,
        global_llm_context=global_llm_context or "",
        debug_run_info_text=debug_run_info_text,
        user_message=chat_request.message,
    )

    LOG.info(
        "Starting copilot agent loop",
        workflow_permanent_id=chat_request.workflow_permanent_id,
        user_message_len=len(user_message),
        llm_key=llm_key,
    )

    trace_context: Any = contextlib.nullcontext()
    if is_tracing_enabled():
        trace_context = trace(
            workflow_name="Copilot workflow",
            group_id=chat_request.workflow_copilot_chat_id,
            metadata={
                "workflow_permanent_id": chat_request.workflow_permanent_id,
                "organization_id": organization_id,
                "llm_key": llm_key,
                "user_message_len": str(len(user_message)),
            },
        )

    chat_id = chat_request.workflow_copilot_chat_id or chat_request.workflow_permanent_id
    session = create_copilot_session(chat_id)
    model_token = _copilot_model_name.set(model_name)
    try:
        with trace_context:
            try:
                async with MCPServerManager([mcp_server]) as manager:
                    agent.mcp_servers = list(manager.active_servers)
                    result = await run_with_enforcement(
                        agent=agent,
                        initial_input=user_message,
                        ctx=ctx,
                        stream=stream,
                        max_turns=MAX_TURNS,
                        hooks=CopilotRunHooks(ctx),
                        run_config=run_config,
                        session=session,
                    )
                return _translate_to_agent_result(
                    result,
                    ctx,
                    global_llm_context,
                    chat_request,
                    organization_id,
                )
            except asyncio.CancelledError:
                LOG.info("Copilot run cancelled")
                return _build_exit_result(ctx, "Request cancelled.", global_llm_context)
            except MaxTurnsExceeded:
                return _build_exit_result(
                    ctx,
                    "I've reached the maximum number of steps. Here's what I have so far.",
                    global_llm_context,
                )
            except CopilotTotalTimeoutError:
                return _build_exit_result(
                    ctx,
                    "I ran out of time processing your request. Here's what I have so far.",
                    global_llm_context,
                )
            except CopilotNonRetriableNavError as exc:
                LOG.warning(
                    "Copilot run halted on non-retriable navigation error",
                    url=exc.url,
                    error_message=exc.error_message,
                    organization_id=organization_id,
                )
                # Non-retriable nav errors prove the current workflow doesn't
                # work; zero the proposal even if other tools succeeded.
                return AgentResult(
                    user_response=(
                        f"The target URL could not be reached. Error: {exc.error_message}. "
                        "Please verify the URL and try again."
                    ),
                    updated_workflow=None,
                    global_llm_context=global_llm_context,
                    workflow_yaml=None,
                    workflow_was_persisted=ctx.workflow_persisted,
                    total_tokens=ctx.total_tokens_used,
                )
    except Exception as e:
        LOG.error("Copilot agent error", error=str(e), exc_info=True)
        return _build_exit_result(ctx, "An unexpected error occurred. Please try again.", global_llm_context)
    finally:
        _copilot_model_name.reset(model_token)
        session.close()