Skyvern/skyvern/forge/sdk/copilot/agent.py

520 lines
21 KiB
Python

"""Copilot agent — multi-turn tool-use agent for workflow building.
Uses the OpenAI Agents SDK with LiteLLM for multi-provider LLM support.
"""
from __future__ import annotations
import asyncio
import contextlib
from datetime import datetime, timezone
from pathlib import Path
from typing import TYPE_CHECKING, Any
if TYPE_CHECKING:
from agents.result import RunResultStreaming
from skyvern.forge.sdk.experimentation.llm_prompt_config import LLMAPIHandler
from skyvern.forge.sdk.schemas.workflow_copilot import WorkflowCopilotChatRequest
import structlog
import yaml
from pydantic import ValidationError
from skyvern.forge.prompts import prompt_engine
from skyvern.forge.sdk.copilot.block_goal_wrapping import wrap_block_goals
from skyvern.forge.sdk.copilot.context import AgentResult, CopilotContext, StructuredContext
from skyvern.forge.sdk.copilot.output_utils import extract_final_text, parse_final_response
from skyvern.forge.sdk.copilot.tracing_setup import _copilot_model_name, ensure_tracing_initialized, is_tracing_enabled
from skyvern.forge.sdk.routes.event_source_stream import EventSourceStream
from skyvern.forge.sdk.schemas.workflow_copilot import (
WorkflowCopilotChatHistoryMessage,
)
from skyvern.forge.sdk.workflow.exceptions import BaseWorkflowHTTPException
from skyvern.utils.strings import escape_code_fences
LOG = structlog.get_logger()
WORKFLOW_KNOWLEDGE_BASE_PATH = (
Path(__file__).resolve().parents[2] / "prompts" / "skyvern" / "workflow_knowledge_base.txt"
)
MAX_TURNS = 25
def _format_chat_history(chat_history: list[WorkflowCopilotChatHistoryMessage]) -> str:
if not chat_history:
return ""
lines = [f"{msg.sender}: {msg.content}" for msg in chat_history]
return "\n".join(lines)
def _build_system_prompt(
tool_usage_guide: str,
security_rules: str = "",
) -> str:
workflow_knowledge_base = WORKFLOW_KNOWLEDGE_BASE_PATH.read_text(encoding="utf-8")
return prompt_engine.load_prompt(
template="workflow-copilot-agent",
workflow_knowledge_base=workflow_knowledge_base,
current_datetime=datetime.now(timezone.utc).isoformat(),
tool_usage_guide=tool_usage_guide,
security_rules=security_rules,
)
def _build_user_context(
workflow_yaml: str,
chat_history_text: str,
global_llm_context: str,
debug_run_info_text: str,
user_message: str,
) -> str:
"""Render untrusted context into the user message with code fencing.
Every argument is treated as untrusted and passed through
``escape_code_fences`` before the template interpolates it into a
triple-backtick block. Without this, a value containing a literal
``` would close the fence early and let the model see the rest as
system-level content (the classic code-fence breakout). The old
copilot path in ``workflow_copilot.py`` and ``feasibility_gate.py``
both apply the same guard.
"""
return prompt_engine.load_prompt(
template="workflow-copilot-user",
workflow_yaml=escape_code_fences(workflow_yaml or ""),
chat_history=escape_code_fences(chat_history_text),
global_llm_context=escape_code_fences(global_llm_context or ""),
debug_run_info=escape_code_fences(debug_run_info_text),
user_message=escape_code_fences(user_message),
)
def _build_tool_usage_guide(tool_names_and_descriptions: list[tuple[str, str]]) -> str:
if not tool_names_and_descriptions:
return ""
return "\n".join(
f"- **{name}** — {description or 'No description provided.'}"
for name, description in tool_names_and_descriptions
)
def _normalize_failure_reason(failure_reason: str | None) -> str:
if not failure_reason:
return "The workflow test run failed."
normalized = failure_reason.split("Call log:", 1)[0].strip()
normalized = " ".join(normalized.split())
if len(normalized) > 240:
normalized = normalized[:237].rstrip() + "..."
return normalized or "The workflow test run failed."
_FAILURE_FOLLOW_UP = {
"NAVIGATION_FAILURE": " Can you confirm the URL is correct?",
"PROXY_ERROR": " Want me to retry with a different proxy location?",
"PAGE_LOAD_TIMEOUT": " Can you confirm the URL and try again in a moment?",
"ANTI_BOT_DETECTION": " Want me to retry with a different proxy location?",
"AUTH_FAILURE": " The site rejected the login — is the stored password still valid?",
"CREDENTIAL_ERROR": " I couldn't find a credential to use — can you link one in Settings?",
}
def _rewrite_failed_test_response(user_response: str, ctx: CopilotContext) -> str:
# Reshape replies when we cannot ship a proposal this turn so the user
# sees why nothing is being offered rather than an un-grounded claim.
if ctx.last_test_ok is False and ctx.last_update_block_count is not None:
if ctx.last_update_block_count <= 0:
draft_phrase = "a draft workflow"
else:
block_word = "block" if ctx.last_update_block_count == 1 else "blocks"
draft_phrase = f"a draft workflow with {ctx.last_update_block_count} {block_word}"
failure_summary = _normalize_failure_reason(ctx.last_test_failure_reason)
follow_up = _FAILURE_FOLLOW_UP.get(ctx.last_failure_category_top or "", "")
return f"I created {draft_phrase} and tested it, but the test failed. Failure: {failure_summary}.{follow_up}"
if ctx.last_test_ok is None and ctx.last_update_block_count is not None and ctx.last_workflow is not None:
# Agent edited the YAML but didn't verify it this turn; don't promise
# a re-run we can't durably execute (the restore helper rolls the
# mid-turn DB write back and there's no durable draft to re-test).
return (
"I drafted an update but wasn't able to verify it this turn. "
"Could you share more context about what you'd like me to do?"
)
return user_response
def _verified_workflow_or_none(ctx: CopilotContext) -> tuple[Any, str | None]:
"""Only surface a workflow proposal when it passed a test this turn.
SKY-9143: the Accept/Reject UI must never reflect a workflow we haven't
proven works. Every agent exit path that builds an AgentResult directly
(cancel, max-turns, timeout, non-retriable nav error, catch-all Exception)
funnels through this so the strict invariant holds regardless of which
branch the run took.
"""
if ctx.last_workflow is not None and ctx.last_test_ok is True:
return ctx.last_workflow, ctx.last_workflow_yaml
return None, None
def _build_exit_result(ctx: CopilotContext, user_response: str, global_llm_context: str | None) -> AgentResult:
"""AgentResult for agent-loop exits that don't go through ``_translate_to_agent_result``."""
verified_workflow, verified_yaml = _verified_workflow_or_none(ctx)
return AgentResult(
user_response=user_response,
updated_workflow=verified_workflow,
global_llm_context=global_llm_context,
workflow_yaml=verified_yaml,
workflow_was_persisted=ctx.workflow_persisted,
total_tokens=ctx.total_tokens_used,
)
def _translate_to_agent_result(
result: RunResultStreaming,
ctx: CopilotContext,
global_llm_context: str | None,
chat_request: WorkflowCopilotChatRequest,
organization_id: str,
) -> AgentResult:
# Deferred tools.py imports here and below: tools.py -> routes.workflow_copilot -> this module (circular at import time).
from skyvern.forge.sdk.copilot.tools import _process_workflow_yaml
text = extract_final_text(result)
if not text:
text = '{"type": "REPLY", "user_response": "I\'m not sure how to help with that. Could you rephrase?"}'
action_data = parse_final_response(text)
user_response = action_data.get("user_response") or "Done."
resp_type = action_data.get("type", "REPLY")
if resp_type not in ("REPLY", "ASK_QUESTION", "REPLACE_WORKFLOW"):
resp_type = "REPLY"
last_workflow = ctx.last_workflow
last_workflow_yaml = ctx.last_workflow_yaml
if resp_type == "REPLACE_WORKFLOW":
LOG.warning("Agent used inline REPLACE_WORKFLOW instead of update_workflow tool")
workflow_yaml = action_data.get("workflow_yaml", "")
if workflow_yaml:
# REPLACE_WORKFLOW bypasses _update_workflow, so the post-emission
# reject has to run here too. Skip processing on detection; leave
# last_workflow / last_workflow_yaml at their pre-REPLACE values so
# the rejected YAML does not latch onto ctx.
from skyvern.forge.sdk.copilot.tools import (
_banned_block_reject_message,
_detect_new_banned_blocks,
_record_banned_block_reject_span,
)
banned_items = _detect_new_banned_blocks(workflow_yaml, ctx.last_workflow_yaml)
if banned_items:
_record_banned_block_reject_span("replace_workflow_inline", banned_items)
user_response = f"{user_response}\n\n(Note: {_banned_block_reject_message(banned_items)})"
workflow_yaml = ""
if workflow_yaml:
if ctx.user_message:
workflow_yaml = wrap_block_goals(workflow_yaml, ctx.user_message)
else:
LOG.warning("REPLACE_WORKFLOW inline path missing ctx.user_message; skipping block-goal wrap")
try:
last_workflow = _process_workflow_yaml(
workflow_id=chat_request.workflow_id,
workflow_permanent_id=chat_request.workflow_permanent_id,
organization_id=organization_id,
workflow_yaml=workflow_yaml,
)
last_workflow_yaml = workflow_yaml
except (yaml.YAMLError, ValidationError, BaseWorkflowHTTPException) as e:
LOG.warning("Failed to process final workflow YAML", error=str(e))
user_response = (
f"{user_response}\n\n"
f"(Note: The proposed workflow had a validation error: {str(e)[:200]}. "
f"Please ask me to fix it.)"
)
# Inline REPLACE_WORKFLOW bypasses _update_workflow, so ctx.last_workflow
# is whatever the tool layer last saw. Write the REPLACE candidate onto
# ctx and invalidate any prior passing test: the REPLACE yaml itself was
# never run, so a leftover ``last_test_ok is True`` from an earlier tested
# (but different) yaml must not promote this untested one.
if resp_type == "REPLACE_WORKFLOW" and last_workflow is not ctx.last_workflow:
ctx.last_workflow = last_workflow
ctx.last_workflow_yaml = last_workflow_yaml
ctx.last_test_ok = None
# ASK_QUESTION replies carry a specific clarifying question — often the
# "stop and ask" unblocker the system prompt now requires when the agent
# cannot test. The generic rewrite would replace it with a vague
# "Could you share more context", so skip it for ASK_QUESTION.
if resp_type != "ASK_QUESTION":
user_response = _rewrite_failed_test_response(str(user_response), ctx)
last_workflow, last_workflow_yaml = _verified_workflow_or_none(ctx)
llm_context_raw = action_data.get("global_llm_context")
structured = StructuredContext.from_json_str(global_llm_context)
if isinstance(llm_context_raw, dict):
try:
structured = StructuredContext.model_validate(llm_context_raw)
except Exception:
pass
elif isinstance(llm_context_raw, str):
structured = StructuredContext.from_json_str(llm_context_raw)
structured.merge_turn_summary(ctx.tool_activity)
enriched_context = structured.to_json_str()
return AgentResult(
user_response=str(user_response),
updated_workflow=last_workflow,
global_llm_context=enriched_context or None,
response_type=resp_type,
workflow_yaml=last_workflow_yaml,
workflow_was_persisted=ctx.workflow_persisted,
total_tokens=ctx.total_tokens_used,
)
def _build_feasibility_clarification_result(
question: str,
rationale: str | None,
user_message: str,
prior_global_llm_context: str | None,
prior_workflow_yaml: str | None,
) -> AgentResult:
"""Construct an AgentResult for the feasibility-gate fast-path.
Preserves structured cross-turn context, sets user_goal from the
classifier's rationale (or the raw user message as a fallback), and
appends a decisions_made entry so a follow-up turn can see that a
clarification was already asked and return ``proceed`` instead of
re-asking.
"""
structured = StructuredContext.from_json_str(prior_global_llm_context)
if not structured.user_goal:
structured.user_goal = (rationale or user_message)[:300]
structured.decisions_made.append(f"feasibility-gate clarification asked: {question}")
enriched_context = structured.to_json_str()
return AgentResult(
user_response=question,
updated_workflow=None,
global_llm_context=enriched_context,
response_type="ASK_QUESTION",
workflow_yaml=prior_workflow_yaml or None,
workflow_was_persisted=False,
clear_proposed_workflow=True,
)
async def run_copilot_agent(
stream: EventSourceStream,
organization_id: str,
chat_request: WorkflowCopilotChatRequest,
chat_history: list[WorkflowCopilotChatHistoryMessage],
global_llm_context: str | None,
debug_run_info_text: str,
llm_api_handler: LLMAPIHandler | None,
api_key: str | None = None,
security_rules: str = "",
) -> AgentResult:
# Preflight feasibility classifier. Never raises (errors fall through to
# proceed). Off by default; enable via settings.ENABLE_COPILOT_FEASIBILITY_GATE.
from skyvern.forge.sdk.copilot.feasibility_gate import run_feasibility_gate
feasibility_verdict = await run_feasibility_gate(
user_message=chat_request.message,
workflow_yaml=chat_request.workflow_yaml or "",
chat_history=_format_chat_history(chat_history),
global_llm_context=global_llm_context or "",
distinct_id=chat_request.workflow_permanent_id,
organization_id=organization_id,
)
if feasibility_verdict.verdict == "ask_clarification" and feasibility_verdict.question:
return _build_feasibility_clarification_result(
question=feasibility_verdict.question,
rationale=feasibility_verdict.rationale,
user_message=chat_request.message,
prior_global_llm_context=global_llm_context,
prior_workflow_yaml=chat_request.workflow_yaml,
)
try:
from agents import Agent, trace
from agents.exceptions import MaxTurnsExceeded
from agents.mcp import MCPServerManager
except ModuleNotFoundError as e:
if e.name == "agents":
LOG.error(
"OpenAI Agents SDK dependency missing",
error=str(e),
workflow_permanent_id=chat_request.workflow_permanent_id,
)
return AgentResult(
user_response=(
"Copilot backend is missing the OpenAI Agents SDK dependency. "
"Rebuild or redeploy the backend image so `openai-agents` is installed."
),
updated_workflow=None,
global_llm_context=global_llm_context,
workflow_yaml=chat_request.workflow_yaml or None,
)
raise
from skyvern.cli.mcp_tools import mcp as skyvern_mcp
from skyvern.forge.sdk.copilot.enforcement import (
CopilotNonRetriableNavError,
CopilotTotalTimeoutError,
run_with_enforcement,
)
from skyvern.forge.sdk.copilot.hooks import CopilotRunHooks
from skyvern.forge.sdk.copilot.mcp_adapter import SkyvernOverlayMCPServer
from skyvern.forge.sdk.copilot.model_resolver import resolve_model_config
from skyvern.forge.sdk.copilot.session_factory import create_copilot_session
from skyvern.forge.sdk.copilot.tools import (
NATIVE_TOOLS,
_build_skyvern_mcp_overlays,
get_skyvern_mcp_alias_map,
)
ctx = CopilotContext(
organization_id=organization_id,
workflow_id=chat_request.workflow_id,
workflow_permanent_id=chat_request.workflow_permanent_id,
workflow_yaml=chat_request.workflow_yaml or "",
browser_session_id=None,
stream=stream,
api_key=api_key,
user_message=chat_request.message,
)
model_name, run_config, llm_key, supports_vision = resolve_model_config(llm_api_handler)
ctx.supports_vision = supports_vision
ensure_tracing_initialized()
alias_map = get_skyvern_mcp_alias_map()
overlays = _build_skyvern_mcp_overlays()
mcp_server = SkyvernOverlayMCPServer(
transport=skyvern_mcp,
overlays=overlays,
alias_map=alias_map,
allowlist=frozenset(alias_map.values()),
context_provider=lambda: ctx,
)
tool_info: list[tuple[str, str]] = [(tool.name, tool.description or "") for tool in NATIVE_TOOLS]
tool_info.extend((name, overlay.description or "") for name, overlay in overlays.items())
chat_history_text = _format_chat_history(chat_history)
tool_usage_guide = _build_tool_usage_guide(tool_info)
system_prompt = _build_system_prompt(
tool_usage_guide=tool_usage_guide,
security_rules=security_rules,
)
agent = Agent(
name="workflow-copilot",
instructions=system_prompt,
tools=list(NATIVE_TOOLS),
mcp_servers=[mcp_server],
model=model_name,
)
user_message = _build_user_context(
workflow_yaml=chat_request.workflow_yaml or "",
chat_history_text=chat_history_text,
global_llm_context=global_llm_context or "",
debug_run_info_text=debug_run_info_text,
user_message=chat_request.message,
)
LOG.info(
"Starting copilot agent loop",
workflow_permanent_id=chat_request.workflow_permanent_id,
user_message_len=len(user_message),
llm_key=llm_key,
)
trace_context: Any = contextlib.nullcontext()
if is_tracing_enabled():
trace_context = trace(
workflow_name="Copilot workflow",
group_id=chat_request.workflow_copilot_chat_id,
metadata={
"workflow_permanent_id": chat_request.workflow_permanent_id,
"organization_id": organization_id,
"llm_key": llm_key,
"user_message_len": str(len(user_message)),
},
)
chat_id = chat_request.workflow_copilot_chat_id or chat_request.workflow_permanent_id
session = create_copilot_session(chat_id)
model_token = _copilot_model_name.set(model_name)
try:
with trace_context:
try:
async with MCPServerManager([mcp_server]) as manager:
agent.mcp_servers = list(manager.active_servers)
result = await run_with_enforcement(
agent=agent,
initial_input=user_message,
ctx=ctx,
stream=stream,
max_turns=MAX_TURNS,
hooks=CopilotRunHooks(ctx),
run_config=run_config,
session=session,
)
return _translate_to_agent_result(
result,
ctx,
global_llm_context,
chat_request,
organization_id,
)
except asyncio.CancelledError:
LOG.info("Copilot run cancelled")
return _build_exit_result(ctx, "Request cancelled.", global_llm_context)
except MaxTurnsExceeded:
return _build_exit_result(
ctx,
"I've reached the maximum number of steps. Here's what I have so far.",
global_llm_context,
)
except CopilotTotalTimeoutError:
return _build_exit_result(
ctx,
"I ran out of time processing your request. Here's what I have so far.",
global_llm_context,
)
except CopilotNonRetriableNavError as exc:
LOG.warning(
"Copilot run halted on non-retriable navigation error",
url=exc.url,
error_message=exc.error_message,
organization_id=organization_id,
)
# Non-retriable nav errors prove the current workflow doesn't
# work; zero the proposal even if other tools succeeded.
return AgentResult(
user_response=(
f"The target URL could not be reached. Error: {exc.error_message}. "
"Please verify the URL and try again."
),
updated_workflow=None,
global_llm_context=global_llm_context,
workflow_yaml=None,
workflow_was_persisted=ctx.workflow_persisted,
total_tokens=ctx.total_tokens_used,
)
except Exception as e:
LOG.error("Copilot agent error", error=str(e), exc_info=True)
return _build_exit_result(ctx, "An unexpected error occurred. Please try again.", global_llm_context)
finally:
_copilot_model_name.reset(model_token)
session.close()