mirror of
https://github.com/Skyvern-AI/skyvern.git
synced 2026-04-26 10:41:14 +00:00
520 lines
21 KiB
Python
520 lines
21 KiB
Python
"""Copilot agent — multi-turn tool-use agent for workflow building.
|
|
|
|
Uses the OpenAI Agents SDK with LiteLLM for multi-provider LLM support.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import contextlib
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
from typing import TYPE_CHECKING, Any
|
|
|
|
if TYPE_CHECKING:
|
|
from agents.result import RunResultStreaming
|
|
|
|
from skyvern.forge.sdk.experimentation.llm_prompt_config import LLMAPIHandler
|
|
from skyvern.forge.sdk.schemas.workflow_copilot import WorkflowCopilotChatRequest
|
|
|
|
import structlog
|
|
import yaml
|
|
from pydantic import ValidationError
|
|
|
|
from skyvern.forge.prompts import prompt_engine
|
|
from skyvern.forge.sdk.copilot.block_goal_wrapping import wrap_block_goals
|
|
from skyvern.forge.sdk.copilot.context import AgentResult, CopilotContext, StructuredContext
|
|
from skyvern.forge.sdk.copilot.output_utils import extract_final_text, parse_final_response
|
|
from skyvern.forge.sdk.copilot.tracing_setup import _copilot_model_name, ensure_tracing_initialized, is_tracing_enabled
|
|
from skyvern.forge.sdk.routes.event_source_stream import EventSourceStream
|
|
from skyvern.forge.sdk.schemas.workflow_copilot import (
|
|
WorkflowCopilotChatHistoryMessage,
|
|
)
|
|
from skyvern.forge.sdk.workflow.exceptions import BaseWorkflowHTTPException
|
|
from skyvern.utils.strings import escape_code_fences
|
|
|
|
LOG = structlog.get_logger()
|
|
|
|
WORKFLOW_KNOWLEDGE_BASE_PATH = (
|
|
Path(__file__).resolve().parents[2] / "prompts" / "skyvern" / "workflow_knowledge_base.txt"
|
|
)
|
|
|
|
MAX_TURNS = 25
|
|
|
|
|
|
def _format_chat_history(chat_history: list[WorkflowCopilotChatHistoryMessage]) -> str:
|
|
if not chat_history:
|
|
return ""
|
|
lines = [f"{msg.sender}: {msg.content}" for msg in chat_history]
|
|
return "\n".join(lines)
|
|
|
|
|
|
def _build_system_prompt(
|
|
tool_usage_guide: str,
|
|
security_rules: str = "",
|
|
) -> str:
|
|
workflow_knowledge_base = WORKFLOW_KNOWLEDGE_BASE_PATH.read_text(encoding="utf-8")
|
|
return prompt_engine.load_prompt(
|
|
template="workflow-copilot-agent",
|
|
workflow_knowledge_base=workflow_knowledge_base,
|
|
current_datetime=datetime.now(timezone.utc).isoformat(),
|
|
tool_usage_guide=tool_usage_guide,
|
|
security_rules=security_rules,
|
|
)
|
|
|
|
|
|
def _build_user_context(
|
|
workflow_yaml: str,
|
|
chat_history_text: str,
|
|
global_llm_context: str,
|
|
debug_run_info_text: str,
|
|
user_message: str,
|
|
) -> str:
|
|
"""Render untrusted context into the user message with code fencing.
|
|
|
|
Every argument is treated as untrusted and passed through
|
|
``escape_code_fences`` before the template interpolates it into a
|
|
triple-backtick block. Without this, a value containing a literal
|
|
``` would close the fence early and let the model see the rest as
|
|
system-level content (the classic code-fence breakout). The old
|
|
copilot path in ``workflow_copilot.py`` and ``feasibility_gate.py``
|
|
both apply the same guard.
|
|
"""
|
|
return prompt_engine.load_prompt(
|
|
template="workflow-copilot-user",
|
|
workflow_yaml=escape_code_fences(workflow_yaml or ""),
|
|
chat_history=escape_code_fences(chat_history_text),
|
|
global_llm_context=escape_code_fences(global_llm_context or ""),
|
|
debug_run_info=escape_code_fences(debug_run_info_text),
|
|
user_message=escape_code_fences(user_message),
|
|
)
|
|
|
|
|
|
def _build_tool_usage_guide(tool_names_and_descriptions: list[tuple[str, str]]) -> str:
|
|
if not tool_names_and_descriptions:
|
|
return ""
|
|
return "\n".join(
|
|
f"- **{name}** — {description or 'No description provided.'}"
|
|
for name, description in tool_names_and_descriptions
|
|
)
|
|
|
|
|
|
def _normalize_failure_reason(failure_reason: str | None) -> str:
|
|
if not failure_reason:
|
|
return "The workflow test run failed."
|
|
|
|
normalized = failure_reason.split("Call log:", 1)[0].strip()
|
|
normalized = " ".join(normalized.split())
|
|
if len(normalized) > 240:
|
|
normalized = normalized[:237].rstrip() + "..."
|
|
return normalized or "The workflow test run failed."
|
|
|
|
|
|
_FAILURE_FOLLOW_UP = {
|
|
"NAVIGATION_FAILURE": " Can you confirm the URL is correct?",
|
|
"PROXY_ERROR": " Want me to retry with a different proxy location?",
|
|
"PAGE_LOAD_TIMEOUT": " Can you confirm the URL and try again in a moment?",
|
|
"ANTI_BOT_DETECTION": " Want me to retry with a different proxy location?",
|
|
"AUTH_FAILURE": " The site rejected the login — is the stored password still valid?",
|
|
"CREDENTIAL_ERROR": " I couldn't find a credential to use — can you link one in Settings?",
|
|
}
|
|
|
|
|
|
def _rewrite_failed_test_response(user_response: str, ctx: CopilotContext) -> str:
|
|
# Reshape replies when we cannot ship a proposal this turn so the user
|
|
# sees why nothing is being offered rather than an un-grounded claim.
|
|
if ctx.last_test_ok is False and ctx.last_update_block_count is not None:
|
|
if ctx.last_update_block_count <= 0:
|
|
draft_phrase = "a draft workflow"
|
|
else:
|
|
block_word = "block" if ctx.last_update_block_count == 1 else "blocks"
|
|
draft_phrase = f"a draft workflow with {ctx.last_update_block_count} {block_word}"
|
|
|
|
failure_summary = _normalize_failure_reason(ctx.last_test_failure_reason)
|
|
follow_up = _FAILURE_FOLLOW_UP.get(ctx.last_failure_category_top or "", "")
|
|
return f"I created {draft_phrase} and tested it, but the test failed. Failure: {failure_summary}.{follow_up}"
|
|
|
|
if ctx.last_test_ok is None and ctx.last_update_block_count is not None and ctx.last_workflow is not None:
|
|
# Agent edited the YAML but didn't verify it this turn; don't promise
|
|
# a re-run we can't durably execute (the restore helper rolls the
|
|
# mid-turn DB write back and there's no durable draft to re-test).
|
|
return (
|
|
"I drafted an update but wasn't able to verify it this turn. "
|
|
"Could you share more context about what you'd like me to do?"
|
|
)
|
|
|
|
return user_response
|
|
|
|
|
|
def _verified_workflow_or_none(ctx: CopilotContext) -> tuple[Any, str | None]:
|
|
"""Only surface a workflow proposal when it passed a test this turn.
|
|
|
|
SKY-9143: the Accept/Reject UI must never reflect a workflow we haven't
|
|
proven works. Every agent exit path that builds an AgentResult directly
|
|
(cancel, max-turns, timeout, non-retriable nav error, catch-all Exception)
|
|
funnels through this so the strict invariant holds regardless of which
|
|
branch the run took.
|
|
"""
|
|
if ctx.last_workflow is not None and ctx.last_test_ok is True:
|
|
return ctx.last_workflow, ctx.last_workflow_yaml
|
|
return None, None
|
|
|
|
|
|
def _build_exit_result(ctx: CopilotContext, user_response: str, global_llm_context: str | None) -> AgentResult:
|
|
"""AgentResult for agent-loop exits that don't go through ``_translate_to_agent_result``."""
|
|
verified_workflow, verified_yaml = _verified_workflow_or_none(ctx)
|
|
return AgentResult(
|
|
user_response=user_response,
|
|
updated_workflow=verified_workflow,
|
|
global_llm_context=global_llm_context,
|
|
workflow_yaml=verified_yaml,
|
|
workflow_was_persisted=ctx.workflow_persisted,
|
|
total_tokens=ctx.total_tokens_used,
|
|
)
|
|
|
|
|
|
def _translate_to_agent_result(
|
|
result: RunResultStreaming,
|
|
ctx: CopilotContext,
|
|
global_llm_context: str | None,
|
|
chat_request: WorkflowCopilotChatRequest,
|
|
organization_id: str,
|
|
) -> AgentResult:
|
|
# Deferred tools.py imports here and below: tools.py -> routes.workflow_copilot -> this module (circular at import time).
|
|
from skyvern.forge.sdk.copilot.tools import _process_workflow_yaml
|
|
|
|
text = extract_final_text(result)
|
|
if not text:
|
|
text = '{"type": "REPLY", "user_response": "I\'m not sure how to help with that. Could you rephrase?"}'
|
|
|
|
action_data = parse_final_response(text)
|
|
user_response = action_data.get("user_response") or "Done."
|
|
|
|
resp_type = action_data.get("type", "REPLY")
|
|
if resp_type not in ("REPLY", "ASK_QUESTION", "REPLACE_WORKFLOW"):
|
|
resp_type = "REPLY"
|
|
|
|
last_workflow = ctx.last_workflow
|
|
last_workflow_yaml = ctx.last_workflow_yaml
|
|
|
|
if resp_type == "REPLACE_WORKFLOW":
|
|
LOG.warning("Agent used inline REPLACE_WORKFLOW instead of update_workflow tool")
|
|
workflow_yaml = action_data.get("workflow_yaml", "")
|
|
if workflow_yaml:
|
|
# REPLACE_WORKFLOW bypasses _update_workflow, so the post-emission
|
|
# reject has to run here too. Skip processing on detection; leave
|
|
# last_workflow / last_workflow_yaml at their pre-REPLACE values so
|
|
# the rejected YAML does not latch onto ctx.
|
|
from skyvern.forge.sdk.copilot.tools import (
|
|
_banned_block_reject_message,
|
|
_detect_new_banned_blocks,
|
|
_record_banned_block_reject_span,
|
|
)
|
|
|
|
banned_items = _detect_new_banned_blocks(workflow_yaml, ctx.last_workflow_yaml)
|
|
if banned_items:
|
|
_record_banned_block_reject_span("replace_workflow_inline", banned_items)
|
|
user_response = f"{user_response}\n\n(Note: {_banned_block_reject_message(banned_items)})"
|
|
workflow_yaml = ""
|
|
if workflow_yaml:
|
|
if ctx.user_message:
|
|
workflow_yaml = wrap_block_goals(workflow_yaml, ctx.user_message)
|
|
else:
|
|
LOG.warning("REPLACE_WORKFLOW inline path missing ctx.user_message; skipping block-goal wrap")
|
|
try:
|
|
last_workflow = _process_workflow_yaml(
|
|
workflow_id=chat_request.workflow_id,
|
|
workflow_permanent_id=chat_request.workflow_permanent_id,
|
|
organization_id=organization_id,
|
|
workflow_yaml=workflow_yaml,
|
|
)
|
|
last_workflow_yaml = workflow_yaml
|
|
except (yaml.YAMLError, ValidationError, BaseWorkflowHTTPException) as e:
|
|
LOG.warning("Failed to process final workflow YAML", error=str(e))
|
|
user_response = (
|
|
f"{user_response}\n\n"
|
|
f"(Note: The proposed workflow had a validation error: {str(e)[:200]}. "
|
|
f"Please ask me to fix it.)"
|
|
)
|
|
|
|
# Inline REPLACE_WORKFLOW bypasses _update_workflow, so ctx.last_workflow
|
|
# is whatever the tool layer last saw. Write the REPLACE candidate onto
|
|
# ctx and invalidate any prior passing test: the REPLACE yaml itself was
|
|
# never run, so a leftover ``last_test_ok is True`` from an earlier tested
|
|
# (but different) yaml must not promote this untested one.
|
|
if resp_type == "REPLACE_WORKFLOW" and last_workflow is not ctx.last_workflow:
|
|
ctx.last_workflow = last_workflow
|
|
ctx.last_workflow_yaml = last_workflow_yaml
|
|
ctx.last_test_ok = None
|
|
|
|
# ASK_QUESTION replies carry a specific clarifying question — often the
|
|
# "stop and ask" unblocker the system prompt now requires when the agent
|
|
# cannot test. The generic rewrite would replace it with a vague
|
|
# "Could you share more context", so skip it for ASK_QUESTION.
|
|
if resp_type != "ASK_QUESTION":
|
|
user_response = _rewrite_failed_test_response(str(user_response), ctx)
|
|
last_workflow, last_workflow_yaml = _verified_workflow_or_none(ctx)
|
|
|
|
llm_context_raw = action_data.get("global_llm_context")
|
|
structured = StructuredContext.from_json_str(global_llm_context)
|
|
if isinstance(llm_context_raw, dict):
|
|
try:
|
|
structured = StructuredContext.model_validate(llm_context_raw)
|
|
except Exception:
|
|
pass
|
|
elif isinstance(llm_context_raw, str):
|
|
structured = StructuredContext.from_json_str(llm_context_raw)
|
|
structured.merge_turn_summary(ctx.tool_activity)
|
|
enriched_context = structured.to_json_str()
|
|
|
|
return AgentResult(
|
|
user_response=str(user_response),
|
|
updated_workflow=last_workflow,
|
|
global_llm_context=enriched_context or None,
|
|
response_type=resp_type,
|
|
workflow_yaml=last_workflow_yaml,
|
|
workflow_was_persisted=ctx.workflow_persisted,
|
|
total_tokens=ctx.total_tokens_used,
|
|
)
|
|
|
|
|
|
def _build_feasibility_clarification_result(
|
|
question: str,
|
|
rationale: str | None,
|
|
user_message: str,
|
|
prior_global_llm_context: str | None,
|
|
prior_workflow_yaml: str | None,
|
|
) -> AgentResult:
|
|
"""Construct an AgentResult for the feasibility-gate fast-path.
|
|
|
|
Preserves structured cross-turn context, sets user_goal from the
|
|
classifier's rationale (or the raw user message as a fallback), and
|
|
appends a decisions_made entry so a follow-up turn can see that a
|
|
clarification was already asked and return ``proceed`` instead of
|
|
re-asking.
|
|
"""
|
|
structured = StructuredContext.from_json_str(prior_global_llm_context)
|
|
if not structured.user_goal:
|
|
structured.user_goal = (rationale or user_message)[:300]
|
|
structured.decisions_made.append(f"feasibility-gate clarification asked: {question}")
|
|
enriched_context = structured.to_json_str()
|
|
|
|
return AgentResult(
|
|
user_response=question,
|
|
updated_workflow=None,
|
|
global_llm_context=enriched_context,
|
|
response_type="ASK_QUESTION",
|
|
workflow_yaml=prior_workflow_yaml or None,
|
|
workflow_was_persisted=False,
|
|
clear_proposed_workflow=True,
|
|
)
|
|
|
|
|
|
async def run_copilot_agent(
|
|
stream: EventSourceStream,
|
|
organization_id: str,
|
|
chat_request: WorkflowCopilotChatRequest,
|
|
chat_history: list[WorkflowCopilotChatHistoryMessage],
|
|
global_llm_context: str | None,
|
|
debug_run_info_text: str,
|
|
llm_api_handler: LLMAPIHandler | None,
|
|
api_key: str | None = None,
|
|
security_rules: str = "",
|
|
) -> AgentResult:
|
|
# Preflight feasibility classifier. Never raises (errors fall through to
|
|
# proceed). Off by default; enable via settings.ENABLE_COPILOT_FEASIBILITY_GATE.
|
|
from skyvern.forge.sdk.copilot.feasibility_gate import run_feasibility_gate
|
|
|
|
feasibility_verdict = await run_feasibility_gate(
|
|
user_message=chat_request.message,
|
|
workflow_yaml=chat_request.workflow_yaml or "",
|
|
chat_history=_format_chat_history(chat_history),
|
|
global_llm_context=global_llm_context or "",
|
|
distinct_id=chat_request.workflow_permanent_id,
|
|
organization_id=organization_id,
|
|
)
|
|
if feasibility_verdict.verdict == "ask_clarification" and feasibility_verdict.question:
|
|
return _build_feasibility_clarification_result(
|
|
question=feasibility_verdict.question,
|
|
rationale=feasibility_verdict.rationale,
|
|
user_message=chat_request.message,
|
|
prior_global_llm_context=global_llm_context,
|
|
prior_workflow_yaml=chat_request.workflow_yaml,
|
|
)
|
|
|
|
try:
|
|
from agents import Agent, trace
|
|
from agents.exceptions import MaxTurnsExceeded
|
|
from agents.mcp import MCPServerManager
|
|
except ModuleNotFoundError as e:
|
|
if e.name == "agents":
|
|
LOG.error(
|
|
"OpenAI Agents SDK dependency missing",
|
|
error=str(e),
|
|
workflow_permanent_id=chat_request.workflow_permanent_id,
|
|
)
|
|
return AgentResult(
|
|
user_response=(
|
|
"Copilot backend is missing the OpenAI Agents SDK dependency. "
|
|
"Rebuild or redeploy the backend image so `openai-agents` is installed."
|
|
),
|
|
updated_workflow=None,
|
|
global_llm_context=global_llm_context,
|
|
workflow_yaml=chat_request.workflow_yaml or None,
|
|
)
|
|
raise
|
|
|
|
from skyvern.cli.mcp_tools import mcp as skyvern_mcp
|
|
from skyvern.forge.sdk.copilot.enforcement import (
|
|
CopilotNonRetriableNavError,
|
|
CopilotTotalTimeoutError,
|
|
run_with_enforcement,
|
|
)
|
|
from skyvern.forge.sdk.copilot.hooks import CopilotRunHooks
|
|
from skyvern.forge.sdk.copilot.mcp_adapter import SkyvernOverlayMCPServer
|
|
from skyvern.forge.sdk.copilot.model_resolver import resolve_model_config
|
|
from skyvern.forge.sdk.copilot.session_factory import create_copilot_session
|
|
from skyvern.forge.sdk.copilot.tools import (
|
|
NATIVE_TOOLS,
|
|
_build_skyvern_mcp_overlays,
|
|
get_skyvern_mcp_alias_map,
|
|
)
|
|
|
|
ctx = CopilotContext(
|
|
organization_id=organization_id,
|
|
workflow_id=chat_request.workflow_id,
|
|
workflow_permanent_id=chat_request.workflow_permanent_id,
|
|
workflow_yaml=chat_request.workflow_yaml or "",
|
|
browser_session_id=None,
|
|
stream=stream,
|
|
api_key=api_key,
|
|
user_message=chat_request.message,
|
|
)
|
|
|
|
model_name, run_config, llm_key, supports_vision = resolve_model_config(llm_api_handler)
|
|
ctx.supports_vision = supports_vision
|
|
ensure_tracing_initialized()
|
|
|
|
alias_map = get_skyvern_mcp_alias_map()
|
|
overlays = _build_skyvern_mcp_overlays()
|
|
|
|
mcp_server = SkyvernOverlayMCPServer(
|
|
transport=skyvern_mcp,
|
|
overlays=overlays,
|
|
alias_map=alias_map,
|
|
allowlist=frozenset(alias_map.values()),
|
|
context_provider=lambda: ctx,
|
|
)
|
|
|
|
tool_info: list[tuple[str, str]] = [(tool.name, tool.description or "") for tool in NATIVE_TOOLS]
|
|
tool_info.extend((name, overlay.description or "") for name, overlay in overlays.items())
|
|
|
|
chat_history_text = _format_chat_history(chat_history)
|
|
tool_usage_guide = _build_tool_usage_guide(tool_info)
|
|
system_prompt = _build_system_prompt(
|
|
tool_usage_guide=tool_usage_guide,
|
|
security_rules=security_rules,
|
|
)
|
|
|
|
agent = Agent(
|
|
name="workflow-copilot",
|
|
instructions=system_prompt,
|
|
tools=list(NATIVE_TOOLS),
|
|
mcp_servers=[mcp_server],
|
|
model=model_name,
|
|
)
|
|
|
|
user_message = _build_user_context(
|
|
workflow_yaml=chat_request.workflow_yaml or "",
|
|
chat_history_text=chat_history_text,
|
|
global_llm_context=global_llm_context or "",
|
|
debug_run_info_text=debug_run_info_text,
|
|
user_message=chat_request.message,
|
|
)
|
|
|
|
LOG.info(
|
|
"Starting copilot agent loop",
|
|
workflow_permanent_id=chat_request.workflow_permanent_id,
|
|
user_message_len=len(user_message),
|
|
llm_key=llm_key,
|
|
)
|
|
|
|
trace_context: Any = contextlib.nullcontext()
|
|
if is_tracing_enabled():
|
|
trace_context = trace(
|
|
workflow_name="Copilot workflow",
|
|
group_id=chat_request.workflow_copilot_chat_id,
|
|
metadata={
|
|
"workflow_permanent_id": chat_request.workflow_permanent_id,
|
|
"organization_id": organization_id,
|
|
"llm_key": llm_key,
|
|
"user_message_len": str(len(user_message)),
|
|
},
|
|
)
|
|
|
|
chat_id = chat_request.workflow_copilot_chat_id or chat_request.workflow_permanent_id
|
|
session = create_copilot_session(chat_id)
|
|
model_token = _copilot_model_name.set(model_name)
|
|
try:
|
|
with trace_context:
|
|
try:
|
|
async with MCPServerManager([mcp_server]) as manager:
|
|
agent.mcp_servers = list(manager.active_servers)
|
|
result = await run_with_enforcement(
|
|
agent=agent,
|
|
initial_input=user_message,
|
|
ctx=ctx,
|
|
stream=stream,
|
|
max_turns=MAX_TURNS,
|
|
hooks=CopilotRunHooks(ctx),
|
|
run_config=run_config,
|
|
session=session,
|
|
)
|
|
return _translate_to_agent_result(
|
|
result,
|
|
ctx,
|
|
global_llm_context,
|
|
chat_request,
|
|
organization_id,
|
|
)
|
|
except asyncio.CancelledError:
|
|
LOG.info("Copilot run cancelled")
|
|
return _build_exit_result(ctx, "Request cancelled.", global_llm_context)
|
|
except MaxTurnsExceeded:
|
|
return _build_exit_result(
|
|
ctx,
|
|
"I've reached the maximum number of steps. Here's what I have so far.",
|
|
global_llm_context,
|
|
)
|
|
except CopilotTotalTimeoutError:
|
|
return _build_exit_result(
|
|
ctx,
|
|
"I ran out of time processing your request. Here's what I have so far.",
|
|
global_llm_context,
|
|
)
|
|
except CopilotNonRetriableNavError as exc:
|
|
LOG.warning(
|
|
"Copilot run halted on non-retriable navigation error",
|
|
url=exc.url,
|
|
error_message=exc.error_message,
|
|
organization_id=organization_id,
|
|
)
|
|
# Non-retriable nav errors prove the current workflow doesn't
|
|
# work; zero the proposal even if other tools succeeded.
|
|
return AgentResult(
|
|
user_response=(
|
|
f"The target URL could not be reached. Error: {exc.error_message}. "
|
|
"Please verify the URL and try again."
|
|
),
|
|
updated_workflow=None,
|
|
global_llm_context=global_llm_context,
|
|
workflow_yaml=None,
|
|
workflow_was_persisted=ctx.workflow_persisted,
|
|
total_tokens=ctx.total_tokens_used,
|
|
)
|
|
except Exception as e:
|
|
LOG.error("Copilot agent error", error=str(e), exc_info=True)
|
|
return _build_exit_result(ctx, "An unexpected error occurred. Please try again.", global_llm_context)
|
|
finally:
|
|
_copilot_model_name.reset(model_token)
|
|
session.close()
|