mirror of
https://github.com/Skyvern-AI/skyvern.git
synced 2026-04-28 03:30:10 +00:00
fix(SKY-9189): restore raw-credential refusal in Workflow Copilot v2 (#5598)
This commit is contained in:
parent
de1959a6ac
commit
0dad5ea50b
6 changed files with 308 additions and 33 deletions
|
|
@ -314,3 +314,42 @@ class TestTranslateToAgentResultGating:
|
|||
assert "test failed" in agent_result.user_response.lower()
|
||||
assert "All done" not in agent_result.user_response
|
||||
assert agent_result.updated_workflow is None
|
||||
|
||||
|
||||
class TestCredentialRefusalReachesAgent:
|
||||
"""Prove the SKY-9189 refusal rule is actually delivered to the agent.
|
||||
|
||||
`run_copilot_agent` constructs the openai-agents SDK `Agent(...)` with
|
||||
`instructions=_build_system_prompt(...)` and `tools=list(NATIVE_TOOLS)`.
|
||||
A behavior test would require patching the agent loop and is fragile; a
|
||||
construction test (rule text flows through the exact helpers the route
|
||||
uses) is deterministic and catches both prompt and tool-surface drift.
|
||||
"""
|
||||
|
||||
def test_build_system_prompt_carries_refusal_clause(self) -> None:
|
||||
from skyvern.forge.sdk.copilot.agent import _build_system_prompt
|
||||
|
||||
prompt = _build_system_prompt(tool_usage_guide="", security_rules="")
|
||||
|
||||
assert "CREDENTIAL HANDLING - CRITICAL" in prompt
|
||||
assert "DO NOT PROVIDE RAW LOGIN/PASSWORD" in prompt
|
||||
assert "MUST NOT build, update, or run a workflow" in prompt
|
||||
assert "redacted from the outbound client stream" not in prompt
|
||||
|
||||
def test_native_tools_carry_refusal_reference(self) -> None:
|
||||
import re
|
||||
|
||||
from skyvern.forge.sdk.copilot.tools import NATIVE_TOOLS
|
||||
|
||||
targets = {"run_blocks_and_collect_debug", "update_and_run_blocks"}
|
||||
matched = {tool.name for tool in NATIVE_TOOLS if tool.name in targets}
|
||||
assert matched == targets, f"missing tools in NATIVE_TOOLS: {targets - matched}"
|
||||
|
||||
cross_ref = re.compile(r"CREDENTIAL\s+HANDLING refusal rule")
|
||||
for tool in NATIVE_TOOLS:
|
||||
if tool.name not in targets:
|
||||
continue
|
||||
desc = tool.description
|
||||
assert "redacted from" not in desc, f"{tool.name} still claims redaction"
|
||||
assert "you may pass it via" not in desc, f"{tool.name} still permits inline secrets"
|
||||
assert cross_ref.search(desc), f"{tool.name} missing refusal cross-reference"
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ from __future__ import annotations
|
|||
from unittest.mock import MagicMock
|
||||
|
||||
from skyvern.forge.sdk.copilot.output_utils import (
|
||||
parse_final_response,
|
||||
sanitize_tool_result_for_llm,
|
||||
summarize_tool_result,
|
||||
truncate_output,
|
||||
|
|
@ -323,3 +324,43 @@ class TestSummarizeToolResult:
|
|||
def test_unknown_tool_returns_ok(self) -> None:
|
||||
summary = self._summarize("unknown_tool", {"ok": True})
|
||||
assert summary == "OK"
|
||||
|
||||
|
||||
class TestParseFinalResponse:
|
||||
"""parse_final_response is the last mile between model output and the frontend.
|
||||
|
||||
A parse failure falls back to `{"type": "REPLY", "user_response": text}`,
|
||||
which means the raw JSON object is rendered in the chat bubble. Real model
|
||||
outputs sometimes embed literal newlines inside string values (strict
|
||||
`json.loads` rejects those) — seen in SKY-9189 test-2 where the full
|
||||
refusal envelope landed in the user bubble instead of just user_response.
|
||||
"""
|
||||
|
||||
def test_parses_clean_json_envelope(self) -> None:
|
||||
envelope = '{"type": "ASK_QUESTION", "user_response": "hi"}'
|
||||
parsed = parse_final_response(envelope)
|
||||
assert parsed == {"type": "ASK_QUESTION", "user_response": "hi"}
|
||||
|
||||
def test_strips_json_code_fence(self) -> None:
|
||||
envelope = '```json\n{"type": "REPLY", "user_response": "ok"}\n```'
|
||||
assert parse_final_response(envelope)["type"] == "REPLY"
|
||||
|
||||
def test_tolerates_literal_newline_inside_string_value(self) -> None:
|
||||
# Real model output shape: a multi-line user_response split across
|
||||
# actual newlines instead of \n escapes. strict=True rejects this,
|
||||
# strict=False accepts it. Without the fallback, the whole JSON blob
|
||||
# gets shown to the user.
|
||||
envelope = '{"type": "ASK_QUESTION", "user_response": "line one\nline two"}'
|
||||
parsed = parse_final_response(envelope)
|
||||
assert parsed["type"] == "ASK_QUESTION"
|
||||
assert parsed["user_response"] == "line one\nline two"
|
||||
|
||||
def test_unparseable_text_falls_back_to_reply(self) -> None:
|
||||
# Genuinely broken output still degrades gracefully.
|
||||
parsed = parse_final_response("not json at all")
|
||||
assert parsed == {"type": "REPLY", "user_response": "not json at all"}
|
||||
|
||||
def test_non_dict_json_falls_back_to_reply(self) -> None:
|
||||
# A JSON array at top level is valid JSON but not a valid envelope.
|
||||
parsed = parse_final_response("[1, 2, 3]")
|
||||
assert parsed == {"type": "REPLY", "user_response": "[1, 2, 3]"}
|
||||
|
|
|
|||
177
tests/unit/test_workflow_copilot_credential_refusal.py
Normal file
177
tests/unit/test_workflow_copilot_credential_refusal.py
Normal file
|
|
@ -0,0 +1,177 @@
|
|||
"""Deterministic prompt + tool-docstring tests for the v2 raw-credential refusal.
|
||||
|
||||
Locks in the fix for SKY-9189. The rule lives in two places the agent reads
|
||||
as operating instructions: the system prompt template
|
||||
`workflow-copilot-agent.j2`, and the tool docstrings for
|
||||
`run_blocks_and_collect_debug` and `update_and_run_blocks` (exposed to the
|
||||
agents SDK via FunctionTool.description). Both must state the same policy,
|
||||
or the agent follows whichever it weights higher.
|
||||
"""
|
||||
|
||||
from skyvern.forge.prompts import prompt_engine
|
||||
from skyvern.forge.sdk.copilot.tools import (
|
||||
run_blocks_tool,
|
||||
update_and_run_blocks_tool,
|
||||
)
|
||||
|
||||
_AGENT_TEMPLATE_DEFAULTS = dict(
|
||||
workflow_knowledge_base="test kb",
|
||||
current_datetime="2026-01-01T00:00:00Z",
|
||||
tool_usage_guide="",
|
||||
security_rules="",
|
||||
)
|
||||
|
||||
|
||||
def _render_agent_prompt() -> str:
|
||||
return prompt_engine.load_prompt("workflow-copilot-agent", **_AGENT_TEMPLATE_DEFAULTS)
|
||||
|
||||
|
||||
class TestAgentPromptRefusalClause:
|
||||
"""The rewritten system prompt must carry the hard refusal rule."""
|
||||
|
||||
def test_uppercase_marker_present(self) -> None:
|
||||
"""The recognizability marker preserved from v1 must render verbatim."""
|
||||
rendered = _render_agent_prompt()
|
||||
assert "DO NOT PROVIDE RAW LOGIN/PASSWORD" in rendered
|
||||
|
||||
def test_critical_section_header_present(self) -> None:
|
||||
"""The section is labelled CRITICAL so prompt scanners can find it."""
|
||||
rendered = _render_agent_prompt()
|
||||
assert "CREDENTIAL HANDLING - CRITICAL" in rendered
|
||||
|
||||
def test_ask_question_response_required(self) -> None:
|
||||
"""On refusal, the agent must use ASK_QUESTION, not a workflow build."""
|
||||
rendered = _render_agent_prompt()
|
||||
assert "ASK_QUESTION" in rendered
|
||||
assert "MUST NOT build, update, or run a workflow" in rendered
|
||||
|
||||
def test_generalized_secret_surface_enumerated(self) -> None:
|
||||
"""Refusal covers more than username/password — all the common leakage shapes."""
|
||||
rendered = _render_agent_prompt()
|
||||
for term in (
|
||||
"API keys",
|
||||
"OAuth tokens",
|
||||
"Authorization: Bearer",
|
||||
"JWTs",
|
||||
"session cookies",
|
||||
"TOTP seeds",
|
||||
"OTP",
|
||||
"private keys",
|
||||
"credit card numbers",
|
||||
"CVVs",
|
||||
):
|
||||
assert term in rendered, f"missing secret-surface term: {term}"
|
||||
|
||||
def test_browser_tools_also_forbidden(self) -> None:
|
||||
"""The refusal must block not just workflow build/run but direct browser-tool paths too.
|
||||
|
||||
Without this, an agent can 'comply' by skipping workflow tools and still
|
||||
type_text the pasted password into a live login form. Explicit names of
|
||||
the browser tools keep the rule concrete for the LLM.
|
||||
"""
|
||||
rendered = _render_agent_prompt()
|
||||
assert "type_text" in rendered
|
||||
assert "click" in rendered
|
||||
assert "press_key" in rendered
|
||||
assert "any other direct browser tool" in rendered
|
||||
|
||||
def test_list_credentials_pagination_instruction(self) -> None:
|
||||
"""Prompt must tell the agent to page list_credentials before concluding 'no match'.
|
||||
|
||||
list_credentials defaults to page_size=10 and caps at 50. Without this
|
||||
instruction the agent can falsely tell the user 'no stored credential
|
||||
exists' and send them to the UI to create one they already have.
|
||||
"""
|
||||
rendered = _render_agent_prompt()
|
||||
assert "page_size=50" in rendered
|
||||
assert "has_more" in rendered
|
||||
|
||||
def test_negative_bare_identifier_is_not_a_credential(self) -> None:
|
||||
"""A bare username/email alone must not trigger the refusal rule.
|
||||
|
||||
The section explicitly calls out that bare non-secret identifiers don't
|
||||
count, so the prompt doesn't over-refuse ('my username is alice').
|
||||
"""
|
||||
rendered = _render_agent_prompt()
|
||||
assert "bare username" in rendered
|
||||
assert "NOT a raw credential" in rendered
|
||||
assert "does not trigger this rule" in rendered
|
||||
|
||||
def test_list_credentials_called_first(self) -> None:
|
||||
"""The refusal flow routes through list_credentials for discoverable matches."""
|
||||
rendered = _render_agent_prompt()
|
||||
assert "FIRST call `list_credentials`" in rendered
|
||||
|
||||
def test_no_domain_matching(self) -> None:
|
||||
"""list_credentials has no domain field, so the rule must match on name/username only."""
|
||||
rendered = _render_agent_prompt()
|
||||
assert "no site/domain field" in rendered
|
||||
assert "on `name` / `username` only" in rendered
|
||||
|
||||
def test_no_raw_secret_echoed(self) -> None:
|
||||
"""The agent must not echo the raw secret into its reply or persistent context."""
|
||||
rendered = _render_agent_prompt()
|
||||
assert "Do NOT echo the raw secret back" in rendered
|
||||
|
||||
def test_old_permissive_clause_is_gone(self) -> None:
|
||||
"""The v2 template used to authorize inline secrets via `parameters` — must be removed."""
|
||||
rendered = _render_agent_prompt()
|
||||
assert "redacted from the outbound client stream" not in rendered
|
||||
assert "you may pass it via `parameters`" not in rendered
|
||||
|
||||
|
||||
class TestToolDocstringsRefusalClause:
|
||||
"""Tool docstrings reach the agent via FunctionTool.description — they must agree with the prompt."""
|
||||
|
||||
def _tools(self) -> list[object]:
|
||||
return [run_blocks_tool, update_and_run_blocks_tool]
|
||||
|
||||
def test_old_permissive_clause_gone_from_tools(self) -> None:
|
||||
"""The clause that told the agent inline secrets were fine via `parameters` is removed."""
|
||||
for tool in self._tools():
|
||||
desc = tool.description # type: ignore[attr-defined]
|
||||
assert "redacted from" not in desc, f"{tool.name} still claims redaction" # type: ignore[attr-defined]
|
||||
assert "you may pass it via" not in desc, f"{tool.name} still permits inline secrets" # type: ignore[attr-defined]
|
||||
|
||||
def test_new_refusal_reference_in_tools(self) -> None:
|
||||
"""Docstrings point back at the CREDENTIAL HANDLING refusal rule in the system prompt."""
|
||||
import re
|
||||
|
||||
cross_ref = re.compile(r"CREDENTIAL\s+HANDLING refusal rule")
|
||||
for tool in self._tools():
|
||||
desc = tool.description # type: ignore[attr-defined]
|
||||
assert "do NOT pass" in desc, f"{tool.name} does not forbid inline secret pass-through" # type: ignore[attr-defined]
|
||||
assert cross_ref.search(desc), f"{tool.name} does not cross-reference the refusal rule" # type: ignore[attr-defined]
|
||||
|
||||
def test_non_secret_parameters_guidance_preserved(self) -> None:
|
||||
"""The `parameters` dict is still the right channel for non-secret runtime values."""
|
||||
for tool in self._tools():
|
||||
desc = tool.description # type: ignore[attr-defined]
|
||||
assert "non-secret values" in desc, f"{tool.name} missing non-secret guidance" # type: ignore[attr-defined]
|
||||
|
||||
def test_list_credentials_tool_describes_pagination(self) -> None:
|
||||
"""list_credentials docstring must warn about paging before concluding no match."""
|
||||
from skyvern.forge.sdk.copilot.tools import list_credentials_tool
|
||||
|
||||
desc = list_credentials_tool.description # type: ignore[attr-defined]
|
||||
assert "has_more" in desc
|
||||
assert "already stored on a later page" in desc
|
||||
|
||||
|
||||
class TestBrowserToolOverlayRefusalCaveat:
|
||||
"""The MCP browser-tool overlays are also operating instructions for the agent.
|
||||
|
||||
type_text is the primary leakage vector (typing a pasted password into a
|
||||
live login form). Its overlay description must tell the agent to refuse
|
||||
there too — otherwise prompt-level refusal is bypassable by the agent
|
||||
following the overlay's own (previously silent) description.
|
||||
"""
|
||||
|
||||
def test_type_text_overlay_forbids_inline_secrets(self) -> None:
|
||||
from skyvern.forge.sdk.copilot.tools import _build_skyvern_mcp_overlays
|
||||
|
||||
overlays = _build_skyvern_mcp_overlays()
|
||||
assert "type_text" in overlays
|
||||
desc = overlays["type_text"].description or ""
|
||||
assert "NEVER type inline passwords" in desc
|
||||
assert "CREDENTIAL HANDLING refusal rule" in desc
|
||||
Loading…
Add table
Add a link
Reference in a new issue