fix(SKY-9189): restore raw-credential refusal in Workflow Copilot v2 (#5598)

This commit is contained in:
Andrew Neilson 2026-04-22 14:54:15 -07:00 committed by GitHub
parent de1959a6ac
commit 0dad5ea50b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 308 additions and 33 deletions

View file

@ -314,3 +314,42 @@ class TestTranslateToAgentResultGating:
assert "test failed" in agent_result.user_response.lower()
assert "All done" not in agent_result.user_response
assert agent_result.updated_workflow is None
class TestCredentialRefusalReachesAgent:
"""Prove the SKY-9189 refusal rule is actually delivered to the agent.
`run_copilot_agent` constructs the openai-agents SDK `Agent(...)` with
`instructions=_build_system_prompt(...)` and `tools=list(NATIVE_TOOLS)`.
A behavior test would require patching the agent loop and is fragile; a
construction test (rule text flows through the exact helpers the route
uses) is deterministic and catches both prompt and tool-surface drift.
"""
def test_build_system_prompt_carries_refusal_clause(self) -> None:
from skyvern.forge.sdk.copilot.agent import _build_system_prompt
prompt = _build_system_prompt(tool_usage_guide="", security_rules="")
assert "CREDENTIAL HANDLING - CRITICAL" in prompt
assert "DO NOT PROVIDE RAW LOGIN/PASSWORD" in prompt
assert "MUST NOT build, update, or run a workflow" in prompt
assert "redacted from the outbound client stream" not in prompt
def test_native_tools_carry_refusal_reference(self) -> None:
import re
from skyvern.forge.sdk.copilot.tools import NATIVE_TOOLS
targets = {"run_blocks_and_collect_debug", "update_and_run_blocks"}
matched = {tool.name for tool in NATIVE_TOOLS if tool.name in targets}
assert matched == targets, f"missing tools in NATIVE_TOOLS: {targets - matched}"
cross_ref = re.compile(r"CREDENTIAL\s+HANDLING refusal rule")
for tool in NATIVE_TOOLS:
if tool.name not in targets:
continue
desc = tool.description
assert "redacted from" not in desc, f"{tool.name} still claims redaction"
assert "you may pass it via" not in desc, f"{tool.name} still permits inline secrets"
assert cross_ref.search(desc), f"{tool.name} missing refusal cross-reference"

View file

@ -5,6 +5,7 @@ from __future__ import annotations
from unittest.mock import MagicMock
from skyvern.forge.sdk.copilot.output_utils import (
parse_final_response,
sanitize_tool_result_for_llm,
summarize_tool_result,
truncate_output,
@ -323,3 +324,43 @@ class TestSummarizeToolResult:
def test_unknown_tool_returns_ok(self) -> None:
summary = self._summarize("unknown_tool", {"ok": True})
assert summary == "OK"
class TestParseFinalResponse:
"""parse_final_response is the last mile between model output and the frontend.
A parse failure falls back to `{"type": "REPLY", "user_response": text}`,
which means the raw JSON object is rendered in the chat bubble. Real model
outputs sometimes embed literal newlines inside string values (strict
`json.loads` rejects those) seen in SKY-9189 test-2 where the full
refusal envelope landed in the user bubble instead of just user_response.
"""
def test_parses_clean_json_envelope(self) -> None:
envelope = '{"type": "ASK_QUESTION", "user_response": "hi"}'
parsed = parse_final_response(envelope)
assert parsed == {"type": "ASK_QUESTION", "user_response": "hi"}
def test_strips_json_code_fence(self) -> None:
envelope = '```json\n{"type": "REPLY", "user_response": "ok"}\n```'
assert parse_final_response(envelope)["type"] == "REPLY"
def test_tolerates_literal_newline_inside_string_value(self) -> None:
# Real model output shape: a multi-line user_response split across
# actual newlines instead of \n escapes. strict=True rejects this,
# strict=False accepts it. Without the fallback, the whole JSON blob
# gets shown to the user.
envelope = '{"type": "ASK_QUESTION", "user_response": "line one\nline two"}'
parsed = parse_final_response(envelope)
assert parsed["type"] == "ASK_QUESTION"
assert parsed["user_response"] == "line one\nline two"
def test_unparseable_text_falls_back_to_reply(self) -> None:
# Genuinely broken output still degrades gracefully.
parsed = parse_final_response("not json at all")
assert parsed == {"type": "REPLY", "user_response": "not json at all"}
def test_non_dict_json_falls_back_to_reply(self) -> None:
# A JSON array at top level is valid JSON but not a valid envelope.
parsed = parse_final_response("[1, 2, 3]")
assert parsed == {"type": "REPLY", "user_response": "[1, 2, 3]"}

View file

@ -0,0 +1,177 @@
"""Deterministic prompt + tool-docstring tests for the v2 raw-credential refusal.
Locks in the fix for SKY-9189. The rule lives in two places the agent reads
as operating instructions: the system prompt template
`workflow-copilot-agent.j2`, and the tool docstrings for
`run_blocks_and_collect_debug` and `update_and_run_blocks` (exposed to the
agents SDK via FunctionTool.description). Both must state the same policy,
or the agent follows whichever it weights higher.
"""
from skyvern.forge.prompts import prompt_engine
from skyvern.forge.sdk.copilot.tools import (
run_blocks_tool,
update_and_run_blocks_tool,
)
_AGENT_TEMPLATE_DEFAULTS = dict(
workflow_knowledge_base="test kb",
current_datetime="2026-01-01T00:00:00Z",
tool_usage_guide="",
security_rules="",
)
def _render_agent_prompt() -> str:
return prompt_engine.load_prompt("workflow-copilot-agent", **_AGENT_TEMPLATE_DEFAULTS)
class TestAgentPromptRefusalClause:
"""The rewritten system prompt must carry the hard refusal rule."""
def test_uppercase_marker_present(self) -> None:
"""The recognizability marker preserved from v1 must render verbatim."""
rendered = _render_agent_prompt()
assert "DO NOT PROVIDE RAW LOGIN/PASSWORD" in rendered
def test_critical_section_header_present(self) -> None:
"""The section is labelled CRITICAL so prompt scanners can find it."""
rendered = _render_agent_prompt()
assert "CREDENTIAL HANDLING - CRITICAL" in rendered
def test_ask_question_response_required(self) -> None:
"""On refusal, the agent must use ASK_QUESTION, not a workflow build."""
rendered = _render_agent_prompt()
assert "ASK_QUESTION" in rendered
assert "MUST NOT build, update, or run a workflow" in rendered
def test_generalized_secret_surface_enumerated(self) -> None:
"""Refusal covers more than username/password — all the common leakage shapes."""
rendered = _render_agent_prompt()
for term in (
"API keys",
"OAuth tokens",
"Authorization: Bearer",
"JWTs",
"session cookies",
"TOTP seeds",
"OTP",
"private keys",
"credit card numbers",
"CVVs",
):
assert term in rendered, f"missing secret-surface term: {term}"
def test_browser_tools_also_forbidden(self) -> None:
"""The refusal must block not just workflow build/run but direct browser-tool paths too.
Without this, an agent can 'comply' by skipping workflow tools and still
type_text the pasted password into a live login form. Explicit names of
the browser tools keep the rule concrete for the LLM.
"""
rendered = _render_agent_prompt()
assert "type_text" in rendered
assert "click" in rendered
assert "press_key" in rendered
assert "any other direct browser tool" in rendered
def test_list_credentials_pagination_instruction(self) -> None:
"""Prompt must tell the agent to page list_credentials before concluding 'no match'.
list_credentials defaults to page_size=10 and caps at 50. Without this
instruction the agent can falsely tell the user 'no stored credential
exists' and send them to the UI to create one they already have.
"""
rendered = _render_agent_prompt()
assert "page_size=50" in rendered
assert "has_more" in rendered
def test_negative_bare_identifier_is_not_a_credential(self) -> None:
"""A bare username/email alone must not trigger the refusal rule.
The section explicitly calls out that bare non-secret identifiers don't
count, so the prompt doesn't over-refuse ('my username is alice').
"""
rendered = _render_agent_prompt()
assert "bare username" in rendered
assert "NOT a raw credential" in rendered
assert "does not trigger this rule" in rendered
def test_list_credentials_called_first(self) -> None:
"""The refusal flow routes through list_credentials for discoverable matches."""
rendered = _render_agent_prompt()
assert "FIRST call `list_credentials`" in rendered
def test_no_domain_matching(self) -> None:
"""list_credentials has no domain field, so the rule must match on name/username only."""
rendered = _render_agent_prompt()
assert "no site/domain field" in rendered
assert "on `name` / `username` only" in rendered
def test_no_raw_secret_echoed(self) -> None:
"""The agent must not echo the raw secret into its reply or persistent context."""
rendered = _render_agent_prompt()
assert "Do NOT echo the raw secret back" in rendered
def test_old_permissive_clause_is_gone(self) -> None:
"""The v2 template used to authorize inline secrets via `parameters` — must be removed."""
rendered = _render_agent_prompt()
assert "redacted from the outbound client stream" not in rendered
assert "you may pass it via `parameters`" not in rendered
class TestToolDocstringsRefusalClause:
"""Tool docstrings reach the agent via FunctionTool.description — they must agree with the prompt."""
def _tools(self) -> list[object]:
return [run_blocks_tool, update_and_run_blocks_tool]
def test_old_permissive_clause_gone_from_tools(self) -> None:
"""The clause that told the agent inline secrets were fine via `parameters` is removed."""
for tool in self._tools():
desc = tool.description # type: ignore[attr-defined]
assert "redacted from" not in desc, f"{tool.name} still claims redaction" # type: ignore[attr-defined]
assert "you may pass it via" not in desc, f"{tool.name} still permits inline secrets" # type: ignore[attr-defined]
def test_new_refusal_reference_in_tools(self) -> None:
"""Docstrings point back at the CREDENTIAL HANDLING refusal rule in the system prompt."""
import re
cross_ref = re.compile(r"CREDENTIAL\s+HANDLING refusal rule")
for tool in self._tools():
desc = tool.description # type: ignore[attr-defined]
assert "do NOT pass" in desc, f"{tool.name} does not forbid inline secret pass-through" # type: ignore[attr-defined]
assert cross_ref.search(desc), f"{tool.name} does not cross-reference the refusal rule" # type: ignore[attr-defined]
def test_non_secret_parameters_guidance_preserved(self) -> None:
"""The `parameters` dict is still the right channel for non-secret runtime values."""
for tool in self._tools():
desc = tool.description # type: ignore[attr-defined]
assert "non-secret values" in desc, f"{tool.name} missing non-secret guidance" # type: ignore[attr-defined]
def test_list_credentials_tool_describes_pagination(self) -> None:
"""list_credentials docstring must warn about paging before concluding no match."""
from skyvern.forge.sdk.copilot.tools import list_credentials_tool
desc = list_credentials_tool.description # type: ignore[attr-defined]
assert "has_more" in desc
assert "already stored on a later page" in desc
class TestBrowserToolOverlayRefusalCaveat:
"""The MCP browser-tool overlays are also operating instructions for the agent.
type_text is the primary leakage vector (typing a pasted password into a
live login form). Its overlay description must tell the agent to refuse
there too otherwise prompt-level refusal is bypassable by the agent
following the overlay's own (previously silent) description.
"""
def test_type_text_overlay_forbids_inline_secrets(self) -> None:
from skyvern.forge.sdk.copilot.tools import _build_skyvern_mcp_overlays
overlays = _build_skyvern_mcp_overlays()
assert "type_text" in overlays
desc = overlays["type_text"].description or ""
assert "NEVER type inline passwords" in desc
assert "CREDENTIAL HANDLING refusal rule" in desc