mirror of
https://github.com/Skyvern-AI/skyvern.git
synced 2026-04-28 03:30:10 +00:00
196 lines
7.1 KiB
Python
196 lines
7.1 KiB
Python
"""Unit tests for MCP response size cap."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from typing import Any
|
|
|
|
import pytest
|
|
|
|
from skyvern.cli.mcp_tools.response import (
|
|
MCP_MAX_RESPONSE_CHARS,
|
|
size_capped,
|
|
truncate_response,
|
|
)
|
|
|
|
|
|
def test_truncate_response_passes_small_payload_unchanged() -> None:
|
|
small = {"ok": True, "data": {"items": list(range(10))}}
|
|
assert truncate_response(small) is small
|
|
|
|
|
|
def test_truncate_response_wraps_large_payload_with_envelope() -> None:
|
|
# Construct a payload larger than the default cap.
|
|
big_payload = "x" * (MCP_MAX_RESPONSE_CHARS + 100)
|
|
large = {"ok": True, "data": {"body": big_payload}}
|
|
|
|
result = truncate_response(large)
|
|
|
|
assert result is not large
|
|
assert result["_truncated"] is True
|
|
assert result["_max_chars"] == MCP_MAX_RESPONSE_CHARS
|
|
assert result["_original_chars"] > MCP_MAX_RESPONSE_CHARS
|
|
assert "Narrow the query" in result["_hint"]
|
|
# Original top-level `ok` is preserved so callers reading .ok still work.
|
|
assert result["ok"] is True
|
|
# The oversized payload itself is dropped.
|
|
assert "data" not in result
|
|
|
|
|
|
def test_truncate_response_preserves_top_level_error_on_overflow() -> None:
|
|
large = {
|
|
"ok": False,
|
|
"error": {"code": "TIMEOUT", "message": "page did not load"},
|
|
# Padding to push the total over the cap.
|
|
"debug": "y" * (MCP_MAX_RESPONSE_CHARS + 50),
|
|
}
|
|
result = truncate_response(large)
|
|
assert result["_truncated"] is True
|
|
assert result["ok"] is False
|
|
assert result["error"] == {"code": "TIMEOUT", "message": "page did not load"}
|
|
|
|
|
|
def test_truncate_response_preserves_identifier_fields_on_overflow() -> None:
|
|
# A tool that returns identifier fields alongside a bulky payload should
|
|
# retain those identifiers in the envelope so the caller can re-query.
|
|
large = {
|
|
"ok": True,
|
|
"workflow_id": "wpid_abc123",
|
|
"run_id": "wr_xyz789",
|
|
"session_id": "pbs_qqq000",
|
|
"timestamp": "ignored",
|
|
"count": 12345,
|
|
"data": {"blob": "z" * (MCP_MAX_RESPONSE_CHARS + 500)},
|
|
}
|
|
result = truncate_response(large)
|
|
assert result["_truncated"] is True
|
|
assert result["ok"] is True
|
|
assert result["workflow_id"] == "wpid_abc123"
|
|
assert result["run_id"] == "wr_xyz789"
|
|
assert result["session_id"] == "pbs_qqq000"
|
|
# Keys that do not end with `_id` are not preserved.
|
|
assert "timestamp" not in result
|
|
assert "count" not in result
|
|
# The oversized payload itself is dropped.
|
|
assert "data" not in result
|
|
|
|
|
|
def test_truncate_response_caps_oversize_error_field() -> None:
|
|
# Pathological input: the `error` field itself is bigger than the cap
|
|
# (e.g. a full HTML dump or stack trace serialized into `error.message`).
|
|
# Without bounding, copying it verbatim into the envelope would blow the
|
|
# envelope past max_chars and break the "under cap" contract.
|
|
large_error_message = "x" * (MCP_MAX_RESPONSE_CHARS + 500)
|
|
large = {
|
|
"ok": False,
|
|
"error": {"code": "INTERNAL", "message": large_error_message},
|
|
"data": {"n": 1},
|
|
}
|
|
result = truncate_response(large)
|
|
assert result["_truncated"] is True
|
|
assert result["ok"] is False
|
|
# The oversized error payload is replaced with a structured placeholder,
|
|
# not copied verbatim.
|
|
assert result["error"] != large["error"]
|
|
assert isinstance(result["error"], dict)
|
|
assert "_original_error_chars" in result["error"]
|
|
assert result["error"]["_error_preview"].endswith("... [truncated]")
|
|
# Envelope itself stays under the cap (module contract).
|
|
assert len(json.dumps(result, ensure_ascii=False)) <= MCP_MAX_RESPONSE_CHARS
|
|
|
|
|
|
def test_truncate_response_drops_oversize_identifier_values() -> None:
|
|
# An identifier value that itself exceeds the per-value cap is dropped so
|
|
# the envelope cannot be re-inflated past the overall limit.
|
|
large = {
|
|
"ok": True,
|
|
"short_id": "abc",
|
|
"huge_id": "x" * 10_000,
|
|
"data": "y" * (MCP_MAX_RESPONSE_CHARS + 100),
|
|
}
|
|
result = truncate_response(large)
|
|
assert result["_truncated"] is True
|
|
assert result["short_id"] == "abc"
|
|
assert "huge_id" not in result
|
|
|
|
|
|
def test_truncate_response_accepts_custom_max() -> None:
|
|
payload = {"data": "z" * 200}
|
|
# payload JSON is ~213 chars; cap at 100 forces truncation.
|
|
result = truncate_response(payload, max_chars=100)
|
|
assert result["_truncated"] is True
|
|
assert result["_max_chars"] == 100
|
|
|
|
|
|
def test_truncate_response_non_dict_overflow_wraps_into_envelope() -> None:
|
|
# A tool that returns a raw list (unusual but legal) should still be guarded.
|
|
big_list = ["x" * 100] * 2000
|
|
result = truncate_response(big_list)
|
|
assert isinstance(result, dict)
|
|
assert result["_truncated"] is True
|
|
assert "ok" not in result
|
|
|
|
|
|
def test_truncate_response_unserializable_input_returned_as_is() -> None:
|
|
# object() is not JSON-serializable; json.dumps(..., default=str) stringifies
|
|
# it, so the helper returns the payload unchanged (size is small).
|
|
sentinel: dict[str, Any] = {"x": object()}
|
|
result = truncate_response(sentinel)
|
|
assert result is sentinel
|
|
|
|
|
|
def test_truncate_response_serialization_failure_is_fail_closed() -> None:
|
|
# Circular references make json.dumps raise ValueError. A size cap that
|
|
# can't measure a payload must fail CLOSED (wrap in the truncation
|
|
# envelope) rather than passing the unmeasurable payload through.
|
|
import sys
|
|
|
|
circular: dict[str, Any] = {"ok": True, "error": None}
|
|
circular["self"] = circular
|
|
|
|
result = truncate_response(circular)
|
|
|
|
assert result is not circular
|
|
assert result["_truncated"] is True
|
|
# Sentinel: unmeasurable payloads report `sys.maxsize` for
|
|
# `_original_chars`. Locks in the fail-closed contract so an accidental
|
|
# change (e.g. returning 0 or None on serialization error) trips here.
|
|
assert result["_original_chars"] == sys.maxsize
|
|
# Top-level `ok` / `error` are still preserved from the original dict so
|
|
# callers reading those fields continue to work.
|
|
assert result["ok"] is True
|
|
assert result["error"] is None
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_size_capped_decorator_no_op_for_small_result() -> None:
|
|
@size_capped
|
|
async def small_tool() -> dict[str, Any]:
|
|
return {"ok": True, "data": {"n": 1}}
|
|
|
|
result = await small_tool()
|
|
assert result == {"ok": True, "data": {"n": 1}}
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_size_capped_decorator_wraps_oversize_result() -> None:
|
|
@size_capped
|
|
async def big_tool() -> dict[str, Any]:
|
|
return {"ok": True, "data": {"blob": "q" * (MCP_MAX_RESPONSE_CHARS + 500)}}
|
|
|
|
result = await big_tool()
|
|
assert result["_truncated"] is True
|
|
assert result["ok"] is True
|
|
# Re-serializing the wrapped envelope must be under the cap.
|
|
assert len(json.dumps(result, ensure_ascii=False)) <= MCP_MAX_RESPONSE_CHARS
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_size_capped_decorator_preserves_signature() -> None:
|
|
@size_capped
|
|
async def typed_tool(x: int, y: str = "default") -> dict[str, Any]:
|
|
return {"x": x, "y": y}
|
|
|
|
result = await typed_tool(1, y="override")
|
|
assert result == {"x": 1, "y": "override"}
|
|
assert typed_tool.__name__ == "typed_tool"
|