Skyvern/tests/unit/test_mcp_response_size_cap.py

196 lines
7.1 KiB
Python

"""Unit tests for MCP response size cap."""
from __future__ import annotations
import json
from typing import Any
import pytest
from skyvern.cli.mcp_tools.response import (
MCP_MAX_RESPONSE_CHARS,
size_capped,
truncate_response,
)
def test_truncate_response_passes_small_payload_unchanged() -> None:
small = {"ok": True, "data": {"items": list(range(10))}}
assert truncate_response(small) is small
def test_truncate_response_wraps_large_payload_with_envelope() -> None:
# Construct a payload larger than the default cap.
big_payload = "x" * (MCP_MAX_RESPONSE_CHARS + 100)
large = {"ok": True, "data": {"body": big_payload}}
result = truncate_response(large)
assert result is not large
assert result["_truncated"] is True
assert result["_max_chars"] == MCP_MAX_RESPONSE_CHARS
assert result["_original_chars"] > MCP_MAX_RESPONSE_CHARS
assert "Narrow the query" in result["_hint"]
# Original top-level `ok` is preserved so callers reading .ok still work.
assert result["ok"] is True
# The oversized payload itself is dropped.
assert "data" not in result
def test_truncate_response_preserves_top_level_error_on_overflow() -> None:
large = {
"ok": False,
"error": {"code": "TIMEOUT", "message": "page did not load"},
# Padding to push the total over the cap.
"debug": "y" * (MCP_MAX_RESPONSE_CHARS + 50),
}
result = truncate_response(large)
assert result["_truncated"] is True
assert result["ok"] is False
assert result["error"] == {"code": "TIMEOUT", "message": "page did not load"}
def test_truncate_response_preserves_identifier_fields_on_overflow() -> None:
# A tool that returns identifier fields alongside a bulky payload should
# retain those identifiers in the envelope so the caller can re-query.
large = {
"ok": True,
"workflow_id": "wpid_abc123",
"run_id": "wr_xyz789",
"session_id": "pbs_qqq000",
"timestamp": "ignored",
"count": 12345,
"data": {"blob": "z" * (MCP_MAX_RESPONSE_CHARS + 500)},
}
result = truncate_response(large)
assert result["_truncated"] is True
assert result["ok"] is True
assert result["workflow_id"] == "wpid_abc123"
assert result["run_id"] == "wr_xyz789"
assert result["session_id"] == "pbs_qqq000"
# Keys that do not end with `_id` are not preserved.
assert "timestamp" not in result
assert "count" not in result
# The oversized payload itself is dropped.
assert "data" not in result
def test_truncate_response_caps_oversize_error_field() -> None:
# Pathological input: the `error` field itself is bigger than the cap
# (e.g. a full HTML dump or stack trace serialized into `error.message`).
# Without bounding, copying it verbatim into the envelope would blow the
# envelope past max_chars and break the "under cap" contract.
large_error_message = "x" * (MCP_MAX_RESPONSE_CHARS + 500)
large = {
"ok": False,
"error": {"code": "INTERNAL", "message": large_error_message},
"data": {"n": 1},
}
result = truncate_response(large)
assert result["_truncated"] is True
assert result["ok"] is False
# The oversized error payload is replaced with a structured placeholder,
# not copied verbatim.
assert result["error"] != large["error"]
assert isinstance(result["error"], dict)
assert "_original_error_chars" in result["error"]
assert result["error"]["_error_preview"].endswith("... [truncated]")
# Envelope itself stays under the cap (module contract).
assert len(json.dumps(result, ensure_ascii=False)) <= MCP_MAX_RESPONSE_CHARS
def test_truncate_response_drops_oversize_identifier_values() -> None:
# An identifier value that itself exceeds the per-value cap is dropped so
# the envelope cannot be re-inflated past the overall limit.
large = {
"ok": True,
"short_id": "abc",
"huge_id": "x" * 10_000,
"data": "y" * (MCP_MAX_RESPONSE_CHARS + 100),
}
result = truncate_response(large)
assert result["_truncated"] is True
assert result["short_id"] == "abc"
assert "huge_id" not in result
def test_truncate_response_accepts_custom_max() -> None:
payload = {"data": "z" * 200}
# payload JSON is ~213 chars; cap at 100 forces truncation.
result = truncate_response(payload, max_chars=100)
assert result["_truncated"] is True
assert result["_max_chars"] == 100
def test_truncate_response_non_dict_overflow_wraps_into_envelope() -> None:
# A tool that returns a raw list (unusual but legal) should still be guarded.
big_list = ["x" * 100] * 2000
result = truncate_response(big_list)
assert isinstance(result, dict)
assert result["_truncated"] is True
assert "ok" not in result
def test_truncate_response_unserializable_input_returned_as_is() -> None:
# object() is not JSON-serializable; json.dumps(..., default=str) stringifies
# it, so the helper returns the payload unchanged (size is small).
sentinel: dict[str, Any] = {"x": object()}
result = truncate_response(sentinel)
assert result is sentinel
def test_truncate_response_serialization_failure_is_fail_closed() -> None:
# Circular references make json.dumps raise ValueError. A size cap that
# can't measure a payload must fail CLOSED (wrap in the truncation
# envelope) rather than passing the unmeasurable payload through.
import sys
circular: dict[str, Any] = {"ok": True, "error": None}
circular["self"] = circular
result = truncate_response(circular)
assert result is not circular
assert result["_truncated"] is True
# Sentinel: unmeasurable payloads report `sys.maxsize` for
# `_original_chars`. Locks in the fail-closed contract so an accidental
# change (e.g. returning 0 or None on serialization error) trips here.
assert result["_original_chars"] == sys.maxsize
# Top-level `ok` / `error` are still preserved from the original dict so
# callers reading those fields continue to work.
assert result["ok"] is True
assert result["error"] is None
@pytest.mark.asyncio
async def test_size_capped_decorator_no_op_for_small_result() -> None:
@size_capped
async def small_tool() -> dict[str, Any]:
return {"ok": True, "data": {"n": 1}}
result = await small_tool()
assert result == {"ok": True, "data": {"n": 1}}
@pytest.mark.asyncio
async def test_size_capped_decorator_wraps_oversize_result() -> None:
@size_capped
async def big_tool() -> dict[str, Any]:
return {"ok": True, "data": {"blob": "q" * (MCP_MAX_RESPONSE_CHARS + 500)}}
result = await big_tool()
assert result["_truncated"] is True
assert result["ok"] is True
# Re-serializing the wrapped envelope must be under the cap.
assert len(json.dumps(result, ensure_ascii=False)) <= MCP_MAX_RESPONSE_CHARS
@pytest.mark.asyncio
async def test_size_capped_decorator_preserves_signature() -> None:
@size_capped
async def typed_tool(x: int, y: str = "default") -> dict[str, Any]:
return {"x": x, "y": y}
result = await typed_tool(1, y="override")
assert result == {"x": 1, "y": "override"}
assert typed_tool.__name__ == "typed_tool"