mirror of
https://github.com/Skyvern-AI/skyvern.git
synced 2026-04-28 03:30:10 +00:00
136 lines
5.2 KiB
Python
136 lines
5.2 KiB
Python
"""Tests for prompt truncation helpers (SKY-8920 Phase B + D)."""
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
def test_truncate_none_returns_none() -> None:
|
|
from skyvern.utils.prompt_truncation import truncate_previous_extracted_information
|
|
|
|
assert truncate_previous_extracted_information(None, max_tokens=1000) is None
|
|
|
|
|
|
def test_truncate_short_string_returns_unchanged() -> None:
|
|
from skyvern.utils.prompt_truncation import truncate_previous_extracted_information
|
|
|
|
value = "small tail"
|
|
result = truncate_previous_extracted_information(value, max_tokens=1000)
|
|
assert result == value
|
|
|
|
|
|
def test_truncate_long_string_keeps_tail() -> None:
|
|
from skyvern.utils.prompt_truncation import truncate_previous_extracted_information
|
|
|
|
value = "HEAD" + ("x" * 500_000) + "TAIL"
|
|
result = truncate_previous_extracted_information(value, max_tokens=100)
|
|
assert isinstance(result, str)
|
|
assert result.endswith("TAIL")
|
|
assert "HEAD" not in result
|
|
|
|
|
|
def test_truncate_long_string_respects_exact_token_cap() -> None:
|
|
from skyvern.utils.prompt_truncation import truncate_previous_extracted_information
|
|
from skyvern.utils.token_counter import count_tokens
|
|
|
|
value = ("lorem ipsum dolor sit amet " * 20_000) + "UNIQUE_TAIL_MARKER"
|
|
for cap in (50, 500, 5_000):
|
|
result = truncate_previous_extracted_information(value, max_tokens=cap)
|
|
assert isinstance(result, str)
|
|
assert count_tokens(result) <= cap, f"cap={cap} overshot: {count_tokens(result)}"
|
|
|
|
|
|
def test_truncate_long_list_keeps_recent_entries() -> None:
|
|
from skyvern.utils.prompt_truncation import truncate_previous_extracted_information
|
|
|
|
value = [{"i": i, "pad": "x" * 1000} for i in range(500)]
|
|
result = truncate_previous_extracted_information(value, max_tokens=500)
|
|
assert isinstance(result, list)
|
|
assert result[-1]["i"] == 499
|
|
assert len(result) < len(value)
|
|
|
|
|
|
def test_truncate_dict_preserves_top_level_keys_and_caps_values() -> None:
|
|
import json
|
|
|
|
from skyvern.utils.prompt_truncation import truncate_previous_extracted_information
|
|
from skyvern.utils.token_counter import count_tokens
|
|
|
|
value = {"a": "x" * 50_000, "b": "y" * 50_000}
|
|
result = truncate_previous_extracted_information(value, max_tokens=200)
|
|
assert isinstance(result, dict)
|
|
assert set(result.keys()) == {"a", "b"}
|
|
assert count_tokens(json.dumps(result)) <= 400 # small slack for JSON wrapping
|
|
|
|
|
|
def test_truncate_dict_preserves_value_types_when_under_per_key_budget() -> None:
|
|
from skyvern.utils.prompt_truncation import truncate_previous_extracted_information
|
|
|
|
value = {
|
|
"small_dict": {"nested": "data", "count": 42},
|
|
"small_list": [1, 2, 3],
|
|
"small_str": "hello",
|
|
}
|
|
result = truncate_previous_extracted_information(value, max_tokens=10_000)
|
|
# Each item is well under the per_key budget; original types should survive,
|
|
# not be coerced to JSON-serialized strings.
|
|
assert result == value
|
|
assert isinstance(result["small_dict"], dict)
|
|
assert isinstance(result["small_list"], list)
|
|
assert isinstance(result["small_str"], str)
|
|
|
|
|
|
def test_truncate_respects_default_budget() -> None:
|
|
from skyvern.utils.prompt_truncation import PREVIOUS_EXTRACTED_INFO_MAX_TOKENS
|
|
|
|
assert PREVIOUS_EXTRACTED_INFO_MAX_TOKENS == 20_000
|
|
|
|
|
|
def test_truncate_extraction_schema_none_returns_none() -> None:
|
|
from skyvern.utils.prompt_truncation import truncate_extraction_schema
|
|
|
|
assert truncate_extraction_schema(None, max_tokens=1000) is None
|
|
|
|
|
|
def test_truncate_extraction_schema_short_passes_through() -> None:
|
|
from skyvern.utils.prompt_truncation import truncate_extraction_schema
|
|
|
|
schema = {"type": "object", "properties": {"name": {"type": "string"}}}
|
|
result = truncate_extraction_schema(schema, max_tokens=1000)
|
|
assert result == schema
|
|
|
|
|
|
def test_truncate_extraction_schema_large_returns_summary_placeholder() -> None:
|
|
import json
|
|
|
|
from skyvern.utils.prompt_truncation import truncate_extraction_schema
|
|
from skyvern.utils.token_counter import count_tokens
|
|
|
|
big_props = {f"field_{i}": {"type": "string", "description": "x" * 200} for i in range(500)}
|
|
schema = {"type": "object", "properties": big_props}
|
|
original_tokens = count_tokens(json.dumps(schema))
|
|
assert original_tokens > 10_000
|
|
|
|
result = truncate_extraction_schema(schema, max_tokens=2_000)
|
|
result_tokens = count_tokens(json.dumps(result))
|
|
|
|
assert result_tokens <= 2_200
|
|
assert result["type"] == "object"
|
|
assert result.get("_skyvern_schema_truncated") is True
|
|
|
|
|
|
def test_truncate_extraction_schema_default_budget() -> None:
|
|
from skyvern.utils.prompt_truncation import EXTRACTION_SCHEMA_MAX_TOKENS
|
|
|
|
assert EXTRACTION_SCHEMA_MAX_TOKENS == 10_000
|
|
|
|
|
|
def test_truncate_extraction_schema_preserves_array_top_level() -> None:
|
|
import json
|
|
|
|
from skyvern.utils.prompt_truncation import truncate_extraction_schema
|
|
from skyvern.utils.token_counter import count_tokens
|
|
|
|
items = [{"f": f"val_{i}_" + ("lorem ipsum " * 40)} for i in range(1000)]
|
|
schema = {"type": "array", "items": {"type": "object", "properties": {"f": {"type": "string"}}}, "_items": items}
|
|
result = truncate_extraction_schema(schema, max_tokens=2_000)
|
|
assert count_tokens(json.dumps(result)) <= 2_200
|
|
assert result["type"] == "array"
|