mirror of
https://github.com/Skyvern-AI/skyvern.git
synced 2026-05-05 23:41:16 +00:00
Stop MCP model-key hallucinations in workflow creation (#SKY-8302) (#5061)
This commit is contained in:
parent
af91183d75
commit
87f18f4ee0
11 changed files with 810 additions and 26 deletions
|
|
@ -17,6 +17,7 @@ from pydantic import Field
|
|||
from skyvern.client.errors import NotFoundError
|
||||
from skyvern.client.types import WorkflowCreateYamlRequest
|
||||
from skyvern.schemas.runs import ProxyLocation
|
||||
from skyvern.schemas.workflows import WorkflowCreateYAMLRequest as WorkflowCreateYAMLRequestSchema
|
||||
|
||||
from ._common import ErrorCode, Timer, make_error, make_result
|
||||
from ._session import get_skyvern
|
||||
|
|
@ -206,6 +207,63 @@ _CODE_V2_DEFAULTS: dict[str, Any] = {
|
|||
}
|
||||
|
||||
|
||||
def _deep_merge(base: Any, override: Any) -> Any:
|
||||
"""Recursively merge normalized JSON-like data over the raw payload.
|
||||
|
||||
Unknown fields should survive normalization. Lists are merged by index so
|
||||
overlapping items keep raw unknown keys even if normalization changes the
|
||||
list length.
|
||||
"""
|
||||
|
||||
if isinstance(base, dict) and isinstance(override, dict):
|
||||
result = dict(base)
|
||||
for key, value in override.items():
|
||||
if key in result:
|
||||
result[key] = _deep_merge(result[key], value)
|
||||
else:
|
||||
result[key] = value
|
||||
return result
|
||||
|
||||
if isinstance(base, list) and isinstance(override, list):
|
||||
merged: list[Any] = []
|
||||
for idx in range(max(len(base), len(override))):
|
||||
if idx < len(base) and idx < len(override):
|
||||
merged.append(_deep_merge(base[idx], override[idx]))
|
||||
elif idx < len(override):
|
||||
merged.append(override[idx])
|
||||
else:
|
||||
merged.append(base[idx])
|
||||
return merged
|
||||
|
||||
return override
|
||||
|
||||
|
||||
def _normalize_json_definition(raw: Any) -> WorkflowCreateYamlRequest:
|
||||
"""Normalize JSON workflow definitions through the shared backend schema."""
|
||||
|
||||
if not isinstance(raw, dict):
|
||||
raise TypeError("Workflow definition JSON must be an object")
|
||||
|
||||
try:
|
||||
normalized = WorkflowCreateYAMLRequestSchema.model_validate(raw)
|
||||
except Exception as exc:
|
||||
# Internal schema is stricter than the Fern SDK — skip normalization so
|
||||
# unknown/future fields are not rejected.
|
||||
LOG.warning("Skipping text-prompt normalization; internal schema rejected payload", error=str(exc))
|
||||
return WorkflowCreateYamlRequest(**raw)
|
||||
|
||||
merged = _deep_merge(raw, normalized.model_dump(mode="json"))
|
||||
return WorkflowCreateYamlRequest(**merged)
|
||||
|
||||
|
||||
def _make_invalid_json_definition_error(exc: Exception) -> dict[str, Any]:
|
||||
return make_error(
|
||||
ErrorCode.INVALID_INPUT,
|
||||
f"Invalid JSON definition: {exc}",
|
||||
"Provide a valid JSON object for the workflow definition",
|
||||
)
|
||||
|
||||
|
||||
def _inject_code_v2_defaults(definition: str, fmt: str) -> str:
|
||||
"""Inject Code 2.0 defaults into a JSON definition string when not explicitly set.
|
||||
|
||||
|
|
@ -237,42 +295,28 @@ def _parse_definition(
|
|||
Exactly one of the first two will be set on success, or error on failure.
|
||||
JSON input is parsed into a WorkflowCreateYamlRequest (the type the SDK expects).
|
||||
"""
|
||||
|
||||
if fmt == "json":
|
||||
try:
|
||||
raw = json.loads(definition)
|
||||
return WorkflowCreateYamlRequest(**raw), None, None
|
||||
except (json.JSONDecodeError, TypeError) as e:
|
||||
return (
|
||||
None,
|
||||
None,
|
||||
make_error(
|
||||
ErrorCode.INVALID_INPUT,
|
||||
f"Invalid JSON definition: {e}",
|
||||
"Provide a valid JSON object for the workflow definition",
|
||||
),
|
||||
)
|
||||
return None, None, _make_invalid_json_definition_error(e)
|
||||
try:
|
||||
return _normalize_json_definition(raw), None, None
|
||||
except Exception as e:
|
||||
return (
|
||||
None,
|
||||
None,
|
||||
make_error(
|
||||
ErrorCode.INVALID_INPUT,
|
||||
f"Invalid workflow definition: {e}",
|
||||
"Check the workflow definition fields (title, workflow_definition with blocks)",
|
||||
),
|
||||
)
|
||||
return None, None, _make_invalid_json_definition_error(e)
|
||||
elif fmt == "yaml":
|
||||
return None, definition, None
|
||||
else:
|
||||
# auto: try JSON first, fall back to YAML
|
||||
try:
|
||||
raw = json.loads(definition)
|
||||
return WorkflowCreateYamlRequest(**raw), None, None
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
return None, definition, None
|
||||
except Exception:
|
||||
# JSON parsed but failed model validation — treat as YAML
|
||||
return None, definition, None
|
||||
try:
|
||||
return _normalize_json_definition(raw), None, None
|
||||
except Exception as e:
|
||||
return None, None, _make_invalid_json_definition_error(e)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue