fix(caching-v2): resolve parameter-key URL references in cache-key domain enrichment (#5655)

This commit is contained in:
pedrohsdb 2026-04-24 16:09:19 -07:00 committed by GitHub
parent 254ba7d851
commit 6c2b4728ee
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 92 additions and 6 deletions

View file

@ -23,6 +23,7 @@ from skyvern.forge.sdk.workflow.models.workflow import Workflow, WorkflowRun, is
from skyvern.schemas.scripts import FileEncoding, Script, ScriptFileCreate, ScriptStatus
from skyvern.schemas.workflows import BlockType
from skyvern.services import script_service
from skyvern.utils.url_validators import prepend_scheme_and_validate_url
LOG = structlog.get_logger()
jinja_sandbox_env = SandboxedEnvironment()
@ -114,12 +115,28 @@ def _jinja_domain_filter(url: str) -> str:
jinja_sandbox_env.filters["domain"] = _jinja_domain_filter
def _extract_first_block_domain(workflow: Workflow, parameters: dict[str, Any]) -> str:
"""Extract the domain from the first block that has a URL field.
def _resolve_block_url_for_cache_key(url_template: str, parameters: dict[str, Any]) -> str:
"""Resolve a block ``url`` the same way runtime does: param-key swap,
Jinja render, prepend-scheme validate. Must stay in sync with
``BaseTaskBlock.execute`` / ``format_potential_template_parameters``.
"""
candidate = url_template
if url_template in parameters:
value = parameters[url_template]
if value:
candidate = str(value)
rendered = jinja_sandbox_env.from_string(candidate).render(parameters)
if not rendered:
return ""
return prepend_scheme_and_validate_url(rendered)
Used to automatically enrich the cache key with the target domain so that
the same workflow running against different sites gets separate cached scripts.
Returns empty string if no block URL is found.
def _extract_first_block_domain(workflow: Workflow, parameters: dict[str, Any]) -> str:
"""Extract the domain from the first block's URL for cache-key enrichment.
Calls ``_resolve_block_url_for_cache_key`` on each block's ``url`` and
returns the first non-empty domain. The helper mirrors runtime's URL
resolution see its docstring for the pipeline.
"""
try:
blocks = get_all_blocks(workflow.workflow_definition.blocks)
@ -127,7 +144,7 @@ def _extract_first_block_domain(workflow: Workflow, parameters: dict[str, Any])
url_template = getattr(block, "url", None)
if not url_template:
continue
rendered_url = jinja_sandbox_env.from_string(str(url_template)).render(parameters)
rendered_url = _resolve_block_url_for_cache_key(str(url_template), parameters)
if rendered_url:
domain = _jinja_domain_filter(rendered_url)
if domain:

View file

@ -94,3 +94,72 @@ class TestExtractFirstBlockDomain:
wf = _workflow(blocks)
# Jinja renders undefined variables as empty string in SandboxedEnvironment
assert _extract_first_block_domain(wf, {}) == ""
def test_resolves_parameter_key_reference(self) -> None:
"""url field holding a bare parameter key resolves via the parameters dict.
Mirrors BaseTaskBlock.execute() which replaces self.url with the parameter
value when self.url matches a parameter key. Without this resolution the
cache key at run start renders to the literal parameter name (e.g.
``default:website_url:v2``) and never matches any persisted script, while
mid-run gen after block execution mutates self.url renders the real
domain. Scripts get persisted under the correct key but are never loaded
at run start.
"""
blocks = [_task_block("step1", url="website_url")]
wf = _workflow(blocks)
params = {"website_url": "https://secure.example.com/login"}
assert _extract_first_block_domain(wf, params) == "secure.example.com"
def test_parameter_key_reference_takes_precedence_over_jinja(self) -> None:
"""When url is a bare key that matches a parameter, use the parameter value,
not Jinja rendering of the literal string."""
blocks = [_task_block("step1", url="target_site")]
wf = _workflow(blocks)
params = {"target_site": "https://portal.example.org/dashboard"}
# Without the fix this would return "target_site" (literal jinja render).
assert _extract_first_block_domain(wf, params) == "portal.example.org"
def test_bare_key_without_matching_parameter_falls_back_to_jinja(self) -> None:
"""If url looks like a bare key but no parameter matches, Jinja renders
the literal (``unknown_key``), prepend_scheme_and_validate_url yields
``https://unknown_key``, and the domain filter returns ``unknown_key``.
The broken workflow produces a meaningless cache segment, but that's
consistent with what runtime would produce via
format_block_parameter_template_from_workflow_run_context."""
blocks = [_task_block("step1", url="unknown_key")]
wf = _workflow(blocks)
assert _extract_first_block_domain(wf, {}) == "unknown_key"
def test_parameter_key_with_falsy_value_falls_back_to_jinja(self) -> None:
"""Runtime's truthy-value guard (block.py:931 ``if task_url_parameter_value:``)
leaves self.url unchanged when the parameter value is None/empty. Mirror
that here the falsy value causes a fall-through to the literal key
(``website_url``). Without the guard, ``str(None)`` would yield the
string ``None`` and cache-key segment ``default:None:v2``, which
diverges from runtime's behavior of keeping the literal."""
blocks = [_task_block("step1", url="website_url")]
wf = _workflow(blocks)
assert _extract_first_block_domain(wf, {"website_url": None}) == "website_url"
assert _extract_first_block_domain(wf, {"website_url": ""}) == "website_url"
def test_parameter_value_containing_jinja_is_rendered_too(self) -> None:
"""Runtime renders Jinja on the substituted value (format_potential_template_parameters
at block.py:753). The fix mirrors this with a post-substitution Jinja render so
parameter values that themselves contain template expressions are fully expanded."""
blocks = [_task_block("step1", url="website_url")]
wf = _workflow(blocks)
params = {
"website_url": "{{ base_url }}/login",
"base_url": "https://nested.example.com",
}
assert _extract_first_block_domain(wf, params) == "nested.example.com"
def test_scheme_less_parameter_value_is_normalized(self) -> None:
"""Runtime applies prepend_scheme_and_validate_url at block.py:754 so
scheme-less URLs get an ``https://`` prepended before navigation. Lock
the normalization branch against regression."""
blocks = [_task_block("step1", url="website_url")]
wf = _workflow(blocks)
params = {"website_url": "example.com/login"}
assert _extract_first_block_domain(wf, params) == "example.com"