fix: CJS default export interop for fetch-to-curl under Vite 8 (#5594)

2026-04-26 10:41:14 +00:00 · 2026-04-23 00:36:56 +08:00 · 2026-04-23 00:36:56 +08:00 · b4cceab6c1
commit b4cceab6c1
parent 8efd3308ba
54 changed files with 3146 additions and 560 deletions
--- a/docs/changelog.mdx
+++ b/docs/changelog.mdx
@ -13,6 +13,7 @@ keywords:
 <div className="cl-page">
  <div className="cl-sidebar">
    <p className="cl-sidebar-title">TIMELINE</p>
+    <a href="#week-of-april-21%2C-2026" className="cl-sidebar-link">Week of Apr 21, 2026</a>
    <a href="#week-of-april-14%2C-2026" className="cl-sidebar-link">Week of Apr 14, 2026</a>
    <a href="#v1-0-22-—-february-26%2C-2026" className="cl-sidebar-link">v1.0.22 — Feb 26, 2026</a>
    <a href="#v1-0-21-—-february-24%2C-2026" className="cl-sidebar-link">v1.0.21 — Feb 24, 2026</a>
@ -26,7 +27,24 @@ keywords:
    <h1 className="cl-title">Changelog</h1>
    <p className="cl-subtitle">New features, improvements, and fixes in Skyvern</p>

-<Update label="Week of April 14, 2026" description="Saudi Arabia proxy, workflow error mapping, smarter caching, and reliability fixes">
+<Update label="Week of April 21, 2026" description="Copilot reliability, run recordings, 2FA improvements, and more">
+
+## Improvements
+
+- **Copilot Live Status Narration** — The workflow editor copilot now displays a short live status line while it works, so you can see what the agent is doing instead of waiting in silence. ([#5567](https://github.com/Skyvern-AI/skyvern/pull/5567))
+
+- **Renamed Browser Profile Reset Action** — The API endpoint for clearing a persisted browser session has been renamed from `/browser_session/refresh` to `/browser_session/reset_profile` for clarity. The old path remains available as a deprecated alias. ([#5572](https://github.com/Skyvern-AI/skyvern/pull/5572))
+
+- **Fewer Screenshots for API Runs** — Screenshot capture frequency has been reduced for API-triggered runs, lowering storage overhead on long-running automations without affecting interactive sessions. ([#5565](https://github.com/Skyvern-AI/skyvern/pull/5565))
+
+## Bug fixes
+
+- Fixed the workflow copilot incorrectly proposing a workflow it had just failed to build. The copilot now only surfaces an Accept/Reject panel when the last test actually passed. ([#5574](https://github.com/Skyvern-AI/skyvern/pull/5574))
+- Fixed 2FA entry failing on pages with custom overlay input components where the real input field is hidden. The agent now types digit keys directly when no interactable input exists. ([#5568](https://github.com/Skyvern-AI/skyvern/pull/5568))
+
+</Update>
+
+<Update label="Week of April 14, 2026" description="Saudi Arabia proxy, workflow error mapping, smarter caching, MCP OAuth, AI output summaries, and reliability fixes">

 ## New features

@ -34,12 +52,26 @@ keywords:

 - **Workflow-Level Error Code Mapping** — Workflows now support an `error_code_mapping` field that all blocks inherit automatically. Block-level mappings still take precedence, giving you a single place to define default error handling without repeating it on every block. ([#5506](https://github.com/Skyvern-AI/skyvern/pull/5506))

+- **AI Summarize for Workflow Run Outputs** — A magic-wand "Summarize with AI" button now appears next to block and workflow run output sections, generating a plain-language summary of raw JSON output on demand. ([#5556](https://github.com/Skyvern-AI/skyvern/pull/5556))
+
+- **Reset Browser Profile API** — A new `POST /workflows/{workflow_permanent_id}/browser_session/reset_profile` endpoint clears a corrupted persisted browser session so the next run starts from a fresh browser state. ([#5544](https://github.com/Skyvern-AI/skyvern/pull/5544))
+
+- **MCP OAuth for Remote Clients** — MCP clients (e.g. Claude Code, Codex) can now complete browser-based OAuth to authenticate with Skyvern without getting stuck in redirect chains. ([#5558](https://github.com/Skyvern-AI/skyvern/pull/5558))
+
+- **OpenClaw MCP Setup** — `skyvern setup openclaw` and `skyvern mcp switch` now support OpenClaw as an MCP client target for both remote and local stdio configurations. ([#5536](https://github.com/Skyvern-AI/skyvern/pull/5536))
+
 ## Improvements

 - **Smarter Extraction Cache Keys** — The extraction cache now normalizes session-volatile tokens (random UUIDs, CSRF values, nonce query params, timestamps) before hashing cache keys. Semantically identical page visits now share cached extraction results, improving cache hit rates across workflow re-runs. ([#5504](https://github.com/Skyvern-AI/skyvern/pull/5504))

 - **Reduced Rate-Limit Errors on Extraction Prompts** — Extraction prompts are now capped in size before being sent to the LLM. Oversized `previous_extracted_information`, `extracted_information_schema`, and element trees are truncated rather than sent whole, significantly reducing `429 RESOURCE_EXHAUSTED` errors on high-volume workflows. ([#5502](https://github.com/Skyvern-AI/skyvern/pull/5502))

+- **Cross-Run Extraction Cache** — Extraction results are now cached in Redis across workflow runs, scoped to the workflow. Recurring scheduled workflows skip redundant LLM calls when the page content hasn't changed. ([#5562](https://github.com/Skyvern-AI/skyvern/pull/5562))
+
+- **Copilot Survives Tab Switches and Reconnects** — The workflow copilot agent now runs to completion even if the browser tab is closed or the connection drops mid-request, and the AI reply is persisted so it appears when you reconnect. ([#5560](https://github.com/Skyvern-AI/skyvern/pull/5560))
+
+- **Copilot Fails Fast on Unresolvable URLs** — The workflow copilot now immediately surfaces the real error (e.g. DNS resolution failure) and stops retrying when a URL cannot be reached, rather than reporting success. ([#5563](https://github.com/Skyvern-AI/skyvern/pull/5563))
+
 ## Bug fixes

 - Fixed CSV parsing failing on files whose first header row exceeded ~1 MB, causing valid wide-format CSVs to be rejected with "Could not determine delimiter". ([#5510](https://github.com/Skyvern-AI/skyvern/pull/5510))
@ -48,7 +80,11 @@ keywords:
 - Fixed workflow file-download blocks losing their configured `download_suffix` at task finalization, causing downloaded files to be persisted without the expected filename suffix. ([#5499](https://github.com/Skyvern-AI/skyvern/pull/5499))
 - Fixed cached scripts for non-ATS workflows incorrectly generating `fill_form()` calls, which defeated the performance benefits of caching by delegating back to AI at runtime. ([#5497](https://github.com/Skyvern-AI/skyvern/pull/5497))
 - Fixed the pagination navigator on the Workflow Past Runs and Run History pages being right-aligned instead of centered.
-
+- Fixed run recordings missing an end timestamp and broken scrubbing. Recordings are now finalized with a stream-copy remux so players correctly show duration and support seeking. ([#5551](https://github.com/Skyvern-AI/skyvern/pull/5551))
+- Fixed tag input fields (e.g. skill set fields on job application forms) not selecting values from the dropdown. These fields are now correctly treated as auto-completion inputs. ([#5557](https://github.com/Skyvern-AI/skyvern/pull/5557))
+- Fixed per-step video sync spawning a redundant ffmpeg process on every step, causing CPU spikes and log noise. ([#5559](https://github.com/Skyvern-AI/skyvern/pull/5559))
+- Fixed workflow parameter edit and delete buttons being hidden when a parameter name was too long.
+- Fixed webhook delivery failing silently when a stored webhook URL had a leading or trailing space. ([#5550](https://github.com/Skyvern-AI/skyvern/pull/5550))
 </Update>

 <Update label="v1.0.22 — February 26, 2026" description="Adaptive caching, Workflow Trigger block, CLI signup, and more">
--- a/fern/docs/changelog.mdx
+++ b/fern/docs/changelog.mdx
@ -7,6 +7,7 @@ mode: custom
 <div className="cl-page">
  <div className="cl-sidebar">
    <p className="cl-sidebar-title">TIMELINE</p>
+    <a href="#week-of-april-21%2C-2026" className="cl-sidebar-link">Week of Apr 21, 2026</a>
    <a href="#week-of-april-14%2C-2026" className="cl-sidebar-link">Week of Apr 14, 2026</a>
    <a href="#week-of-march-30%2C-2026" className="cl-sidebar-link">Week of Mar 30, 2026</a>
    <a href="#v1-0-22-—-february-26%2C-2026" className="cl-sidebar-link">v1.0.22 — Feb 26, 2026</a>
@ -21,7 +22,24 @@ mode: custom
    <h1 className="cl-title">Changelog</h1>
    <p className="cl-subtitle">New features, improvements, and fixes in Skyvern</p>

-<Update label="Week of April 14, 2026" description="Saudi Arabia proxy, workflow error mapping, smarter caching, and reliability fixes">
+<Update label="Week of April 21, 2026" description="Copilot reliability, run recordings, 2FA improvements, and more">
+
+## Improvements
+
+- **Copilot Live Status Narration** — The workflow editor copilot now displays a short live status line while it works, so you can see what the agent is doing instead of waiting in silence. ([#5567](https://github.com/Skyvern-AI/skyvern/pull/5567))
+
+- **Renamed Browser Profile Reset Action** — The API endpoint for clearing a persisted browser session has been renamed from `/browser_session/refresh` to `/browser_session/reset_profile` for clarity. The old path remains available as a deprecated alias. ([#5572](https://github.com/Skyvern-AI/skyvern/pull/5572))
+
+- **Fewer Screenshots for API Runs** — Screenshot capture frequency has been reduced for API-triggered runs, lowering storage overhead on long-running automations without affecting interactive sessions. ([#5565](https://github.com/Skyvern-AI/skyvern/pull/5565))
+
+## Bug fixes
+
+- Fixed the workflow copilot incorrectly proposing a workflow it had just failed to build. The copilot now only surfaces an Accept/Reject panel when the last test actually passed. ([#5574](https://github.com/Skyvern-AI/skyvern/pull/5574))
+- Fixed 2FA entry failing on pages with custom overlay input components where the real input field is hidden. The agent now types digit keys directly when no interactable input exists. ([#5568](https://github.com/Skyvern-AI/skyvern/pull/5568))
+
+</Update>
+
+<Update label="Week of April 14, 2026" description="Saudi Arabia proxy, workflow error mapping, smarter caching, MCP OAuth, AI output summaries, and reliability fixes">

 ## New features

@ -29,12 +47,26 @@ mode: custom

 - **Workflow-Level Error Code Mapping** — Workflows now support an `error_code_mapping` field that all blocks inherit automatically. Block-level mappings still take precedence, giving you a single place to define default error handling without repeating it on every block. ([#5506](https://github.com/Skyvern-AI/skyvern/pull/5506))

+- **AI Summarize for Workflow Run Outputs** — A magic-wand "Summarize with AI" button now appears next to block and workflow run output sections, generating a plain-language summary of raw JSON output on demand. ([#5556](https://github.com/Skyvern-AI/skyvern/pull/5556))
+
+- **Reset Browser Profile API** — A new `POST /workflows/{workflow_permanent_id}/browser_session/reset_profile` endpoint clears a corrupted persisted browser session so the next run starts from a fresh browser state. ([#5544](https://github.com/Skyvern-AI/skyvern/pull/5544))
+
+- **MCP OAuth for Remote Clients** — MCP clients (e.g. Claude Code, Codex) can now complete browser-based OAuth to authenticate with Skyvern without getting stuck in redirect chains. ([#5558](https://github.com/Skyvern-AI/skyvern/pull/5558))
+
+- **OpenClaw MCP Setup** — `skyvern setup openclaw` and `skyvern mcp switch` now support OpenClaw as an MCP client target for both remote and local stdio configurations. ([#5536](https://github.com/Skyvern-AI/skyvern/pull/5536))
+
 ## Improvements

 - **Smarter Extraction Cache Keys** — The extraction cache now normalizes session-volatile tokens (random UUIDs, CSRF values, nonce query params, timestamps) before hashing cache keys. Semantically identical page visits now share cached extraction results, improving cache hit rates across workflow re-runs. ([#5504](https://github.com/Skyvern-AI/skyvern/pull/5504))

 - **Reduced Rate-Limit Errors on Extraction Prompts** — Extraction prompts are now capped in size before being sent to the LLM. Oversized `previous_extracted_information`, `extracted_information_schema`, and element trees are truncated rather than sent whole, significantly reducing `429 RESOURCE_EXHAUSTED` errors on high-volume workflows. ([#5502](https://github.com/Skyvern-AI/skyvern/pull/5502))

+- **Cross-Run Extraction Cache** — Extraction results are now cached in Redis across workflow runs, scoped to the workflow. Recurring scheduled workflows skip redundant LLM calls when the page content hasn't changed. ([#5562](https://github.com/Skyvern-AI/skyvern/pull/5562))
+
+- **Copilot Survives Tab Switches and Reconnects** — The workflow copilot agent now runs to completion even if the browser tab is closed or the connection drops mid-request, and the AI reply is persisted so it appears when you reconnect. ([#5560](https://github.com/Skyvern-AI/skyvern/pull/5560))
+
+- **Copilot Fails Fast on Unresolvable URLs** — The workflow copilot now immediately surfaces the real error (e.g. DNS resolution failure) and stops retrying when a URL cannot be reached, rather than reporting success. ([#5563](https://github.com/Skyvern-AI/skyvern/pull/5563))
+
 ## Bug fixes

 - Fixed CSV parsing failing on files whose first header row exceeded ~1 MB, causing valid wide-format CSVs to be rejected with "Could not determine delimiter". ([#5510](https://github.com/Skyvern-AI/skyvern/pull/5510))
@ -43,6 +75,11 @@ mode: custom
 - Fixed workflow file-download blocks losing their configured `download_suffix` at task finalization, causing downloaded files to be persisted without the expected filename suffix. ([#5499](https://github.com/Skyvern-AI/skyvern/pull/5499))
 - Fixed cached scripts for non-ATS workflows incorrectly generating `fill_form()` calls, which defeated the performance benefits of caching by delegating back to AI at runtime. ([#5497](https://github.com/Skyvern-AI/skyvern/pull/5497))
 - Fixed the pagination navigator on the Workflow Past Runs and Run History pages being right-aligned instead of centered.
+- Fixed run recordings missing an end timestamp and broken scrubbing. Recordings are now finalized with a stream-copy remux so players correctly show duration and support seeking. ([#5551](https://github.com/Skyvern-AI/skyvern/pull/5551))
+- Fixed tag input fields (e.g. skill set fields on job application forms) not selecting values from the dropdown. These fields are now correctly treated as auto-completion inputs. ([#5557](https://github.com/Skyvern-AI/skyvern/pull/5557))
+- Fixed per-step video sync spawning a redundant ffmpeg process on every step, causing CPU spikes and log noise. ([#5559](https://github.com/Skyvern-AI/skyvern/pull/5559))
+- Fixed workflow parameter edit and delete buttons being hidden when a parameter name was too long.
+- Fixed webhook delivery failing silently when a stored webhook URL had a leading or trailing space. ([#5550](https://github.com/Skyvern-AI/skyvern/pull/5550))

 </Update>

--- a/pyproject.toml
+++ b/pyproject.toml
@ -59,6 +59,8 @@ dependencies = [
    "tiktoken>=0.9.0",
    "anthropic>=0.50.0,<0.89",
    "google-cloud-aiplatform>=1.90.0,<2",
+    "google-auth>=2.49.2,<3",
+    "google-auth-oauthlib>=1.3.1,<2",
    "onepassword-sdk==0.4.0",
    "types-boto3[full]>=1.38.31,<2",
    "lark>=1.2.2,<2",
--- a/skyvern-frontend/src/api/types.ts
+++ b/skyvern-frontend/src/api/types.ts
@ -147,6 +147,12 @@ export type Task = {
  application: string | null;
 };

+export type FailureCategory = {
+  category: string;
+  confidence_float: number;
+  reasoning: string;
+};
+
 export type TaskApiResponse = {
  request: CreateTaskRequest;
  task_id: string;
@ -157,6 +163,7 @@ export type TaskApiResponse = {
  screenshot_url: string | null;
  recording_url: string | null;
  failure_reason: string | null;
+  failure_category: Array<FailureCategory> | null;
  webhook_failure_reason: string | null;
  errors: Array<Record<string, unknown>>;
  max_steps_per_run: number | null;
@ -483,6 +490,7 @@ export type WorkflowRunStatusApiResponse = {
  recording_url: string | null;
  outputs: Record<string, unknown> | null;
  failure_reason: string | null;
+  failure_category: Array<FailureCategory> | null;
  webhook_failure_reason: string | null;
  downloaded_file_urls: Array<string> | null;
  total_steps: number | null;
@ -513,6 +521,7 @@ export type WorkflowRunStatusApiResponseWithWorkflow = {
  recording_url: string | null;
  outputs: Record<string, unknown> | null;
  failure_reason: string | null;
+  failure_category: Array<FailureCategory> | null;
  webhook_failure_reason: string | null;
  downloaded_file_urls: Array<string> | null;
  total_steps: number | null;
--- a/skyvern-frontend/src/components/FailureCategoryBadge.tsx
+++ b/skyvern-frontend/src/components/FailureCategoryBadge.tsx
@ -0,0 +1,27 @@
+import type { FailureCategory } from "@/api/types";
+import { Badge } from "@/components/ui/badge";
+
+function formatCategoryLabel(category: string): string {
+  return category
+    .split("_")
+    .map((word) => word.charAt(0).toUpperCase() + word.slice(1).toLowerCase())
+    .join(" ");
+}
+
+type Props = {
+  failureCategory: Array<FailureCategory> | null;
+};
+
+function FailureCategoryBadge({ failureCategory }: Props) {
+  const primary = failureCategory?.[0];
+  if (!primary) {
+    return null;
+  }
+  return (
+    <Badge variant="destructive" className="w-fit">
+      {formatCategoryLabel(primary.category)}
+    </Badge>
+  );
+}
+
+export { FailureCategoryBadge };
--- a/skyvern-frontend/src/routes/tasks/detail/TaskDetails.tsx
+++ b/skyvern-frontend/src/routes/tasks/detail/TaskDetails.tsx
@ -7,6 +7,7 @@ import {
  TaskApiResponse,
  WorkflowRunStatusApiResponse,
 } from "@/api/types";
+import { FailureCategoryBadge } from "@/components/FailureCategoryBadge";
 import { Status404 } from "@/components/Status404";
 import { StatusBadge } from "@/components/StatusBadge";
 import { SwitchBarNavigation } from "@/components/SwitchBarNavigation";
@ -177,7 +178,10 @@ function TaskDetails() {
    task?.status === Status.TimedOut;
  const failureReason = showFailureReason ? (
    <div className="space-y-1">
-      <Label className="text-lg">Failure Reason</Label>
+      <div className="flex items-center gap-2">
+        <Label className="text-lg">Failure Reason</Label>
+        <FailureCategoryBadge failureCategory={task.failure_category ?? null} />
+      </div>
      <CodeEditor
        language="json"
        value={JSON.stringify(task.failure_reason, null, 2)}
--- a/skyvern-frontend/src/routes/workflows/WorkflowRun.tsx
+++ b/skyvern-frontend/src/routes/workflows/WorkflowRun.tsx
@ -2,6 +2,7 @@ import { AxiosError } from "axios";
 import { useEffect, useState } from "react";
 import { getClient } from "@/api/AxiosClient";
 import { ProxyLocation, Status } from "@/api/types";
+import { FailureCategoryBadge } from "@/components/FailureCategoryBadge";
 import { StatusBadge } from "@/components/StatusBadge";
 import {
  SwitchBarNavigation,
@ -229,7 +230,10 @@ function WorkflowRun() {

  const workflowFailureReason = workflowRun?.failure_reason ? (
    <div className="space-y-2 rounded-md border border-red-600 bg-error-light p-4">
-      <div className="font-bold">{failureReasonTitle}</div>
+      <div className="flex items-center gap-2">
+        <div className="font-bold">{failureReasonTitle}</div>
+        <FailureCategoryBadge failureCategory={workflowRun.failure_category} />
+      </div>
      <div className="text-sm">{workflowRun.failure_reason}</div>
      {matchedTips}
      {shouldShowFinallyNote && (
--- a/skyvern-frontend/src/routes/workflows/editor/nodes/FileParserNode/FileParserNode.tsx
+++ b/skyvern-frontend/src/routes/workflows/editor/nodes/FileParserNode/FileParserNode.tsx
@ -24,6 +24,7 @@ import { ModelSelector } from "@/components/ModelSelector";
 import { useRecordingStore } from "@/store/useRecordingStore";

 const FILE_TYPE_OPTIONS: Array<{ value: FileParserFileType; label: string }> = [
+  { value: "auto_detect", label: "Auto detect" },
  { value: "csv", label: "CSV" },
  { value: "excel", label: "Excel" },
  { value: "pdf", label: "PDF" },
@ -78,7 +79,9 @@ function FileParserNode({ id, data }: NodeProps<FileParserNode>) {

  function handleFileUrlChange(value: string) {
    const detected = detectFileTypeFromUrl(value);
-    if (detected) {
+    const currentType = data.fileType;
+    const canInfer = detected && (!currentType || currentType === detected);
+    if (canInfer) {
      update({ fileUrl: value, fileType: detected });
    } else {
      update({ fileUrl: value });
--- a/skyvern-frontend/src/routes/workflows/editor/nodes/FileParserNode/types.ts
+++ b/skyvern-frontend/src/routes/workflows/editor/nodes/FileParserNode/types.ts
@ -6,7 +6,13 @@ import {
  WorkflowModel,
 } from "@/routes/workflows/types/workflowTypes";

-export type FileParserFileType = "csv" | "excel" | "pdf" | "image" | "docx";
+export type FileParserFileType =
+  | "auto_detect"
+  | "csv"
+  | "excel"
+  | "pdf"
+  | "image"
+  | "docx";

 export type FileParserNodeData = NodeBaseData & {
  fileUrl: string;
@ -22,7 +28,7 @@ export const fileParserNodeDefaultData: FileParserNodeData = {
  editable: true,
  label: "",
  fileUrl: "",
-  fileType: "csv",
+  fileType: "auto_detect",
  continueOnFailure: false,
  jsonSchema: "null",
  model: null,
--- a/skyvern-frontend/src/routes/workflows/editor/workflowEditorUtils.ts
+++ b/skyvern-frontend/src/routes/workflows/editor/workflowEditorUtils.ts
@ -864,7 +864,7 @@ function convertToNode(
        data: {
          ...commonData,
          fileUrl: block.file_url,
-          fileType: block.file_type ?? "csv",
+          fileType: block.file_type ?? "auto_detect",
          jsonSchema: JSON.stringify(block.json_schema, null, 2),
          model: block.model,
        },
--- a/skyvern-frontend/src/routes/workflows/types/workflowTypes.ts
+++ b/skyvern-frontend/src/routes/workflows/types/workflowTypes.ts
@ -410,7 +410,7 @@ export type SendEmailBlock = WorkflowBlockBase & {
 export type FileURLParserBlock = WorkflowBlockBase & {
  block_type: "file_url_parser";
  file_url: string;
-  file_type: "csv" | "excel" | "pdf" | "image" | "docx";
+  file_type: "auto_detect" | "csv" | "excel" | "pdf" | "image" | "docx";
  json_schema: Record<string, unknown> | null;
 };

--- a/skyvern-frontend/src/routes/workflows/types/workflowYamlTypes.ts
+++ b/skyvern-frontend/src/routes/workflows/types/workflowYamlTypes.ts
@ -352,7 +352,7 @@ export type SendEmailBlockYAML = BlockYAMLBase & {
 export type FileUrlParserBlockYAML = BlockYAMLBase & {
  block_type: "file_url_parser";
  file_url: string;
-  file_type: "csv" | "excel" | "pdf" | "image" | "docx";
+  file_type: "auto_detect" | "csv" | "excel" | "pdf" | "image" | "docx";
  json_schema?: Record<string, unknown> | null;
 };

--- a/skyvern-frontend/src/types/fetch-to-curl.d.ts
+++ b/skyvern-frontend/src/types/fetch-to-curl.d.ts
@ -0,0 +1,6 @@
+declare module "fetch-to-curl" {
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  function fetchToCurl(requestInfo: any, requestInit?: any): string;
+  export default fetchToCurl;
+  export { fetchToCurl };
+}
--- a/skyvern-frontend/src/util/apiCommands.ts
+++ b/skyvern-frontend/src/util/apiCommands.ts
@ -1,4 +1,4 @@
-import fetchToCurl from "fetch-to-curl";
+import { fetchToCurl } from "fetch-to-curl";

 export interface ApiCommandOptions {
  method: string;
--- a/skyvern/cli/core/mcp_http_auth.py
+++ b/skyvern/cli/core/mcp_http_auth.py
@ -77,15 +77,24 @@ class _OAuthResolution:


 def _canonical_mcp_url(base_url: str | None = None) -> str:
+    """Return the canonical MCP resource URI with no trailing slash.
+
+    MCP clients canonicalize the resource URI without a trailing slash for
+    RFC 8707 audience / RFC 9728 protected-resource comparison. Advertising
+    ``.../mcp/`` caused Claude's MCP SDK to reject discovery with
+    ``Protected resource .../mcp/ does not match expected .../mcp``. The
+    audience / resource-claim validators below normalize both sides before
+    comparing so tokens issued against either form still validate.
+    """
    candidate = (base_url or settings.SKYVERN_BASE_URL or _DEFAULT_REMOTE_BASE_URL).strip()
    if not candidate:
        candidate = _DEFAULT_REMOTE_BASE_URL
    parts = urlsplit(candidate)
    path = (parts.path or "").rstrip("/")
    if path.endswith("/mcp"):
-        canonical_path = f"{path}/"
+        canonical_path = path
    else:
-        canonical_path = f"{path}/mcp/" if path else "/mcp/"
+        canonical_path = f"{path}/mcp" if path else "/mcp"
    return urlunsplit((parts.scheme, parts.netloc, canonical_path, "", ""))


@ -151,25 +160,49 @@ def _validate_token_issuer(payload: dict[str, object], expected_issuer: str) ->
        raise HTTPException(status_code=401, detail="Token issuer is not valid for this MCP resource")


+def _normalize_resource(resource: str) -> str:
+    """Strip a trailing slash so audience / resource comparisons are slash-agnostic.
+
+    Centralized here so ``_validate_token_audience``,
+    ``_validate_token_resource_claims``, and any future validator apply the
+    same normalization rule to both sides of the comparison.
+    """
+    return resource.rstrip("/")
+
+
 def _validate_token_audience(payload: dict[str, object], expected_resource: str) -> None:
    audience = payload.get("aud")
    if isinstance(audience, str):
        audiences = [audience]
    elif isinstance(audience, list):
+        # RFC 7519 permits `aud` as string-or-array-of-strings; silently drop
+        # any non-string items in the array rather than rejecting. This is
+        # intentionally more permissive than `_validate_token_resource_claims`
+        # (where the single `resource` claim must be a string outright):
+        # a malformed array element here shouldn't poison an otherwise valid
+        # audience list.
        audiences = [item for item in audience if isinstance(item, str)]
    else:
        audiences = []

-    if expected_resource not in audiences:
+    # Slash-agnostic compare: tokens whose `aud` was minted against either
+    # `.../mcp` or `.../mcp/` validate against either expected form.
+    expected_norm = _normalize_resource(expected_resource)
+    if not any(_normalize_resource(a) == expected_norm for a in audiences):
        raise HTTPException(status_code=401, detail="Token audience is not valid for this MCP resource")


 def _validate_token_resource_claims(payload: dict[str, object], expected_resource: str) -> None:
+    expected_norm = _normalize_resource(expected_resource)
    for key in _RESOURCE_CLAIM_KEYS:
        claim_value = payload.get(key)
        if claim_value is None:
            continue
-        if claim_value != expected_resource:
+        # A non-string `resource` claim is a malformed token, not a value
+        # mismatch — use a distinct error so the cause is obvious in logs.
+        if not isinstance(claim_value, str):
+            raise HTTPException(status_code=401, detail="Token resource claim must be a string")
+        if _normalize_resource(claim_value) != expected_norm:
            raise HTTPException(status_code=401, detail="Token resource is not valid for this MCP resource")


--- a/skyvern/cli/mcp_tools/blocks.py
+++ b/skyvern/cli/mcp_tools/blocks.py
@ -28,6 +28,8 @@ from skyvern.schemas.workflows import (
    FileParserBlockYAML,
    FileUploadBlockYAML,
    ForLoopBlockYAML,
+    GoogleSheetsReadBlockYAML,
+    GoogleSheetsWriteBlockYAML,
    HttpRequestBlockYAML,
    HumanInteractionBlockYAML,
    LoginBlockYAML,
@ -78,6 +80,8 @@ BLOCK_TYPE_MAP: dict[str, type[BlockYAML]] = {
    BlockType.HUMAN_INTERACTION.value: HumanInteractionBlockYAML,
    BlockType.PRINT_PAGE.value: PrintPageBlockYAML,
    BlockType.WORKFLOW_TRIGGER.value: WorkflowTriggerBlockYAML,
+    BlockType.GOOGLE_SHEETS_READ.value: GoogleSheetsReadBlockYAML,
+    BlockType.GOOGLE_SHEETS_WRITE.value: GoogleSheetsWriteBlockYAML,
 }

 # ---------------------------------------------------------------------------
@ -107,6 +111,8 @@ BLOCK_SUMMARIES: dict[str, str] = {
    "human_interaction": "Pause workflow for human approval via email",
    "print_page": "Print the current page to PDF",
    "workflow_trigger": "Trigger another workflow by permanent ID, with optional payload and wait-for-completion",
+    "google_sheets_read": "Read rows from a Google Sheet as structured data (list of dicts)",
+    "google_sheets_write": "Write rows to a Google Sheet (append new rows or update existing cells)",
 }

 # ---------------------------------------------------------------------------
@ -211,6 +217,25 @@ BLOCK_EXAMPLES: dict[str, dict[str, Any]] = {
        "payload": {"url": "{{ some_parameter }}"},
        "wait_for_completion": True,
    },
+    "google_sheets_read": {
+        "block_type": "google_sheets_read",
+        "label": "read_sheet_data",
+        "spreadsheet_url": "https://docs.google.com/spreadsheets/d/SPREADSHEET_ID/edit",
+        "sheet_name": "Sheet1",
+        "range": "A1:D100",
+        "credential_id": "{{ google_credential_id }}",
+        "has_header_row": True,
+    },
+    "google_sheets_write": {
+        "block_type": "google_sheets_write",
+        "label": "write_sheet_data",
+        "spreadsheet_url": "https://docs.google.com/spreadsheets/d/SPREADSHEET_ID/edit",
+        "sheet_name": "Sheet1",
+        "range": "A1",
+        "credential_id": "{{ google_credential_id }}",
+        "write_mode": "append",
+        "values": "{{ output_data | tojson }}",
+    },
 }

 # ---------------------------------------------------------------------------
--- a/skyvern/exceptions.py
+++ b/skyvern/exceptions.py
@ -433,6 +433,16 @@ class ScrapingFailedBlankPage(ScrapingFailed):
        super().__init__(reason="It's a blank page. Please ensure there is a non-blank page for Skyvern to work with.")


+class MissingStarterUrl(SkyvernException):
+    def __init__(self, block_label: str | None = None) -> None:
+        self.block_label = block_label
+        location = f"block '{block_label}'" if block_label else "the first browser block"
+        super().__init__(
+            f"{location} has no starting URL set. The first browser block must have a URL to navigate to. "
+            "Set a URL on the block, or reference a workflow parameter (e.g. '{{ starting_url }}')."
+        )
+
+
 class WorkflowRunContextNotInitialized(SkyvernException):
    def __init__(self, workflow_run_id: str) -> None:
        super().__init__(f"WorkflowRunContext not initialized for workflow run {workflow_run_id}")
--- a/skyvern/forge/agent.py
+++ b/skyvern/forge/agent.py
@ -94,7 +94,7 @@ from skyvern.forge.sdk.schemas.files import FileInfo
 from skyvern.forge.sdk.schemas.organizations import Organization
 from skyvern.forge.sdk.schemas.tasks import Task, TaskRequest, TaskResponse, TaskStatus
 from skyvern.forge.sdk.schemas.totp_codes import OTPType
-from skyvern.forge.sdk.trace import traced
+from skyvern.forge.sdk.trace import apply_context_attrs, traced
 from skyvern.forge.sdk.workflow.context_manager import WorkflowRunContext
 from skyvern.forge.sdk.workflow.models.block import (
    ActionBlock,
@ -423,7 +423,12 @@ class ForgeAgent:
        operations = await app.AGENT_FUNCTION.generate_async_operations(organization, task, page)
        self.async_operation_pool.add_operations(task.task_id, operations)

-    @traced(name="skyvern.agent.execute_step")
+    # Span name intentionally differs from the method: this is the outer wrapper
+    # that handles cancellation checks, status updates, retries, and cleanup around
+    # the inner step body. "step_orchestration" reads better on the trace dashboard
+    # than "execute_step", which was easy to confuse with the inner span below.
+    # code.function still carries the qualname for grep.
+    @traced(name="skyvern.agent.step_orchestration", role="wrapper")
    async def execute_step(
        self,
        organization: Organization,
@ -1084,7 +1089,12 @@ class ForgeAgent:
            )
            return True

-    @traced(name="skyvern.agent.step")
+    # Span name intentionally differs from the method: this is the inner body
+    # that actually runs a step (scrape, LLM call, action execution). It sits
+    # under the "step_orchestration" span above. "step_body" vs the old
+    # "skyvern.agent.step" makes the parent/child relationship obvious at a glance
+    # on the dashboard. code.function still carries the qualname for grep.
+    @traced(name="skyvern.agent.step_body", role="wrapper")
    async def agent_step(
        self,
        task: Task,
@ -2546,26 +2556,34 @@ class ForgeAgent:
                await skyvern_frame.safe_scroll_to_x_y(x, y)
                LOG.debug("Scrolled back to the original x, y position of the page after taking screenshot", x=x, y=y)
                _ctx = skyvern_context.current()
-                if step and not step.is_speculative and _ctx and _ctx.use_artifact_bundling:
-                    ids = app.ARTIFACT_MANAGER.accumulate_screenshot_to_step_archive(
-                        step=step,
-                        screenshots=[screenshot],
-                        artifact_type=ArtifactType.SCREENSHOT_ACTION,
-                    )
-                    if ids:
-                        screenshot_artifact_id = ids[0]
-                else:
-                    screenshot_request = await app.ARTIFACT_MANAGER.prepare_llm_artifact(
-                        data=screenshot,
-                        artifact_type=ArtifactType.SCREENSHOT_ACTION,
-                        step=step,
-                    )
-                    if screenshot_request:
-                        artifacts.append(screenshot_request)
-                        for artifact_data in screenshot_request.artifacts:
-                            if artifact_data.artifact_model.artifact_type == ArtifactType.SCREENSHOT_ACTION:
-                                screenshot_artifact_id = artifact_data.artifact_model.artifact_id
-                                break
+                # Derive bundled from the actual branch condition, not just the context flag —
+                # step=None and speculative steps force the non-bundled path.
+                _bundled = bool(step and not step.is_speculative and _ctx and _ctx.use_artifact_bundling)
+                _tracer = otel_trace.get_tracer("skyvern")
+                with _tracer.start_as_current_span("skyvern.agent.artifact.screenshot_action") as _ss_art_span:
+                    apply_context_attrs(_ss_art_span)
+                    _ss_art_span.set_attribute("screenshot_bytes", len(screenshot))
+                    _ss_art_span.set_attribute("bundled", _bundled)
+                    if _bundled:
+                        ids = app.ARTIFACT_MANAGER.accumulate_screenshot_to_step_archive(
+                            step=step,
+                            screenshots=[screenshot],
+                            artifact_type=ArtifactType.SCREENSHOT_ACTION,
+                        )
+                        if ids:
+                            screenshot_artifact_id = ids[0]
+                    else:
+                        screenshot_request = await app.ARTIFACT_MANAGER.prepare_llm_artifact(
+                            data=screenshot,
+                            artifact_type=ArtifactType.SCREENSHOT_ACTION,
+                            step=step,
+                        )
+                        if screenshot_request:
+                            artifacts.append(screenshot_request)
+                            for artifact_data in screenshot_request.artifacts:
+                                if artifact_data.artifact_model.artifact_type == ArtifactType.SCREENSHOT_ACTION:
+                                    screenshot_artifact_id = artifact_data.artifact_model.artifact_id
+                                    break
        except Exception:
            LOG.error(
                "Failed to record screenshot after action",
@ -2576,24 +2594,38 @@ class ForgeAgent:
            skyvern_frame = await SkyvernFrame.create_instance(frame=working_page)
            html = await skyvern_frame.get_content()
            _ctx = skyvern_context.current()
-            if step and not step.is_speculative and _ctx and _ctx.use_artifact_bundling:
-                app.ARTIFACT_MANAGER.accumulate_action_html_to_archive(step=step, html_action=html.encode("utf-8"))
-            else:
-                artifacts.append(
-                    await app.ARTIFACT_MANAGER.prepare_llm_artifact(
-                        data=html.encode(),
-                        artifact_type=ArtifactType.HTML_ACTION,
-                        step=step,
+            # Encode once to fix the html_bytes char-vs-byte mismatch and avoid a
+            # second pass through html.encode() in the chosen branch.
+            html_bytes = html.encode("utf-8")
+            _bundled = bool(step and not step.is_speculative and _ctx and _ctx.use_artifact_bundling)
+            _tracer = otel_trace.get_tracer("skyvern")
+            with _tracer.start_as_current_span("skyvern.agent.artifact.html_action") as _html_art_span:
+                apply_context_attrs(_html_art_span)
+                _html_art_span.set_attribute("html_bytes", len(html_bytes))
+                _html_art_span.set_attribute("bundled", _bundled)
+                if _bundled:
+                    app.ARTIFACT_MANAGER.accumulate_action_html_to_archive(step=step, html_action=html_bytes)
+                else:
+                    artifacts.append(
+                        await app.ARTIFACT_MANAGER.prepare_llm_artifact(
+                            data=html_bytes,
+                            artifact_type=ArtifactType.HTML_ACTION,
+                            step=step,
+                        )
                    )
-                )
        except Exception:
            LOG.exception("Failed to record html after action")

        if artifacts:
-            try:
-                await app.ARTIFACT_MANAGER.bulk_create_artifacts(artifacts)
-            except Exception:
-                LOG.warning("Failed to bulk create artifacts after action", exc_info=True)
+            _tracer = otel_trace.get_tracer("skyvern")
+            with _tracer.start_as_current_span("skyvern.agent.artifact.bulk_create") as _bulk_span:
+                apply_context_attrs(_bulk_span)
+                # Count underlying artifacts (main + screenshots per request), not request wrappers.
+                _bulk_span.set_attribute("artifact_count", sum(len(a.artifacts) for a in artifacts if a is not None))
+                try:
+                    await app.ARTIFACT_MANAGER.bulk_create_artifacts(artifacts)
+                except Exception:
+                    LOG.warning("Failed to bulk create artifacts after action", exc_info=True)

        if screenshot_artifact_id and action.action_id and action.organization_id:
            action.screenshot_artifact_id = screenshot_artifact_id
@ -2608,19 +2640,22 @@ class ForgeAgent:
                    artifact_id=screenshot_artifact_id,
                )
            else:
-                try:
-                    await app.DATABASE.artifacts.update_action_screenshot_artifact_id(
-                        organization_id=action.organization_id,
-                        action_id=action.action_id,
-                        screenshot_artifact_id=screenshot_artifact_id,
-                    )
-                except Exception:
-                    LOG.warning(
-                        "Failed to update action with screenshot artifact id",
-                        action_id=action.action_id,
-                        screenshot_artifact_id=screenshot_artifact_id,
-                        exc_info=True,
-                    )
+                _tracer = otel_trace.get_tracer("skyvern")
+                with _tracer.start_as_current_span("skyvern.agent.artifact.update_action_screenshot_fk") as _fk_span:
+                    apply_context_attrs(_fk_span)
+                    try:
+                        await app.DATABASE.artifacts.update_action_screenshot_artifact_id(
+                            organization_id=action.organization_id,
+                            action_id=action.action_id,
+                            screenshot_artifact_id=screenshot_artifact_id,
+                        )
+                    except Exception:
+                        LOG.warning(
+                            "Failed to update action with screenshot artifact id",
+                            action_id=action.action_id,
+                            screenshot_artifact_id=screenshot_artifact_id,
+                            exc_info=True,
+                        )

    async def initialize_execution_state(
        self,
@ -2705,7 +2740,7 @@ class ForgeAgent:
            scroll=scroll,
        )

-    @traced(name="skyvern.agent.scrape_and_prompt")
+    @traced(name="skyvern.agent.scrape_and_prompt", role="wrapper")
    async def build_and_record_step_prompt(
        self,
        task: Task,
@ -3339,6 +3374,14 @@ class ForgeAgent:

        # Map template to prompt_name for logging/caching guards
        prompt_name = EXTRACT_ACTION_PROMPT_NAME if template == EXTRACT_ACTION_TEMPLATE else template
+
+        # Tag the span so we can explain the 26x p95/p50 variance — prompt size
+        # is almost always the reason prompt_build gets slow.
+        _prompt_build_span = otel_trace.get_current_span()
+        _prompt_build_span.set_attribute("prompt_name", prompt_name)
+        _prompt_build_span.set_attribute("prompt_tokens", count_tokens(full_prompt))
+        _prompt_build_span.set_attribute("use_caching", bool(use_caching))
+
        return full_prompt, use_caching, prompt_name

    async def _get_prompt_caching_settings(self, context: SkyvernContext) -> dict[str, bool]:
@ -3700,37 +3743,42 @@ class ForgeAgent:
                    LOG.exception("Failed to take screenshot before sending task response")

        if task.organization_id:
-            try:
-                # Keep both finalize and save inside a single timeout budget so a hung
-                # finalize call cannot block persistence forever; accept the trade-off
-                # that a very slow finalize on many files could crowd out save.
-                async with asyncio.timeout(SAVE_DOWNLOADED_FILES_TIMEOUT):
-                    if download_suffix and list_files_before is not None:
-                        await self._finalize_downloaded_files_for_task(
-                            task,
+            _tracer = otel_trace.get_tracer("skyvern")
+            with _tracer.start_as_current_span("skyvern.agent.cleanup.save_downloaded_files") as _cl_save_span:
+                apply_context_attrs(_cl_save_span)
+                try:
+                    # Keep both finalize and save inside a single timeout budget so a hung
+                    # finalize call cannot block persistence forever; accept the trade-off
+                    # that a very slow finalize on many files could crowd out save.
+                    async with asyncio.timeout(SAVE_DOWNLOADED_FILES_TIMEOUT):
+                        if download_suffix and list_files_before is not None:
+                            await self._finalize_downloaded_files_for_task(
+                                task,
+                                organization_id=task.organization_id,
+                                download_suffix=download_suffix,
+                                list_files_before=list_files_before,
+                                randomize_if_missing=False,
+                            )
+                        context = skyvern_context.current()
+                        await app.STORAGE.save_downloaded_files(
                            organization_id=task.organization_id,
-                            download_suffix=download_suffix,
-                            list_files_before=list_files_before,
-                            randomize_if_missing=False,
+                            run_id=context.run_id
+                            if context and context.run_id
+                            else task.workflow_run_id or task.task_id,
                        )
-                    context = skyvern_context.current()
-                    await app.STORAGE.save_downloaded_files(
-                        organization_id=task.organization_id,
-                        run_id=context.run_id if context and context.run_id else task.workflow_run_id or task.task_id,
+                except asyncio.TimeoutError:
+                    LOG.warning(
+                        "Timeout to save downloaded files",
+                        task_id=task.task_id,
+                        workflow_run_id=task.workflow_run_id,
+                    )
+                except Exception:
+                    LOG.warning(
+                        "Failed to save downloaded files",
+                        exc_info=True,
+                        task_id=task.task_id,
+                        workflow_run_id=task.workflow_run_id,
                    )
-            except asyncio.TimeoutError:
-                LOG.warning(
-                    "Timeout to save downloaded files",
-                    task_id=task.task_id,
-                    workflow_run_id=task.workflow_run_id,
-                )
-            except Exception:
-                LOG.warning(
-                    "Failed to save downloaded files",
-                    exc_info=True,
-                    task_id=task.task_id,
-                    workflow_run_id=task.workflow_run_id,
-                )

        # if it's a task block from workflow run,
        # we don't need to close the browser, save browser artifacts, or call webhook
@ -3744,15 +3792,22 @@ class ForgeAgent:

        await self.async_operation_pool.remove_task(task.task_id)

-        await self.cleanup_browser_and_create_artifacts(
-            close_browser_on_completion, last_step, task, browser_session_id=browser_session_id
-        )
+        _tracer = otel_trace.get_tracer("skyvern")
+        with _tracer.start_as_current_span("skyvern.agent.cleanup.browser_and_artifacts") as _cl_br_span:
+            apply_context_attrs(_cl_br_span)
+            await self.cleanup_browser_and_create_artifacts(
+                close_browser_on_completion, last_step, task, browser_session_id=browser_session_id
+            )

        # Wait for all tasks to complete before generating the links for the artifacts
-        await app.ARTIFACT_MANAGER.wait_for_upload_aiotasks([task.task_id])
+        with _tracer.start_as_current_span("skyvern.agent.cleanup.wait_for_upload") as _cl_wait_span:
+            apply_context_attrs(_cl_wait_span)
+            await app.ARTIFACT_MANAGER.wait_for_upload_aiotasks([task.task_id])

        if need_call_webhook:
-            await self.execute_task_webhook(task=task, api_key=api_key)
+            with _tracer.start_as_current_span("skyvern.agent.cleanup.webhook") as _cl_wh_span:
+                apply_context_attrs(_cl_wh_span)
+                await self.execute_task_webhook(task=task, api_key=api_key)

    async def execute_task_webhook(
        self,
--- a/skyvern/forge/agent_functions.py
+++ b/skyvern/forge/agent_functions.py
@ -606,6 +606,96 @@ class AgentFunction:
        Cloud override provides actual solving; OSS base is a no-op."""
        return False

+    async def get_google_sheets_credentials(
+        self,
+        organization_id: str,
+        credential_id: str,
+    ) -> str | None:
+        """Get a Google Sheets access token for the given credential.
+
+        Returns None in OSS. Cloud override uses the OAuth service to
+        decrypt the stored refresh token and exchange it for an access token.
+        """
+        return None
+
+    async def get_google_workspace_credentials(
+        self,
+        organization_id: str,
+        credential_id: str,
+        required_scopes: list[str] | None = None,
+    ) -> object | None:
+        """OSS no-op; cloud override returns a refreshed google.oauth2.credentials.Credentials or None."""
+        return None
+
+    async def ensure_sheet_tab(
+        self,
+        *,
+        access_token: str,
+        spreadsheet_id: str,
+        title: str,
+    ) -> int | None:
+        """Ensure a sheet tab with the given title exists in the spreadsheet.
+
+        Returns the sheet_id of the newly created tab, or None if the caller
+        should fall back to its own lookup (e.g. a concurrent creator won the
+        race). OSS base is a no-op that returns None; cloud override calls the
+        Sheets v4 batchUpdate addSheet endpoint.
+        """
+        return None
+
+    async def google_sheets_values_get(
+        self,
+        *,
+        access_token: str,
+        spreadsheet_id: str,
+        ranges: str,
+        fields: str | None = None,
+    ) -> dict[str, Any] | None:
+        """Read ranges from a spreadsheet via spreadsheets.get. OSS no-op."""
+        return None
+
+    async def google_sheets_values_append(
+        self,
+        *,
+        access_token: str,
+        spreadsheet_id: str,
+        range_: str,
+        values: list[list[Any]],
+    ) -> dict[str, Any] | None:
+        """Append rows via spreadsheets.values.append. OSS no-op."""
+        return None
+
+    async def google_sheets_values_update(
+        self,
+        *,
+        access_token: str,
+        spreadsheet_id: str,
+        range_: str,
+        values: list[list[Any]],
+    ) -> dict[str, Any] | None:
+        """Update rows via spreadsheets.values.update. OSS no-op."""
+        return None
+
+    async def google_sheets_batch_update(
+        self,
+        *,
+        access_token: str,
+        spreadsheet_id: str,
+        requests: list[dict[str, Any]],
+    ) -> dict[str, Any] | None:
+        """Apply a batchUpdate to a spreadsheet. OSS no-op."""
+        return None
+
+    async def google_sheets_get_sheet_id(
+        self,
+        *,
+        access_token: str,
+        spreadsheet_id: str,
+        sheet_title: str,
+    ) -> int | None:
+        """Resolve a tab title to its numeric sheetId. OSS no-op."""
+        return None
+
    async def generate_async_operations(
        self,
        organization: Organization,
@ -621,7 +711,7 @@ class AgentFunction:
    ) -> CleanupElementTreeFunc:
        MAX_ELEMENT_CNT = settings.SVG_MAX_PARSING_ELEMENT_CNT

-        @traced()
+        @traced(name="skyvern.agent.cleanup_element_tree")
        async def cleanup_element_tree_func(frame: Page | Frame, url: str, element_tree: list[dict]) -> list[dict]:
            """
            Remove rect and attribute.unique_id from the elements.
--- a/skyvern/forge/sdk/api/llm/api_handler_factory.py
+++ b/skyvern/forge/sdk/api/llm/api_handler_factory.py
@ -49,7 +49,7 @@ from skyvern.forge.sdk.core.skyvern_context import SkyvernContext
 from skyvern.forge.sdk.models import SpeculativeLLMMetadata, Step
 from skyvern.forge.sdk.schemas.ai_suggestions import AISuggestion
 from skyvern.forge.sdk.schemas.task_v2 import TaskV2, Thought
-from skyvern.forge.sdk.trace import traced
+from skyvern.forge.sdk.trace import apply_context_attrs, traced
 from skyvern.utils.image_resizer import Resolution, get_resize_target_dimension, resize_screenshots

 LOG = structlog.get_logger()
@ -590,7 +590,9 @@ class LLMAPIHandlerFactory:
            _llm_span.set_attribute("llm_key", llm_key)
            _llm_span.set_attribute("llm_model", main_model_group)
            _llm_span.set_attribute("prompt_name", prompt_name)
-            _llm_span.set_attribute("handler_type", "router")
+            # handler_type distinguishes the three LLM entry points that share
+            # the skyvern.llm.request span name. Dashboards filter on this attr.
+            _llm_span.set_attribute("handler_type", "router_with_fallback")

            if parameters is None:
                parameters = LLMAPIHandlerFactory.get_api_parameters(llm_config)
@ -647,24 +649,32 @@ class LLMAPIHandlerFactory:
                        )

                llm_prompt_value = prompt
-                if should_persist_llm_artifacts:
-                    if _should_bundle:
-                        _bundle_prompt = llm_prompt_value.encode("utf-8")
-                        if screenshots and step:
-                            app.ARTIFACT_MANAGER.accumulate_screenshot_to_step_archive(
-                                step=step,
-                                screenshots=screenshots,
-                                artifact_type=ArtifactType.SCREENSHOT_LLM,
+                # Pre-request artifact persistence cluster. Covers prompt + screenshot
+                # staging before the LLM call. The hot cost is inside the non-bundled
+                # branch (prepare_llm_artifact → S3 upload).
+                _tracer = otel_trace.get_tracer("skyvern")
+                with _tracer.start_as_current_span("skyvern.llm.artifact.pre_request") as _pre_span:
+                    apply_context_attrs(_pre_span)
+                    _pre_span.set_attribute("bundled", bool(_should_bundle))
+                    _pre_span.set_attribute("persist", bool(should_persist_llm_artifacts))
+                    if should_persist_llm_artifacts:
+                        if _should_bundle:
+                            _bundle_prompt = llm_prompt_value.encode("utf-8")
+                            if screenshots and step:
+                                app.ARTIFACT_MANAGER.accumulate_screenshot_to_step_archive(
+                                    step=step,
+                                    screenshots=screenshots,
+                                    artifact_type=ArtifactType.SCREENSHOT_LLM,
+                                )
+                        else:
+                            artifacts.append(
+                                await app.ARTIFACT_MANAGER.prepare_llm_artifact(
+                                    data=llm_prompt_value.encode("utf-8"),
+                                    artifact_type=ArtifactType.LLM_PROMPT,
+                                    screenshots=screenshots,
+                                    **artifact_targets,
+                                )
                            )
-                    else:
-                        artifacts.append(
-                            await app.ARTIFACT_MANAGER.prepare_llm_artifact(
-                                data=llm_prompt_value.encode("utf-8"),
-                                artifact_type=ArtifactType.LLM_PROMPT,
-                                screenshots=screenshots,
-                                **artifact_targets,
-                            )
-                        )
                # Build messages and apply caching in one step
                messages = await llm_messages_builder(prompt, screenshots, llm_config.add_assistant_prefix)

@ -845,7 +855,7 @@ class LLMAPIHandlerFactory:
                    raise LLMProviderErrorRetryableTask(llm_key, cause=e) from e
                except litellm.exceptions.ContextWindowExceededError as e:
                    duration_seconds = time.perf_counter() - start_time
-                    _llm_span.set_attribute("status", "error")
+                    _llm_span.set_attribute("status", "context_exceeded")
                    LOG.exception(
                        "Context window exceeded",
                        llm_key=llm_key,
@ -887,6 +897,17 @@ class LLMAPIHandlerFactory:
                            duration_seconds=_duration,
                        )
                        raise LLMProviderError(llm_key) from None
+                except litellm.exceptions.RateLimitError as e:
+                    duration_seconds = time.perf_counter() - start_time
+                    _llm_span.set_attribute("status", "rate_limited")
+                    LOG.warning(
+                        "LLM request rate limited",
+                        llm_key=llm_key,
+                        model=main_model_group,
+                        prompt_name=prompt_name,
+                        duration_seconds=duration_seconds,
+                    )
+                    raise LLMProviderError(llm_key, cause=e) from e
                except Exception as e:
                    duration_seconds = time.perf_counter() - start_time
                    _llm_span.set_attribute("status", "error")
@ -1066,24 +1087,35 @@ class LLMAPIHandlerFactory:

                return parsed_response
            finally:
-                try:
-                    await app.ARTIFACT_MANAGER.bulk_create_artifacts(artifacts)
-                except Exception:
-                    LOG.error("Failed to persist artifacts", exc_info=True)
-                if _should_bundle and should_persist_llm_artifacts and step:
-                    _ctx = skyvern_context.current()
-                    app.ARTIFACT_MANAGER.accumulate_llm_call_to_archive(
-                        step=step,
-                        workflow_run_id=_ctx.workflow_run_id if _ctx else None,
-                        workflow_run_block_id=_ctx.parent_workflow_run_block_id if _ctx else None,
-                        run_id=_ctx.run_id if _ctx else None,
-                        hashed_href_map=_bundle_hashed_href_map,
-                        prompt=_bundle_prompt,
-                        request=_bundle_request,
-                        response=_bundle_response,
-                        parsed_response=_bundle_parsed,
-                        rendered_response=_bundle_rendered,
+                # Post-response artifact persistence cluster. bulk_create_artifacts
+                # does the S3 uploads + DB rows for every LLM artifact queued during
+                # this request; the bundled-path archive accumulate is fast in-memory.
+                _tracer = otel_trace.get_tracer("skyvern")
+                with _tracer.start_as_current_span("skyvern.llm.artifact.post_response") as _post_span:
+                    apply_context_attrs(_post_span)
+                    _post_span.set_attribute("bundled", bool(_should_bundle))
+                    # Count underlying artifacts (main + screenshots per request), not request wrappers.
+                    _post_span.set_attribute(
+                        "artifact_count", sum(len(a.artifacts) for a in artifacts if a is not None)
                    )
+                    try:
+                        await app.ARTIFACT_MANAGER.bulk_create_artifacts(artifacts)
+                    except Exception:
+                        LOG.error("Failed to persist artifacts", exc_info=True)
+                    if _should_bundle and should_persist_llm_artifacts and step:
+                        _ctx = skyvern_context.current()
+                        app.ARTIFACT_MANAGER.accumulate_llm_call_to_archive(
+                            step=step,
+                            workflow_run_id=_ctx.workflow_run_id if _ctx else None,
+                            workflow_run_block_id=_ctx.parent_workflow_run_block_id if _ctx else None,
+                            run_id=_ctx.run_id if _ctx else None,
+                            hashed_href_map=_bundle_hashed_href_map,
+                            prompt=_bundle_prompt,
+                            request=_bundle_request,
+                            response=_bundle_response,
+                            parsed_response=_bundle_parsed,
+                            rendered_response=_bundle_rendered,
+                        )

        llm_api_handler_with_router_and_fallback.llm_key = llm_key  # type: ignore[attr-defined]
        LLMAPIHandlerFactory._router_handler_cache[llm_key] = llm_api_handler_with_router_and_fallback
@ -1135,7 +1167,9 @@ class LLMAPIHandlerFactory:
            _assert_step_thought_exclusive(step, thought)
            start_time = time.perf_counter()
            _llm_span = otel_trace.get_current_span()
-            _llm_span.set_attribute("handler_type", "default")
+            # handler_type distinguishes the three LLM entry points that share
+            # the skyvern.llm.request span name. Dashboards filter on this attr.
+            _llm_span.set_attribute("handler_type", "single_handler")
            _llm_span.set_attribute("llm_key", llm_key)
            _llm_span.set_attribute("llm_model", llm_config.model_name)
            _llm_span.set_attribute("prompt_name", prompt_name)
@ -1202,24 +1236,32 @@ class LLMAPIHandlerFactory:
                        )

                llm_prompt_value = prompt
-                if should_persist_llm_artifacts:
-                    if _should_bundle:
-                        _bundle_prompt = llm_prompt_value.encode("utf-8")
-                        if screenshots and step:
-                            app.ARTIFACT_MANAGER.accumulate_screenshot_to_step_archive(
-                                step=step,
-                                screenshots=screenshots,
-                                artifact_type=ArtifactType.SCREENSHOT_LLM,
+                # Pre-request artifact persistence cluster. Covers prompt + screenshot
+                # staging before the LLM call. The hot cost is inside the non-bundled
+                # branch (prepare_llm_artifact → S3 upload).
+                _tracer = otel_trace.get_tracer("skyvern")
+                with _tracer.start_as_current_span("skyvern.llm.artifact.pre_request") as _pre_span:
+                    apply_context_attrs(_pre_span)
+                    _pre_span.set_attribute("bundled", bool(_should_bundle))
+                    _pre_span.set_attribute("persist", bool(should_persist_llm_artifacts))
+                    if should_persist_llm_artifacts:
+                        if _should_bundle:
+                            _bundle_prompt = llm_prompt_value.encode("utf-8")
+                            if screenshots and step:
+                                app.ARTIFACT_MANAGER.accumulate_screenshot_to_step_archive(
+                                    step=step,
+                                    screenshots=screenshots,
+                                    artifact_type=ArtifactType.SCREENSHOT_LLM,
+                                )
+                        else:
+                            artifacts.append(
+                                await app.ARTIFACT_MANAGER.prepare_llm_artifact(
+                                    data=llm_prompt_value.encode("utf-8"),
+                                    artifact_type=ArtifactType.LLM_PROMPT,
+                                    screenshots=screenshots,
+                                    **artifact_targets,
+                                )
                            )
-                    else:
-                        artifacts.append(
-                            await app.ARTIFACT_MANAGER.prepare_llm_artifact(
-                                data=llm_prompt_value.encode("utf-8"),
-                                artifact_type=ArtifactType.LLM_PROMPT,
-                                screenshots=screenshots,
-                                **artifact_targets,
-                            )
-                        )

                if not llm_config.supports_vision:
                    screenshots = None
@ -1355,7 +1397,7 @@ class LLMAPIHandlerFactory:
                    raise LLMProviderErrorRetryableTask(llm_key, cause=e) from e
                except litellm.exceptions.ContextWindowExceededError as e:
                    duration_seconds = time.perf_counter() - start_time
-                    _llm_span.set_attribute("status", "error")
+                    _llm_span.set_attribute("status", "context_exceeded")
                    LOG.exception(
                        "Context window exceeded",
                        llm_key=llm_key,
@ -1388,6 +1430,17 @@ class LLMAPIHandlerFactory:
                            duration=t_llm_cancelled - t_llm_request,
                        )
                        raise LLMProviderError(llm_key) from None
+                except litellm.exceptions.RateLimitError as e:
+                    duration_seconds = time.perf_counter() - start_time
+                    _llm_span.set_attribute("status", "rate_limited")
+                    LOG.warning(
+                        "LLM request rate limited",
+                        llm_key=llm_key,
+                        model=model_name,
+                        prompt_name=prompt_name,
+                        duration_seconds=duration_seconds,
+                    )
+                    raise LLMProviderError(llm_key, cause=e) from e
                except Exception as e:
                    duration_seconds = time.perf_counter() - start_time
                    _llm_span.set_attribute("status", "error")
@ -1565,24 +1618,35 @@ class LLMAPIHandlerFactory:

                return parsed_response
            finally:
-                try:
-                    await app.ARTIFACT_MANAGER.bulk_create_artifacts(artifacts)
-                except Exception:
-                    LOG.error("Failed to persist artifacts", exc_info=True)
-                if _should_bundle and should_persist_llm_artifacts and step:
-                    _ctx = skyvern_context.current()
-                    app.ARTIFACT_MANAGER.accumulate_llm_call_to_archive(
-                        step=step,
-                        workflow_run_id=_ctx.workflow_run_id if _ctx else None,
-                        workflow_run_block_id=_ctx.parent_workflow_run_block_id if _ctx else None,
-                        run_id=_ctx.run_id if _ctx else None,
-                        hashed_href_map=_bundle_hashed_href_map,
-                        prompt=_bundle_prompt,
-                        request=_bundle_request,
-                        response=_bundle_response,
-                        parsed_response=_bundle_parsed,
-                        rendered_response=_bundle_rendered,
+                # Post-response artifact persistence cluster. bulk_create_artifacts
+                # does the S3 uploads + DB rows for every LLM artifact queued during
+                # this request; the bundled-path archive accumulate is fast in-memory.
+                _tracer = otel_trace.get_tracer("skyvern")
+                with _tracer.start_as_current_span("skyvern.llm.artifact.post_response") as _post_span:
+                    apply_context_attrs(_post_span)
+                    _post_span.set_attribute("bundled", bool(_should_bundle))
+                    # Count underlying artifacts (main + screenshots per request), not request wrappers.
+                    _post_span.set_attribute(
+                        "artifact_count", sum(len(a.artifacts) for a in artifacts if a is not None)
                    )
+                    try:
+                        await app.ARTIFACT_MANAGER.bulk_create_artifacts(artifacts)
+                    except Exception:
+                        LOG.error("Failed to persist artifacts", exc_info=True)
+                    if _should_bundle and should_persist_llm_artifacts and step:
+                        _ctx = skyvern_context.current()
+                        app.ARTIFACT_MANAGER.accumulate_llm_call_to_archive(
+                            step=step,
+                            workflow_run_id=_ctx.workflow_run_id if _ctx else None,
+                            workflow_run_block_id=_ctx.parent_workflow_run_block_id if _ctx else None,
+                            run_id=_ctx.run_id if _ctx else None,
+                            hashed_href_map=_bundle_hashed_href_map,
+                            prompt=_bundle_prompt,
+                            request=_bundle_request,
+                            response=_bundle_response,
+                            parsed_response=_bundle_parsed,
+                            rendered_response=_bundle_rendered,
+                        )

        llm_api_handler.llm_key = llm_key  # type: ignore[attr-defined]
        return llm_api_handler
@ -1702,7 +1766,9 @@ class LLMCaller:
        _llm_span.set_attribute("llm_key", self.llm_key)
        _llm_span.set_attribute("llm_model", self.llm_config.model_name)
        _llm_span.set_attribute("prompt_name", prompt_name or "<unknown>")
-        _llm_span.set_attribute("handler_type", "llm_caller")
+        # handler_type distinguishes the three LLM entry points that share
+        # the skyvern.llm.request span name. Dashboards filter on this attr.
+        _llm_span.set_attribute("handler_type", "direct_call")
        active_parameters = self.base_parameters or {}
        if parameters is None:
            parameters = LLMAPIHandlerFactory.get_api_parameters(self.llm_config)
@ -1759,27 +1825,43 @@ class LLMCaller:
                            tool["display_height_px"] = target_dimension["height"]
                        if "display_width_px" in tool:
                            tool["display_width_px"] = target_dimension["width"]
-                screenshots = resize_screenshots(screenshots, target_dimension)
+                _tracer = otel_trace.get_tracer("skyvern")
+                with _tracer.start_as_current_span("skyvern.llm.screenshot_resize") as _resize_span:
+                    apply_context_attrs(_resize_span)
+                    _resize_span.set_attribute("input_count", len(screenshots))
+                    _resize_span.set_attribute("input_bytes", sum(len(s) for s in screenshots))
+                    _resize_span.set_attribute("target_width", target_dimension["width"])
+                    _resize_span.set_attribute("target_height", target_dimension["height"])
+                    screenshots = resize_screenshots(screenshots, target_dimension)
+                    _resize_span.set_attribute("output_bytes", sum(len(s) for s in screenshots))

            llm_prompt_value = prompt or ""
-            if prompt and should_persist_llm_artifacts:
-                if _should_bundle:
-                    _bundle_prompt = prompt.encode("utf-8")
-                    if screenshots and step:
-                        app.ARTIFACT_MANAGER.accumulate_screenshot_to_step_archive(
-                            step=step,
-                            screenshots=screenshots,
-                            artifact_type=ArtifactType.SCREENSHOT_LLM,
+            # Pre-request artifact persistence cluster. Covers prompt + screenshot
+            # staging before the LLM call. The hot cost is inside the non-bundled
+            # branch (prepare_llm_artifact → S3 upload).
+            _tracer = otel_trace.get_tracer("skyvern")
+            with _tracer.start_as_current_span("skyvern.llm.artifact.pre_request") as _pre_span:
+                apply_context_attrs(_pre_span)
+                _pre_span.set_attribute("bundled", bool(_should_bundle))
+                _pre_span.set_attribute("persist", bool(prompt and should_persist_llm_artifacts))
+                if prompt and should_persist_llm_artifacts:
+                    if _should_bundle:
+                        _bundle_prompt = prompt.encode("utf-8")
+                        if screenshots and step:
+                            app.ARTIFACT_MANAGER.accumulate_screenshot_to_step_archive(
+                                step=step,
+                                screenshots=screenshots,
+                                artifact_type=ArtifactType.SCREENSHOT_LLM,
+                            )
+                    else:
+                        artifacts.append(
+                            await app.ARTIFACT_MANAGER.prepare_llm_artifact(
+                                data=prompt.encode("utf-8"),
+                                artifact_type=ArtifactType.LLM_PROMPT,
+                                screenshots=screenshots,
+                                **artifact_targets,
+                            )
                        )
-                else:
-                    artifacts.append(
-                        await app.ARTIFACT_MANAGER.prepare_llm_artifact(
-                            data=prompt.encode("utf-8"),
-                            artifact_type=ArtifactType.LLM_PROMPT,
-                            screenshots=screenshots,
-                            **artifact_targets,
-                        )
-                    )

            if not self.llm_config.supports_vision:
                screenshots = None
@ -1838,7 +1920,7 @@ class LLMCaller:
                _llm_span.set_attribute("status", "error")
                raise LLMProviderErrorRetryableTask(self.llm_key, cause=e) from e
            except litellm.exceptions.ContextWindowExceededError as e:
-                _llm_span.set_attribute("status", "error")
+                _llm_span.set_attribute("status", "context_exceeded")
                LOG.exception(
                    "Context window exceeded",
                    llm_key=self.llm_key,
@ -1867,6 +1949,10 @@ class LLMCaller:
                        duration=t_llm_cancelled - t_llm_request,
                    )
                    raise LLMProviderError(self.llm_key) from None
+            except litellm.exceptions.RateLimitError as e:
+                _llm_span.set_attribute("status", "rate_limited")
+                LOG.warning("LLM request rate limited", llm_key=self.llm_key)
+                raise LLMProviderError(self.llm_key, cause=e) from e
            except Exception as e:
                _llm_span.set_attribute("status", "error")
                LOG.exception("LLM request failed unexpectedly", llm_key=self.llm_key)
@ -2005,31 +2091,40 @@ class LLMCaller:

            return parsed_response
        finally:
-            try:
-                await app.ARTIFACT_MANAGER.bulk_create_artifacts(artifacts)
-            except Exception:
-                LOG.error("Failed to persist artifacts", exc_info=True)
-            if _should_bundle and should_persist_llm_artifacts and step:
-                _ctx = skyvern_context.current()
-                app.ARTIFACT_MANAGER.accumulate_llm_call_to_archive(
-                    step=step,
-                    workflow_run_id=_ctx.workflow_run_id if _ctx else None,
-                    workflow_run_block_id=_ctx.parent_workflow_run_block_id if _ctx else None,
-                    run_id=_ctx.run_id if _ctx else None,
-                    hashed_href_map=_bundle_hashed_href_map,
-                    prompt=_bundle_prompt,
-                    request=_bundle_request,
-                    response=_bundle_response,
-                    parsed_response=_bundle_parsed,
-                    rendered_response=_bundle_rendered,
-                )
+            # Post-response artifact persistence cluster. bulk_create_artifacts does
+            # the S3 uploads + DB rows for every LLM artifact queued during this
+            # request; the bundled-path archive accumulate is fast in-memory.
+            _tracer = otel_trace.get_tracer("skyvern")
+            with _tracer.start_as_current_span("skyvern.llm.artifact.post_response") as _post_span:
+                apply_context_attrs(_post_span)
+                _post_span.set_attribute("bundled", bool(_should_bundle))
+                # Count underlying artifacts (main + screenshots per request), not request wrappers.
+                _post_span.set_attribute("artifact_count", sum(len(a.artifacts) for a in artifacts if a is not None))
+                try:
+                    await app.ARTIFACT_MANAGER.bulk_create_artifacts(artifacts)
+                except Exception:
+                    LOG.error("Failed to persist artifacts", exc_info=True)
+                if _should_bundle and should_persist_llm_artifacts and step:
+                    _ctx = skyvern_context.current()
+                    app.ARTIFACT_MANAGER.accumulate_llm_call_to_archive(
+                        step=step,
+                        workflow_run_id=_ctx.workflow_run_id if _ctx else None,
+                        workflow_run_block_id=_ctx.parent_workflow_run_block_id if _ctx else None,
+                        run_id=_ctx.run_id if _ctx else None,
+                        hashed_href_map=_bundle_hashed_href_map,
+                        prompt=_bundle_prompt,
+                        request=_bundle_request,
+                        response=_bundle_response,
+                        parsed_response=_bundle_parsed,
+                        rendered_response=_bundle_rendered,
+                    )

    def get_screenshot_resize_target_dimension(self, window_dimension: Resolution | None) -> Resolution:
        if window_dimension and window_dimension != self.browser_window_dimension:
            return get_resize_target_dimension(window_dimension)
        return self.screenshot_resize_target_dimension

-    @traced()
+    @traced(name="skyvern.llm.dispatch")
    async def _dispatch_llm_call(
        self,
        messages: list[dict[str, Any]],
--- a/skyvern/forge/sdk/copilot/agent.py
+++ b/skyvern/forge/sdk/copilot/agent.py
@ -187,10 +187,14 @@ def _translate_to_agent_result(
    action_data = parse_final_response(text)
    user_response = action_data.get("user_response") or "Done."

+    resp_type = action_data.get("type", "REPLY")
+    if resp_type not in ("REPLY", "ASK_QUESTION", "REPLACE_WORKFLOW"):
+        resp_type = "REPLY"
+
    last_workflow = ctx.last_workflow
    last_workflow_yaml = ctx.last_workflow_yaml

-    if action_data.get("type") == "REPLACE_WORKFLOW":
+    if resp_type == "REPLACE_WORKFLOW":
        LOG.warning("Agent used inline REPLACE_WORKFLOW instead of update_workflow tool")
        workflow_yaml = action_data.get("workflow_yaml", "")
        if workflow_yaml:
@ -201,6 +205,7 @@ def _translate_to_agent_result(
                    organization_id=organization_id,
                    workflow_yaml=workflow_yaml,
                )
+                last_workflow_yaml = workflow_yaml
            except (yaml.YAMLError, ValidationError, BaseWorkflowHTTPException) as e:
                LOG.warning("Failed to process final workflow YAML", error=str(e))
                user_response = (
@ -209,22 +214,24 @@ def _translate_to_agent_result(
                    f"Please ask me to fix it.)"
                )

-    # Inline REPLACE_WORKFLOW bypasses _update_workflow so ctx.last_workflow
-    # is still whatever the tool layer last saw. Writing the REPLACE_WORKFLOW
-    # result onto ctx ensures _verified_workflow_or_none sees the right
-    # candidate — though it still gates on last_test_ok, so an untested
-    # REPLACE is rejected too.
-    if action_data.get("type") == "REPLACE_WORKFLOW" and last_workflow is not ctx.last_workflow:
+    # Inline REPLACE_WORKFLOW bypasses _update_workflow, so ctx.last_workflow
+    # is whatever the tool layer last saw. Write the REPLACE candidate onto
+    # ctx and invalidate any prior passing test: the REPLACE yaml itself was
+    # never run, so a leftover ``last_test_ok is True`` from an earlier tested
+    # (but different) yaml must not promote this untested one.
+    if resp_type == "REPLACE_WORKFLOW" and last_workflow is not ctx.last_workflow:
        ctx.last_workflow = last_workflow
        ctx.last_workflow_yaml = last_workflow_yaml
+        ctx.last_test_ok = None

-    user_response = _rewrite_failed_test_response(str(user_response), ctx)
+    # ASK_QUESTION replies carry a specific clarifying question — often the
+    # "stop and ask" unblocker the system prompt now requires when the agent
+    # cannot test. The generic rewrite would replace it with a vague
+    # "Could you share more context", so skip it for ASK_QUESTION.
+    if resp_type != "ASK_QUESTION":
+        user_response = _rewrite_failed_test_response(str(user_response), ctx)
    last_workflow, last_workflow_yaml = _verified_workflow_or_none(ctx)

-    resp_type = action_data.get("type", "REPLY")
-    if resp_type not in ("REPLY", "ASK_QUESTION", "REPLACE_WORKFLOW"):
-        resp_type = "REPLY"
-
    llm_context_raw = action_data.get("global_llm_context")
    structured = StructuredContext.from_json_str(global_llm_context)
    if isinstance(llm_context_raw, dict):
--- a/skyvern/forge/sdk/copilot/context.py
+++ b/skyvern/forge/sdk/copilot/context.py
@ -160,6 +160,15 @@ class CopilotContext(AgentContext):
    failed_test_nudge_count: int = 0
    explore_without_workflow_nudge_count: int = 0
    last_failed_workflow_yaml: str | None = None
+    # Set when a block-running tool timed out and the run's true outcome
+    # could not be reconciled (post-drain row was ``canceled``, non-final, or
+    # unreadable). Blocks further block-running tool calls until the LLM
+    # calls ``get_run_results(workflow_run_id=<same>)`` AND that read returns
+    # a status in ``_TRUSTED_POST_DRAIN_STATUSES``. Turn-scoped by
+    # construction — ``CopilotContext`` is re-created per agent turn — so
+    # this guards auto-retry WITHIN a turn but not cross-turn "user says
+    # retry" requests.
+    pending_reconciliation_run_id: str | None = None
    # Consecutive test runs whose data-producing blocks completed with no
    # meaningful output (missing, empty, or all-null fields). Resets when a
    # run produces real data. Used to escalate when the agent is stuck
--- a/skyvern/forge/sdk/copilot/tools.py
+++ b/skyvern/forge/sdk/copilot/tools.py
@ -41,7 +41,7 @@ from skyvern.forge.sdk.workflow.models.parameter import (
    WorkflowParameter,
    WorkflowParameterType,
 )
-from skyvern.forge.sdk.workflow.models.workflow import Workflow, WorkflowRunStatus
+from skyvern.forge.sdk.workflow.models.workflow import Workflow, WorkflowRun, WorkflowRunStatus
 from skyvern.webeye.navigation import is_skip_inner_retry_error

 LOG = structlog.get_logger()
@ -55,7 +55,7 @@ _FAILED_BLOCK_STATUSES: frozenset[str] = frozenset(
    }
 )
 _DATA_PRODUCING_BLOCK_TYPES = frozenset({"EXTRACTION", "TEXT_PROMPT"})
-RUN_BLOCKS_DEBUG_TIMEOUT_SECONDS = 180
+RUN_BLOCKS_DEBUG_TIMEOUT_SECONDS = 300

 # Detached cleanup tasks held here so the garbage collector does not drop them
 # while they still have work to do, and so the "task exception was never
@ -114,6 +114,81 @@ def _log_detached_cleanup_failure(task: asyncio.Task) -> None:
        LOG.warning("Detached cancel fallback failed", exc_info=exc)


+async def _safe_read_workflow_run(
+    workflow_run_id: str,
+    organization_id: str,
+    *,
+    context: str,
+) -> WorkflowRun | None:
+    """Read a workflow_runs row, logging-and-returning-None on failure.
+
+    The ``context`` string distinguishes call sites in logs (e.g.
+    ``"pre-cancel"`` vs ``"post-drain"``) so a failure is attributable to
+    the specific phase of the timeout branch it fired from.
+    """
+    try:
+        return await app.DATABASE.workflow_runs.get_workflow_run(
+            workflow_run_id=workflow_run_id,
+            organization_id=organization_id,
+        )
+    except Exception:
+        LOG.warning(
+            "Workflow run re-read failed",
+            workflow_run_id=workflow_run_id,
+            context=context,
+            exc_info=True,
+        )
+        return None
+
+
+def _trusted_post_drain_status(run: WorkflowRun | None) -> str | None:
+    """Return the run's status if it is one we can trust after the cancel
+    helper has run; otherwise ``None``.
+
+    ``canceled`` is deliberately rejected because at post-drain read time we
+    can't tell a legitimate ``canceled`` (written by
+    ``_finalize_workflow_run_status`` when a block/user canceled the run)
+    apart from a synthetic ``canceled`` (written by the cancel helper's
+    fallback). Callers that need to distinguish those cases must read the row
+    BEFORE the cancel helper runs.
+    """
+    if run is None:
+        return None
+    if WorkflowRunStatus(run.status).is_final_excluding_canceled():
+        return run.status
+    return None
+
+
+def _maybe_clear_reconciliation_flag(copilot_ctx: Any, result: Any) -> None:
+    """Clear ``pending_reconciliation_run_id`` iff ``result`` proves the
+    pending run has reached a trustworthy-final status.
+
+    Called from ``get_run_results_tool`` after ``_get_run_results`` returns.
+    Requires (a) a pending run_id on the ctx, (b) a matching resolved run_id
+    in ``result.data`` (so a ``workflow_run_id=None`` call that resolves to
+    a different run does NOT clear), and (c) the resolved ``overall_status``
+    passes ``is_final_excluding_canceled`` (so an ambiguous ``canceled``
+    does NOT clear).
+    """
+    pending_run_id = getattr(copilot_ctx, "pending_reconciliation_run_id", None)
+    if not isinstance(pending_run_id, str) or not pending_run_id:
+        return
+    if not isinstance(result, dict):
+        return
+    data = result.get("data")
+    if not isinstance(data, dict):
+        return
+    resolved_run_id = data.get("workflow_run_id")
+    resolved_status = data.get("overall_status")
+    if (
+        isinstance(resolved_run_id, str)
+        and resolved_run_id == pending_run_id
+        and isinstance(resolved_status, str)
+        and WorkflowRunStatus(resolved_status).is_final_excluding_canceled()
+    ):
+        copilot_ctx.pending_reconciliation_run_id = None
+
+
 # Streak threshold at which the copilot hard-aborts a tool call because the
 # same action sequence has repeated run-over-run with no intervening success.
 # The streak counter is incremented in ``update_repeated_failure_state`` AFTER
@ -240,6 +315,23 @@ def _tool_loop_error(ctx: AgentContext, tool_name: str) -> str | None:
    # to block-running tools so planning/metadata tools (update_workflow,
    # list_credentials, get_run_results) stay unaffected.
    if tool_name in _BLOCK_RUNNING_TOOLS:
+        # Reconciliation guard: a previous block-running tool call timed out
+        # without reconciling the run's outcome (post-drain status was
+        # ``canceled``, non-final, or unreadable). Block further block-running
+        # calls until ``get_run_results`` clears the flag. Prevents the LLM
+        # from auto-retrying a mutation block whose side effects may have
+        # landed.
+        pending_run_id = getattr(ctx, "pending_reconciliation_run_id", None)
+        if isinstance(pending_run_id, str) and pending_run_id:
+            return (
+                f"The previous block-running tool call timed out and run "
+                f"{pending_run_id} was not reconciled to a trustworthy terminal "
+                f'status. Call `get_run_results(workflow_run_id="{pending_run_id}")` '
+                f"first, report the result to the user, then await user input "
+                f"before running more blocks. This guard prevents duplicate "
+                f"side effects on live sites."
+            )
+
        streak_raw = getattr(ctx, "repeated_action_fingerprint_streak_count", 0)
        streak = streak_raw if isinstance(streak_raw, int) else 0
        if streak >= REPEATED_ACTION_STREAK_ABORT_AT:
@ -861,26 +953,78 @@ async def _run_blocks_and_collect_debug(
                break

        if final_status is None:
-            await _cancel_run_task_if_not_final(run_task, workflow_run.workflow_run_id)
-            timeout_msg = (
-                f"Block execution timed out after {max_poll}s. "
-                f"Run ID: {workflow_run.workflow_run_id}. "
-                f"The task was likely stuck repeating failing actions. "
-                f"Consider: simplifying the navigation_goal, using a more specific URL, "
-                f"adding a dismiss-popup step, or concluding the site is not automatable."
+            # Read the row BEFORE the cancel helper runs. The poll interval is
+            # 2 s, so a legitimate self-finalize (``canceled`` from a user/block
+            # cancel, or any other terminal) can land in the narrow gap between
+            # the last poll and entering this branch. Trusting the pre-cancel
+            # status preserves that signal without ambiguity — nothing we do
+            # could have written it. Any post-cancel re-read has to exclude
+            # ``canceled`` because the helper itself writes synthetic
+            # ``canceled`` (see ``WorkflowRunStatus.is_final_excluding_canceled``).
+            pre_cancel_run = await _safe_read_workflow_run(
+                workflow_run.workflow_run_id, ctx.organization_id, context="pre-cancel"
            )
-            if ctx.browser_session_id:
-                try:
-                    browser_state = await app.PERSISTENT_SESSIONS_MANAGER.get_browser_state(
-                        session_id=ctx.browser_session_id,
-                        organization_id=ctx.organization_id,
+
+            if pre_cancel_run is not None and WorkflowRunStatus(pre_cancel_run.status).is_final():
+                final_status = pre_cancel_run.status
+                run = pre_cancel_run
+            else:
+                await _cancel_run_task_if_not_final(run_task, workflow_run.workflow_run_id)
+
+                # The cancel helper waits for ``execute_workflow``'s shielded
+                # ``_finalize_workflow_run_status`` to run, which writes the real
+                # terminal status when the run completed during the drain window.
+                # Re-read once before declaring timeout so a run that finished
+                # in those last few seconds is not mis-reported as a failure the
+                # LLM will retry against a live site.
+                run = await _safe_read_workflow_run(
+                    workflow_run.workflow_run_id, ctx.organization_id, context="post-drain"
+                )
+                trusted = _trusted_post_drain_status(run)
+                if trusted is not None:
+                    final_status = trusted
+                else:
+                    # Three distinct cases produce a ``None`` from
+                    # ``_trusted_post_drain_status`` — DB read failed, row
+                    # non-final, or ``canceled`` (legitimate vs synthetic
+                    # indistinguishable at this read). Body differs so logs
+                    # are attributable; the LLM-facing instruction is
+                    # identical: outcome is uncertain, reconcile via
+                    # ``get_run_results`` before re-invoking block-running
+                    # tools. The reconciliation guard below enforces that.
+                    if run is None:
+                        timeout_body = (
+                            f"Block execution exceeded the {max_poll}s tool budget and the workflow "
+                            f"run row could not be re-read after cancellation (see 'Workflow run "
+                            f"re-read failed' log)."
+                        )
+                    elif run.status == WorkflowRunStatus.canceled.value:
+                        timeout_body = (
+                            f"Block execution was cancelled after exceeding the {max_poll}s tool "
+                            f"budget. Side effects of any actions already taken may have landed."
+                        )
+                    else:
+                        timeout_body = (
+                            f"Block execution exceeded the {max_poll}s tool budget and the workflow "
+                            f"run did not reach a terminal status within the cancellation grace "
+                            f"window (last observed status: {run.status})."
+                        )
+                    timeout_msg = (
+                        f"{timeout_body} Run ID: {workflow_run.workflow_run_id}. "
+                        f"Outcome is uncertain. Do NOT re-invoke block-running tools in this "
+                        f"session without first calling `get_run_results` with this "
+                        f"workflow_run_id and reporting the result to the user."
                    )
-                    if browser_state:
-                        page = await browser_state.get_or_create_page()
-                        timeout_msg += f" Browser was on: {page.url}"
-                except Exception:
-                    pass
-            return {"ok": False, "error": timeout_msg}
+                    current_url, _ = await _fallback_page_info(ctx)
+                    if current_url:
+                        timeout_msg += f" Browser was on: {current_url}"
+                    # Turn-scoped reconciliation guard. Clearing requires an
+                    # explicit ``get_run_results`` read that resolves to this
+                    # run_id with a status that passes
+                    # ``WorkflowRunStatus.is_final_excluding_canceled`` (see
+                    # clearing logic in ``get_run_results_tool``).
+                    ctx.pending_reconciliation_run_id = workflow_run.workflow_run_id
+                    return {"ok": False, "error": timeout_msg}
    except asyncio.CancelledError:
        # The SDK's @function_tool(timeout=...) cancelled us mid-poll. Shield
        # the cleanup so the parent cancellation can't interrupt it mid-await.
@ -1691,6 +1835,8 @@ async def get_run_results_tool(
    if workflow_run_id:
        params["workflow_run_id"] = workflow_run_id
    result = await _get_run_results(params, copilot_ctx)
+    _maybe_clear_reconciliation_flag(copilot_ctx, result)
+
    sanitized = sanitize_tool_result_for_llm("get_run_results", result)
    return json.dumps(sanitized)

--- a/skyvern/forge/sdk/core/skyvern_context.py
+++ b/skyvern/forge/sdk/core/skyvern_context.py
@ -37,6 +37,7 @@ class SkyvernContext:
    max_screenshot_scrolls: int | None = None
    browser_container_ip: str | None = None
    browser_container_task_arn: str | None = None
+    is_remote_browser: bool = False
    feature_flag_entries: dict[str, bool | str | None] = field(default_factory=dict)

    # feature flags
--- a/skyvern/forge/sdk/event/base.py
+++ b/skyvern/forge/sdk/event/base.py
@ -21,7 +21,7 @@ class CursorEventStrategy(ABC):
        pass

    @abstractmethod
-    async def click(self, page: Page, locator: Locator) -> None:
+    async def click(self, page: Page, locator: Locator, timeout: float | None = None) -> None:
        pass


--- a/skyvern/forge/sdk/event/default.py
+++ b/skyvern/forge/sdk/event/default.py
@ -34,8 +34,12 @@ class DefaultCursorStrategy(CursorEventStrategy):
            LOG.debug("move_to_element failed", exc_info=True)
            return 0.0, 0.0

-    async def click(self, page: Page, locator: Locator) -> None:
-        await locator.click()
+    async def click(self, page: Page, locator: Locator, timeout: float | None = None) -> None:
+        LOG.debug("DefaultCursorStrategy.click", timeout=timeout)
+        kwargs: dict = {}
+        if timeout is not None:
+            kwargs["timeout"] = timeout
+        await locator.click(**kwargs)


 class DefaultInputStrategy(InputEventStrategy):
--- a/skyvern/forge/sdk/event/factory.py
+++ b/skyvern/forge/sdk/event/factory.py
@ -123,6 +123,15 @@ class EventStrategyFactory:
        """Update cursor position without generating movement."""
        EventStrategyFactory.get_cursor_strategy().sync_position(page, x, y)

+    @staticmethod
+    async def click(page: Page, locator: Locator, timeout: float | None = None) -> None:
+        """Click an element using the active cursor strategy."""
+        start = time.perf_counter()
+        try:
+            await EventStrategyFactory.get_cursor_strategy().click(page, locator, timeout)
+        finally:
+            EventStrategyFactory.__metrics.record("click", time.perf_counter() - start)
+
    # -- input convenience methods ----------------------------------------------

    @staticmethod
--- a/skyvern/forge/sdk/routes/workflow_copilot.py
+++ b/skyvern/forge/sdk/routes/workflow_copilot.py
@ -858,7 +858,8 @@ async def _new_copilot_chat_post(
            # panel state below stays gated on auto_accept — the frontend
            # applies proposals via applyWorkflowUpdate when auto-accept is
            # on.
-            if _should_restore_persisted_workflow(chat.auto_accept, agent_result):
+            restored = _should_restore_persisted_workflow(chat.auto_accept, agent_result)
+            if restored:
                await _restore_workflow_definition(original_workflow, organization.organization_id)

            if chat.auto_accept is not True:
@ -871,11 +872,16 @@ async def _new_copilot_chat_post(
                        workflow_copilot_chat_id=chat.workflow_copilot_chat_id,
                        proposed_workflow=proposed_data,
                    )
-                elif getattr(agent_result, "clear_proposed_workflow", False):
-                    # Feasibility-gate fast-path returned ASK_QUESTION. Null
-                    # any previously-persisted proposed_workflow so a page
-                    # reload does not resurrect a stale draft alongside the
-                    # new clarification question.
+                elif (
+                    restored or getattr(agent_result, "clear_proposed_workflow", False)
+                ) and chat.proposed_workflow is not None:
+                    # Null any previously-persisted proposed_workflow so a
+                    # page reload does not resurrect a stale Accept/Reject
+                    # card next to an assistant message that just explained
+                    # why no verified proposal is available. Covers:
+                    # * feasibility-gate fast-path clarifications, and
+                    # * SKY-9143 strict-gate turns where a mid-turn draft was
+                    #   rolled back (``restored=True``).
                    await app.DATABASE.workflow_params.update_workflow_copilot_chat(
                        organization_id=chat.organization_id,
                        workflow_copilot_chat_id=chat.workflow_copilot_chat_id,
--- a/skyvern/forge/sdk/schemas/tasks.py
+++ b/skyvern/forge/sdk/schemas/tasks.py
@ -357,6 +357,7 @@ class Task(TaskBase):
            finished_at=self.finished_at,
            extracted_information=self.extracted_information,
            failure_reason=failure_reason or self.failure_reason,
+            failure_category=self.failure_category,
            webhook_failure_reason=self.webhook_failure_reason,
            action_screenshot_urls=action_screenshot_urls,
            screenshot_url=screenshot_url,
@ -387,6 +388,7 @@ class TaskResponse(BaseModel):
    downloaded_files: list[FileInfo] | None = None
    downloaded_file_urls: list[str] | None = None
    failure_reason: str | None = None
+    failure_category: list[dict[str, Any]] | None = None
    webhook_failure_reason: str | None = None
    errors: list[dict[str, Any]] = []
    max_steps_per_run: int | None = None
--- a/skyvern/forge/sdk/trace/init.py
+++ b/skyvern/forge/sdk/trace/init.py
@ -1,9 +1,11 @@
 import asyncio
 from functools import wraps
-from typing import Any, Callable
+from typing import Any, Callable, Literal

 from opentelemetry import trace

+SpanRole = Literal["wrapper"]
+
 # Context fields to auto-attach to every span. Deliberately minimal — each
 # attribute is paid for in storage and index cardinality, so only IDs we
 # actively query on during profiling / Milestone 2 aggregations belong here.
@ -56,7 +58,11 @@ def apply_context_attrs(span: Any) -> None:
            span.set_attribute(attr, str(value))


-def traced(name: str | None = None, tags: list[str] | None = None) -> Callable:
+def traced(
+    name: str | None = None,
+    tags: list[str] | None = None,
+    role: SpanRole | None = None,
+) -> Callable:
    """Decorator that creates an OTEL span. No-op without SDK installed.

    Every span is tagged with:
@ -73,6 +79,11 @@ def traced(name: str | None = None, tags: list[str] | None = None) -> Callable:
    Args:
        name: Span name. If not provided, uses func.__qualname__.
        tags: Tags to add as a span attribute.
+        role: Optional span role. Set to "wrapper" on spans whose duration is
+            dominated by the work of their children (e.g. `agent.step`,
+            `workflow.execute`). Dashboards filter these out with
+            `skyvern.span.role != 'wrapper'` so leaf-time composition (pie,
+            stacked bar) isn't double-counted via nesting.
    """

    def decorator(func: Callable) -> Callable:
@ -87,6 +98,8 @@ def traced(name: str | None = None, tags: list[str] | None = None) -> Callable:
                with trace.get_tracer("skyvern").start_as_current_span(span_name) as span:
                    span.set_attribute("code.function", code_function)
                    span.set_attribute("code.namespace", code_namespace)
+                    if role is not None:
+                        span.set_attribute("skyvern.span.role", role)
                    apply_context_attrs(span)
                    if tags:
                        span.set_attribute("tags", tags)
@ -105,6 +118,8 @@ def traced(name: str | None = None, tags: list[str] | None = None) -> Callable:
                with trace.get_tracer("skyvern").start_as_current_span(span_name) as span:
                    span.set_attribute("code.function", code_function)
                    span.set_attribute("code.namespace", code_namespace)
+                    if role is not None:
+                        span.set_attribute("skyvern.span.role", role)
                    apply_context_attrs(span)
                    if tags:
                        span.set_attribute("tags", tags)
--- a/skyvern/forge/sdk/workflow/models/_jinja.py
+++ b/skyvern/forge/sdk/workflow/models/_jinja.py
@ -0,0 +1,37 @@
+"""Standalone Jinja environment shared by block modules to avoid import cycles."""
+
+from __future__ import annotations
+
+import json
+from typing import Any
+
+from jinja2.sandbox import SandboxedEnvironment
+
+# Sentinel marker for native JSON type injection via | json filter.
+_JSON_TYPE_MARKER = "__SKYVERN_RAW_JSON__"
+
+
+def _json_type_filter(value: Any) -> str:
+    """Jinja filter that marks a value for native JSON type injection.
+
+    Usage in templates: {{ some_bool | json }}
+
+    The filter serializes the value to JSON and wraps it with sentinel markers.
+    When _render_templates_in_json() detects these markers, it unwraps and
+    parses the JSON to get the native typed value (bool, int, list, etc.).
+
+    Uses default=str to handle non-JSON-serializable types (datetime, Enum, etc.)
+    """
+    return f"{_JSON_TYPE_MARKER}{json.dumps(value, default=str)}{_JSON_TYPE_MARKER}"
+
+
+def _json_finalize(value: Any) -> Any:
+    """Jinja finalize hook: JSON-serialize dict/list values so `{{ var }}` yields valid JSON
+    instead of Python repr. Strings (including `| tojson` output) pass through unchanged."""
+    if isinstance(value, (dict, list)):
+        return json.dumps(value, default=str)
+    return value
+
+
+jinja_json_finalize_env = SandboxedEnvironment(finalize=_json_finalize)
+jinja_json_finalize_env.filters["json"] = _json_type_filter
--- a/skyvern/forge/sdk/workflow/models/block.py
+++ b/skyvern/forge/sdk/workflow/models/block.py
@ -31,6 +31,7 @@ from charset_normalizer import from_bytes
 from email_validator import EmailNotValidError, validate_email
 from jinja2 import StrictUndefined
 from jinja2.sandbox import SandboxedEnvironment
+from opentelemetry import trace as otel_trace
 from playwright.async_api import Page
 from pydantic import BaseModel, Field, model_validator

@ -47,6 +48,7 @@ from skyvern.exceptions import (
    DownloadFileMaxSizeExceeded,
    MissingBrowserState,
    MissingBrowserStatePage,
+    MissingStarterUrl,
    PDFParsingError,
    TaskNotFound,
    UnexpectedTaskStatus,
@ -101,6 +103,10 @@ from skyvern.forge.sdk.workflow.loop_download_filter import (
    filter_downloaded_files_for_current_iteration,
    to_downloaded_file_signature,
 )
+from skyvern.forge.sdk.workflow.models._jinja import (
+    _JSON_TYPE_MARKER,
+    _json_type_filter,
+)
 from skyvern.forge.sdk.workflow.models.parameter import (
    PARAMETER_TYPE,
    AWSSecretParameter,
@ -219,24 +225,6 @@ else:
 # Date format used for the built-in {{current_date}} reserved parameter.
 CURRENT_DATE_FORMAT = "%Y-%m-%d"

-# Sentinel marker for native JSON type injection via | json filter.
-_JSON_TYPE_MARKER = "__SKYVERN_RAW_JSON__"
-
-
-def _json_type_filter(value: Any) -> str:
-    """Jinja filter that marks a value for native JSON type injection.
-
-    Usage in templates: {{ some_bool | json }}
-
-    The filter serializes the value to JSON and wraps it with sentinel markers.
-    When _render_templates_in_json() detects these markers, it unwraps and
-    parses the JSON to get the native typed value (bool, int, list, etc.).
-
-    Uses default=str to handle non-JSON-serializable types (datetime, Enum, etc.)
-    """
-    return f"{_JSON_TYPE_MARKER}{json.dumps(value, default=str)}{_JSON_TYPE_MARKER}"
-
-
 jinja_sandbox_env.filters["json"] = _json_type_filter


@ -414,6 +402,7 @@ class Block(BaseModel, abc.ABC):
        workflow_run_context: WorkflowRunContext,
        *,
        force_include_secrets: bool = False,
+        env: SandboxedEnvironment | None = None,
    ) -> str:
        """
        Format a template string using the workflow run context.
@ -431,7 +420,7 @@ class Block(BaseModel, abc.ABC):
            BlockType.HTTP_REQUEST,
        ]

-        template = jinja_sandbox_env.from_string(potential_template)
+        template = (env or jinja_sandbox_env).from_string(potential_template)

        block_reference_data: dict[str, Any] = workflow_run_context.get_block_metadata(self.label)
        template_data = workflow_run_context.values.copy()
@ -601,7 +590,7 @@ class Block(BaseModel, abc.ABC):
        """Return block-level error codes for unexpected failures. Override in subclasses."""
        return []

-    @traced(name="skyvern.block.execute")
+    @traced(name="skyvern.block.execute", role="wrapper")
    async def execute_safe(
        self,
        workflow_run_id: str,
@ -612,6 +601,10 @@ class Block(BaseModel, abc.ABC):
        current_index: int | None = None,
        **kwargs: dict,
    ) -> BlockResult:
+        # block_type slices the 303s p95 by block kind — task/for_loop/code/extraction
+        # have wildly different latency profiles. Set early so it's present even if
+        # execute_safe raises before any child work.
+        otel_trace.get_current_span().set_attribute("block_type", self.block_type.value)
        workflow_run_block_id = None
        engine: RunEngine | None = None
        try:
@ -1077,6 +1070,24 @@ class BaseTaskBlock(Block):
                    )
                    raise e

+                # Validate starter URL before downstream scraping on a blank page
+                if not (self.url and self.url.strip()) and working_page.url in ("about:blank", "", ":"):
+                    missing_url_exc = MissingStarterUrl(block_label=self.label)
+                    LOG.warning(
+                        "First browser block has no starter URL",
+                        task_id=task.task_id,
+                        workflow_run_id=workflow_run_id,
+                        block_label=self.label,
+                    )
+                    await self._handle_task_failure_with_error_detection(
+                        task=task,
+                        step=step,
+                        browser_state=browser_state,
+                        failure_reason=str(missing_url_exc),
+                        organization_id=workflow_run.organization_id,
+                    )
+                    raise missing_url_exc
+
                try:
                    # add screenshot artifact for the first task
                    screenshot = await browser_state.take_fullpage_screenshot()
@ -7174,6 +7185,12 @@ def get_all_blocks(blocks: list[BlockTypeVar]) -> list[BlockTypeVar]:
    return all_blocks


+# Late import: google_sheets_blocks imports Block from this module, so top-level import would cycle.
+from skyvern.forge.sdk.workflow.models.google_sheets_blocks import (  # noqa: E402
+    GoogleSheetsReadBlock,
+    GoogleSheetsWriteBlock,
+)
+
 BlockSubclasses = Union[
    ConditionalBlock,
    ForLoopBlock,
@ -7199,6 +7216,8 @@ BlockSubclasses = Union[
    HttpRequestBlock,
    PrintPageBlock,
    WorkflowTriggerBlock,
+    GoogleSheetsReadBlock,
+    GoogleSheetsWriteBlock,
 ]
 BlockTypeVar = Annotated[BlockSubclasses, Field(discriminator="block_type")]

--- a/skyvern/forge/sdk/workflow/models/google_sheets_blocks.py
+++ b/skyvern/forge/sdk/workflow/models/google_sheets_blocks.py
@ -0,0 +1,893 @@
+import json
+import re
+from dataclasses import dataclass
+from typing import Any, Literal, cast
+
+import structlog
+
+from skyvern.forge import app
+from skyvern.forge.sdk.workflow.context_manager import WorkflowRunContext
+from skyvern.forge.sdk.workflow.models._jinja import jinja_json_finalize_env
+from skyvern.forge.sdk.workflow.models.block import Block
+from skyvern.forge.sdk.workflow.models.parameter import PARAMETER_TYPE
+from skyvern.schemas.google_sheets import (
+    MAX_COLUMN_INDEX,
+    GoogleSheetsAPIError,
+    a1_to_grid_range,
+    build_a1,
+    column_letters_to_index,
+    extract_a1_sheet_prefix,
+    extract_spreadsheet_id,
+    leading_column_offset,
+)
+from skyvern.schemas.workflows import BlockResult, BlockStatus, BlockType
+
+LOG = structlog.get_logger()
+
+
+def _disambiguate_header(header: list[str]) -> list[str]:
+    """Rename empty/duplicate header cells so dict(zip(header, row)) does not drop columns."""
+    counts: dict[str, int] = {}
+    reserved = {h.strip() for h in header if h and h.strip()}
+    disambiguated: list[str] = []
+    for idx, raw in enumerate(header):
+        name = raw.strip() if raw else ""
+        if not name:
+            candidate = f"col_{idx + 1}"
+            suffix = 2
+            while candidate in reserved or candidate in disambiguated:
+                candidate = f"col_{idx + 1}_{suffix}"
+                suffix += 1
+            disambiguated.append(candidate)
+            reserved.add(candidate)
+            continue
+        if counts.get(name, 0) == 0:
+            counts[name] = 1
+            disambiguated.append(name)
+            continue
+        counts[name] += 1
+        candidate = f"{name}_{counts[name]}"
+        while candidate in reserved or candidate in disambiguated:
+            counts[name] += 1
+            candidate = f"{name}_{counts[name]}"
+        disambiguated.append(candidate)
+        reserved.add(candidate)
+    return disambiguated
+
+
+class GoogleSheetsReadBlock(Block):
+    block_type: Literal[BlockType.GOOGLE_SHEETS_READ] = BlockType.GOOGLE_SHEETS_READ  # type: ignore
+
+    spreadsheet_url: str
+    sheet_name: str | None = None
+    range: str | None = None
+    credential_id: str | None = None
+    has_header_row: bool = True
+    parameters: list[PARAMETER_TYPE] = []
+
+    def get_all_parameters(self, workflow_run_id: str) -> list[PARAMETER_TYPE]:
+        return self.parameters
+
+    def _render_templates(self, workflow_run_context: WorkflowRunContext) -> None:
+        if self.spreadsheet_url:
+            self.spreadsheet_url = self.format_block_parameter_template_from_workflow_run_context(
+                self.spreadsheet_url, workflow_run_context
+            )
+        if self.sheet_name:
+            self.sheet_name = self.format_block_parameter_template_from_workflow_run_context(
+                self.sheet_name, workflow_run_context
+            )
+        if self.range:
+            self.range = self.format_block_parameter_template_from_workflow_run_context(
+                self.range, workflow_run_context
+            )
+        if self.credential_id:
+            self.credential_id = self.format_block_parameter_template_from_workflow_run_context(
+                self.credential_id, workflow_run_context
+            )
+
+    async def execute(
+        self,
+        workflow_run_id: str,
+        workflow_run_block_id: str,
+        organization_id: str | None = None,
+        browser_session_id: str | None = None,
+        **kwargs: Any,
+    ) -> BlockResult:
+        workflow_run_context = self.get_workflow_run_context(workflow_run_id)
+
+        try:
+            self._render_templates(workflow_run_context)
+        except Exception as e:
+            return await self.build_block_result(
+                success=False,
+                failure_reason=f"Failed to format jinja template: {str(e)}",
+                output_parameter_value=None,
+                status=BlockStatus.failed,
+                workflow_run_block_id=workflow_run_block_id,
+                organization_id=organization_id,
+            )
+
+        if not self.credential_id:
+            return await self.build_block_result(
+                success=False,
+                failure_reason="Google credential_id is required",
+                output_parameter_value=None,
+                status=BlockStatus.failed,
+                workflow_run_block_id=workflow_run_block_id,
+                organization_id=organization_id,
+            )
+
+        try:
+            spreadsheet_id = extract_spreadsheet_id(self.spreadsheet_url)
+        except ValueError:
+            return await self.build_block_result(
+                success=False,
+                failure_reason=f"Could not resolve spreadsheet id from: {self.spreadsheet_url}",
+                output_parameter_value=None,
+                status=BlockStatus.failed,
+                workflow_run_block_id=workflow_run_block_id,
+                organization_id=organization_id,
+            )
+
+        a1 = build_a1(self.sheet_name, self.range)
+        if not a1:
+            return await self.build_block_result(
+                success=False,
+                failure_reason="Either sheet_name or range must be provided",
+                output_parameter_value=None,
+                status=BlockStatus.failed,
+                workflow_run_block_id=workflow_run_block_id,
+                organization_id=organization_id,
+            )
+
+        effective_org_id = organization_id or workflow_run_context.organization_id
+        if not effective_org_id:
+            return await self.build_block_result(
+                success=False,
+                failure_reason="organization_id is required to load Google Sheets credentials",
+                output_parameter_value=None,
+                status=BlockStatus.failed,
+                workflow_run_block_id=workflow_run_block_id,
+                organization_id=organization_id,
+            )
+        access_token = await app.AGENT_FUNCTION.get_google_sheets_credentials(
+            organization_id=effective_org_id,
+            credential_id=self.credential_id,
+        )
+        if not access_token:
+            return await self.build_block_result(
+                success=False,
+                failure_reason="Reconnect the Google account: no valid access token",
+                output_parameter_value=None,
+                status=BlockStatus.failed,
+                workflow_run_block_id=workflow_run_block_id,
+                organization_id=organization_id,
+            )
+
+        fields = (
+            "spreadsheetId,sheets("
+            "properties(sheetId,title,index),"
+            "merges,"
+            "data(startRow,startColumn,rowData(values("
+            "userEnteredValue,userEnteredFormat,formattedValue,note,hyperlink"
+            ")))"
+            ")"
+        )
+        try:
+            payload = await app.AGENT_FUNCTION.google_sheets_values_get(
+                access_token=access_token,
+                spreadsheet_id=spreadsheet_id,
+                ranges=a1,
+                fields=fields,
+            )
+        except GoogleSheetsAPIError as e:
+            failure_reason = _failure_reason_from_sheets_error("read", e)
+            error_data = {"status_code": e.status, "code": e.code, "error": e.message}
+            await self.record_output_parameter_value(workflow_run_context, workflow_run_id, error_data)
+            return await self.build_block_result(
+                success=False,
+                failure_reason=failure_reason,
+                output_parameter_value=error_data,
+                status=BlockStatus.failed,
+                workflow_run_block_id=workflow_run_block_id,
+                organization_id=organization_id,
+            )
+        except Exception as e:
+            error_data = {"error": str(e), "error_type": "unknown"}
+            await self.record_output_parameter_value(workflow_run_context, workflow_run_id, error_data)
+            return await self.build_block_result(
+                success=False,
+                failure_reason=f"Google Sheets read failed: {str(e)}",
+                output_parameter_value=error_data,
+                status=BlockStatus.failed,
+                workflow_run_block_id=workflow_run_block_id,
+                organization_id=organization_id,
+            )
+
+        if payload is None:
+            error_data = {"error": "Google Sheets read returned no payload"}
+            await self.record_output_parameter_value(workflow_run_context, workflow_run_id, error_data)
+            return await self.build_block_result(
+                success=False,
+                failure_reason="Google Sheets runtime is not available in this build",
+                output_parameter_value=error_data,
+                status=BlockStatus.failed,
+                workflow_run_block_id=workflow_run_block_id,
+                organization_id=organization_id,
+            )
+        sheets = payload.get("sheets") or []
+        # spreadsheets.get returns every sheet object even when ranges= is set, so
+        # selecting [0] would silently grab the first tab instead of the requested one.
+        target_sheet_title = self.sheet_name or extract_a1_sheet_prefix(a1)
+        sheet_block: dict[str, Any] = {}
+        if target_sheet_title:
+            for candidate in sheets:
+                candidate_title = (candidate.get("properties") or {}).get("title")
+                if candidate_title == target_sheet_title:
+                    sheet_block = candidate
+                    break
+        if not sheet_block:
+            sheet_block = sheets[0] if sheets else {}
+        properties = sheet_block.get("properties") or {}
+        data_blocks = sheet_block.get("data") or []
+        first_data = data_blocks[0] if data_blocks else {}
+        row_data = first_data.get("rowData") or []
+
+        cells: list[list[dict[str, Any]]] = []
+        values: list[list[Any]] = []
+        for row in row_data:
+            row_values = row.get("values") or []
+            cells.append([dict(cell) for cell in row_values])
+            values.append([cell.get("formattedValue", "") for cell in row_values])
+
+        start_row = int(first_data.get("startRow", 0))
+        start_column = int(first_data.get("startColumn", 0))
+        row_count = len(cells)
+        col_count = max((len(r) for r in cells), default=0)
+        end_row_exclusive = start_row + row_count
+        end_col_exclusive = start_column + col_count
+
+        merges: list[dict[str, int]] = []
+        for merge in sheet_block.get("merges") or []:
+            m_start_row = int(merge.get("startRowIndex", 0))
+            m_end_row = int(merge.get("endRowIndex", 0))
+            m_start_col = int(merge.get("startColumnIndex", 0))
+            m_end_col = int(merge.get("endColumnIndex", 0))
+            # Clip to the intersection so a merge that begins outside the read window
+            # does not produce negative offsets when a downstream rich write replays it.
+            clipped_start_row = max(m_start_row, start_row)
+            clipped_end_row = min(m_end_row, end_row_exclusive)
+            clipped_start_col = max(m_start_col, start_column)
+            clipped_end_col = min(m_end_col, end_col_exclusive)
+            if clipped_start_row >= clipped_end_row or clipped_start_col >= clipped_end_col:
+                continue
+            merges.append(
+                {
+                    "start_row_index": clipped_start_row - start_row,
+                    "end_row_index": clipped_end_row - start_row,
+                    "start_column_index": clipped_start_col - start_column,
+                    "end_column_index": clipped_end_col - start_column,
+                }
+            )
+
+        rows: list[dict[str, Any]] | None = None
+        if self.has_header_row and values:
+            header = _disambiguate_header([str(h) for h in values[0]])
+            rows = [dict(zip(header, row)) for row in values[1:]]
+
+        output_data: dict[str, Any] = {
+            "spreadsheet_id": spreadsheet_id,
+            "range": a1,
+            "sheet_id": properties.get("sheetId"),
+            "sheet_title": properties.get("title"),
+            "start_row": start_row,
+            "start_column": start_column,
+            "values": values,
+            "rows": rows,
+            "cells": cells,
+            "merges": merges,
+        }
+        await self.record_output_parameter_value(workflow_run_context, workflow_run_id, output_data)
+        return await self.build_block_result(
+            success=True,
+            failure_reason=None,
+            output_parameter_value=output_data,
+            status=BlockStatus.completed,
+            workflow_run_block_id=workflow_run_block_id,
+            organization_id=organization_id,
+        )
+
+
+@dataclass(frozen=True)
+class RichSheetsInput:
+    cells: list[list[dict[str, Any]]]
+    merges: list[dict[str, int]]
+    sheet_id: int | None
+    sheet_title: str | None
+
+
+def _failure_reason_from_sheets_error(action: str, exc: GoogleSheetsAPIError) -> str:
+    if exc.status == 403 and exc.code == "reconnect_required":
+        return f"Reconnect the Google account: {exc.message}"
+    if exc.status == 429:
+        return f"Google Sheets rate limit on {action}: {exc.message}"
+    return f"Google Sheets {action} failed (HTTP {exc.status}): {exc.message}"
+
+
+def _try_rich_sheets_input(parsed: Any) -> RichSheetsInput | None:
+    if not isinstance(parsed, dict):
+        return None
+    cells = parsed.get("cells")
+    if not isinstance(cells, list) or not cells:
+        return None
+    if not all(isinstance(row, list) for row in cells):
+        return None
+    merges_in = parsed.get("merges") or []
+    merges: list[dict[str, int]] = []
+    for m in merges_in:
+        if not isinstance(m, dict):
+            continue
+        try:
+            merges.append(
+                {
+                    "start_row_index": int(m.get("start_row_index", 0)),
+                    "end_row_index": int(m.get("end_row_index", 0)),
+                    "start_column_index": int(m.get("start_column_index", 0)),
+                    "end_column_index": int(m.get("end_column_index", 0)),
+                }
+            )
+        except (TypeError, ValueError):
+            continue
+    sheet_id_raw = parsed.get("sheet_id")
+    sheet_title_raw = parsed.get("sheet_title")
+    return RichSheetsInput(
+        cells=cells,
+        merges=merges,
+        sheet_id=int(sheet_id_raw) if isinstance(sheet_id_raw, int) else None,
+        sheet_title=str(sheet_title_raw) if isinstance(sheet_title_raw, str) else None,
+    )
+
+
+class GoogleSheetsWriteBlock(Block):
+    block_type: Literal[BlockType.GOOGLE_SHEETS_WRITE] = BlockType.GOOGLE_SHEETS_WRITE  # type: ignore
+
+    spreadsheet_url: str
+    sheet_name: str | None = None
+    range: str | None = None
+    credential_id: str | None = None
+    write_mode: Literal["append", "update"] = "append"
+    values: str = ""
+    column_mapping: dict[str, str] | None = None
+    create_sheet_if_missing: bool = False
+    parameters: list[PARAMETER_TYPE] = []
+
+    def get_all_parameters(self, workflow_run_id: str) -> list[PARAMETER_TYPE]:
+        return self.parameters
+
+    def _render_templates(self, workflow_run_context: WorkflowRunContext) -> None:
+        if self.spreadsheet_url:
+            self.spreadsheet_url = self.format_block_parameter_template_from_workflow_run_context(
+                self.spreadsheet_url, workflow_run_context
+            )
+        if self.sheet_name:
+            self.sheet_name = self.format_block_parameter_template_from_workflow_run_context(
+                self.sheet_name, workflow_run_context
+            )
+        if self.range:
+            self.range = self.format_block_parameter_template_from_workflow_run_context(
+                self.range, workflow_run_context
+            )
+        if self.credential_id:
+            self.credential_id = self.format_block_parameter_template_from_workflow_run_context(
+                self.credential_id, workflow_run_context
+            )
+        if self.values:
+            self.values = self.format_block_parameter_template_from_workflow_run_context(
+                self.values, workflow_run_context, env=jinja_json_finalize_env
+            )
+
+    def _coerce_values(self, raw: Any, *, column_offset: int = 0) -> list[list[Any]]:
+        if isinstance(raw, dict):
+            if isinstance(raw.get("values"), list) and isinstance(raw.get("rows"), list):
+                LOG.warning("Google Sheets write payload has both 'values' and 'rows'; using 'values'")
+            for key in ("values", "rows"):
+                inner = raw.get(key)
+                if isinstance(inner, list):
+                    raw = inner
+                    break
+        if not isinstance(raw, list):
+            raise ValueError("Google Sheets write expects a JSON array of rows")
+        if not raw:
+            return []
+        if all(isinstance(row, list) for row in raw):
+            return cast(list[list[Any]], raw)
+        if all(isinstance(row, dict) for row in raw):
+            if not self.column_mapping:
+                raise ValueError("column_mapping is required when writing a list of objects to Google Sheets")
+            indexed: list[tuple[int, str]] = []
+            seen_columns: set[int] = set()
+            for field_key, target in self.column_mapping.items():
+                target_str = str(target).strip().upper()
+                # str.isalpha accepts non-ASCII letters; restrict to A-Z so we never silently
+                # map "Ω" or "Α" to a column index.
+                if not target_str or not re.fullmatch(r"[A-Z]+", target_str):
+                    raise ValueError(f"column_mapping target must be a column letter (A, B, ... ZZ), got: {target!r}")
+                col_index = column_letters_to_index(target_str)
+                if col_index > MAX_COLUMN_INDEX:
+                    raise ValueError(f"column_mapping target {target!r} exceeds the Google Sheets column limit (ZZZ)")
+                if col_index in seen_columns:
+                    raise ValueError(f"column_mapping has duplicate destination column: {target!r}")
+                pos = col_index - column_offset
+                if pos < 0:
+                    raise ValueError(
+                        f"column_mapping target {target!r} falls before the range start column; "
+                        f"widen the range or remap this field"
+                    )
+                seen_columns.add(col_index)
+                indexed.append((pos, field_key))
+            width = max(pos for pos, _ in indexed) + 1
+            coerced: list[list[Any]] = []
+            for row in raw:
+                padded: list[Any] = [None] * width
+                for pos, field_key in indexed:
+                    padded[pos] = row.get(field_key)
+                coerced.append(padded)
+            return coerced
+        raise ValueError("Google Sheets write expects rows to be all lists or all objects")
+
+    def _parse_values_or_raise(self) -> Any:
+        """Parse `self.values` JSON text into raw Python data.
+
+        Returns raw parsed JSON so the caller can rich-detect or coerce as needed.
+        Callers performing a flat write must pass the result through `_coerce_values`.
+        """
+        if not self.values:
+            return []
+        snippet = self.values[:200]
+        try:
+            return json.loads(self.values)
+        except (ValueError, json.JSONDecodeError) as e:
+            stripped = self.values.lstrip()
+            hint = ""
+            if stripped.startswith("{'") or stripped.startswith("[{'") or stripped.startswith("['"):
+                hint = (
+                    " Looks like a Python dict/list repr - wrap your template with | tojson "
+                    "(e.g. {{ block_1.output | tojson }})."
+                )
+            raise ValueError(f"{str(e)}.{hint} Rendered values: {snippet!r}") from e
+
+    async def execute(
+        self,
+        workflow_run_id: str,
+        workflow_run_block_id: str,
+        organization_id: str | None = None,
+        browser_session_id: str | None = None,
+        **kwargs: Any,
+    ) -> BlockResult:
+        workflow_run_context = self.get_workflow_run_context(workflow_run_id)
+
+        try:
+            self._render_templates(workflow_run_context)
+        except Exception as e:
+            return await self.build_block_result(
+                success=False,
+                failure_reason=f"Failed to format jinja template: {str(e)}",
+                output_parameter_value=None,
+                status=BlockStatus.failed,
+                workflow_run_block_id=workflow_run_block_id,
+                organization_id=organization_id,
+            )
+
+        if not self.credential_id:
+            return await self.build_block_result(
+                success=False,
+                failure_reason="Google credential_id is required",
+                output_parameter_value=None,
+                status=BlockStatus.failed,
+                workflow_run_block_id=workflow_run_block_id,
+                organization_id=organization_id,
+            )
+
+        try:
+            spreadsheet_id = extract_spreadsheet_id(self.spreadsheet_url)
+        except ValueError:
+            return await self.build_block_result(
+                success=False,
+                failure_reason=f"Could not resolve spreadsheet id from: {self.spreadsheet_url}",
+                output_parameter_value=None,
+                status=BlockStatus.failed,
+                workflow_run_block_id=workflow_run_block_id,
+                organization_id=organization_id,
+            )
+
+        a1 = build_a1(self.sheet_name, self.range)
+        if not a1:
+            return await self.build_block_result(
+                success=False,
+                failure_reason="Either sheet_name or range must be provided",
+                output_parameter_value=None,
+                status=BlockStatus.failed,
+                workflow_run_block_id=workflow_run_block_id,
+                organization_id=organization_id,
+            )
+
+        try:
+            parsed_values: Any = self._parse_values_or_raise()
+        except ValueError as e:
+            return await self.build_block_result(
+                success=False,
+                failure_reason=f"Invalid values payload: {str(e)}",
+                output_parameter_value=None,
+                status=BlockStatus.failed,
+                workflow_run_block_id=workflow_run_block_id,
+                organization_id=organization_id,
+            )
+
+        effective_org_id = organization_id or workflow_run_context.organization_id
+        if not effective_org_id:
+            return await self.build_block_result(
+                success=False,
+                failure_reason="organization_id is required to load Google Sheets credentials",
+                output_parameter_value=None,
+                status=BlockStatus.failed,
+                workflow_run_block_id=workflow_run_block_id,
+                organization_id=organization_id,
+            )
+        access_token = await app.AGENT_FUNCTION.get_google_sheets_credentials(
+            organization_id=effective_org_id,
+            credential_id=self.credential_id,
+        )
+        if not access_token:
+            return await self.build_block_result(
+                success=False,
+                failure_reason="Reconnect the Google account: no valid access token",
+                output_parameter_value=None,
+                status=BlockStatus.failed,
+                workflow_run_block_id=workflow_run_block_id,
+                organization_id=organization_id,
+            )
+
+        created_sheet_id: int | None = None
+        target_sheet_title = self.sheet_name or extract_a1_sheet_prefix(a1)
+        if self.create_sheet_if_missing and target_sheet_title:
+            try:
+                existing_sheet_id = await self._resolve_sheet_id(
+                    spreadsheet_id=spreadsheet_id,
+                    access_token=access_token,
+                    sheet_title=target_sheet_title,
+                )
+            except GoogleSheetsAPIError as e:
+                failure_reason = _failure_reason_from_sheets_error("lookup", e)
+                error_data = {"status_code": e.status, "code": e.code, "error": e.message}
+                await self.record_output_parameter_value(workflow_run_context, workflow_run_id, error_data)
+                return await self.build_block_result(
+                    success=False,
+                    failure_reason=failure_reason,
+                    output_parameter_value=error_data,
+                    status=BlockStatus.failed,
+                    workflow_run_block_id=workflow_run_block_id,
+                    organization_id=organization_id,
+                )
+            if existing_sheet_id is None:
+                try:
+                    created_sheet_id = await app.AGENT_FUNCTION.ensure_sheet_tab(
+                        access_token=access_token,
+                        spreadsheet_id=spreadsheet_id,
+                        title=target_sheet_title,
+                    )
+                except Exception as e:
+                    return await self.build_block_result(
+                        success=False,
+                        failure_reason=f"Failed to create sheet '{target_sheet_title}': {str(e)}",
+                        output_parameter_value=None,
+                        status=BlockStatus.failed,
+                        workflow_run_block_id=workflow_run_block_id,
+                        organization_id=organization_id,
+                    )
+            else:
+                created_sheet_id = existing_sheet_id
+
+        rich = _try_rich_sheets_input(parsed_values)
+        if rich is not None:
+            return await self._execute_rich(
+                spreadsheet_id=spreadsheet_id,
+                a1=a1,
+                access_token=access_token,
+                workflow_run_context=workflow_run_context,
+                workflow_run_id=workflow_run_id,
+                workflow_run_block_id=workflow_run_block_id,
+                organization_id=organization_id,
+                rich=rich,
+                known_sheet_id=created_sheet_id,
+            )
+
+        try:
+            rows = self._coerce_values(parsed_values, column_offset=leading_column_offset(a1))
+        except ValueError as e:
+            snippet = self.values[:200] if self.values else ""
+            extra = ""
+            if isinstance(parsed_values, dict):
+                extra = (
+                    " Rendered a single object instead of a list; reference a list field "
+                    "(e.g. {{ block_1.output.rows | tojson }}) or wrap the object in an array."
+                )
+            return await self.build_block_result(
+                success=False,
+                failure_reason=f"Invalid values payload: {str(e)}.{extra} Rendered values: {snippet!r}",
+                output_parameter_value=None,
+                status=BlockStatus.failed,
+                workflow_run_block_id=workflow_run_block_id,
+                organization_id=organization_id,
+            )
+
+        try:
+            if self.write_mode == "append":
+                payload = await app.AGENT_FUNCTION.google_sheets_values_append(
+                    access_token=access_token,
+                    spreadsheet_id=spreadsheet_id,
+                    range_=a1,
+                    values=rows,
+                )
+            else:
+                payload = await app.AGENT_FUNCTION.google_sheets_values_update(
+                    access_token=access_token,
+                    spreadsheet_id=spreadsheet_id,
+                    range_=a1,
+                    values=rows,
+                )
+        except GoogleSheetsAPIError as e:
+            failure_reason = _failure_reason_from_sheets_error("write", e)
+            error_data = {"status_code": e.status, "code": e.code, "error": e.message}
+            await self.record_output_parameter_value(workflow_run_context, workflow_run_id, error_data)
+            return await self.build_block_result(
+                success=False,
+                failure_reason=failure_reason,
+                output_parameter_value=error_data,
+                status=BlockStatus.failed,
+                workflow_run_block_id=workflow_run_block_id,
+                organization_id=organization_id,
+            )
+        except Exception as e:
+            error_data = {"error": str(e), "error_type": "unknown"}
+            await self.record_output_parameter_value(workflow_run_context, workflow_run_id, error_data)
+            return await self.build_block_result(
+                success=False,
+                failure_reason=f"Google Sheets write failed: {str(e)}",
+                output_parameter_value=error_data,
+                status=BlockStatus.failed,
+                workflow_run_block_id=workflow_run_block_id,
+                organization_id=organization_id,
+            )
+
+        if payload is None:
+            error_data = {"error": "Google Sheets write returned no payload"}
+            await self.record_output_parameter_value(workflow_run_context, workflow_run_id, error_data)
+            return await self.build_block_result(
+                success=False,
+                failure_reason="Google Sheets runtime is not available in this build",
+                output_parameter_value=error_data,
+                status=BlockStatus.failed,
+                workflow_run_block_id=workflow_run_block_id,
+                organization_id=organization_id,
+            )
+
+        output_data: dict[str, Any] = {
+            "spreadsheet_id": spreadsheet_id,
+            "write_mode": self.write_mode,
+            "rows_written": len(rows),
+            "response": payload,
+        }
+        await self.record_output_parameter_value(workflow_run_context, workflow_run_id, output_data)
+        return await self.build_block_result(
+            success=True,
+            failure_reason=None,
+            output_parameter_value=output_data,
+            status=BlockStatus.completed,
+            workflow_run_block_id=workflow_run_block_id,
+            organization_id=organization_id,
+        )
+
+    async def _execute_rich(
+        self,
+        *,
+        spreadsheet_id: str,
+        a1: str,
+        access_token: str,
+        workflow_run_context: WorkflowRunContext,
+        workflow_run_id: str,
+        workflow_run_block_id: str,
+        organization_id: str | None,
+        rich: RichSheetsInput,
+        known_sheet_id: int | None = None,
+    ) -> BlockResult:
+        sheet_id: int | None = known_sheet_id
+        try:
+            if sheet_id is None and self.sheet_name:
+                sheet_id = await self._resolve_sheet_id(
+                    spreadsheet_id=spreadsheet_id,
+                    access_token=access_token,
+                    sheet_title=self.sheet_name,
+                )
+            if sheet_id is None:
+                # Honor an explicit sheet prefix in the configured A1 (e.g. "'Target'!B2:C3")
+                # before falling back to the rich payload's sheet metadata.
+                a1_prefix = extract_a1_sheet_prefix(a1)
+                if a1_prefix:
+                    sheet_id = await self._resolve_sheet_id(
+                        spreadsheet_id=spreadsheet_id,
+                        access_token=access_token,
+                        sheet_title=a1_prefix,
+                    )
+            if sheet_id is None and rich.sheet_title:
+                # rich.sheet_id is local to the source spreadsheet; resolve by title against the destination.
+                sheet_id = await self._resolve_sheet_id(
+                    spreadsheet_id=spreadsheet_id,
+                    access_token=access_token,
+                    sheet_title=rich.sheet_title,
+                )
+        except GoogleSheetsAPIError as e:
+            failure_reason = _failure_reason_from_sheets_error("lookup", e)
+            error_data = {"status_code": e.status, "code": e.code, "error": e.message}
+            await self.record_output_parameter_value(workflow_run_context, workflow_run_id, error_data)
+            return await self.build_block_result(
+                success=False,
+                failure_reason=failure_reason,
+                output_parameter_value=error_data,
+                status=BlockStatus.failed,
+                workflow_run_block_id=workflow_run_block_id,
+                organization_id=organization_id,
+            )
+        if sheet_id is None:
+            return await self.build_block_result(
+                success=False,
+                failure_reason=(
+                    "Could not resolve sheet_id for batchUpdate; "
+                    "set sheet_name on the Write block or include sheet_title in the input"
+                ),
+                output_parameter_value=None,
+                status=BlockStatus.failed,
+                workflow_run_block_id=workflow_run_block_id,
+                organization_id=organization_id,
+            )
+
+        fields_mask = "userEnteredValue,userEnteredFormat,note,hyperlink"
+        requests: list[dict[str, Any]] = []
+
+        if self.write_mode == "append":
+            append_col_offset = leading_column_offset(a1)
+            padded_cells: list[list[dict[str, Any]]]
+            if append_col_offset:
+                padded_cells = [[{}] * append_col_offset + row for row in rich.cells]
+            else:
+                padded_cells = list(rich.cells)
+            rows_payload = [{"values": row} for row in padded_cells]
+            requests.append(
+                {
+                    "appendCells": {
+                        "sheetId": sheet_id,
+                        "rows": rows_payload,
+                        "fields": fields_mask,
+                    }
+                }
+            )
+            merge_origin_row: int | None = None
+            merge_origin_col = append_col_offset
+        else:
+            rows_payload = [{"values": row} for row in rich.cells]
+            try:
+                grid_range = a1_to_grid_range(a1, sheet_id)
+            except ValueError as e:
+                return await self.build_block_result(
+                    success=False,
+                    failure_reason=f"Update mode requires a fully-qualified A1 range: {e}",
+                    output_parameter_value=None,
+                    status=BlockStatus.failed,
+                    workflow_run_block_id=workflow_run_block_id,
+                    organization_id=organization_id,
+                )
+            requests.append(
+                {
+                    "updateCells": {
+                        "range": grid_range,
+                        "rows": rows_payload,
+                        "fields": fields_mask,
+                    }
+                }
+            )
+            merge_origin_row = grid_range["startRowIndex"]
+            merge_origin_col = grid_range["startColumnIndex"]
+
+        for merge in rich.merges:
+            if self.write_mode == "append":
+                # Append mode appends after the last table row; we'd need the response's
+                # updatedRange to shift merges correctly. Skip for now.
+                continue
+            row_offset = merge_origin_row or 0
+            requests.append(
+                {
+                    "mergeCells": {
+                        "range": {
+                            "sheetId": sheet_id,
+                            "startRowIndex": merge["start_row_index"] + row_offset,
+                            "endRowIndex": merge["end_row_index"] + row_offset,
+                            "startColumnIndex": merge["start_column_index"] + merge_origin_col,
+                            "endColumnIndex": merge["end_column_index"] + merge_origin_col,
+                        },
+                        "mergeType": "MERGE_ALL",
+                    }
+                }
+            )
+
+        try:
+            payload = await app.AGENT_FUNCTION.google_sheets_batch_update(
+                access_token=access_token,
+                spreadsheet_id=spreadsheet_id,
+                requests=requests,
+            )
+        except GoogleSheetsAPIError as e:
+            failure_reason = _failure_reason_from_sheets_error("batchUpdate", e)
+            error_data = {"status_code": e.status, "code": e.code, "error": e.message}
+            await self.record_output_parameter_value(workflow_run_context, workflow_run_id, error_data)
+            return await self.build_block_result(
+                success=False,
+                failure_reason=failure_reason,
+                output_parameter_value=error_data,
+                status=BlockStatus.failed,
+                workflow_run_block_id=workflow_run_block_id,
+                organization_id=organization_id,
+            )
+        except Exception as e:
+            error_data = {"error": str(e), "error_type": "unknown"}
+            await self.record_output_parameter_value(workflow_run_context, workflow_run_id, error_data)
+            return await self.build_block_result(
+                success=False,
+                failure_reason=f"Google Sheets batchUpdate failed: {str(e)}",
+                output_parameter_value=error_data,
+                status=BlockStatus.failed,
+                workflow_run_block_id=workflow_run_block_id,
+                organization_id=organization_id,
+            )
+
+        if payload is None:
+            error_data = {"error": "Google Sheets batchUpdate returned no payload"}
+            await self.record_output_parameter_value(workflow_run_context, workflow_run_id, error_data)
+            return await self.build_block_result(
+                success=False,
+                failure_reason="Google Sheets runtime is not available in this build",
+                output_parameter_value=error_data,
+                status=BlockStatus.failed,
+                workflow_run_block_id=workflow_run_block_id,
+                organization_id=organization_id,
+            )
+
+        output_data: dict[str, Any] = {
+            "spreadsheet_id": spreadsheet_id,
+            "write_mode": self.write_mode,
+            "rows_written": len(rich.cells),
+            "response": payload,
+        }
+        await self.record_output_parameter_value(workflow_run_context, workflow_run_id, output_data)
+        return await self.build_block_result(
+            success=True,
+            failure_reason=None,
+            output_parameter_value=output_data,
+            status=BlockStatus.completed,
+            workflow_run_block_id=workflow_run_block_id,
+            organization_id=organization_id,
+        )
+
+    async def _resolve_sheet_id(
+        self,
+        *,
+        spreadsheet_id: str,
+        access_token: str,
+        sheet_title: str | None,
+    ) -> int | None:
+        if not sheet_title:
+            return None
+        return await app.AGENT_FUNCTION.google_sheets_get_sheet_id(
+            access_token=access_token,
+            spreadsheet_id=spreadsheet_id,
+            sheet_title=sheet_title,
+        )
--- a/skyvern/forge/sdk/workflow/models/workflow.py
+++ b/skyvern/forge/sdk/workflow/models/workflow.py
@ -157,6 +157,17 @@ class WorkflowRunStatus(StrEnum):
            WorkflowRunStatus.completed,
        ]

+    def is_final_excluding_canceled(self) -> bool:
+        """Like :meth:`is_final` but excludes ``canceled``.
+
+        For callers that can't distinguish a legitimate user/block cancel from
+        a synthetic ``canceled`` written as a last-resort fallback — e.g. the
+        copilot tool reading the row AFTER ``mark_workflow_run_as_canceled_if_not_final``
+        has run. Callers that want to trust a legitimate ``canceled`` must read
+        the row BEFORE invoking any cancel helper.
+        """
+        return self.is_final() and self is not WorkflowRunStatus.canceled
+

 class WorkflowRun(BaseModel):
    workflow_run_id: str
@ -244,6 +255,7 @@ class WorkflowRunResponseBase(BaseModel):
    workflow_run_id: str
    status: WorkflowRunStatus
    failure_reason: str | None = None
+    failure_category: list[dict[str, Any]] | None = None
    proxy_location: ProxyLocationInput = None
    webhook_callback_url: str | None = None
    webhook_failure_reason: str | None = None
--- a/skyvern/forge/sdk/workflow/service.py
+++ b/skyvern/forge/sdk/workflow/service.py
@ -915,7 +915,7 @@ class WorkflowService:

        return None

-    @traced(name="skyvern.workflow.execute")
+    @traced(name="skyvern.workflow.execute", role="wrapper")
    async def execute_workflow(
        self,
        workflow_run_id: str,
@ -924,6 +924,7 @@ class WorkflowService:
        block_labels: list[str] | None = None,
        block_outputs: dict[str, Any] | None = None,
        browser_session_id: str | None = None,
+        need_call_webhook: bool = True,
    ) -> WorkflowRun:
        """Execute a workflow."""
        organization_id = organization.organization_id
@ -1014,6 +1015,7 @@ class WorkflowService:
                api_key=api_key,
                browser_session_id=browser_session_id,
                close_browser_on_completion=close_browser_on_completion,
+                need_call_webhook=need_call_webhook,
            )
            return workflow_run

@ -1069,6 +1071,7 @@ class WorkflowService:
                    api_key=api_key,
                    browser_session_id=browser_session_id,
                    close_browser_on_completion=close_browser_on_completion,
+                    need_call_webhook=need_call_webhook,
                )
                return workflow_run
            # Start background task to periodically renew the browser session
@ -1256,6 +1259,7 @@ class WorkflowService:
                api_key=api_key,
                browser_session_id=browser_session_id,
                close_browser_on_completion=close_browser_on_completion,
+                need_call_webhook=need_call_webhook,
            )

        return workflow_run
@ -4526,6 +4530,7 @@ class WorkflowService:
            workflow_run_id=workflow_run_id,
            status=workflow_run.status,
            failure_reason=workflow_run.failure_reason,
+            failure_category=workflow_run.failure_category,
            proxy_location=workflow_run.proxy_location,
            webhook_callback_url=workflow_run.webhook_callback_url,
            webhook_failure_reason=workflow_run.webhook_failure_reason,
--- a/skyvern/forge/sdk/workflow/workflow_definition_converter.py
+++ b/skyvern/forge/sdk/workflow/workflow_definition_converter.py
@ -58,6 +58,10 @@ from skyvern.forge.sdk.workflow.models.block import (
    WaitBlock,
    WorkflowTriggerBlock,
 )
+from skyvern.forge.sdk.workflow.models.google_sheets_blocks import (
+    GoogleSheetsReadBlock,
+    GoogleSheetsWriteBlock,
+)
 from skyvern.forge.sdk.workflow.models.parameter import (
    PARAMETER_TYPE,
    RESERVED_PARAMETER_KEYS,
@ -747,6 +751,31 @@ def block_yaml_to_block(
            use_parent_browser_session=block_yaml.use_parent_browser_session,
            parameters=workflow_trigger_block_parameters,
        )
+    elif block_yaml.block_type == BlockType.GOOGLE_SHEETS_READ:
+        google_sheets_read_parameters = _resolve_block_parameters(block_yaml, parameters)
+        return GoogleSheetsReadBlock(
+            **base_kwargs,
+            spreadsheet_url=block_yaml.spreadsheet_url,
+            sheet_name=block_yaml.sheet_name,
+            range=block_yaml.range,
+            credential_id=block_yaml.credential_id,
+            has_header_row=block_yaml.has_header_row,
+            parameters=google_sheets_read_parameters,
+        )
+    elif block_yaml.block_type == BlockType.GOOGLE_SHEETS_WRITE:
+        google_sheets_write_parameters = _resolve_block_parameters(block_yaml, parameters)
+        return GoogleSheetsWriteBlock(
+            **base_kwargs,
+            spreadsheet_url=block_yaml.spreadsheet_url,
+            sheet_name=block_yaml.sheet_name,
+            range=block_yaml.range,
+            credential_id=block_yaml.credential_id,
+            write_mode=block_yaml.write_mode,
+            values=block_yaml.values,
+            column_mapping=block_yaml.column_mapping,
+            create_sheet_if_missing=block_yaml.create_sheet_if_missing,
+            parameters=google_sheets_write_parameters,
+        )

    raise ValueError(f"Invalid block type {block_yaml.block_type}")

--- a/skyvern/library/skyvern_locator.py
+++ b/skyvern/library/skyvern_locator.py
@ -2,6 +2,8 @@ from typing import Any, Pattern

 from playwright.async_api import Locator

+from skyvern.forge.sdk.event.factory import EventStrategyFactory
+

 class SkyvernLocator:
    """Locator for finding and interacting with elements on a page.
@ -16,7 +18,12 @@ class SkyvernLocator:
    # Action methods
    async def click(self, **kwargs: Any) -> None:
        """Click the element."""
-        await self._locator.click(**kwargs)
+        timeout = kwargs.pop("timeout", None)
+        if kwargs:
+            # Extra kwargs (e.g. position) — use raw Playwright click
+            await self._locator.click(timeout=timeout, **kwargs)
+        else:
+            await EventStrategyFactory.click(self._locator.page, self._locator, timeout=timeout)

    async def fill(self, value: str, **kwargs: Any) -> None:
        """Fill an input element with text."""
--- a/skyvern/schemas/google_sheets.py
+++ b/skyvern/schemas/google_sheets.py
@ -0,0 +1,128 @@
+import re
+
+_SPREADSHEET_URL_RE = re.compile(r"/spreadsheets(?:/u/\d+)?/d/([a-zA-Z0-9-_]+)")
+_BARE_ID_RE = re.compile(r"^[a-zA-Z0-9-_]{20,}$")
+_A1_CELL_RE = re.compile(r"^([A-Z]+)(\d+)$")
+_A1_COLUMN_ONLY_RE = re.compile(r"^([A-Z]+)$")
+
+MAX_COLUMN_INDEX = 18277
+
+
+class GoogleSheetsAPIError(RuntimeError):
+    """Raised when the Drive or Sheets API returns a non-2xx response."""
+
+    def __init__(self, status: int, code: str | None, message: str) -> None:
+        super().__init__(message)
+        self.status = status
+        self.code = code
+        self.message = message
+
+
+def extract_spreadsheet_id(url_or_id: str) -> str:
+    if not url_or_id:
+        raise ValueError("Empty spreadsheet reference")
+    match = _SPREADSHEET_URL_RE.search(url_or_id)
+    if match:
+        return match.group(1)
+    if _BARE_ID_RE.match(url_or_id):
+        return url_or_id
+    raise ValueError(f"Could not extract spreadsheet id from: {url_or_id}")
+
+
+def quote_sheet_name(name: str) -> str:
+    return "'" + name.replace("'", "''") + "'"
+
+
+def build_a1(sheet_name: str | None, cell_range: str | None) -> str | None:
+    quoted = quote_sheet_name(sheet_name) if sheet_name else None
+    if quoted and cell_range:
+        return f"{quoted}!{cell_range}"
+    if quoted:
+        return quoted
+    if cell_range:
+        return cell_range
+    return None
+
+
+def column_letters_to_index(letters: str) -> int:
+    index = 0
+    for ch in letters:
+        index = index * 26 + (ord(ch) - ord("A") + 1)
+    return index - 1
+
+
+def strip_a1_sheet_prefix(a1: str) -> str:
+    if a1.startswith("'"):
+        idx = 1
+        while idx < len(a1):
+            if a1[idx] == "'":
+                if idx + 1 < len(a1) and a1[idx + 1] == "'":
+                    idx += 2
+                    continue
+                break
+            idx += 1
+        if idx < len(a1) and idx + 1 < len(a1) and a1[idx + 1] == "!":
+            return a1[idx + 2 :]
+    if "!" in a1:
+        return a1.rsplit("!", 1)[1]
+    return a1
+
+
+def extract_a1_sheet_prefix(a1: str | None) -> str | None:
+    """Return the unquoted sheet title from an A1 string (e.g. ``'Target'!B2:C3`` -> ``Target``)."""
+    if not a1:
+        return None
+    if a1.startswith("'"):
+        idx = 1
+        while idx < len(a1):
+            if a1[idx] == "'":
+                if idx + 1 < len(a1) and a1[idx + 1] == "'":
+                    idx += 2
+                    continue
+                break
+            idx += 1
+        if idx < len(a1) and idx + 1 < len(a1) and a1[idx + 1] == "!":
+            return a1[1:idx].replace("''", "'") or None
+        return None
+    if "!" in a1:
+        sheet_part = a1.partition("!")[0]
+        return sheet_part or None
+    return None
+
+
+def leading_column_offset(a1: str | None) -> int:
+    if not a1:
+        return 0
+    body = strip_a1_sheet_prefix(a1)
+    start = body.split(":", 1)[0].upper()
+    cell_match = _A1_CELL_RE.match(start)
+    if cell_match:
+        return column_letters_to_index(cell_match.group(1))
+    col_match = _A1_COLUMN_ONLY_RE.match(start)
+    if col_match:
+        return column_letters_to_index(col_match.group(1))
+    return 0
+
+
+def a1_to_grid_range(a1: str, sheet_id: int) -> dict[str, int]:
+    body = strip_a1_sheet_prefix(a1)
+    parts = body.split(":")
+    if len(parts) == 1:
+        parts = [parts[0], parts[0]]
+    elif len(parts) != 2:
+        raise ValueError(f"Unsupported A1 range: {a1!r}")
+    start_match = _A1_CELL_RE.match(parts[0].upper())
+    end_match = _A1_CELL_RE.match(parts[1].upper())
+    if not start_match or not end_match:
+        raise ValueError(f"Only fully qualified A1 ranges are supported, got: {a1!r}")
+    start_col = column_letters_to_index(start_match.group(1))
+    start_row = int(start_match.group(2)) - 1
+    end_col = column_letters_to_index(end_match.group(1))
+    end_row = int(end_match.group(2)) - 1
+    return {
+        "sheetId": sheet_id,
+        "startRowIndex": start_row,
+        "endRowIndex": end_row + 1,
+        "startColumnIndex": start_col,
+        "endColumnIndex": end_col + 1,
+    }
--- a/skyvern/schemas/workflows.py
+++ b/skyvern/schemas/workflows.py
@ -418,6 +418,8 @@ class BlockType(StrEnum):
    HUMAN_INTERACTION = "human_interaction"
    PRINT_PAGE = "print_page"
    WORKFLOW_TRIGGER = "workflow_trigger"
+    GOOGLE_SHEETS_READ = "google_sheets_read"
+    GOOGLE_SHEETS_WRITE = "google_sheets_write"


 class BlockStatus(StrEnum):
@ -1029,6 +1031,29 @@ class WorkflowTriggerBlockYAML(BlockYAML):
    parameter_keys: list[str] | None = None


+class GoogleSheetsReadBlockYAML(BlockYAML):
+    block_type: Literal[BlockType.GOOGLE_SHEETS_READ] = BlockType.GOOGLE_SHEETS_READ  # type: ignore
+    spreadsheet_url: str
+    sheet_name: str | None = None
+    range: str | None = None
+    credential_id: str | None = None
+    has_header_row: bool = True
+    parameter_keys: list[str] | None = None
+
+
+class GoogleSheetsWriteBlockYAML(BlockYAML):
+    block_type: Literal[BlockType.GOOGLE_SHEETS_WRITE] = BlockType.GOOGLE_SHEETS_WRITE  # type: ignore
+    spreadsheet_url: str
+    sheet_name: str | None = None
+    range: str | None = None
+    credential_id: str | None = None
+    write_mode: Literal["append", "update"] = "append"
+    values: str = ""
+    column_mapping: dict[str, str] | None = None
+    create_sheet_if_missing: bool = False
+    parameter_keys: list[str] | None = None
+
+
 PARAMETER_YAML_SUBCLASSES = (
    AWSSecretParameterYAML
    | BitwardenLoginCredentialParameterYAML
@ -1068,6 +1093,8 @@ BLOCK_YAML_SUBCLASSES = (
    | ConditionalBlockYAML
    | PrintPageBlockYAML
    | WorkflowTriggerBlockYAML
+    | GoogleSheetsReadBlockYAML
+    | GoogleSheetsWriteBlockYAML
 )
 BLOCK_YAML_TYPES = Annotated[BLOCK_YAML_SUBCLASSES, Field(discriminator="block_type")]

--- a/skyvern/services/task_v2_service.py
+++ b/skyvern/services/task_v2_service.py
@ -455,7 +455,7 @@ async def initialize_task_v2_metadata(
    return task_v2


-@traced()
+@traced(name="skyvern.task_v2.run")
 async def run_task_v2(
    organization: Organization,
    task_v2_id: str,
--- a/skyvern/webeye/actions/handler.py
+++ b/skyvern/webeye/actions/handler.py
@ -77,7 +77,7 @@ from skyvern.forge.sdk.models import Step
 from skyvern.forge.sdk.schemas.tasks import Task
 from skyvern.forge.sdk.services.bitwarden import BitwardenConstants
 from skyvern.forge.sdk.services.credentials import AzureVaultConstants, OnePasswordConstants
-from skyvern.forge.sdk.trace import traced
+from skyvern.forge.sdk.trace import apply_context_attrs, traced
 from skyvern.services import service_utils
 from skyvern.services.action_service import get_action_history
 from skyvern.utils.prompt_engine import (
@ -392,7 +392,7 @@ class ActionHandler:
        cls._teardown_action_types[action_type] = handler

    @staticmethod
-    @traced(name="skyvern.agent.action")
+    @traced(name="skyvern.agent.action", role="wrapper")
    async def handle_action(
        scraped_page: ScrapedPage,
        task: Task,
@ -411,14 +411,21 @@ class ActionHandler:
        trigger_download_action = (
            isinstance(action, (SelectOptionAction, ClickAction, DownloadFileAction)) and action.download
        )
+        # triggers_download splits the bimodal distribution: non-download actions
+        # finish in ~1s while download actions can burn up to BROWSER_DOWNLOAD_MAX_WAIT_TIME
+        # (120s) polling for the file. Explains the 36s p95 on this wrapper.
+        _action_span.set_attribute("triggers_download", trigger_download_action)
+        _tracer = otel_trace.get_tracer("skyvern")
        if not trigger_download_action:
-            results = await ActionHandler._handle_action(
-                scraped_page=scraped_page,
-                task=task,
-                step=step,
-                page=page,
-                action=action,
-            )
+            with _tracer.start_as_current_span("skyvern.agent.action.handle_inner") as _hi_span:
+                apply_context_attrs(_hi_span)
+                results = await ActionHandler._handle_action(
+                    scraped_page=scraped_page,
+                    task=task,
+                    step=step,
+                    page=page,
+                    action=action,
+                )
            persisted_action = await app.DATABASE.workflow_params.create_action(action=action)
            action.action_id = persisted_action.action_id
            return results
@ -449,45 +456,57 @@ class ActionHandler:

        download_triggered = False
        try:
-            results = await ActionHandler._handle_action(
-                scraped_page=scraped_page,
-                task=task,
-                step=step,
-                page=page,
-                action=action,
-            )
+            with _tracer.start_as_current_span("skyvern.agent.action.handle_inner") as _hi_span:
+                apply_context_attrs(_hi_span)
+                results = await ActionHandler._handle_action(
+                    scraped_page=scraped_page,
+                    task=task,
+                    step=step,
+                    page=page,
+                    action=action,
+                )
            if not results:
                return results
-            try:
-                LOG.info(
-                    "Checking if there is any new files after click",
-                    download_dir=download_dir,
-                )
-                async with asyncio.timeout(task.download_timeout or BROWSER_DOWNLOAD_MAX_WAIT_TIME):
-                    while True:
-                        list_files_after = list_files_in_directory(download_dir)
-                        if task.browser_session_id:
-                            files_in_browser_session = await app.STORAGE.list_downloaded_files_in_browser_session(
-                                organization_id=task.organization_id, browser_session_id=task.browser_session_id
-                            )
-                            list_files_after = list_files_after + files_in_browser_session
+            _download_timeout = task.download_timeout or BROWSER_DOWNLOAD_MAX_WAIT_TIME
+            with _tracer.start_as_current_span("skyvern.agent.action.download_wait") as _dl_wait_span:
+                apply_context_attrs(_dl_wait_span)
+                _dl_wait_span.set_attribute("timeout_seconds", _download_timeout)
+                _poll_iterations = 0
+                try:
+                    LOG.info(
+                        "Checking if there is any new files after click",
+                        download_dir=download_dir,
+                    )
+                    async with asyncio.timeout(_download_timeout):
+                        while True:
+                            _poll_iterations += 1
+                            list_files_after = list_files_in_directory(download_dir)
+                            if task.browser_session_id:
+                                files_in_browser_session = await app.STORAGE.list_downloaded_files_in_browser_session(
+                                    organization_id=task.organization_id,
+                                    browser_session_id=task.browser_session_id,
+                                )
+                                list_files_after = list_files_after + files_in_browser_session

-                        if len(list_files_after) > len(list_files_before):
-                            LOG.info(
-                                "Found new files in download directory after action",
-                                num_downloaded_files_after=len(list_files_after),
-                                download_dir=download_dir,
-                                workflow_run_id=task.workflow_run_id,
-                            )
-                            download_triggered = True
-                            break
-                        await asyncio.sleep(1)
+                            if len(list_files_after) > len(list_files_before):
+                                LOG.info(
+                                    "Found new files in download directory after action",
+                                    num_downloaded_files_after=len(list_files_after),
+                                    download_dir=download_dir,
+                                    workflow_run_id=task.workflow_run_id,
+                                )
+                                download_triggered = True
+                                break
+                            await asyncio.sleep(1)

-            except asyncio.TimeoutError:
-                LOG.warning(
-                    "No file to download after action",
-                    workflow_run_id=task.workflow_run_id,
-                )
+                except asyncio.TimeoutError:
+                    LOG.warning(
+                        "No file to download after action",
+                        workflow_run_id=task.workflow_run_id,
+                    )
+                finally:
+                    _dl_wait_span.set_attribute("download_triggered", download_triggered)
+                    _dl_wait_span.set_attribute("poll_iterations", _poll_iterations)

            if not download_triggered:
                results[-1].download_triggered = False
@ -701,7 +720,7 @@ def check_for_invalid_web_action(
    return []


-@traced()
+@traced(name="skyvern.agent.action.solve_captcha")
 async def handle_solve_captcha_action(
    action: actions.SolveCaptchaAction,
    page: Page,
@ -717,7 +736,7 @@ async def handle_solve_captcha_action(
    return [ActionSuccess()]


-@traced()
+@traced(name="skyvern.agent.action.click")
 async def handle_click_action(
    action: actions.ClickAction,
    page: Page,
@ -880,7 +899,7 @@ async def handle_click_action(
    return results


-@traced()
+@traced(name="skyvern.agent.action.click_dropdown_sequential")
 async def handle_sequential_click_for_dropdown(
    action: actions.ClickAction,
    action_history: list[ActionResult],
@ -1006,7 +1025,7 @@ async def handle_sequential_click_for_dropdown(
    )


-@traced()
+@traced(name="skyvern.agent.action.click_to_download")
 async def handle_click_to_download_file_action(
    action: actions.ClickAction,
    page: Page,
@ -1145,7 +1164,7 @@ async def _handle_multi_field_totp_sequence(
    return None  # Success


-@traced()
+@traced(name="skyvern.agent.action.input_text")
 async def handle_input_text_action(
    action: actions.InputTextAction,
    page: Page,
@ -1596,7 +1615,7 @@ async def handle_input_text_action(
            await skyvern_element.press_key("Tab")


-@traced()
+@traced(name="skyvern.agent.action.upload_file")
 async def handle_upload_file_action(
    action: actions.UploadFileAction,
    page: Page,
@ -1682,7 +1701,7 @@ async def handle_upload_file_action(

 # This function is deprecated in 'extract-actions' prompt. Downloads are handled by the click action handler now.
 # Currently, it's only used for the download action triggered by the code.
-@traced()
+@traced(name="skyvern.agent.action.download_file")
 async def handle_download_file_action(
    action: actions.DownloadFileAction,
    page: Page,
@ -1742,7 +1761,7 @@ async def handle_download_file_action(
        return [ActionFailure(e)]


-@traced()
+@traced(name="skyvern.agent.action.null")
 async def handle_null_action(
    action: actions.NullAction,
    page: Page,
@ -1753,7 +1772,7 @@ async def handle_null_action(
    return [ActionSuccess()]


-@traced()
+@traced(name="skyvern.agent.action.select_option")
 async def handle_select_option_action(
    action: actions.SelectOptionAction,
    page: Page,
@ -2029,7 +2048,7 @@ async def handle_select_option_action(

        try:
            await EventStrategyFactory.move_to_element(page, skyvern_element.get_locator())
-            await skyvern_element.get_locator().click(timeout=timeout)
+            await EventStrategyFactory.click(page, skyvern_element.get_locator(), timeout=timeout)
        except Exception:
            LOG.info(
                "fail to open dropdown by clicking, try to press arrow down to open",
@ -2074,7 +2093,7 @@ async def handle_select_option_action(
        await incremental_scraped.stop_listen_dom_increment()


-@traced()
+@traced(name="skyvern.agent.action.checkbox")
 async def handle_checkbox_action(
    action: actions.CheckboxAction,
    page: Page,
@ -2103,7 +2122,7 @@ async def handle_checkbox_action(
    return [ActionSuccess()]


-@traced()
+@traced(name="skyvern.agent.action.wait")
 async def handle_wait_action(
    action: actions.WaitAction,
    page: Page,
@ -2115,7 +2134,7 @@ async def handle_wait_action(
    return [ActionFailure(exception=Exception("Wait action is treated as a failure"))]


-@traced()
+@traced(name="skyvern.agent.action.hover")
 async def handle_hover_action(
    action: actions.HoverAction,
    page: Page,
@ -2154,7 +2173,7 @@ async def handle_hover_action(
        return [ActionFailure(FailToHover(skyvern_element.get_id(), msg=str(exc)))]


-@traced()
+@traced(name="skyvern.agent.action.terminate")
 async def handle_terminate_action(
    action: actions.TerminateAction,
    page: Page,
@ -2186,58 +2205,68 @@ async def handle_complete_action(
    task: Task,
    step: Step,
 ) -> list[ActionResult]:
-    if not action.verified and task.navigation_goal:
-        LOG.info(
-            "CompleteAction hasn't been verified, going to verify the user goal",
+    # verification_path separates the three distinct runtime paths that
+    # roll up to this span — explains the 175x p95/p50 ratio.
+    _span = otel_trace.get_current_span()
+    if action.verified or not task.navigation_goal:
+        _span.set_attribute("verification_path", "already_verified")
+        return [ActionSuccess()]
+
+    LOG.info(
+        "CompleteAction hasn't been verified, going to verify the user goal",
+        workflow_run_id=task.workflow_run_id,
+    )
+    try:
+        verification_result = await app.agent.complete_verify(page, scraped_page, task, step)
+    except Exception as e:
+        _span.set_attribute("verification_path", "needs_llm_error")
+        LOG.exception(
+            "Failed to verify the complete action",
            workflow_run_id=task.workflow_run_id,
        )
-        try:
-            verification_result = await app.agent.complete_verify(page, scraped_page, task, step)
-        except Exception as e:
-            LOG.exception(
-                "Failed to verify the complete action",
-                workflow_run_id=task.workflow_run_id,
-            )
-            return [ActionFailure(exception=e)]
+        return [ActionFailure(exception=e)]

-        # Check if we should terminate instead of complete
-        # Note: This requires the USE_TERMINATION_AWARE_COMPLETE_VERIFICATION experiment to be enabled
-        if verification_result.is_terminate:
-            LOG.warning(
-                "CompleteAction verification determined task should terminate instead (termination-aware experiment)",
-                workflow_run_id=task.workflow_run_id,
-                thoughts=verification_result.thoughts,
-                status=verification_result.status if verification_result.status else "legacy",
-            )
-            # Create a TerminateAction and execute it
-            terminate_action = actions.TerminateAction(
-                reasoning=verification_result.thoughts,
-                organization_id=action.organization_id,
-                workflow_run_id=action.workflow_run_id,
-                task_id=action.task_id,
-                step_id=action.step_id,
-                step_order=action.step_order,
-                action_order=action.action_order,
-            )
-            results = await handle_terminate_action(terminate_action, page, scraped_page, task, step)
-            action.action_type = ActionType.TERMINATE
-            action.reasoning = terminate_action.reasoning
-            action.errors = terminate_action.errors
-            return results
-
-        if not verification_result.is_complete:
-            return [ActionFailure(exception=IllegitComplete(data={"error": verification_result.thoughts}))]
-
-        LOG.info(
-            "CompleteAction has been verified successfully",
+    # Check if we should terminate instead of complete
+    # Note: This requires the USE_TERMINATION_AWARE_COMPLETE_VERIFICATION experiment to be enabled
+    if verification_result.is_terminate:
+        _span.set_attribute("verification_path", "terminate_requested")
+        LOG.warning(
+            "CompleteAction verification determined task should terminate instead (termination-aware experiment)",
            workflow_run_id=task.workflow_run_id,
+            thoughts=verification_result.thoughts,
+            status=verification_result.status if verification_result.status else "legacy",
        )
-        action.verified = True
+        # Create a TerminateAction and execute it
+        terminate_action = actions.TerminateAction(
+            reasoning=verification_result.thoughts,
+            organization_id=action.organization_id,
+            workflow_run_id=action.workflow_run_id,
+            task_id=action.task_id,
+            step_id=action.step_id,
+            step_order=action.step_order,
+            action_order=action.action_order,
+        )
+        results = await handle_terminate_action(terminate_action, page, scraped_page, task, step)
+        action.action_type = ActionType.TERMINATE
+        action.reasoning = terminate_action.reasoning
+        action.errors = terminate_action.errors
+        return results
+
+    if not verification_result.is_complete:
+        _span.set_attribute("verification_path", "needs_llm_rejected")
+        return [ActionFailure(exception=IllegitComplete(data={"error": verification_result.thoughts}))]
+
+    _span.set_attribute("verification_path", "needs_llm_verified")
+    LOG.info(
+        "CompleteAction has been verified successfully",
+        workflow_run_id=task.workflow_run_id,
+    )
+    action.verified = True

    return [ActionSuccess()]


-@traced()
+@traced(name="skyvern.agent.action.extract")
 async def handle_extract_action(
    action: actions.ExtractAction,
    page: Page,
@ -2259,7 +2288,7 @@ async def handle_extract_action(
        return [ActionFailure(exception=Exception("No data extraction goal"))]


-@traced()
+@traced(name="skyvern.agent.action.scroll")
 async def handle_scroll_action(
    action: actions.ScrollAction,
    page: Page,
@ -2373,7 +2402,7 @@ async def handle_scroll_action(
    return [ActionSuccess()]


-@traced()
+@traced(name="skyvern.agent.action.keypress")
 async def handle_keypress_action(
    action: actions.KeypressAction,
    page: Page,
@ -2385,7 +2414,7 @@ async def handle_keypress_action(
    return [ActionSuccess()]


-@traced()
+@traced(name="skyvern.agent.action.move")
 async def handle_move_action(
    action: actions.MoveAction,
    page: Page,
@ -2397,7 +2426,7 @@ async def handle_move_action(
    return [ActionSuccess()]


-@traced()
+@traced(name="skyvern.agent.action.drag")
 async def handle_drag_action(
    action: actions.DragAction,
    page: Page,
@ -2409,7 +2438,7 @@ async def handle_drag_action(
    return [ActionSuccess()]


-@traced()
+@traced(name="skyvern.agent.action.verification_code")
 async def handle_verification_code_action(
    action: actions.VerificationCodeAction,
    page: Page,
@ -2426,7 +2455,7 @@ async def handle_verification_code_action(
    return [ActionSuccess()]


-@traced()
+@traced(name="skyvern.agent.action.left_mouse")
 async def handle_left_mouse_action(
    action: actions.LeftMouseAction,
    page: Page,
@ -2438,7 +2467,7 @@ async def handle_left_mouse_action(
    return [ActionSuccess()]


-@traced()
+@traced(name="skyvern.agent.action.goto_url")
 async def handle_goto_url_action(
    action: actions.GotoUrlAction,
    page: Page,
@ -2450,7 +2479,7 @@ async def handle_goto_url_action(
    return [ActionSuccess()]


-@traced()
+@traced(name="skyvern.agent.action.close_page")
 async def handle_close_page_action(
    action: actions.ClosePageAction,
    page: Page,
@ -2559,7 +2588,7 @@ async def chain_click(
    try:
        if not await skyvern_element.navigate_to_a_href(page=page):
            await EventStrategyFactory.move_to_element(page, locator)
-            await locator.click(timeout=timeout)
+            await EventStrategyFactory.click(page, locator, timeout=timeout)
            LOG.info("Chain click: main element click succeeded", action=action, locator=locator)
        return [ActionSuccess()]

@ -2575,7 +2604,7 @@ async def chain_click(
                    locator=locator,
                )
                if bound_element := await skyvern_element.find_label_for(dom=dom):
-                    await bound_element.get_locator().click(timeout=timeout)
+                    await EventStrategyFactory.click(page, bound_element.get_locator(), timeout=timeout)
                    action_results.append(ActionSuccess())
                    return action_results
            except Exception as e:
@ -2593,7 +2622,7 @@ async def chain_click(
                if bound_element := await skyvern_element.find_element_in_label_children(
                    dom=dom, element_type=InteractiveElement.INPUT
                ):
-                    await bound_element.get_locator().click(timeout=timeout)
+                    await EventStrategyFactory.click(page, bound_element.get_locator(), timeout=timeout)
                    action_results.append(ActionSuccess())
                    return action_results
            except Exception as e:
@ -2611,6 +2640,8 @@ async def chain_click(
                )
                if bound_locator := await skyvern_element.find_bound_label_by_attr_id():
                    # click on (0, 0) to avoid playwright clicking on the wrong element by accident
+                    # Intentional positional click bypassing EventStrategyFactory — position
+                    # anchoring at (0,0) avoids mis-targeting nested elements inside labels
                    await bound_locator.click(timeout=timeout, position={"x": 0, "y": 0})
                    action_results.append(ActionSuccess())
                    return action_results
@ -2628,6 +2659,8 @@ async def chain_click(
                )
                if bound_locator := await skyvern_element.find_bound_label_by_direct_parent():
                    # click on (0, 0) to avoid playwright clicking on the wrong element by accident
+                    # Intentional positional click bypassing EventStrategyFactory — position
+                    # anchoring at (0,0) avoids mis-targeting nested elements inside labels
                    await bound_locator.click(timeout=timeout, position={"x": 0, "y": 0})
                    action_results.append(ActionSuccess())
                    return action_results
@ -2707,7 +2740,7 @@ async def chain_click(
                    locator=locator,
                )

-                await blocking_element.get_locator().click(timeout=timeout)
+                await EventStrategyFactory.click(page, blocking_element.get_locator(), timeout=timeout)
                action_results.append(ActionSuccess())
                return action_results
        except Exception as e:
@ -2732,7 +2765,7 @@ async def chain_click(
            return [ActionFailure(WrongElementToUploadFile(action.element_id))]


-@traced()
+@traced(name="skyvern.agent.dropdown.auto_completion")
 async def choose_auto_completion_dropdown(
    context: InputOrSelectContext,
    page: Page,
@ -2818,7 +2851,9 @@ async def choose_auto_completion_dropdown(
                            input_value=text,
                        )
                        try:
-                            await fast_path_locator.click(timeout=settings.BROWSER_ACTION_TIMEOUT_MS)
+                            await EventStrategyFactory.click(
+                                page, fast_path_locator, timeout=settings.BROWSER_ACTION_TIMEOUT_MS
+                            )
                            clear_input = False
                            result.action_result = ActionSuccess()
                            return result
@ -3122,7 +3157,7 @@ async def input_or_auto_complete_input(
        return None


-@traced()
+@traced(name="skyvern.agent.dropdown.discover_and_select")
 async def discover_and_select_from_full_dropdown(
    context: InputOrSelectContext,
    page: Page,
@ -3150,7 +3185,9 @@ async def discover_and_select_from_full_dropdown(

        # Try click first to open the dropdown (most combobox components respond to click)
        try:
-            await skyvern_element.get_locator().click(timeout=settings.BROWSER_ACTION_TIMEOUT_MS)
+            await EventStrategyFactory.click(
+                page, skyvern_element.get_locator(), timeout=settings.BROWSER_ACTION_TIMEOUT_MS
+            )
        except Exception:
            LOG.info(
                "Click failed in discover fallback, continuing to ArrowDown",
@ -3307,7 +3344,7 @@ async def discover_and_select_from_full_dropdown(
        await incremental_scraped.stop_listen_dom_increment()


-@traced()
+@traced(name="skyvern.agent.dropdown.select_sequential")
 async def sequentially_select_from_dropdown(
    action: SelectOptionAction,
    input_or_select_context: InputOrSelectContext,
@ -3487,7 +3524,7 @@ class CustomSelectPromptOptions(BaseModel):
    target_value: str | None = None


-@traced()
+@traced(name="skyvern.agent.dropdown.select_emerging")
 async def select_from_emerging_elements(
    current_element_id: str,
    options: CustomSelectPromptOptions,
@ -3580,7 +3617,7 @@ async def select_from_emerging_elements(
    return ActionSuccess()


-@traced()
+@traced(name="skyvern.agent.dropdown.select")
 async def select_from_dropdown(
    context: InputOrSelectContext,
    page: Page,
@ -3767,7 +3804,7 @@ async def select_from_dropdown(
            value=value,
        )
        await EventStrategyFactory.move_to_element(page, locator)
-        await locator.click(timeout=timeout)
+        await EventStrategyFactory.click(page, locator, timeout=timeout)
        single_select_result.action_result = ActionSuccess()
        return single_select_result
    except Exception as e:
@ -3775,7 +3812,7 @@ async def select_from_dropdown(
        return single_select_result


-@traced()
+@traced(name="skyvern.agent.dropdown.select_by_value")
 async def select_from_dropdown_by_value(
    value: str,
    page: Page,
@ -3796,7 +3833,7 @@ async def select_from_dropdown_by_value(

    element_locator = await incremental_scraped.select_one_element_by_value(value=value)
    if element_locator is not None:
-        await element_locator.click(timeout=timeout)
+        await EventStrategyFactory.click(page, element_locator, timeout=timeout)
        return ActionSuccess()

    if dropdown_menu_element is None:
@ -3832,7 +3869,7 @@ async def select_from_dropdown_by_value(

        element_locator = await incre_scraped.select_one_element_by_value(value=value)
        if element_locator is not None:
-            await element_locator.click(timeout=timeout)
+            await EventStrategyFactory.click(page, element_locator, timeout=timeout)
            nonlocal selected
            selected = True
            return False
@ -4002,7 +4039,7 @@ async def try_to_find_potential_scrollable_element(
    return skyvern_element


-@traced()
+@traced(name="skyvern.agent.dropdown.scroll_load_options")
 async def scroll_down_to_load_all_options(
    scrollable_element: SkyvernElement,
    page: Page,
@ -4121,9 +4158,7 @@ async def normal_select(
    value: str | None = json_response.get("value")

    try:
-        await locator.click(
-            timeout=settings.BROWSER_ACTION_TIMEOUT_MS,
-        )
+        await EventStrategyFactory.click(locator.page, locator, timeout=settings.BROWSER_ACTION_TIMEOUT_MS)
    except Exception as e:
        LOG.info(
            "Failed to click before select action",
@ -4603,7 +4638,31 @@ async def extract_information_for_navigation_goal(
    # dict / list / str — the `extract-information` prompt uses
    # `force_dict=False`, so root `type: array` or scalar schemas are valid
    # return shapes (matches ``ScrapeResult.scraped_data``).
+    # TEMPORARY INSTRUMENTATION (SKY-8992): the dual-write block below appears
+    # to never populate Redis in production despite the code being deployed
+    # and the cloud override verified. Log the gate inputs every call so we
+    # can see which guard is closing the block. Revert after root-cause is
+    # identified.
+    LOG.info(
+        "extract_information cache store gate",
+        task_id=task.task_id,
+        workflow_run_id=task.workflow_run_id,
+        workflow_permanent_id=task.workflow_permanent_id,
+        cache_key_present=cache_key is not None,
+        json_response_type=type(json_response).__name__,
+        json_response_is_cacheable=isinstance(json_response, (dict, list, str)),
+        cache_path="handler",
+    )
    if cache_key is not None and isinstance(json_response, (dict, list, str)):
+        # TEMPORARY INSTRUMENTATION (SKY-8992): confirm the dual-write block is entered.
+        LOG.info(
+            "extract_information cache store block entered",
+            task_id=task.task_id,
+            workflow_run_id=task.workflow_run_id,
+            workflow_permanent_id=task.workflow_permanent_id,
+            cache_key=cache_key,
+            cache_path="handler",
+        )
        try:
            extraction_cache.store(task.workflow_run_id, cache_key, json_response)
        except Exception:
@ -4657,7 +4716,7 @@ async def click_listbox_option(
                try:
                    skyvern_element = await dom.get_skyvern_element_by_id(child["id"])
                    locator = skyvern_element.locator
-                    await locator.click(timeout=1000)
+                    await EventStrategyFactory.click(page, locator, timeout=1000)

                    return True
                except Exception:
--- a/skyvern/webeye/browser_factory.py
+++ b/skyvern/webeye/browser_factory.py
@ -795,6 +795,11 @@ async def _connect_to_cdp_browser(
    LOG.info("Connecting browser CDP connection", remote_browser_url=remote_browser_url)
    browser = await playwright.chromium.connect_over_cdp(remote_browser_url)

+    # Mark as remote browser so strategies can adapt their dispatch method
+    ctx = current()
+    if ctx is not None:
+        ctx.is_remote_browser = True
+
    if apply_download_behaviour:
        await _apply_download_behaviour(browser)

--- a/skyvern/webeye/scraper/scraper.py
+++ b/skyvern/webeye/scraper/scraper.py
@ -138,7 +138,6 @@ def build_element_dict(
    return id_to_css_dict, id_to_element_dict, id_to_frame_dict, id_to_element_hash, hash_to_element_ids


-@traced(name="skyvern.agent.scrape_retry")
 async def scrape_website(
    browser_state: BrowserState,
    url: str,
@ -360,7 +359,7 @@ async def scrape_web_unsafe(
            LOG.warning("Failed to get current x, y position of the page", exc_info=True)

        _tracer = otel_trace.get_tracer("skyvern")
-        with _tracer.start_as_current_span("skyvern.agent.screenshot") as _ss_span:
+        with _tracer.start_as_current_span("skyvern.browser.scrape_screenshot") as _ss_span:
            apply_context_attrs(_ss_span)
            # Hardcoded since this is an inline span, not a @traced method.
            # Update if scrape_web_unsafe is renamed.
@ -542,6 +541,9 @@ async def get_interactable_element_tree(
    context = skyvern_context.ensure_context()
    frames = await get_all_children_frames(page)
    frames = await filter_frames(frames, scrape_exclude)
+    # Iframe-heavy pages dominate the 1.8s p95 here; frame_count lets us
+    # slice element_tree latency by page complexity.
+    otel_trace.get_current_span().set_attribute("frame_count", len(frames))

    for frame in frames:
        frame_index = context.frame_index_map.get(frame, None)
@ -580,7 +582,7 @@ class IncrementalScrapePage(ElementTreeBuilder):
            return True
        return False

-    @traced()
+    @traced(name="skyvern.agent.incremental_element_tree")
    async def get_incremental_element_tree(
        self,
        cleanup_element_tree: CleanupElementTreeFunc,
--- a/skyvern/webeye/utils/dom.py
+++ b/skyvern/webeye/utils/dom.py
@ -851,7 +851,7 @@ class SkyvernElement:

        await EventStrategyFactory.move_to_element(page, self.get_locator())
        try:
-            await self.get_locator().click(timeout=timeout)
+            await EventStrategyFactory.click(page, self.get_locator(), timeout=timeout)
            return
        except Exception:
            LOG.info("Failed to click by playwright", exc_info=True, element_id=self.get_id())
@ -863,7 +863,7 @@ class SkyvernElement:
                blocking_element, _ = await self.find_blocking_element(dom=dom, incremental_page=incremental_page)
                if blocking_element:
                    LOG.debug("Find the blocking element", element_id=blocking_element.get_id())
-                    await blocking_element.get_locator().click(timeout=timeout)
+                    await EventStrategyFactory.click(page, blocking_element.get_locator(), timeout=timeout)
                    return
            except Exception:
                LOG.info("Failed to click on the blocking element", exc_info=True, element_id=self.get_id())
--- a/skyvern/webeye/utils/page.py
+++ b/skyvern/webeye/utils/page.py
@ -7,6 +7,7 @@ from io import BytesIO
 from typing import Any

 import structlog
+from opentelemetry import trace as otel_trace
 from PIL import Image
 from playwright._impl._errors import Error as PlaywrightError
 from playwright._impl._errors import TimeoutError
@ -15,7 +16,7 @@ from playwright.async_api import ElementHandle, Frame, Page
 from skyvern.constants import PAGE_CONTENT_TIMEOUT, SKYVERN_DIR
 from skyvern.exceptions import FailedToTakeScreenshot
 from skyvern.forge.sdk.settings_manager import SettingsManager
-from skyvern.forge.sdk.trace import traced
+from skyvern.forge.sdk.trace import apply_context_attrs, traced

 LOG = structlog.get_logger()

@ -282,7 +283,7 @@ class SkyvernFrame:
        return await SkyvernFrame.evaluate(frame=frame, expression="() => document.location.href")

    @staticmethod
-    @traced()
+    @traced(name="skyvern.browser.scrolling_screenshot")
    async def take_scrolling_screenshot(
        page: Page,
        file_path: str | None = None,
@ -356,7 +357,7 @@ class SkyvernFrame:
                await skyvern_frame.safe_scroll_to_x_y(x, y)

    @staticmethod
-    @traced()
+    @traced(name="skyvern.browser.split_screenshots")
    async def take_split_screenshots(
        page: Page,
        url: str | None = None,
@ -552,7 +553,7 @@ class SkyvernFrame:
        js_script = "() => removeAllUniqueIds()"
        await self.evaluate(frame=self.frame, expression=js_script)

-    @traced()
+    @traced(name="skyvern.browser.element_tree_from_body")
    async def build_tree_from_body(
        self,
        frame_name: str | None,
@ -569,7 +570,7 @@ class SkyvernFrame:
            arg=[frame_name, frame_index, must_included_tags],
        )

-    @traced()
+    @traced(name="skyvern.browser.incremental_element_tree")
    async def get_incremental_element_tree(
        self,
        wait_until_finished: bool = True,
@ -580,7 +581,7 @@ class SkyvernFrame:
            frame=self.frame, expression=js_script, timeout_ms=timeout_ms, arg=[wait_until_finished]
        )

-    @traced()
+    @traced(name="skyvern.browser.element_tree_from_element")
    async def build_tree_from_element(
        self,
        starter: ElementHandle,
@ -595,11 +596,20 @@ class SkyvernFrame:

    @traced(name="skyvern.browser.wait_for_animation")
    async def safe_wait_for_animation_end(self, before_wait_sec: float = 0, timeout_ms: float = 3000) -> None:
+        # Separates the fast finished-quickly path from the timeout/error paths
+        # that burn the full timeout budget — explains the 124x p95/p50 ratio.
+        _span = otel_trace.get_current_span()
        try:
            await asyncio.sleep(before_wait_sec)
            await self.frame.wait_for_load_state("load", timeout=timeout_ms)
            await self.wait_for_animation_end(timeout_ms=timeout_ms)
+            _span.set_attribute("animation_result", "finished")
+        except (TimeoutError, asyncio.TimeoutError):
+            _span.set_attribute("animation_result", "timeout")
+            LOG.debug("Timed out waiting for animation end, but ignore it", exc_info=True)
+            return
        except Exception:
+            _span.set_attribute("animation_result", "error")
            LOG.debug("Failed to wait for animation end, but ignore it", exc_info=True)
            return

@ -615,6 +625,7 @@ class SkyvernFrame:
                    return
                await asyncio.sleep(0.1)

+    @traced(name="skyvern.browser.page_ready", role="wrapper")
    async def wait_for_page_ready(
        self,
        network_idle_timeout_ms: float = 3000,
@ -635,70 +646,84 @@ class SkyvernFrame:
        before attempting to interact with elements.
        """
        total_start_time = time.time()
+        _tracer = otel_trace.get_tracer("skyvern")

        # 1. Wait for loading indicators to disappear (longest timeout first)
        loading_indicator_duration_ms = 0.0
        step_start_time = time.time()
        loading_indicator_result = "success"
-        try:
-            await self._wait_for_loading_indicators_gone(timeout_ms=loading_indicator_timeout_ms)
-        except (TimeoutError, asyncio.TimeoutError):
-            loading_indicator_result = "timeout"
-            LOG.warning("Loading indicator timeout - some indicators may still be present, proceeding")
-        except Exception:
-            loading_indicator_result = "error"
-            LOG.warning("Failed to check loading indicators, proceeding", exc_info=True)
-        finally:
-            loading_indicator_duration_ms = (time.time() - step_start_time) * 1000
-            LOG.info(
-                "page_readiness_check",
-                step="loading_indicators",
-                result=loading_indicator_result,
-                duration_ms=loading_indicator_duration_ms,
-                timeout_ms=loading_indicator_timeout_ms,
-            )
+        with _tracer.start_as_current_span("skyvern.browser.page_ready.loading_indicators") as _li_span:
+            apply_context_attrs(_li_span)
+            _li_span.set_attribute("timeout_ms", loading_indicator_timeout_ms)
+            try:
+                await self._wait_for_loading_indicators_gone(timeout_ms=loading_indicator_timeout_ms)
+            except (TimeoutError, asyncio.TimeoutError):
+                loading_indicator_result = "timeout"
+                LOG.warning("Loading indicator timeout - some indicators may still be present, proceeding")
+            except Exception:
+                loading_indicator_result = "error"
+                LOG.warning("Failed to check loading indicators, proceeding", exc_info=True)
+            finally:
+                loading_indicator_duration_ms = (time.time() - step_start_time) * 1000
+                _li_span.set_attribute("result", loading_indicator_result)
+                LOG.info(
+                    "page_readiness_check",
+                    step="loading_indicators",
+                    result=loading_indicator_result,
+                    duration_ms=loading_indicator_duration_ms,
+                    timeout_ms=loading_indicator_timeout_ms,
+                )

        # 2. Wait for network idle (with short timeout - some pages never go idle)
        network_idle_duration_ms = 0.0
        step_start_time = time.time()
        network_idle_result = "success"
-        try:
-            await self.frame.wait_for_load_state("networkidle", timeout=network_idle_timeout_ms)
-        except (TimeoutError, asyncio.TimeoutError):
-            network_idle_result = "timeout"
-            LOG.warning("Network idle timeout - page may have constant activity, proceeding")
-        finally:
-            network_idle_duration_ms = (time.time() - step_start_time) * 1000
-            LOG.info(
-                "page_readiness_check",
-                step="network_idle",
-                result=network_idle_result,
-                duration_ms=network_idle_duration_ms,
-                timeout_ms=network_idle_timeout_ms,
-            )
+        with _tracer.start_as_current_span("skyvern.browser.page_ready.network_idle") as _ni_span:
+            apply_context_attrs(_ni_span)
+            _ni_span.set_attribute("timeout_ms", network_idle_timeout_ms)
+            try:
+                await self.frame.wait_for_load_state("networkidle", timeout=network_idle_timeout_ms)
+            except (TimeoutError, asyncio.TimeoutError):
+                network_idle_result = "timeout"
+                LOG.warning("Network idle timeout - page may have constant activity, proceeding")
+            finally:
+                network_idle_duration_ms = (time.time() - step_start_time) * 1000
+                _ni_span.set_attribute("result", network_idle_result)
+                LOG.info(
+                    "page_readiness_check",
+                    step="network_idle",
+                    result=network_idle_result,
+                    duration_ms=network_idle_duration_ms,
+                    timeout_ms=network_idle_timeout_ms,
+                )

        # 3. Wait for DOM to stabilize
        dom_stability_duration_ms = 0.0
        step_start_time = time.time()
        dom_stability_result = "success"
-        try:
-            await self._wait_for_dom_stable(stable_ms=dom_stable_ms, timeout_ms=dom_stability_timeout_ms)
-        except (TimeoutError, asyncio.TimeoutError):
-            dom_stability_result = "timeout"
-            LOG.warning("DOM stability timeout - DOM may still be changing, proceeding")
-        except Exception:
-            dom_stability_result = "error"
-            LOG.warning("Failed to check DOM stability, proceeding", exc_info=True)
-        finally:
-            dom_stability_duration_ms = (time.time() - step_start_time) * 1000
-            LOG.info(
-                "page_readiness_check",
-                step="dom_stability",
-                result=dom_stability_result,
-                duration_ms=dom_stability_duration_ms,
-                timeout_ms=dom_stability_timeout_ms,
-                stable_ms=dom_stable_ms,
-            )
+        with _tracer.start_as_current_span("skyvern.browser.page_ready.dom_stability") as _ds_span:
+            apply_context_attrs(_ds_span)
+            _ds_span.set_attribute("timeout_ms", dom_stability_timeout_ms)
+            _ds_span.set_attribute("stable_ms", dom_stable_ms)
+            try:
+                await self._wait_for_dom_stable(stable_ms=dom_stable_ms, timeout_ms=dom_stability_timeout_ms)
+            except (TimeoutError, asyncio.TimeoutError):
+                dom_stability_result = "timeout"
+                LOG.warning("DOM stability timeout - DOM may still be changing, proceeding")
+            except Exception:
+                dom_stability_result = "error"
+                LOG.warning("Failed to check DOM stability, proceeding", exc_info=True)
+            finally:
+                dom_stability_duration_ms = (time.time() - step_start_time) * 1000
+                _ds_span.set_attribute("result", dom_stability_result)
+                LOG.info(
+                    "page_readiness_check",
+                    step="dom_stability",
+                    result=dom_stability_result,
+                    duration_ms=dom_stability_duration_ms,
+                    timeout_ms=dom_stability_timeout_ms,
+                    stable_ms=dom_stable_ms,
+                )

        # Log total page readiness check duration
        total_duration_ms = (time.time() - total_start_time) * 1000
--- a/tests/unit/embedded/conftest.py
+++ b/tests/unit/embedded/conftest.py
@ -37,3 +37,25 @@ def _restore_settings_fixture():  # type: ignore[no-untyped-def]  # pytest fixtu
        os.environ.pop("SKYVERN_API_KEY", None)
    else:
        os.environ["SKYVERN_API_KEY"] = prev_api_key
+
+
+@pytest.fixture(autouse=True)
+def _reset_mcp_session_contextvar():  # type: ignore[no-untyped-def]  # pytest fixture
+    """Reset the mcp_session ContextVar in pytest's main context before each test.
+
+    pytest-asyncio runs the async browser fixture and the test body in separate
+    asyncio tasks. Each task gets its own copy of the parent ContextVars context
+    at creation, so ContextVar.set() in the fixture does NOT propagate to the
+    test task. The fixture relies on the _global_session module-level fallback
+    in get_current_session() to bridge the two, but that fallback only fires
+    when _current_session.get() returns None. Any other test that leaves a
+    populated SessionState in the pytest main context poisons every later
+    async test task and defeats the fallback.
+    """
+    from skyvern.cli.core import session_manager
+
+    token = session_manager._current_session.set(None)
+    try:
+        yield
+    finally:
+        session_manager._current_session.reset(token)
--- a/tests/unit/test_copilot_agent_helpers.py
+++ b/tests/unit/test_copilot_agent_helpers.py
@ -2,6 +2,8 @@

 from __future__ import annotations

+import json
+from types import SimpleNamespace
 from unittest.mock import MagicMock


@ -189,3 +191,126 @@ class TestShouldRestorePersistedWorkflow:
        r = self._result(persisted=False, updated_workflow=None)
        assert _should_restore_persisted_workflow(True, r) is False
        assert _should_restore_persisted_workflow(False, r) is False
+
+
+def _fake_run_result(payload: dict) -> SimpleNamespace:
+    """Minimal shim for ``RunResultStreaming`` — extract_final_text reads ``final_output``."""
+    return SimpleNamespace(final_output=json.dumps(payload), new_items=[])
+
+
+def _chat_request() -> SimpleNamespace:
+    return SimpleNamespace(
+        workflow_id="wf-1",
+        workflow_permanent_id="wfp-1",
+        workflow_copilot_chat_id="chat-1",
+    )
+
+
+class TestTranslateToAgentResultGating:
+    """Covers the three SKY-9143 invariants that live in _translate_to_agent_result."""
+
+    def test_inline_replace_workflow_resets_test_ok_after_prior_pass(self, monkeypatch) -> None:
+        # A prior run_blocks test passed for the old workflow (ctx.last_test_ok=True,
+        # ctx.last_workflow=old_wf). The agent then emits inline REPLACE_WORKFLOW
+        # with a different yaml. The translate helper must invalidate the prior
+        # test result so _verified_workflow_or_none rejects the untested REPLACE.
+        from skyvern.forge.sdk.copilot import agent as agent_module
+
+        old_wf = SimpleNamespace(name="old")
+        new_wf = SimpleNamespace(name="new-from-replace")
+        monkeypatch.setattr(
+            "skyvern.forge.sdk.copilot.tools._process_workflow_yaml",
+            lambda **kwargs: new_wf,
+        )
+
+        ctx = _ctx(last_workflow=old_wf, last_workflow_yaml="old: yaml", last_test_ok=True)
+        result = _fake_run_result(
+            {"type": "REPLACE_WORKFLOW", "user_response": "Here you go.", "workflow_yaml": "new: yaml"}
+        )
+        agent_result = agent_module._translate_to_agent_result(
+            result, ctx, global_llm_context=None, chat_request=_chat_request(), organization_id="org-1"
+        )
+
+        assert ctx.last_test_ok is None
+        assert ctx.last_workflow is new_wf
+        # The REPLACE yaml itself (not the stale snapshot) must land on ctx;
+        # otherwise a future code path that reads last_workflow_yaml would
+        # see a string that no longer matches last_workflow.
+        assert ctx.last_workflow_yaml == "new: yaml"
+        assert agent_result.updated_workflow is None
+        assert agent_result.workflow_yaml is None
+        assert agent_result.response_type == "REPLACE_WORKFLOW"
+
+    def test_inline_replace_with_invalid_yaml_keeps_prior_pass(self, monkeypatch) -> None:
+        # _process_workflow_yaml raising on a malformed REPLACE must leave
+        # ctx untouched — no spurious last_test_ok reset, no workflow swap —
+        # so a prior tested workflow remains available.
+        import yaml as yaml_mod
+
+        from skyvern.forge.sdk.copilot import agent as agent_module
+
+        tested_wf = SimpleNamespace(name="tested")
+
+        def boom(**kwargs):
+            raise yaml_mod.YAMLError("mangled yaml")
+
+        monkeypatch.setattr("skyvern.forge.sdk.copilot.tools._process_workflow_yaml", boom)
+
+        ctx = _ctx(last_workflow=tested_wf, last_workflow_yaml="tested: yaml", last_test_ok=True)
+        result = _fake_run_result(
+            {"type": "REPLACE_WORKFLOW", "user_response": "here", "workflow_yaml": "::: not yaml"}
+        )
+        agent_result = agent_module._translate_to_agent_result(
+            result, ctx, global_llm_context=None, chat_request=_chat_request(), organization_id="org-1"
+        )
+
+        assert ctx.last_workflow is tested_wf
+        assert ctx.last_workflow_yaml == "tested: yaml"
+        assert ctx.last_test_ok is True
+        assert agent_result.updated_workflow is tested_wf
+        assert "validation error" in agent_result.user_response.lower()
+
+    def test_ask_question_preserves_model_specific_question(self) -> None:
+        # The new prompt instructs the model to stop and ASK_QUESTION when it
+        # cannot test an edit. Row-3 of _rewrite_failed_test_response would
+        # clobber that specific unblocker with "Could you share more context";
+        # the resp_type==ASK_QUESTION guard must skip the rewrite.
+        from skyvern.forge.sdk.copilot import agent as agent_module
+
+        ctx = _ctx(
+            last_update_block_count=1,
+            last_test_ok=None,
+            last_workflow=SimpleNamespace(name="drafted"),
+            last_workflow_yaml="drafted: yaml",
+        )
+        specific_question = "I need credentials for site.example — can you link one in Settings?"
+        result = _fake_run_result({"type": "ASK_QUESTION", "user_response": specific_question})
+        agent_result = agent_module._translate_to_agent_result(
+            result, ctx, global_llm_context=None, chat_request=_chat_request(), organization_id="org-1"
+        )
+
+        assert agent_result.user_response == specific_question
+        # Even ASK_QUESTION must obey the strict gate — no verified workflow this turn.
+        assert agent_result.updated_workflow is None
+        assert agent_result.response_type == "ASK_QUESTION"
+
+    def test_reply_still_rewrites_after_failed_test(self) -> None:
+        # Guard rail for the above: a plain REPLY after a failed test must
+        # still flow through the "test failed" rewrite so we don't regress
+        # the original SKY-9143 behavior.
+        from skyvern.forge.sdk.copilot import agent as agent_module
+
+        ctx = _ctx(
+            last_update_block_count=2,
+            last_test_ok=False,
+            last_test_failure_reason="Failed to navigate to url https://bad.example.",
+            last_failure_category_top="NAVIGATION_FAILURE",
+        )
+        result = _fake_run_result({"type": "REPLY", "user_response": "All done — your workflow is ready."})
+        agent_result = agent_module._translate_to_agent_result(
+            result, ctx, global_llm_context=None, chat_request=_chat_request(), organization_id="org-1"
+        )
+
+        assert "test failed" in agent_result.user_response.lower()
+        assert "All done" not in agent_result.user_response
+        assert agent_result.updated_workflow is None
--- a/tests/unit/test_copilot_cancel_helpers.py
+++ b/tests/unit/test_copilot_cancel_helpers.py
@ -7,16 +7,25 @@ Covers:
  cancel, but because the row is terminal the real helper would be a no-op.
 - An SDK-realistic ``asyncio.wait_for`` timeout around the tool coroutine does
  not leave ``run_task`` running in the background.
+- ``_trusted_post_drain_status`` accepts post-drain rows that landed in a
+  non-ambiguous terminal state (``completed``/``failed``/``terminated``/
+  ``timed_out``) and rejects ``canceled``. At post-drain read time a
+  ``canceled`` row can't be told apart from the synthetic ``canceled`` that
+  ``mark_workflow_run_as_canceled_if_not_final`` writes as a last-resort
+  fallback; callsites that want the legitimate-``canceled`` signal read the
+  row BEFORE invoking the cancel helper.
 """

 from __future__ import annotations

 import asyncio
+from types import SimpleNamespace
 from typing import Any

 import pytest

-from skyvern.forge.sdk.copilot.tools import _cancel_run_task_if_not_final
+from skyvern.forge.sdk.copilot.tools import _cancel_run_task_if_not_final, _trusted_post_drain_status
+from skyvern.forge.sdk.workflow.models.workflow import WorkflowRunStatus


 class _FakeService:
@ -146,3 +155,238 @@ async def test_sdk_style_wait_for_timeout_does_not_leak_background_work(
    assert "run_task" in run_task_ref
    assert run_task_ref["run_task"].cancelled() or run_task_ref["run_task"].done()
    assert service.mark_calls == ["wr_sdk"]
+
+
+# ---------------------------------------------------------------------------
+# _trusted_post_drain_status
+# ---------------------------------------------------------------------------
+#
+# After the poll loop exhausts its budget, ``_cancel_run_task_if_not_final``
+# waits briefly for ``execute_workflow``'s shielded finalize, then writes
+# synthetic ``canceled`` as a last-resort fallback when nothing else finalized
+# the row. At post-drain read time a row with status ``canceled`` could be
+# either that synthetic fallback or a legitimate cancel that the drain
+# restored — the two are indistinguishable on disk. ``_trusted_post_drain_status``
+# therefore only accepts the unambiguous terminal statuses; the callsite
+# reads the row BEFORE the cancel helper runs to catch any legitimate
+# ``canceled`` self-finalize.
+
+
+def _run_with_status(status: str) -> SimpleNamespace:
+    return SimpleNamespace(status=status)
+
+
+@pytest.mark.parametrize(
+    "status",
+    [
+        WorkflowRunStatus.completed.value,
+        WorkflowRunStatus.failed.value,
+        WorkflowRunStatus.terminated.value,
+        WorkflowRunStatus.timed_out.value,
+    ],
+)
+def test_trusted_post_drain_accepts_real_terminal_statuses(status: str) -> None:
+    """Statuses reachable via ``_finalize_workflow_run_status`` — the drain's
+    real finalize step — let the tool fall through into the normal result
+    path so ``run_ok`` / ``failure_reason`` land correctly for the LLM."""
+    run = _run_with_status(status)
+    assert _trusted_post_drain_status(run) == status
+
+
+def test_trusted_post_drain_rejects_canceled() -> None:
+    """``canceled`` at post-drain read time is ambiguous: it could be the
+    synthetic fallback that ``mark_workflow_run_as_canceled_if_not_final``
+    writes when the run was genuinely stuck, OR a legitimate cancel that
+    ``_finalize_workflow_run_status`` restored. The helper rejects both; the
+    callsite handles the legitimate case with a pre-cancel read instead."""
+    run = _run_with_status(WorkflowRunStatus.canceled.value)
+    assert _trusted_post_drain_status(run) is None
+
+
+@pytest.mark.parametrize(
+    "status",
+    [
+        WorkflowRunStatus.created.value,
+        WorkflowRunStatus.queued.value,
+        WorkflowRunStatus.running.value,
+        WorkflowRunStatus.paused.value,
+    ],
+)
+def test_trusted_post_drain_rejects_non_final_statuses(status: str) -> None:
+    """A non-final post-drain status means the drain did not manage to
+    reconcile the row within its grace window. Emit the timeout error."""
+    run = _run_with_status(status)
+    assert _trusted_post_drain_status(run) is None
+
+
+def test_trusted_post_drain_handles_missing_row() -> None:
+    """A missing row (DB read returned None) is treated the same as a
+    non-final row: no trusted status, caller emits the timeout error."""
+    assert _trusted_post_drain_status(None) is None
+
+
+# ---------------------------------------------------------------------------
+# Reconciliation guard via ``_tool_loop_error``
+# ---------------------------------------------------------------------------
+#
+# When a block-running tool times out and the post-drain read doesn't
+# reconcile the row to a trustworthy-final status, the tool sets
+# ``pending_reconciliation_run_id`` on ``CopilotContext``. Until a
+# ``get_run_results`` call observes that run in a trustworthy-final status,
+# ``_tool_loop_error`` rejects further block-running tool calls so the LLM
+# cannot re-invoke a mutation block whose side effects may have landed.
+
+from types import SimpleNamespace as _NS  # noqa: E402  (grouped with test block)
+
+from skyvern.forge.sdk.copilot.tools import _BLOCK_RUNNING_TOOLS, _tool_loop_error  # noqa: E402
+
+
+def _guard_ctx(pending_run_id: str | None = None) -> _NS:
+    """Minimal ctx stub for ``_tool_loop_error``: only the fields it reads."""
+    return _NS(
+        consecutive_tool_tracker=[],
+        repeated_action_fingerprint_streak_count=0,
+        last_test_non_retriable_nav_error=None,
+        pending_reconciliation_run_id=pending_run_id,
+    )
+
+
+@pytest.mark.parametrize("tool_name", sorted(_BLOCK_RUNNING_TOOLS))
+def test_reconciliation_guard_blocks_block_running_tools(tool_name: str) -> None:
+    """With a pending run set, every tool in ``_BLOCK_RUNNING_TOOLS`` is
+    rejected with an error that names the run id and directs the LLM to call
+    ``get_run_results`` first."""
+    ctx = _guard_ctx(pending_run_id="wr_pending_123")
+    err = _tool_loop_error(ctx, tool_name)
+    assert isinstance(err, str)
+    assert "wr_pending_123" in err
+    assert "get_run_results" in err
+
+
+@pytest.mark.parametrize(
+    "tool_name",
+    ["get_run_results", "update_workflow", "list_credentials"],
+)
+def test_reconciliation_guard_ignores_non_block_running_tools(tool_name: str) -> None:
+    """The guard is scoped to ``_BLOCK_RUNNING_TOOLS``. Planning / metadata
+    tools (including ``get_run_results`` itself, which is the tool that can
+    CLEAR the flag) must not be rejected."""
+    ctx = _guard_ctx(pending_run_id="wr_pending_123")
+    assert _tool_loop_error(ctx, tool_name) is None
+
+
+def test_reconciliation_guard_passes_when_flag_empty() -> None:
+    """No pending run → `_tool_loop_error` returns None for block-running
+    tools (assuming no other guard trips)."""
+    ctx = _guard_ctx(pending_run_id=None)
+    for name in _BLOCK_RUNNING_TOOLS:
+        assert _tool_loop_error(ctx, name) is None
+
+
+def test_reconciliation_guard_rejects_empty_string_run_id() -> None:
+    """An empty string is treated the same as None — the flag is considered
+    not set. Prevents a spurious guard trip if anything ever clears the flag
+    to ``''`` instead of ``None``."""
+    ctx = _guard_ctx(pending_run_id="")
+    for name in _BLOCK_RUNNING_TOOLS:
+        assert _tool_loop_error(ctx, name) is None
+
+
+# ---------------------------------------------------------------------------
+# Clearing the reconciliation guard: ``_maybe_clear_reconciliation_flag``
+# ---------------------------------------------------------------------------
+#
+# ``get_run_results_tool`` calls this helper after ``_get_run_results``
+# returns. Testing the helper directly keeps the tests targeted and avoids
+# the Agents-SDK ``@function_tool`` wrapper machinery.
+
+
+from skyvern.forge.sdk.copilot.tools import _maybe_clear_reconciliation_flag  # noqa: E402
+
+
+def _ctx_with_pending(run_id: str | None) -> _NS:
+    return _NS(pending_reconciliation_run_id=run_id)
+
+
+def test_reconciliation_flag_clears_on_matching_trusted_final_status() -> None:
+    """Same run_id AND a trustworthy-final status (``completed``/``failed``/
+    ``terminated``/``timed_out``) — the flag clears so the LLM can resume."""
+    ctx = _ctx_with_pending("wr_match")
+    result = {
+        "ok": True,
+        "data": {
+            "workflow_run_id": "wr_match",
+            "overall_status": WorkflowRunStatus.completed.value,
+        },
+    }
+    _maybe_clear_reconciliation_flag(ctx, result)
+    assert ctx.pending_reconciliation_run_id is None
+
+
+def test_reconciliation_flag_does_not_clear_on_matching_canceled() -> None:
+    """The canceled case is the whole reason the guard exists. Even an
+    explicit read of the pending run that returns ``canceled`` must keep
+    the guard set — the LLM should report to the user and await input."""
+    ctx = _ctx_with_pending("wr_match")
+    result = {
+        "ok": True,
+        "data": {
+            "workflow_run_id": "wr_match",
+            "overall_status": WorkflowRunStatus.canceled.value,
+        },
+    }
+    _maybe_clear_reconciliation_flag(ctx, result)
+    assert ctx.pending_reconciliation_run_id == "wr_match"
+
+
+def test_reconciliation_flag_does_not_clear_on_non_matching_run_id() -> None:
+    """``get_run_results(workflow_run_id=None)`` resolves internally to the
+    most-recent finished run. If that resolves to a different run than the
+    one the guard is pending on, the flag must NOT clear."""
+    ctx = _ctx_with_pending("wr_match")
+    result = {
+        "ok": True,
+        "data": {
+            "workflow_run_id": "wr_other",
+            "overall_status": WorkflowRunStatus.completed.value,
+        },
+    }
+    _maybe_clear_reconciliation_flag(ctx, result)
+    assert ctx.pending_reconciliation_run_id == "wr_match"
+
+
+@pytest.mark.parametrize(
+    "status",
+    [
+        WorkflowRunStatus.running.value,
+        WorkflowRunStatus.queued.value,
+        WorkflowRunStatus.paused.value,
+    ],
+)
+def test_reconciliation_flag_does_not_clear_on_non_final_status(status: str) -> None:
+    """A matching run_id but a non-final status (e.g. ``running``) leaves
+    the flag set — the run hasn't reached a trustworthy outcome yet."""
+    ctx = _ctx_with_pending("wr_match")
+    result = {
+        "ok": True,
+        "data": {"workflow_run_id": "wr_match", "overall_status": status},
+    }
+    _maybe_clear_reconciliation_flag(ctx, result)
+    assert ctx.pending_reconciliation_run_id == "wr_match"
+
+
+def test_reconciliation_flag_noop_when_unset() -> None:
+    """If no reconciliation is pending, the helper must be a silent no-op —
+    even when the result shape is unexpected."""
+    ctx = _ctx_with_pending(None)
+    _maybe_clear_reconciliation_flag(ctx, {"ok": False, "error": "x"})
+    assert ctx.pending_reconciliation_run_id is None
+
+
+def test_reconciliation_flag_noop_on_malformed_result() -> None:
+    """A result without a `data` dict (e.g. an error envelope) must leave
+    the flag untouched — we only clear when we can affirmatively see a
+    trusted status for the pending run_id."""
+    ctx = _ctx_with_pending("wr_match")
+    _maybe_clear_reconciliation_flag(ctx, {"ok": False, "error": "run not found"})
+    assert ctx.pending_reconciliation_run_id == "wr_match"
--- a/tests/unit/test_mcp_http_auth.py
+++ b/tests/unit/test_mcp_http_auth.py
@ -305,10 +305,13 @@ async def test_validate_mcp_api_key_retries_transient_failure_without_negative_c


 def test_profile_to_mcp_url_normalizes_base_variants() -> None:
-    assert mcp_http_auth._canonical_mcp_url("https://api.skyvern.com") == "https://api.skyvern.com/mcp/"
-    assert mcp_http_auth._canonical_mcp_url("https://api.skyvern.com/") == "https://api.skyvern.com/mcp/"
-    assert mcp_http_auth._canonical_mcp_url("https://api.skyvern.com/mcp") == "https://api.skyvern.com/mcp/"
-    assert mcp_http_auth._canonical_mcp_url("https://api.skyvern.com/mcp/") == "https://api.skyvern.com/mcp/"
+    # Canonical form has no trailing slash so the advertised MCP resource URI
+    # matches what clients send during RFC 8707 audience / RFC 9728
+    # protected-resource comparison.
+    assert mcp_http_auth._canonical_mcp_url("https://api.skyvern.com") == "https://api.skyvern.com/mcp"
+    assert mcp_http_auth._canonical_mcp_url("https://api.skyvern.com/") == "https://api.skyvern.com/mcp"
+    assert mcp_http_auth._canonical_mcp_url("https://api.skyvern.com/mcp") == "https://api.skyvern.com/mcp"
+    assert mcp_http_auth._canonical_mcp_url("https://api.skyvern.com/mcp/") == "https://api.skyvern.com/mcp"


 def test_resource_metadata_url_normalizes_base_variants() -> None:
@ -337,6 +340,87 @@ def test_validate_token_audience_accepts_matching_url() -> None:
    )


+def test_validate_token_audience_tolerates_trailing_slash_mismatch() -> None:
+    # Token audience minted against the slashed form must still validate when
+    # the canonical (slashless) expected_resource is used, and vice versa.
+    mcp_http_auth._validate_token_audience(
+        {"aud": ["https://api.skyvern.com/mcp/"]},
+        "https://api.skyvern.com/mcp",
+    )
+    mcp_http_auth._validate_token_audience(
+        {"aud": ["https://api.skyvern.com/mcp"]},
+        "https://api.skyvern.com/mcp/",
+    )
+
+
+def test_validate_token_resource_claim_tolerates_trailing_slash_mismatch() -> None:
+    # Same normalization applies to the RFC 8707 `resource` claim.
+    mcp_http_auth._validate_token_resource_claims(
+        {"resource": "https://api.skyvern.com/mcp/"},
+        "https://api.skyvern.com/mcp",
+    )
+    mcp_http_auth._validate_token_resource_claims(
+        {"resource": "https://api.skyvern.com/mcp"},
+        "https://api.skyvern.com/mcp/",
+    )
+
+
+def test_validate_token_audience_rejects_missing_aud() -> None:
+    # Payload without any `aud` key at all must reject — the `any(...)` check
+    # on an empty audience list cannot match the expected resource.
+    with pytest.raises(HTTPException, match="Token audience is not valid for this MCP resource"):
+        mcp_http_auth._validate_token_audience({}, "https://api.skyvern.com/mcp")
+
+
+def test_validate_token_audience_rejects_none_aud() -> None:
+    with pytest.raises(HTTPException, match="Token audience is not valid for this MCP resource"):
+        mcp_http_auth._validate_token_audience({"aud": None}, "https://api.skyvern.com/mcp")
+
+
+def test_validate_token_audience_rejects_empty_list_aud() -> None:
+    with pytest.raises(HTTPException, match="Token audience is not valid for this MCP resource"):
+        mcp_http_auth._validate_token_audience({"aud": []}, "https://api.skyvern.com/mcp")
+
+
+def test_validate_token_audience_filters_non_string_list_items() -> None:
+    # Non-string items inside the `aud` array are silently dropped (per the
+    # asymmetry documented in _validate_token_audience); with only garbage in
+    # the list, there is nothing to match against the expected resource.
+    with pytest.raises(HTTPException, match="Token audience is not valid for this MCP resource"):
+        mcp_http_auth._validate_token_audience({"aud": [42, None, {}]}, "https://api.skyvern.com/mcp")
+
+
+def test_validate_token_audience_rejects_different_path_despite_normalization() -> None:
+    # Guards against a future refactor broadening rstrip normalization into a
+    # prefix / startswith check. `/mcp-other/` is not a slash-variant of
+    # `/mcp` and must be rejected.
+    with pytest.raises(HTTPException, match="Token audience is not valid for this MCP resource"):
+        mcp_http_auth._validate_token_audience(
+            {"aud": ["https://api.skyvern.com/mcp-other/"]},
+            "https://api.skyvern.com/mcp",
+        )
+
+
+def test_validate_token_resource_claim_rejects_different_path_despite_normalization() -> None:
+    # Same boundary guard for the `resource` claim.
+    with pytest.raises(HTTPException, match="Token resource is not valid for this MCP resource"):
+        mcp_http_auth._validate_token_resource_claims(
+            {"resource": "https://api.skyvern.com/mcp-other/"},
+            "https://api.skyvern.com/mcp",
+        )
+
+
+def test_validate_token_resource_claim_rejects_non_string_claim() -> None:
+    # Explicit type guard: a non-string `resource` claim is a malformed token,
+    # not a slash-variant of the expected value, and gets its own error detail
+    # so the cause is obvious in logs.
+    with pytest.raises(HTTPException, match="Token resource claim must be a string"):
+        mcp_http_auth._validate_token_resource_claims(
+            {"resource": 42},
+            "https://api.skyvern.com/mcp",
+        )
+
+
 def test_looks_like_jwt_rejects_dotted_opaque_token() -> None:
    assert mcp_http_auth._looks_like_jwt("opaque.with.dots") is False

--- a/tests/unit/test_mcp_session_lifecycle.py
+++ b/tests/unit/test_mcp_session_lifecycle.py
@ -14,6 +14,10 @@ from skyvern.cli.mcp_tools import session as mcp_session

@pytest.fixture(autouse=True)
 def _reset_singletons() -> None:
+    # Must leave _current_session as None in the pytest main context; any populated
+    # SessionState written here would be inherited by every later async test task
+    # via contextvars.copy_context() and would short-circuit the _global_session
+    # fallback in get_current_session().
    client_mod._skyvern_instance.set(None)
    client_mod._api_key_override.set(None)
    client_mod._global_skyvern_instance = None
@ -22,7 +26,6 @@ def _reset_singletons() -> None:
    session_manager._current_session.set(None)
    session_manager._global_session = None
    session_manager.set_stateless_http_mode(False)
-    mcp_session.set_current_session(mcp_session.SessionState())


 def test_get_skyvern_reuses_global_instance_across_contexts(monkeypatch: pytest.MonkeyPatch) -> None:
--- a/tests/unit/test_missing_starter_url.py
+++ b/tests/unit/test_missing_starter_url.py
@ -0,0 +1,218 @@
+"""Tests for when the first browser block has no starter URL
+
+Covers:
+- MissingStarterUrl exception message formatting.
+- The fail-early branch in TaskBlock.execute() for a first task block with no URL.
+- The negative case: URL=None but the page already navigated (e.g. browser profile
+  loaded a homepage) must NOT raise.
+"""
+
+from __future__ import annotations
+
+from contextlib import contextmanager
+from datetime import datetime, timezone
+from types import SimpleNamespace
+from typing import Any, Iterator
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from skyvern.exceptions import MissingStarterUrl
+from skyvern.forge.sdk.schemas.tasks import TaskStatus
+from skyvern.forge.sdk.workflow.context_manager import WorkflowRunContext
+from skyvern.forge.sdk.workflow.models.block import TaskBlock
+from skyvern.forge.sdk.workflow.models.parameter import OutputParameter, ParameterType
+
+
+def _output_parameter(key: str = "task_output") -> OutputParameter:
+    now = datetime.now(timezone.utc)
+    return OutputParameter(
+        parameter_type=ParameterType.OUTPUT,
+        key=key,
+        description="test output",
+        output_parameter_id="op_missing_starter_url_test",
+        workflow_id="w_missing_starter_url_test",
+        created_at=now,
+        modified_at=now,
+    )
+
+
+def _workflow_run_context() -> WorkflowRunContext:
+    return WorkflowRunContext(
+        workflow_title="test",
+        workflow_id="w_missing_starter_url_test",
+        workflow_permanent_id="wpid_missing_starter_url_test",
+        workflow_run_id="wr_missing_starter_url_test",
+        aws_client=MagicMock(),
+    )
+
+
+@contextmanager
+def _mock_block_execute_deps(working_page_url: str) -> Iterator[dict[str, Any]]:
+    """Patch the app-level singletons used by TaskBlock.execute() for a first-task
+    scenario and hand the test back the mocks it needs to assert on."""
+
+    workflow_run = SimpleNamespace(
+        workflow_run_id="wr_missing_starter_url_test",
+        workflow_permanent_id="wpid_missing_starter_url_test",
+        organization_id="o_test",
+        browser_profile_id=None,
+        browser_address=None,
+    )
+
+    working_page = SimpleNamespace(url=working_page_url)
+    browser_state = MagicMock()
+    browser_state.get_working_page = AsyncMock(return_value=working_page)
+    browser_state.take_fullpage_screenshot = AsyncMock(return_value=None)
+    browser_state.navigate_to_url = AsyncMock()
+
+    browser_manager = MagicMock()
+    browser_manager.get_or_create_for_workflow_run = AsyncMock(return_value=browser_state)
+
+    workflow_service = MagicMock()
+    workflow_service.get_workflow_run = AsyncMock(return_value=workflow_run)
+    workflow_service.get_workflow_by_permanent_id = AsyncMock(
+        return_value=MagicMock(workflow_id="w_missing_starter_url_test")
+    )
+
+    organization = SimpleNamespace(organization_id="o_test")
+    organizations_db = MagicMock()
+    organizations_db.get_organization = AsyncMock(return_value=organization)
+
+    tasks_db = MagicMock()
+    tasks_db.update_task = AsyncMock()
+    tasks_db.get_last_task_for_workflow_run = AsyncMock(return_value=None)
+
+    observer_db = MagicMock()
+    observer_db.update_workflow_run_block = AsyncMock(return_value=MagicMock())
+
+    database = MagicMock()
+    database.tasks = tasks_db
+    database.organizations = organizations_db
+    database.observer = observer_db
+
+    task = SimpleNamespace(task_id="tsk_test", status=TaskStatus.failed)
+    step = SimpleNamespace(step_id="stp_test")
+
+    agent = MagicMock()
+    agent.create_task_and_step_from_block = AsyncMock(return_value=(task, step))
+    agent.execute_step = AsyncMock()
+
+    with (
+        patch("skyvern.forge.sdk.workflow.models.block.app") as mock_app,
+        patch(
+            "skyvern.forge.sdk.workflow.models.block.Block.get_workflow_run_context",
+            return_value=_workflow_run_context(),
+        ),
+        patch(
+            "skyvern.forge.sdk.workflow.models.block.capture_block_download_baseline",
+            new=AsyncMock(),
+        ),
+    ):
+        mock_app.BROWSER_MANAGER = browser_manager
+        mock_app.WORKFLOW_SERVICE = workflow_service
+        mock_app.DATABASE = database
+        mock_app.agent = agent
+
+        yield {
+            "task": task,
+            "tasks_db": tasks_db,
+            "agent": agent,
+            "browser_state": browser_state,
+        }
+
+
+def test_missing_starter_url_message_uses_block_label() -> None:
+    exc = MissingStarterUrl(block_label="open_vendor_url")
+    assert "open_vendor_url" in str(exc)
+    assert "starting URL" in str(exc)
+    assert "workflow parameter" in str(exc)
+
+
+def test_missing_starter_url_message_without_label() -> None:
+    exc = MissingStarterUrl()
+    assert "first browser block" in str(exc)
+    assert "starting URL" in str(exc)
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("blank_url", ["about:blank", "", ":"])
+async def test_execute_fails_early_when_first_block_has_no_url(blank_url: str) -> None:
+    """A first browser block with no URL landing on any blank-page marker
+    (``about:blank``, empty string, or the rare ``":"`` Playwright reports for
+    brand-new pages) should raise MissingStarterUrl before scraping starts,
+    instead of the confusing downstream ScrapingFailedBlankPage."""
+
+    block = TaskBlock(
+        label="open_vendor_url",
+        output_parameter=_output_parameter(),
+        title="Open vendor URL",
+        url=None,
+    )
+
+    with _mock_block_execute_deps(working_page_url=blank_url) as deps:
+        with pytest.raises(MissingStarterUrl) as excinfo:
+            await block.execute(
+                workflow_run_id="wr_missing_starter_url_test",
+                workflow_run_block_id="wrb_test",
+                organization_id="o_test",
+            )
+
+        assert "open_vendor_url" in str(excinfo.value)
+        deps["tasks_db"].update_task.assert_any_call(
+            deps["task"].task_id,
+            status=TaskStatus.failed,
+            organization_id="o_test",
+            failure_reason=str(excinfo.value),
+        )
+
+
+@pytest.mark.asyncio
+async def test_execute_does_not_raise_when_profile_loaded_a_page() -> None:
+    """If the browser session/profile navigated the page away from about:blank before
+    the first task starts, the block has a meaningful page to scrape — the missing-URL
+    check must NOT fire."""
+
+    block = TaskBlock(
+        label="use_existing_session",
+        output_parameter=_output_parameter(),
+        title="Use existing session",
+        url=None,
+    )
+
+    with _mock_block_execute_deps(working_page_url="https://example.com/dashboard") as deps:
+        # execute_step is mocked to no-op; we rely on the task-status lookup below to
+        # short-circuit the rest of the block runner.
+        deps["tasks_db"].get_task = AsyncMock(
+            return_value=SimpleNamespace(
+                task_id=deps["task"].task_id,
+                status=TaskStatus.completed,
+                failure_reason=None,
+            )
+        )
+        # The extra downstream services the block touches for a completed task — stub
+        # enough of them to let execute() return without crashing.
+        with (
+            patch(
+                "skyvern.forge.sdk.workflow.models.block.app.STORAGE",
+                new=MagicMock(get_downloaded_files=AsyncMock(return_value=[])),
+                create=True,
+            ),
+            patch(
+                "skyvern.forge.sdk.workflow.models.block.app.ARTIFACT_MANAGER",
+                new=MagicMock(create_workflow_run_block_artifact=AsyncMock()),
+                create=True,
+            ),
+        ):
+            try:
+                await block.execute(
+                    workflow_run_id="wr_missing_starter_url_test",
+                    workflow_run_block_id="wrb_test",
+                    organization_id="o_test",
+                )
+            except MissingStarterUrl:
+                pytest.fail("MissingStarterUrl should not be raised when the page has already navigated")
+            except Exception:
+                # Other downstream failures (e.g. artifact lookup) are fine — we only
+                # care that MissingStarterUrl is NOT raised in this configuration.
+                pass
--- a/tests/unit/test_workflow_copilot_route.py
+++ b/tests/unit/test_workflow_copilot_route.py
@ -255,6 +255,89 @@ async def test_flag_on_mid_stream_disconnect_restores_when_persisted_and_not_aut
        restore_mock.assert_not_awaited()


+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    ("auto_accept", "workflow_was_persisted", "has_valid_proposal", "prior_proposal", "expect_clear_call"),
+    [
+        # SKY-9143: auto_accept=False + gated-out turn (no proposal) + mid-turn
+        # persisted draft => we restore AND clear the stale proposed_workflow
+        # card so reload doesn't resurrect a card the assistant just invalidated.
+        (False, True, False, {"workflow_id": "stale"}, True),
+        # Restore fires but there's nothing stale to clear: skip the no-op DB write.
+        (False, True, False, None, False),
+        # auto_accept=False chat-only turn (no persisted draft, no proposal)
+        # must NOT clear a prior turn's proposed_workflow.
+        (False, False, False, {"workflow_id": "stale"}, False),
+        # auto_accept=False turn with a valid proposal sets the new card (not clear).
+        (False, True, True, {"workflow_id": "stale"}, False),
+        # auto_accept=True never writes proposed_workflow at all.
+        (True, True, False, {"workflow_id": "stale"}, False),
+        (True, True, True, {"workflow_id": "stale"}, False),
+    ],
+)
+async def test_proposed_workflow_cleared_on_restore(
+    monkeypatch: pytest.MonkeyPatch,
+    auto_accept: bool,
+    workflow_was_persisted: bool,
+    has_valid_proposal: bool,
+    prior_proposal: dict | None,
+    expect_clear_call: bool,
+) -> None:
+    monkeypatch.setattr(settings, "ENABLE_WORKFLOW_COPILOT_V2", True)
+
+    captured = _install_fake_create(monkeypatch)
+
+    chat = SimpleNamespace(
+        workflow_copilot_chat_id="chat-1",
+        workflow_permanent_id="wpid-1",
+        organization_id="org-1",
+        proposed_workflow=prior_proposal,
+        auto_accept=auto_accept,
+    )
+    original_workflow = SimpleNamespace(
+        workflow_id="wf-canonical",
+        title="Original",
+        description="Original description",
+        workflow_definition=None,
+    )
+    proposal = MagicMock(spec=["model_dump"]) if has_valid_proposal else None
+    if proposal is not None:
+        proposal.model_dump.return_value = {"workflow_id": "wf-canonical"}
+    agent_result = SimpleNamespace(
+        user_response="done",
+        updated_workflow=proposal,
+        global_llm_context=None,
+        workflow_yaml=None,
+        workflow_was_persisted=workflow_was_persisted,
+        clear_proposed_workflow=False,
+    )
+
+    _setup_new_copilot_mocks(monkeypatch, chat, original_workflow, agent_result)
+
+    request = MagicMock()
+    request.headers = {"x-api-key": "sk-test-key"}
+    organization = SimpleNamespace(organization_id="org-1")
+
+    response = await workflow_copilot_chat_post(request, _make_chat_request(), organization)
+    assert response is captured["sentinel"]
+
+    stream = MagicMock()
+    stream.send = AsyncMock(return_value=True)
+    stream.is_disconnected = AsyncMock(return_value=False)
+
+    handler = captured["handler"]
+    assert callable(handler)
+    await handler(stream)
+
+    update_calls = app.DATABASE.workflow_params.update_workflow_copilot_chat.await_args_list
+    clear_calls = [c for c in update_calls if c.kwargs.get("proposed_workflow") is None]
+
+    if expect_clear_call:
+        assert len(clear_calls) == 1, f"expected a proposed_workflow=None clear, got {update_calls!r}"
+    else:
+        assert not clear_calls, f"did not expect a clear call, got {update_calls!r}"
+
+
@pytest.mark.asyncio
 async def test_env_on_short_circuits_posthog(monkeypatch: pytest.MonkeyPatch) -> None:
    """Env var True must skip the PostHog check entirely and route to v2."""
--- a/tests/unit/worker_activity_import_helpers.py
+++ b/tests/unit/worker_activity_import_helpers.py
@ -1,86 +0,0 @@
-import importlib
-import sys
-from types import ModuleType, SimpleNamespace
-
-import pytest
-
-
-def _install_temporal_activity_stubs(monkeypatch: pytest.MonkeyPatch) -> None:
-    temporalio_package = ModuleType("temporalio")
-    temporalio_package.__path__ = []
-    temporalio_activity = ModuleType("temporalio.activity")
-    temporalio_activity.defn = lambda fn: fn
-    temporalio_package.activity = temporalio_activity
-
-    monkeypatch.setitem(sys.modules, "temporalio", temporalio_package)
-    monkeypatch.setitem(sys.modules, "temporalio.activity", temporalio_activity)
-
-
-def import_cron_worker_activities(monkeypatch: pytest.MonkeyPatch):
-    cloud_package = ModuleType("cloud")
-    cloud_package.__path__ = []
-    cloud_clients = ModuleType("cloud.clients")
-    cloud_yescaptcha = ModuleType("cloud.clients.yescaptcha")
-    cloud_yescaptcha_client = ModuleType("cloud.clients.yescaptcha.client")
-    cloud_config = ModuleType("cloud.config")
-    cloud_db = ModuleType("cloud.db")
-    cloud_agent_db = ModuleType("cloud.db.cloud_agent_db")
-    cloud_tasks = ModuleType("cloud.tasks")
-
-    class YescaptchaClient:
-        def __init__(self, client_key: str) -> None:
-            self.client_key = client_key
-
-    async def update_stuck_tasks_to_timed_out() -> None:
-        return None
-
-    async def update_stuck_workflow_runs_to_timed_out() -> None:
-        return None
-
-    cloud_yescaptcha_client.YescaptchaClient = YescaptchaClient
-    cloud_config.settings = SimpleNamespace(
-        ENABLE_YESCAPTCHA_BALANCE_ALERT=False,
-        YESCAPTCHA_API_KEY="",
-    )
-    cloud_agent_db.cloud_db = SimpleNamespace()
-    cloud_tasks.update_stuck_tasks_to_timed_out = update_stuck_tasks_to_timed_out
-    cloud_tasks.update_stuck_workflow_runs_to_timed_out = update_stuck_workflow_runs_to_timed_out
-
-    monkeypatch.setitem(sys.modules, "cloud", cloud_package)
-    monkeypatch.setitem(sys.modules, "cloud.clients", cloud_clients)
-    monkeypatch.setitem(sys.modules, "cloud.clients.yescaptcha", cloud_yescaptcha)
-    monkeypatch.setitem(sys.modules, "cloud.clients.yescaptcha.client", cloud_yescaptcha_client)
-    monkeypatch.setitem(sys.modules, "cloud.config", cloud_config)
-    monkeypatch.setitem(sys.modules, "cloud.db", cloud_db)
-    monkeypatch.setitem(sys.modules, "cloud.db.cloud_agent_db", cloud_agent_db)
-    monkeypatch.setitem(sys.modules, "cloud.tasks", cloud_tasks)
-    _install_temporal_activity_stubs(monkeypatch)
-
-    sys.modules.pop("workers.cron_worker.activities", None)
-    return importlib.import_module("workers.cron_worker.activities")
-
-
-def import_temporal_v2_worker_activities(monkeypatch: pytest.MonkeyPatch):
-    cloud_package = ModuleType("cloud")
-    cloud_package.__path__ = []
-    cloud_services = ModuleType("cloud.services")
-    data_scrubber_module = ModuleType("cloud.services.data_scrubber_service")
-    worker_utils_module = ModuleType("workers.worker_utils")
-
-    class DataScrubber:
-        pass
-
-    async def activity_teardown() -> None:
-        return None
-
-    data_scrubber_module.DataScrubber = DataScrubber
-    worker_utils_module.activity_teardown = activity_teardown
-
-    monkeypatch.setitem(sys.modules, "cloud", cloud_package)
-    monkeypatch.setitem(sys.modules, "cloud.services", cloud_services)
-    monkeypatch.setitem(sys.modules, "cloud.services.data_scrubber_service", data_scrubber_module)
-    monkeypatch.setitem(sys.modules, "workers.worker_utils", worker_utils_module)
-    _install_temporal_activity_stubs(monkeypatch)
-
-    sys.modules.pop("workers.temporal_v2_worker.activities", None)
-    return importlib.import_module("workers.temporal_v2_worker.activities")
--- a/uv.lock
+++ b/uv.lock
@ -14,7 +14,7 @@ resolution-markers = [
 ]

 [options]
-exclude-newer = "2026-04-10T02:43:47.326099Z"
+exclude-newer = "2026-04-14T19:31:07.927062Z"
 exclude-newer-span = "P7D"

 [manifest]
@ -5606,6 +5606,8 @@ dependencies = [
    { name = "fastapi" },
    { name = "fastmcp" },
    { name = "filetype" },
+    { name = "google-auth" },
+    { name = "google-auth-oauthlib" },
    { name = "google-cloud-aiplatform" },
    { name = "greenlet", version = "3.0.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.12'" },
    { name = "greenlet", version = "3.4.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.12'" },
@ -5731,6 +5733,8 @@ requires-dist = [
    { name = "fastapi", specifier = ">=0.121.0,<0.136" },
    { name = "fastmcp", specifier = ">=3.2.0,<4" },
    { name = "filetype", specifier = ">=1.2.0,<2" },
+    { name = "google-auth", specifier = ">=2.49.2,<3" },
+    { name = "google-auth-oauthlib", specifier = ">=1.3.1,<2" },
    { name = "google-cloud-aiplatform", specifier = ">=1.90.0,<2" },
    { name = "greenlet", marker = "python_full_version == '3.11.*'", specifier = "==3.0.3" },
    { name = "greenlet", marker = "python_full_version >= '3.12' and python_full_version < '3.14'", specifier = ">3.0.3" },