Fix script detail page to only show cached runs + add avg fallbacks (#5220)

2026-04-28 11:40:32 +00:00 · 2026-03-24 13:53:59 -07:00 · 2026-03-24 13:53:59 -07:00 · 85e621364c
commit 85e621364c
parent bb033ee713
5 changed files with 68 additions and 10 deletions
--- a/skyvern-frontend/src/routes/workflows/WorkflowScriptDetailPage.tsx
+++ b/skyvern-frontend/src/routes/workflows/WorkflowScriptDetailPage.tsx
@ -140,6 +140,8 @@ function WorkflowScriptDetailPage() {
  const totalCount = runsData?.total_count ?? 0;
  const successRate =
    totalCount > 0 ? (statusCounts["completed"] ?? 0) / totalCount : null;
+  const avgFallbacks = runsData?.avg_fallbacks_per_run ?? null;
+  const blockCount = codeData?.blocks ? Object.keys(codeData.blocks).length : 0;
  const MAX_RUNS_SHOWN = 50;

  if (!workflowPermanentId || !scriptId) return null;
@ -209,7 +211,7 @@ function WorkflowScriptDetailPage() {
        )}
      </header>

-      <div className="grid grid-cols-4 gap-4">
+      <div className="grid grid-cols-5 gap-4">
        <div className="rounded-md border p-4">
          <p className="text-sm text-muted-foreground">Viewing Revision</p>
          <div className="flex flex-col gap-1">
@ -288,6 +290,29 @@ function WorkflowScriptDetailPage() {
            )}
          </p>
        </div>
+        <div className="rounded-md border p-4">
+          <p className="text-sm text-muted-foreground">Avg AI Fallbacks</p>
+          <p className="text-2xl font-semibold">
+            {runsLoading ? (
+              <Skeleton className="h-8 w-12" />
+            ) : avgFallbacks != null ? (
+              <span
+                className={(() => {
+                  // Color based on fallback ratio relative to block count
+                  const ratio =
+                    blockCount > 0 ? avgFallbacks / blockCount : avgFallbacks;
+                  if (ratio < 0.1) return "text-green-500";
+                  if (ratio < 0.3) return "text-yellow-500";
+                  return "text-red-500";
+                })()}
+              >
+                {avgFallbacks}
+              </span>
+            ) : (
+              "N/A"
+            )}
+          </p>
+        </div>
      </div>

      <div className="space-y-3">
--- a/skyvern-frontend/src/routes/workflows/types/scriptTypes.ts
+++ b/skyvern-frontend/src/routes/workflows/types/scriptTypes.ts
@ -132,4 +132,5 @@ export type ScriptRunsResponse = {
  runs: ScriptRunSummary[];
  total_count: number;
  status_counts: Record<string, number>;
+  avg_fallbacks_per_run: number | null;
 };
--- a/skyvern/forge/sdk/db/agent_db.py
+++ b/skyvern/forge/sdk/db/agent_db.py
@ -7423,12 +7423,17 @@ class AgentDB(BaseAlchemyDB):
        page_size: int = 50,
        created_after: datetime | None = None,
        created_before: datetime | None = None,
-    ) -> tuple[list[WorkflowRunModel], int, dict[str, int]]:
-        """Get workflow runs associated with a script, with total count and status counts.
+    ) -> tuple[list[WorkflowRunModel], int, dict[str, int], float | None]:
+        """Get workflow runs associated with a script, with total count, status counts,
+        and average AI fallbacks per run.

-        Returns (runs, total_count, status_counts) where runs is limited by page_size,
-        total_count is derived from the status_counts GROUP BY, and status_counts is a
-        GROUP BY aggregation of statuses across all runs.
+        Only includes actual script runs (run_with='code'), excluding agent runs that
+        generated the script.
+
+        Returns (runs, total_count, status_counts, avg_fallbacks_per_run) where runs
+        is limited by page_size, total_count is derived from the status_counts GROUP BY,
+        status_counts is a GROUP BY aggregation of statuses across all runs, and
+        avg_fallbacks_per_run is the average number of fallback episodes per run.

        If created_after/created_before are provided, filters by the workflow_script
        entry's created_at (not the run's created_at), scoping to the version that
@ -7455,10 +7460,11 @@ class AgentDB(BaseAlchemyDB):
                        WorkflowScriptModel.created_at < created_before,
                    )

-                # Base filter for workflow runs
+                # Base filter for workflow runs — only include actual script runs
                base_filters = [
                    WorkflowRunModel.workflow_run_id.in_(run_ids_subquery),
                    WorkflowRunModel.organization_id == organization_id,
+                    WorkflowRunModel.run_with.in_(["code", "code_v2"]),
                ]

                # Count statuses via GROUP BY (also gives us total_count)
@ -7471,7 +7477,7 @@ class AgentDB(BaseAlchemyDB):
                total_count = sum(status_counts.values())

                if total_count == 0:
-                    return [], 0, {}
+                    return [], 0, {}, None

                # Get the actual workflow runs (paginated)
                runs_query = (
@ -7482,7 +7488,27 @@ class AgentDB(BaseAlchemyDB):
                )
                runs = list((await session.scalars(runs_query)).all())

-                return runs, total_count, status_counts
+                # Compute average AI fallbacks per run over the last 20 runs.
+                max_fallback_sample = 20
+                recent_run_ids = (
+                    select(WorkflowRunModel.workflow_run_id)
+                    .filter(*base_filters)
+                    .order_by(WorkflowRunModel.created_at.desc())
+                    .limit(max_fallback_sample)
+                )
+                total_fallbacks_result = await session.execute(
+                    select(func.count())
+                    .select_from(ScriptFallbackEpisodeModel)
+                    .filter(
+                        ScriptFallbackEpisodeModel.workflow_run_id.in_(recent_run_ids),
+                        ScriptFallbackEpisodeModel.organization_id == organization_id,
+                    )
+                )
+                total_fallbacks = total_fallbacks_result.scalar() or 0
+                sample_size = min(total_count, max_fallback_sample)
+                avg_fallbacks_per_run = round(total_fallbacks / sample_size, 2)
+
+                return runs, total_count, status_counts, avg_fallbacks_per_run
        except SQLAlchemyError:
            LOG.error("SQLAlchemyError", exc_info=True)
            raise
@ -7524,6 +7550,7 @@ class AgentDB(BaseAlchemyDB):
                        WorkflowScriptModel.deleted_at.is_(None),
                        WorkflowScriptModel.workflow_run_id.isnot(None),
                        WorkflowRunModel.organization_id == organization_id,
+                        WorkflowRunModel.run_with.in_(["code", "code_v2"]),
                    )
                    .group_by(WorkflowScriptModel.script_id, WorkflowRunModel.status)
                )
--- a/skyvern/forge/sdk/routes/scripts.py
+++ b/skyvern/forge/sdk/routes/scripts.py
@ -1006,7 +1006,7 @@ async def get_script_runs(
        if not version_found:
            raise HTTPException(status_code=404, detail=f"Script version {version} not found")

-    runs, total_count, status_counts = await app.DATABASE.get_workflow_runs_for_script(
+    runs, total_count, status_counts, avg_fallbacks_per_run = await app.DATABASE.get_workflow_runs_for_script(
        organization_id=organization_id,
        script_id=script_id,
        page_size=page_size,
@ -1028,6 +1028,7 @@ async def get_script_runs(
        ],
        total_count=total_count,
        status_counts=status_counts,
+        avg_fallbacks_per_run=avg_fallbacks_per_run,
    )


--- a/skyvern/schemas/scripts.py
+++ b/skyvern/schemas/scripts.py
@ -257,6 +257,10 @@ class ScriptRunsResponse(BaseModel):
    runs: list[ScriptRunSummary]
    total_count: int
    status_counts: dict[str, int] = Field(default_factory=dict)
+    avg_fallbacks_per_run: float | None = Field(
+        default=None,
+        description="Average number of AI fallback episodes per script run",
+    )


 class ClearCacheResponse(BaseModel):