Skyvern/skyvern/forge/sdk/db/mixins/schedules.py
Aaron Perez d3bba42792
Some checks are pending
Run tests and pre-commit / Run tests and pre-commit hooks (push) Waiting to run
Run tests and pre-commit / Frontend Lint and Build (push) Waiting to run
Publish Fern Docs / run (push) Waiting to run
refactor(SKY-62): consolidate repository pattern follow-up PRs (#5283)
2026-03-28 14:27:43 -07:00

654 lines
25 KiB
Python

from __future__ import annotations
import asyncio
from datetime import datetime
from typing import TYPE_CHECKING, Any
import structlog
from sqlalchemy import func, or_, select, text, update
from skyvern.forge.sdk.db._error_handling import db_operation
from skyvern.forge.sdk.db.exceptions import ScheduleLimitExceededError
from skyvern.forge.sdk.db.models import (
WorkflowModel,
WorkflowRunModel,
WorkflowScheduleModel,
)
from skyvern.forge.sdk.db.utils import convert_to_workflow_schedule
from skyvern.forge.sdk.schemas.workflow_schedules import OrganizationScheduleItem, WorkflowSchedule
from skyvern.forge.sdk.workflow.schedules import compute_next_run
if TYPE_CHECKING:
from sqlalchemy.ext.asyncio import AsyncEngine
from skyvern.forge.sdk.db.base_alchemy_db import _SessionFactory
from skyvern.forge.sdk.db._sentinels import _UNSET
LOG = structlog.get_logger()
class SchedulesMixin:
"""Database operations for workflow schedules.
.. deprecated::
This mixin is part of the legacy database layer. New code should use the
repository classes in ``skyvern.forge.sdk.db.repositories`` instead.
Cross-mixin migrations already completed:
- ``soft_delete_workflow_and_schedules_by_permanent_id`` → ``WorkflowsRepository``
(operates on workflows as the primary entity, schedules are a side-effect).
"""
Session: _SessionFactory
engine: AsyncEngine
debug_enabled: bool
_sqlite_schedule_lock: asyncio.Lock | None
@db_operation("create_workflow_schedule")
async def create_workflow_schedule(
self,
organization_id: str,
workflow_permanent_id: str,
cron_expression: str,
timezone: str,
enabled: bool,
parameters: dict[str, Any] | None = None,
temporal_schedule_id: str | None = None,
name: str | None = None,
description: str | None = None,
) -> WorkflowSchedule:
async with self.Session() as session:
workflow_schedule = WorkflowScheduleModel(
organization_id=organization_id,
workflow_permanent_id=workflow_permanent_id,
cron_expression=cron_expression,
timezone=timezone,
enabled=enabled,
parameters=parameters,
temporal_schedule_id=temporal_schedule_id,
name=name,
description=description,
)
session.add(workflow_schedule)
await session.commit()
await session.refresh(workflow_schedule)
return convert_to_workflow_schedule(workflow_schedule, self.debug_enabled)
@db_operation("create_workflow_schedule_with_limit")
async def create_workflow_schedule_with_limit(
self,
organization_id: str,
workflow_permanent_id: str,
max_schedules: int | None,
cron_expression: str,
timezone: str,
enabled: bool,
parameters: dict[str, Any] | None = None,
name: str | None = None,
description: str | None = None,
) -> tuple[WorkflowSchedule, int]:
"""Create a schedule atomically with limit enforcement.
On PostgreSQL, uses an advisory lock to serialize concurrent creates for
the same workflow, preventing TOCTOU races on the schedule count.
On SQLite, uses an asyncio.Lock (set on AgentDB.__init__) since SQLite
is single-writer and has no advisory lock support.
Returns (created_schedule, count_before_insert).
Raises ScheduleLimitExceededError if count >= max_schedules.
"""
# SQLite: serialize via Python lock (no advisory locks available).
# The lock is held across the count-check + insert to prevent TOCTOU.
sqlite_lock = getattr(self, "_sqlite_schedule_lock", None)
if sqlite_lock is not None:
async with sqlite_lock:
return await self._create_schedule_with_limit_inner(
organization_id,
workflow_permanent_id,
max_schedules,
cron_expression,
timezone,
enabled,
parameters,
name,
description,
use_advisory_lock=False,
)
return await self._create_schedule_with_limit_inner(
organization_id,
workflow_permanent_id,
max_schedules,
cron_expression,
timezone,
enabled,
parameters,
name,
description,
use_advisory_lock=True,
)
# Intentionally not decorated with @db_operation — errors are caught by the
# outer create_workflow_schedule_with_limit which owns the operation name.
async def _create_schedule_with_limit_inner(
self,
organization_id: str,
workflow_permanent_id: str,
max_schedules: int | None,
cron_expression: str,
timezone: str,
enabled: bool,
parameters: dict[str, Any] | None,
name: str | None,
description: str | None,
*,
use_advisory_lock: bool,
) -> tuple[WorkflowSchedule, int]:
async with self.Session() as session:
if use_advisory_lock:
lock_key = f"schedule:{organization_id}:{workflow_permanent_id}"
await session.execute(
text("SELECT pg_advisory_xact_lock(hashtext(:key))"),
{"key": lock_key},
)
count = (
await session.execute(
select(func.count()).where(
WorkflowScheduleModel.organization_id == organization_id,
WorkflowScheduleModel.workflow_permanent_id == workflow_permanent_id,
WorkflowScheduleModel.deleted_at.is_(None),
)
)
).scalar_one()
if max_schedules is not None and count >= max_schedules:
raise ScheduleLimitExceededError(
organization_id=organization_id,
workflow_permanent_id=workflow_permanent_id,
current_count=count,
max_allowed=max_schedules,
)
workflow_schedule = WorkflowScheduleModel(
organization_id=organization_id,
workflow_permanent_id=workflow_permanent_id,
cron_expression=cron_expression,
timezone=timezone,
enabled=enabled,
parameters=parameters,
name=name,
description=description,
)
session.add(workflow_schedule)
await session.commit()
await session.refresh(workflow_schedule)
return convert_to_workflow_schedule(workflow_schedule, self.debug_enabled), count
@db_operation("set_temporal_schedule_id")
async def set_temporal_schedule_id(
self,
workflow_schedule_id: str,
organization_id: str,
temporal_schedule_id: str,
) -> WorkflowSchedule | None:
async with self.Session() as session:
workflow_schedule = (
await session.scalars(
select(WorkflowScheduleModel).filter_by(
workflow_schedule_id=workflow_schedule_id,
organization_id=organization_id,
deleted_at=None,
)
)
).first()
if not workflow_schedule:
return None
workflow_schedule.temporal_schedule_id = temporal_schedule_id
workflow_schedule.modified_at = datetime.utcnow()
await session.commit()
await session.refresh(workflow_schedule)
return convert_to_workflow_schedule(workflow_schedule, self.debug_enabled)
@db_operation("update_workflow_schedule")
async def update_workflow_schedule(
self,
workflow_schedule_id: str,
organization_id: str,
cron_expression: str,
timezone: str,
enabled: bool,
parameters: dict[str, Any] | None = None,
temporal_schedule_id: str | None | object = _UNSET,
name: str | None | object = _UNSET,
description: str | None | object = _UNSET,
) -> WorkflowSchedule | None:
async with self.Session() as session:
workflow_schedule = (
await session.scalars(
select(WorkflowScheduleModel).filter_by(
workflow_schedule_id=workflow_schedule_id,
organization_id=organization_id,
deleted_at=None,
)
)
).first()
if not workflow_schedule:
return None
workflow_schedule.cron_expression = cron_expression
workflow_schedule.timezone = timezone
workflow_schedule.enabled = enabled
workflow_schedule.parameters = parameters
if temporal_schedule_id is not _UNSET:
workflow_schedule.temporal_schedule_id = temporal_schedule_id
if name is not _UNSET:
workflow_schedule.name = name
if description is not _UNSET:
workflow_schedule.description = description
workflow_schedule.modified_at = datetime.utcnow()
await session.commit()
await session.refresh(workflow_schedule)
return convert_to_workflow_schedule(workflow_schedule, self.debug_enabled)
@db_operation("get_workflow_schedule_by_id")
async def get_workflow_schedule_by_id(
self,
workflow_schedule_id: str,
organization_id: str,
) -> WorkflowSchedule | None:
async with self.Session() as session:
workflow_schedule = (
await session.scalars(
select(WorkflowScheduleModel).filter_by(
workflow_schedule_id=workflow_schedule_id,
organization_id=organization_id,
deleted_at=None,
)
)
).first()
if not workflow_schedule:
return None
return convert_to_workflow_schedule(workflow_schedule, self.debug_enabled)
@db_operation("get_workflow_schedules")
async def get_workflow_schedules(
self,
workflow_permanent_id: str,
organization_id: str,
) -> list[WorkflowSchedule]:
async with self.Session() as session:
rows = (
await session.scalars(
select(WorkflowScheduleModel).filter_by(
workflow_permanent_id=workflow_permanent_id,
organization_id=organization_id,
deleted_at=None,
)
)
).all()
return [convert_to_workflow_schedule(r, self.debug_enabled) for r in rows]
@db_operation("get_all_enabled_schedules")
async def get_all_enabled_schedules(
self,
organization_id: str | None = None,
) -> list[WorkflowSchedule]:
"""Fetch all enabled, non-deleted schedules, optionally filtered by org."""
async with self.Session() as session:
stmt = select(WorkflowScheduleModel).where(
WorkflowScheduleModel.enabled.is_(True),
WorkflowScheduleModel.deleted_at.is_(None),
)
if organization_id:
stmt = stmt.where(WorkflowScheduleModel.organization_id == organization_id)
rows = (await session.scalars(stmt)).all()
return [convert_to_workflow_schedule(r, self.debug_enabled) for r in rows]
@db_operation("has_schedule_fired_since")
async def has_schedule_fired_since(
self,
workflow_schedule_id: str,
since: datetime,
) -> bool:
"""Check if a workflow_run exists for the given schedule since a timestamp."""
from sqlalchemy import exists as sa_exists
async with self.Session() as session:
row = (
await session.execute(
select(
sa_exists().where(
WorkflowRunModel.workflow_schedule_id == workflow_schedule_id,
WorkflowRunModel.created_at >= since,
)
)
)
).scalar()
return bool(row)
@db_operation("update_workflow_schedule_enabled")
async def update_workflow_schedule_enabled(
self,
workflow_schedule_id: str,
organization_id: str,
enabled: bool,
) -> WorkflowSchedule | None:
async with self.Session() as session:
workflow_schedule = (
await session.scalars(
select(WorkflowScheduleModel).filter_by(
workflow_schedule_id=workflow_schedule_id,
organization_id=organization_id,
deleted_at=None,
)
)
).first()
if not workflow_schedule:
return None
workflow_schedule.enabled = enabled
workflow_schedule.modified_at = datetime.utcnow()
await session.commit()
await session.refresh(workflow_schedule)
return convert_to_workflow_schedule(workflow_schedule, self.debug_enabled)
@db_operation("delete_workflow_schedule")
async def delete_workflow_schedule(
self,
workflow_schedule_id: str,
organization_id: str,
) -> WorkflowSchedule | None:
async with self.Session() as session:
workflow_schedule = (
await session.scalars(
select(WorkflowScheduleModel).filter_by(
workflow_schedule_id=workflow_schedule_id,
organization_id=organization_id,
deleted_at=None,
)
)
).first()
if not workflow_schedule:
return None
workflow_schedule.deleted_at = datetime.utcnow()
workflow_schedule.modified_at = datetime.utcnow()
await session.commit()
await session.refresh(workflow_schedule)
return convert_to_workflow_schedule(workflow_schedule, self.debug_enabled)
@db_operation("restore_workflow_schedule")
async def restore_workflow_schedule(
self,
workflow_schedule_id: str,
organization_id: str,
) -> WorkflowSchedule | None:
async with self.Session() as session:
workflow_schedule = (
await session.scalars(
select(WorkflowScheduleModel)
.filter_by(
workflow_schedule_id=workflow_schedule_id,
organization_id=organization_id,
)
.filter(WorkflowScheduleModel.deleted_at.isnot(None))
)
).first()
if not workflow_schedule:
return None
workflow_schedule.deleted_at = None
workflow_schedule.modified_at = datetime.utcnow()
await session.commit()
await session.refresh(workflow_schedule)
return convert_to_workflow_schedule(workflow_schedule, self.debug_enabled)
@db_operation("count_workflow_schedules")
async def count_workflow_schedules(
self,
organization_id: str,
workflow_permanent_id: str,
) -> int:
async with self.Session() as session:
result = await session.execute(
select(func.count()).where(
WorkflowScheduleModel.organization_id == organization_id,
WorkflowScheduleModel.workflow_permanent_id == workflow_permanent_id,
WorkflowScheduleModel.deleted_at.is_(None),
)
)
return result.scalar_one()
@db_operation("list_organization_schedules")
async def list_organization_schedules(
self,
organization_id: str,
page: int = 1,
page_size: int = 10,
enabled_filter: bool | None = None,
search: str | None = None,
) -> tuple[list[OrganizationScheduleItem], int]:
"""
List all schedules for an organization, joined with workflow titles.
Returns (schedules, total_count).
"""
if page < 1:
raise ValueError(f"Page must be greater than 0, got {page}")
db_page = page - 1
async with self.Session() as session:
# Subquery to get the latest version title per workflow_permanent_id
latest_version_sq = (
select(
WorkflowModel.workflow_permanent_id,
func.max(WorkflowModel.version).label("max_version"),
)
.where(WorkflowModel.organization_id == organization_id)
.where(WorkflowModel.deleted_at.is_(None))
.group_by(WorkflowModel.workflow_permanent_id)
.subquery()
)
workflow_title_sq = (
select(
WorkflowModel.workflow_permanent_id,
WorkflowModel.title,
)
.join(
latest_version_sq,
(WorkflowModel.workflow_permanent_id == latest_version_sq.c.workflow_permanent_id)
& (WorkflowModel.version == latest_version_sq.c.max_version),
)
.subquery()
)
# Base query: schedules joined with workflow titles
base_filter = (
select(WorkflowScheduleModel, workflow_title_sq.c.title.label("workflow_title"))
.outerjoin(
workflow_title_sq,
WorkflowScheduleModel.workflow_permanent_id == workflow_title_sq.c.workflow_permanent_id,
)
.where(WorkflowScheduleModel.organization_id == organization_id)
.where(WorkflowScheduleModel.deleted_at.is_(None))
)
if enabled_filter is not None:
base_filter = base_filter.where(WorkflowScheduleModel.enabled == enabled_filter)
if search:
base_filter = base_filter.where(
or_(
workflow_title_sq.c.title.icontains(search, autoescape=True),
WorkflowScheduleModel.name.icontains(search, autoescape=True),
)
)
# Count query
count_query = select(func.count()).select_from(base_filter.subquery())
total_count = (await session.execute(count_query)).scalar_one()
# Data query with pagination
data_query = (
base_filter.order_by(WorkflowScheduleModel.created_at.desc())
.limit(page_size)
.offset(db_page * page_size)
)
rows = (await session.execute(data_query)).all()
# Materialize row data while session is open
raw_schedules = []
for row in rows:
schedule_model = row[0]
raw_schedules.append(
(
schedule_model.workflow_schedule_id,
schedule_model.organization_id,
schedule_model.workflow_permanent_id,
row[1] or "Untitled Workflow",
schedule_model.cron_expression,
schedule_model.timezone,
schedule_model.enabled,
schedule_model.parameters,
schedule_model.name,
schedule_model.description,
schedule_model.created_at,
schedule_model.modified_at,
)
)
# Compute next_run outside session scope (pure CPU, no DB needed)
schedules: list[OrganizationScheduleItem] = []
for (
ws_id,
org_id,
wpid,
title,
cron_expr,
tz,
enabled,
params,
name,
description,
created,
modified,
) in raw_schedules:
next_run = None
if enabled:
try:
next_run = compute_next_run(cron_expr, tz)
except Exception:
LOG.warning(
"Failed to compute next_run for schedule",
workflow_schedule_id=ws_id,
exc_info=True,
)
schedules.append(
OrganizationScheduleItem(
workflow_schedule_id=ws_id,
organization_id=org_id,
workflow_permanent_id=wpid,
workflow_title=title,
cron_expression=cron_expr,
timezone=tz,
enabled=enabled,
parameters=params,
name=name,
description=description,
next_run=next_run,
created_at=created,
modified_at=modified,
)
)
return schedules, total_count
@db_operation("soft_delete_workflow_and_schedules_by_permanent_id")
async def soft_delete_workflow_and_schedules_by_permanent_id(
self,
workflow_permanent_id: str,
organization_id: str | None = None,
) -> list[str]:
"""Soft-delete a workflow and its active schedules in a single DB transaction.
.. deprecated::
Moved to ``WorkflowsRepository.soft_delete_workflow_and_schedules_by_permanent_id``
(skyvern/forge/sdk/db/repositories/workflows.py). The primary entity is the
workflow, not the schedule, so it belongs in the workflows repository.
This copy remains for backward compatibility with the legacy mixin layer.
"""
async with self.Session() as session:
select_query = (
select(WorkflowScheduleModel.workflow_schedule_id)
.where(WorkflowScheduleModel.workflow_permanent_id == workflow_permanent_id)
.where(WorkflowScheduleModel.deleted_at.is_(None))
)
if organization_id is not None:
select_query = select_query.where(WorkflowScheduleModel.organization_id == organization_id)
result = await session.execute(select_query)
schedule_ids = list(result.scalars().all())
deleted_at = datetime.utcnow()
if schedule_ids:
update_schedules_query = (
update(WorkflowScheduleModel)
.where(WorkflowScheduleModel.workflow_schedule_id.in_(schedule_ids))
.values(deleted_at=deleted_at)
)
await session.execute(update_schedules_query)
update_workflow_query = (
update(WorkflowModel)
.where(WorkflowModel.workflow_permanent_id == workflow_permanent_id)
.where(WorkflowModel.deleted_at.is_(None))
)
if organization_id is not None:
update_workflow_query = update_workflow_query.filter_by(organization_id=organization_id)
await session.execute(update_workflow_query.values(deleted_at=deleted_at))
await session.commit()
return schedule_ids
@db_operation("soft_delete_orphaned_schedules")
async def soft_delete_orphaned_schedules(self, limit: int = 500) -> list[tuple[str, str]]:
"""Soft-delete orphaned schedules and return their identities.
Uses a single UPDATE ... RETURNING statement so orphan detection and
soft-deletion happen atomically in one DB round-trip.
"""
async with self.Session() as session:
active_workflow_exists = (
select(WorkflowModel.workflow_permanent_id)
.where(WorkflowModel.workflow_permanent_id == WorkflowScheduleModel.workflow_permanent_id)
.where(WorkflowModel.deleted_at.is_(None))
.correlate(WorkflowScheduleModel)
.exists()
)
orphaned_schedules = (
select(
WorkflowScheduleModel.workflow_schedule_id.label("workflow_schedule_id"),
WorkflowScheduleModel.workflow_permanent_id.label("workflow_permanent_id"),
)
.where(WorkflowScheduleModel.deleted_at.is_(None))
.where(~active_workflow_exists)
.limit(limit)
.cte("orphaned_schedules")
)
update_query = (
update(WorkflowScheduleModel)
.where(
WorkflowScheduleModel.workflow_schedule_id.in_(select(orphaned_schedules.c.workflow_schedule_id))
)
.where(WorkflowScheduleModel.deleted_at.is_(None))
.values(deleted_at=datetime.utcnow())
.returning(
WorkflowScheduleModel.workflow_schedule_id,
WorkflowScheduleModel.workflow_permanent_id,
)
)
result = await session.execute(update_query)
await session.commit()
return [(row[0], row[1]) for row in result.all()]