mirror of
https://github.com/Skyvern-AI/skyvern.git
synced 2025-09-02 02:30:07 +00:00
78 lines
3.1 KiB
Python
78 lines
3.1 KiB
Python
import asyncio
|
|
import json
|
|
from datetime import datetime
|
|
from typing import Optional
|
|
from uuid import uuid4
|
|
|
|
import typer
|
|
from dotenv import load_dotenv
|
|
|
|
from evaluation.core import Evaluator, SkyvernClient
|
|
from evaluation.core.utils import load_webvoyager_case_from_json
|
|
from skyvern.forge import app
|
|
from skyvern.forge.prompts import prompt_engine
|
|
from skyvern.forge.sdk.workflow.models.workflow import WorkflowRequestBody
|
|
from skyvern.schemas.runs import ProxyLocation
|
|
|
|
load_dotenv()
|
|
|
|
|
|
async def create_workflow_run(
|
|
base_url: str,
|
|
cred: str,
|
|
workflow_pid: str,
|
|
proxy_location: ProxyLocation | None = None,
|
|
) -> None:
|
|
client = SkyvernClient(base_url=base_url, credentials=cred)
|
|
group_id = uuid4()
|
|
|
|
cnt = 0
|
|
record_file_path = f"{group_id}-webvoyager-record.jsonl"
|
|
with open(record_file_path, "w", encoding="utf-8") as f:
|
|
for case_data in load_webvoyager_case_from_json(
|
|
file_path="evaluation/datasets/webvoyager_tasks.jsonl", group_id=str(group_id)
|
|
):
|
|
prompt = prompt_engine.load_prompt(
|
|
"check-evaluation-goal", user_goal=case_data.question, local_datetime=datetime.now().isoformat()
|
|
)
|
|
response = await app.LLM_API_HANDLER(prompt=prompt, prompt_name="check-evaluation-goal")
|
|
tweaked_user_goal = response.get("tweaked_user_goal")
|
|
case_data.is_updated = tweaked_user_goal != case_data.question
|
|
case_data.question = tweaked_user_goal
|
|
|
|
evaluator = Evaluator(client=client, artifact_folder=f"test/artifacts/{case_data.group_id}/{case_data.id}")
|
|
request_body = WorkflowRequestBody(
|
|
data={
|
|
"url": case_data.url,
|
|
"instruction": case_data.question,
|
|
"answer": case_data.answer,
|
|
},
|
|
proxy_location=proxy_location,
|
|
)
|
|
workflow_run_id = evaluator.queue_skyvern_workflow(
|
|
workflow_pid=workflow_pid, workflow_request=request_body, max_step=case_data.max_steps
|
|
)
|
|
dumped_data = case_data.model_dump()
|
|
dumped_data.update({"workflow_run_id": workflow_run_id})
|
|
print(f"Queued {workflow_run_id} for {case_data.model_dump_json()}")
|
|
f.write(json.dumps(dumped_data) + "\n")
|
|
cnt += 1
|
|
|
|
print(f"Queued {cnt} workflows to launch webvoyager evaluation test. saving the records file in {record_file_path}")
|
|
|
|
|
|
def main(
|
|
base_url: str = typer.Option(..., "--base-url", help="base url for Skyvern client"),
|
|
cred: str = typer.Option(..., "--cred", help="credential for Skyvern organization"),
|
|
workflow_pid: str = typer.Option(..., "--workflow-pid", help="workflow pid to execute the evaluation test"),
|
|
proxy_location: Optional[ProxyLocation] = typer.Option(
|
|
None, "--proxy-location", help="overwrite the workflow proxy location"
|
|
),
|
|
) -> None:
|
|
asyncio.run(
|
|
create_workflow_run(base_url=base_url, cred=cred, workflow_pid=workflow_pid, proxy_location=proxy_location)
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
typer.run(main)
|