mirror of
https://github.com/Skyvern-AI/skyvern.git
synced 2025-09-10 15:35:51 +00:00
task v2 refactor part 6 - observer_cruise_id -> task_v2_id (#1817)
This commit is contained in:
parent
2d24055c36
commit
ffbc95e1b4
22 changed files with 238 additions and 250 deletions
66
evaluation/script/create_webvoyager_task_v2.py
Normal file
66
evaluation/script/create_webvoyager_task_v2.py
Normal file
|
@ -0,0 +1,66 @@
|
|||
import asyncio
|
||||
import json
|
||||
from datetime import datetime
|
||||
from uuid import uuid4
|
||||
|
||||
import typer
|
||||
|
||||
from evaluation.core import Evaluator, SkyvernClient
|
||||
from evaluation.core.utils import load_webvoyager_case_from_json
|
||||
from skyvern.forge import app
|
||||
from skyvern.forge.prompts import prompt_engine
|
||||
from skyvern.forge.sdk.schemas.task_v2 import ObserverTaskRequest
|
||||
|
||||
|
||||
async def create_task_v2(
|
||||
base_url: str,
|
||||
cred: str,
|
||||
) -> None:
|
||||
client = SkyvernClient(base_url=base_url, credentials=cred)
|
||||
group_id = uuid4()
|
||||
|
||||
cnt = 0
|
||||
record_file_path = f"{group_id}-webvoyager-record.jsonl"
|
||||
with open(record_file_path, "w", encoding="utf-8") as f:
|
||||
for case_data in load_webvoyager_case_from_json(
|
||||
file_path="evaluation/datasets/webvoyager_tasks.jsonl", group_id=str(group_id)
|
||||
):
|
||||
prompt = prompt_engine.load_prompt(
|
||||
"check-evaluation-goal", user_goal=case_data.question, local_datetime=datetime.now().isoformat()
|
||||
)
|
||||
response = await app.LLM_API_HANDLER(prompt=prompt, prompt_name="check-evaluation-goal")
|
||||
tweaked_user_goal = response.get("tweaked_user_goal")
|
||||
case_data.is_updated = tweaked_user_goal != case_data.question
|
||||
case_data.question = tweaked_user_goal
|
||||
|
||||
evaluator = Evaluator(client=client, artifact_folder=f"test/artifacts/{case_data.group_id}/{case_data.id}")
|
||||
request_body = ObserverTaskRequest(
|
||||
url=case_data.url,
|
||||
user_prompt=case_data.question,
|
||||
)
|
||||
cruise = evaluator.queue_skyvern_cruise(cruise_request=request_body, max_step=case_data.max_steps)
|
||||
dumped_data = case_data.model_dump()
|
||||
dumped_data.update(
|
||||
{
|
||||
"task_v2_id": cruise.observer_cruise_id,
|
||||
"workflow_run_id": cruise.workflow_run_id,
|
||||
"workflow_permanent_id": cruise.workflow_permanent_id,
|
||||
"cruise_url": str(cruise.url) if cruise.url else cruise.url,
|
||||
}
|
||||
)
|
||||
print(f"Queued {cruise.observer_cruise_id} for {case_data.model_dump_json()}")
|
||||
f.write(json.dumps(dumped_data) + "\n")
|
||||
cnt += 1
|
||||
|
||||
print(f"Queued {cnt} cruises to launch webvoyager evaluation test. saving the records file in {record_file_path}")
|
||||
|
||||
|
||||
def main(
|
||||
base_url: str = typer.Option(..., "--base-url", help="base url for Skyvern client"),
|
||||
cred: str = typer.Option(..., "--cred", help="credential for Skyvern organization"),
|
||||
) -> None:
|
||||
asyncio.run(create_task_v2(base_url=base_url, cred=cred))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
typer.run(main)
|
Loading…
Add table
Add a link
Reference in a new issue