Remove old claude prompt for action extraction (#682)

2025-09-02 02:30:07 +00:00 · 2024-08-07 23:46:13 +03:00 · 2024-08-07 23:46:13 +03:00 · b8752b0f7d
commit b8752b0f7d
parent feddade56e
2 changed files with 1 additions and 89 deletions
--- a/skyvern/forge/agent.py
+++ b/skyvern/forge/agent.py
@ -960,14 +960,6 @@ class ForgeAgent:
        current_url = (
            await browser_state.page.evaluate("() => document.location.href") if browser_state.page else starting_url
        )
-        prompt_template = "extract-action"
-        if app.EXPERIMENTATION_PROVIDER.is_feature_enabled_cached(
-            "USE_CLAUDE3_SONNET",
-            task.workflow_run_id or task.task_id,
-            properties={"organization_id": task.organization_id},
-        ):
-            LOG.info("Using Claude3 Sonnet prompt template for action extraction")
-            prompt_template = "extract-action-claude3-sonnet"

        # TODO: we only use HTML element for now, introduce a way to switch in the future
        element_tree_format = ElementTreeFormat.HTML
@ -981,7 +973,7 @@ class ForgeAgent:
        element_tree_in_prompt: str = scraped_page.build_element_tree(element_tree_format)
        final_navigation_payload = self._build_navigation_payload(task)
        extract_action_prompt = prompt_engine.load_prompt(
-            prompt_template,
+            "extract-action",
            navigation_goal=navigation_goal,
            navigation_payload_str=json.dumps(final_navigation_payload),
            starting_url=starting_url,
--- a/skyvern/forge/prompts/skyvern/extract-action-claude3-sonnet.j2
+++ b/skyvern/forge/prompts/skyvern/extract-action-claude3-sonnet.j2
@ -1,80 +0,0 @@
-Identify actions to help user progress towards the user goal using the DOM elements given in the list and the screenshot of the website.
-Include only the elements that are relevant to the user goal, without altering or imagining new elements.
-Accurately interpret and understand the functional significance of SVG elements based on their shapes and context within the webpage.
-Use the details from the user details to fill in necessary values. Always satisfy required fields if the field isn't already filled in. Don't return any action for the same field, if this field is already filled in and the value is the same as the one you would have filled in.
-MAKE SURE YOU OUTPUT VALID JSON. No text before or after JSON, no trailing commas, no comments (//), no unnecessary quotes, etc.
-Each interactable element is tagged with an ID.
-If you see any information in red in the page screenshot, this means a condition wasn't satisfied. prioritize actions with the red information.
-If you see a popup in the page screenshot, prioritize actions on the popup.
-
-Reply in JSON format with the following keys:
-{
-    "user_goal_achieved": str, // A string that describes if user goal has been completed with reasoning.
-    "action_plan": str, // A string that describes the plan of actions you're going to take. Be specific and to the point. Use this as a quick summary of the actions you're going to take, and what order you're going to take them in, and how that moves you towards your overall goal. Output "COMPLETE" action in the "actions" if user goal has been achieved.
-    "actions": array // An array of actions. Here's the format of each action:
-    [{
-        "reasoning": str, // The reasoning behind the action. Be specific, referencing any user information and their fields and element ids in your reasoning. Mention why you chose the action type, and why you chose the element id. Keep the reasoning short and to the point.
-        "confidence_float": float, // The confidence of the action. Pick a number between 0.0 and 1.0. 0.0 means no confidence, 1.0 means full confidence
-        "action_type": str, // It's a string enum: "CLICK", "INPUT_TEXT", "UPLOAD_FILE", "SELECT_OPTION", "WAIT", "SOLVE_CAPTCHA", "COMPLETE", "TERMINATE". "CLICK" is an element you'd like to click. "INPUT_TEXT" is an element you'd like to input text into. "UPLOAD_FILE" is an element you'd like to upload a file into. "SELECT_OPTION" is an element you'd like to select an option from. "WAIT" action should be used if there are no actions to take and there is some indication on screen that waiting could yield more actions. "WAIT" should not be used if there are actions to take. "SOLVE_CAPTCHA" should be used if there's a captcha to solve on the screen. "COMPLETE" is used when the user goal has been achieved AND if there's any data extraction goal, you should be able to get data from the page. Never return a COMPLETE action unless you confirm user goal is achieved through the elements or the screenshots. "TERMINATE" is used to terminate the whole task with a failure when it doesn't seem like the user goal can be achieved. Do not use "TERMINATE" if waiting could lead the user towards the goal. Only return "TERMINATE" if you are on a page where the user goal cannot be achieved. If you are returning "COMPLETE" or "TERMINATE", never return any other action in the same response. The "COMPLETE" and "TERMINATE" actions can only be returned once in the whole task. When they are returned, they have to be the only action in the response.
-        "id": str, // The id of the element to take action on. The id has to be one from the elements list
-        "text": str, // Text for INPUT_TEXT action only
-        "file_url": str, // The url of the file to upload if applicable. This field must be present for UPLOAD_FILE but can also be present for CLICK only if the click is to upload the file. It should be null otherwise.
-        "option": {  // The option to select for SELECT_OPTION action only. null if not SELECT_OPTION action
-            "label": str, // the label of the option if any. MAKE SURE YOU USE THIS LABEL TO SELECT THE OPTION. DO NOT PUT ANYTHING OTHER THAN A VALID OPTION LABEL HERE
-            "index": int, // the index corresponding to the option index under the the select element.
-            "value": str // the value of the option. MAKE SURE YOU USE THIS VALUE TO SELECT THE OPTION. DO NOT PUT ANYTHING OTHER THAN A VALID OPTION VALUE HERE
-        },
-{% if error_code_mapping_str %}
-        "errors": array // A list of errors. This is used to surface any errors that matches the current situation for COMPLETE and TERMINATE actions. For other actions or if no error description suits the current situation on the screenshots, return an empty list. You are allowed to return multiple errors if there are multiple errors on the page.
-        [{
-            "error_code": str, // The error code from the user's error code list
-            "reasoning": str, // The reasoning behind the error. Be specific, referencing any user information and their fields in your reasoning. Keep the reasoning short and to the point.
-            "confidence_float": float // The confidence of the error. Pick a number between 0.0 and 1.0. 0.0 means no confidence, 1.0 means full confidence
-        }]
-{% endif %}
-    }],
-}
-
-{% if action_history %}
-Consider the action history from the last step and the screenshot together, if actions from the last step don't yield positive impact, try other actions or other action combinations.
-{% endif %}
-
-Clickable elements from `{{ current_url }}`:
-```
-{{ elements }}
-```
-
-The URL of the page you're on right now is `{{ current_url }}`.
-
-User goal:
-```
-{{ navigation_goal }}
-```
-
-{% if error_code_mapping_str %}
-Use the error codes and their descriptions to surface user-defined errors. Do not return any error that's not defined by the user. User defined errors:
-{{ error_code_mapping_str }}
-{% endif %}
-
-{% if data_extraction_goal %}
-
-User Data Extraction Goal:
-```
-{{ data_extraction_goal }}
-```
-{% endif %}
-
-User details:
-```
-{{ navigation_payload_str }}
-```
-{% if action_history %}
-
-Action results from previous steps: (note: even if the action history suggests goal is achieved, check the screenshot and the DOM elements to make sure the goal is achieved)
-{{ action_history }}
-{% endif %}
-
-Current datetime in UTC, YYYY-MM-DD HH:MM format:
-```
-{{ utc_datetime }}
-```