import base64 import re from typing import Any import commentjson import litellm from skyvern.forge.sdk.api.llm.exceptions import EmptyLLMResponseError, InvalidLLMResponseFormat async def llm_messages_builder( prompt: str, screenshots: list[bytes] | None = None, add_assistant_prefix: bool = False, ) -> list[dict[str, Any]]: messages: list[dict[str, Any]] = [ { "type": "text", "text": prompt, } ] if screenshots: for screenshot in screenshots: encoded_image = base64.b64encode(screenshot).decode("utf-8") messages.append( { "type": "image_url", "image_url": { "url": f"data:image/png;base64,{encoded_image}", }, } ) # Anthropic models seems to struggle to always output a valid json object so we need to prefill the response to force it: if add_assistant_prefix: return [ {"role": "user", "content": messages}, {"role": "assistant", "content": "{"}, ] return [{"role": "user", "content": messages}] def parse_api_response(response: litellm.ModelResponse, add_assistant_prefix: bool = False) -> dict[str, str]: try: content = response.choices[0].message.content # Since we prefilled Anthropic response with "{" we need to add it back to the response to have a valid json object: if add_assistant_prefix: content = "{" + content content = try_to_extract_json_from_markdown_format(content) content = replace_useless_text_around_json(content) if not content: raise EmptyLLMResponseError(str(response)) return commentjson.loads(content) except Exception as e: raise InvalidLLMResponseFormat(str(response)) from e def replace_useless_text_around_json(input_string: str) -> str: first_occurrence_of_brace = input_string.find("{") last_occurrence_of_brace = input_string.rfind("}") return input_string[first_occurrence_of_brace : last_occurrence_of_brace + 1] def try_to_extract_json_from_markdown_format(text: str) -> str: pattern = r"```json\s*(.*?)\s*```" match = re.search(pattern, text, re.DOTALL) if match: return match.group(1) else: return text