mirror of
https://github.com/Skyvern-AI/skyvern.git
synced 2025-09-16 02:09:40 +00:00
More visibility into attached files and duplicate status (#776)
This commit is contained in:
parent
73227963dd
commit
be1c8ba060
2 changed files with 33 additions and 1 deletions
|
@ -1,3 +1,4 @@
|
||||||
|
import hashlib
|
||||||
import os
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
import zipfile
|
import zipfile
|
||||||
|
@ -86,3 +87,12 @@ def get_number_of_files_in_directory(directory: Path, recursive: bool = False) -
|
||||||
break
|
break
|
||||||
count += len(files)
|
count += len(files)
|
||||||
return count
|
return count
|
||||||
|
|
||||||
|
|
||||||
|
def calculate_sha256(file_path: str) -> str:
|
||||||
|
"""Helper function to calculate SHA256 hash of a file."""
|
||||||
|
sha256_hash = hashlib.sha256()
|
||||||
|
with open(file_path, "rb") as f:
|
||||||
|
for byte_block in iter(lambda: f.read(4096), b""):
|
||||||
|
sha256_hash.update(byte_block)
|
||||||
|
return sha256_hash.hexdigest()
|
||||||
|
|
|
@ -6,6 +6,7 @@ import os
|
||||||
import smtplib
|
import smtplib
|
||||||
import textwrap
|
import textwrap
|
||||||
import uuid
|
import uuid
|
||||||
|
from collections import defaultdict
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from email.message import EmailMessage
|
from email.message import EmailMessage
|
||||||
from enum import StrEnum
|
from enum import StrEnum
|
||||||
|
@ -30,7 +31,12 @@ from skyvern.exceptions import (
|
||||||
from skyvern.forge import app
|
from skyvern.forge import app
|
||||||
from skyvern.forge.prompts import prompt_engine
|
from skyvern.forge.prompts import prompt_engine
|
||||||
from skyvern.forge.sdk.api.aws import AsyncAWSClient
|
from skyvern.forge.sdk.api.aws import AsyncAWSClient
|
||||||
from skyvern.forge.sdk.api.files import download_file, download_from_s3, get_path_for_workflow_download_directory
|
from skyvern.forge.sdk.api.files import (
|
||||||
|
calculate_sha256,
|
||||||
|
download_file,
|
||||||
|
download_from_s3,
|
||||||
|
get_path_for_workflow_download_directory,
|
||||||
|
)
|
||||||
from skyvern.forge.sdk.api.llm.api_handler_factory import LLMAPIHandlerFactory
|
from skyvern.forge.sdk.api.llm.api_handler_factory import LLMAPIHandlerFactory
|
||||||
from skyvern.forge.sdk.schemas.tasks import TaskOutput, TaskStatus
|
from skyvern.forge.sdk.schemas.tasks import TaskOutput, TaskStatus
|
||||||
from skyvern.forge.sdk.settings_manager import SettingsManager
|
from skyvern.forge.sdk.settings_manager import SettingsManager
|
||||||
|
@ -905,6 +911,8 @@ class SendEmailBlock(Block):
|
||||||
else:
|
else:
|
||||||
msg.set_content(self.body)
|
msg.set_content(self.body)
|
||||||
|
|
||||||
|
file_names_by_hash: dict[str, list[str]] = defaultdict(list)
|
||||||
|
|
||||||
for filename in self._get_file_paths(workflow_run_context, workflow_run_id):
|
for filename in self._get_file_paths(workflow_run_context, workflow_run_id):
|
||||||
path = None
|
path = None
|
||||||
try:
|
try:
|
||||||
|
@ -961,10 +969,24 @@ class SendEmailBlock(Block):
|
||||||
subtype=subtype,
|
subtype=subtype,
|
||||||
filename=attachment_filename,
|
filename=attachment_filename,
|
||||||
)
|
)
|
||||||
|
file_hash = calculate_sha256(path)
|
||||||
|
file_names_by_hash[file_hash].append(path)
|
||||||
finally:
|
finally:
|
||||||
if path:
|
if path:
|
||||||
os.unlink(path)
|
os.unlink(path)
|
||||||
|
|
||||||
|
# Calculate file stats based on content hashes
|
||||||
|
total_files = sum(len(files) for files in file_names_by_hash.values())
|
||||||
|
unique_files = len(file_names_by_hash)
|
||||||
|
duplicate_files_list = [files for files in file_names_by_hash.values() if len(files) > 1]
|
||||||
|
|
||||||
|
# Log file statistics
|
||||||
|
LOG.info("SendEmailBlock: Total files attached", total_files=total_files)
|
||||||
|
LOG.info("SendEmailBlock: Unique files (based on content) attached", unique_files=unique_files)
|
||||||
|
LOG.info(
|
||||||
|
"SendEmailBlock: Duplicate files (based on content) attached", duplicate_files_list=duplicate_files_list
|
||||||
|
)
|
||||||
|
|
||||||
return msg
|
return msg
|
||||||
|
|
||||||
async def execute(self, workflow_run_id: str, **kwargs: dict) -> BlockResult:
|
async def execute(self, workflow_run_id: str, **kwargs: dict) -> BlockResult:
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue