mirror of
https://github.com/Skyvern-AI/skyvern.git
synced 2025-09-16 18:29:40 +00:00
More visibility into attached files and duplicate status (#776)
This commit is contained in:
parent
73227963dd
commit
be1c8ba060
2 changed files with 33 additions and 1 deletions
|
@ -1,3 +1,4 @@
|
|||
import hashlib
|
||||
import os
|
||||
import tempfile
|
||||
import zipfile
|
||||
|
@ -86,3 +87,12 @@ def get_number_of_files_in_directory(directory: Path, recursive: bool = False) -
|
|||
break
|
||||
count += len(files)
|
||||
return count
|
||||
|
||||
|
||||
def calculate_sha256(file_path: str) -> str:
|
||||
"""Helper function to calculate SHA256 hash of a file."""
|
||||
sha256_hash = hashlib.sha256()
|
||||
with open(file_path, "rb") as f:
|
||||
for byte_block in iter(lambda: f.read(4096), b""):
|
||||
sha256_hash.update(byte_block)
|
||||
return sha256_hash.hexdigest()
|
||||
|
|
|
@ -6,6 +6,7 @@ import os
|
|||
import smtplib
|
||||
import textwrap
|
||||
import uuid
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass
|
||||
from email.message import EmailMessage
|
||||
from enum import StrEnum
|
||||
|
@ -30,7 +31,12 @@ from skyvern.exceptions import (
|
|||
from skyvern.forge import app
|
||||
from skyvern.forge.prompts import prompt_engine
|
||||
from skyvern.forge.sdk.api.aws import AsyncAWSClient
|
||||
from skyvern.forge.sdk.api.files import download_file, download_from_s3, get_path_for_workflow_download_directory
|
||||
from skyvern.forge.sdk.api.files import (
|
||||
calculate_sha256,
|
||||
download_file,
|
||||
download_from_s3,
|
||||
get_path_for_workflow_download_directory,
|
||||
)
|
||||
from skyvern.forge.sdk.api.llm.api_handler_factory import LLMAPIHandlerFactory
|
||||
from skyvern.forge.sdk.schemas.tasks import TaskOutput, TaskStatus
|
||||
from skyvern.forge.sdk.settings_manager import SettingsManager
|
||||
|
@ -905,6 +911,8 @@ class SendEmailBlock(Block):
|
|||
else:
|
||||
msg.set_content(self.body)
|
||||
|
||||
file_names_by_hash: dict[str, list[str]] = defaultdict(list)
|
||||
|
||||
for filename in self._get_file_paths(workflow_run_context, workflow_run_id):
|
||||
path = None
|
||||
try:
|
||||
|
@ -961,10 +969,24 @@ class SendEmailBlock(Block):
|
|||
subtype=subtype,
|
||||
filename=attachment_filename,
|
||||
)
|
||||
file_hash = calculate_sha256(path)
|
||||
file_names_by_hash[file_hash].append(path)
|
||||
finally:
|
||||
if path:
|
||||
os.unlink(path)
|
||||
|
||||
# Calculate file stats based on content hashes
|
||||
total_files = sum(len(files) for files in file_names_by_hash.values())
|
||||
unique_files = len(file_names_by_hash)
|
||||
duplicate_files_list = [files for files in file_names_by_hash.values() if len(files) > 1]
|
||||
|
||||
# Log file statistics
|
||||
LOG.info("SendEmailBlock: Total files attached", total_files=total_files)
|
||||
LOG.info("SendEmailBlock: Unique files (based on content) attached", unique_files=unique_files)
|
||||
LOG.info(
|
||||
"SendEmailBlock: Duplicate files (based on content) attached", duplicate_files_list=duplicate_files_list
|
||||
)
|
||||
|
||||
return msg
|
||||
|
||||
async def execute(self, workflow_run_id: str, **kwargs: dict) -> BlockResult:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue