add async content processing

This commit is contained in:
LUIS NOVO 2024-11-11 17:32:35 -03:00
parent ac2ea9e554
commit 00f070a644
10 changed files with 541 additions and 395 deletions

View file

@ -1,11 +1,13 @@
import asyncio
from loguru import logger
from open_notebook.graphs.content_processing.state import ContentState
def extract_txt(state: ContentState):
async def extract_txt(state: ContentState):
"""
Parse the text file and print its content.
Parse the text file and extract its content asynchronously.
"""
return_dict = {}
if (
@ -14,12 +16,22 @@ def extract_txt(state: ContentState):
):
logger.debug(f"Extracting text from {state.get('file_path')}")
file_path = state.get("file_path")
if file_path is not None:
try:
with open(file_path, "r", encoding="utf-8") as file:
content = file.read()
logger.debug(f"Extracted: {content[:100]}")
return_dict["content"] = content
def _read_file():
with open(file_path, "r", encoding="utf-8") as file:
return file.read()
# Run file I/O in thread pool
content = await asyncio.get_event_loop().run_in_executor(
None, _read_file
)
logger.debug(f"Extracted: {content[:100]}")
return_dict["content"] = content
except FileNotFoundError:
raise FileNotFoundError(f"File not found at {file_path}")
except Exception as e: