mirror of
https://github.com/MODSetter/SurfSense.git
synced 2025-09-04 19:49:09 +00:00
31 lines
852 B
Python
31 lines
852 B
Python
"""
|
|
Base functionality and shared imports for document processors.
|
|
"""
|
|
|
|
from langchain_community.document_transformers import MarkdownifyTransformer
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
from sqlalchemy.future import select
|
|
|
|
from app.db import Document
|
|
|
|
# Initialize markdown transformer
|
|
md = MarkdownifyTransformer()
|
|
|
|
|
|
async def check_duplicate_document(
|
|
session: AsyncSession, content_hash: str
|
|
) -> Document | None:
|
|
"""
|
|
Check if a document with the given content hash already exists.
|
|
|
|
Args:
|
|
session: Database session
|
|
content_hash: Hash of the document content
|
|
|
|
Returns:
|
|
Existing document if found, None otherwise
|
|
"""
|
|
existing_doc_result = await session.execute(
|
|
select(Document).where(Document.content_hash == content_hash)
|
|
)
|
|
return existing_doc_result.scalars().first()
|