fix: Resolve merge conflict in documents_routes.py

- Integrated Docling ETL service with new task logging system
- Maintained consistent logging pattern across all ETL services
- Added progress and success/failure logging for Docling processing
This commit is contained in:
Abdullah 3li 2025-07-21 10:43:15 +03:00
commit f117d94ef7
34 changed files with 4160 additions and 520 deletions

View file

@ -0,0 +1,71 @@
"""Add LogLevel and LogStatus enums and logs table
Revision ID: 12
Revises: 11
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects.postgresql import JSON
# revision identifiers, used by Alembic.
revision: str = "12"
down_revision: Union[str, None] = "11"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
"""Upgrade schema - add LogLevel and LogStatus enums and logs table."""
# Create LogLevel enum
op.execute("""
CREATE TYPE loglevel AS ENUM ('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL')
""")
# Create LogStatus enum
op.execute("""
CREATE TYPE logstatus AS ENUM ('IN_PROGRESS', 'SUCCESS', 'FAILED')
""")
# Create logs table
op.execute("""
CREATE TABLE logs (
id SERIAL PRIMARY KEY,
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW(),
level loglevel NOT NULL,
status logstatus NOT NULL,
message TEXT NOT NULL,
source VARCHAR(200),
log_metadata JSONB DEFAULT '{}',
search_space_id INTEGER NOT NULL REFERENCES searchspaces(id) ON DELETE CASCADE
)
""")
# Create indexes
op.create_index(op.f('ix_logs_id'), 'logs', ['id'], unique=False)
op.create_index(op.f('ix_logs_created_at'), 'logs', ['created_at'], unique=False)
op.create_index(op.f('ix_logs_level'), 'logs', ['level'], unique=False)
op.create_index(op.f('ix_logs_status'), 'logs', ['status'], unique=False)
op.create_index(op.f('ix_logs_source'), 'logs', ['source'], unique=False)
def downgrade() -> None:
"""Downgrade schema - remove logs table and enums."""
# Drop indexes
op.drop_index(op.f('ix_logs_source'), table_name='logs')
op.drop_index(op.f('ix_logs_status'), table_name='logs')
op.drop_index(op.f('ix_logs_level'), table_name='logs')
op.drop_index(op.f('ix_logs_created_at'), table_name='logs')
op.drop_index(op.f('ix_logs_id'), table_name='logs')
# Drop logs table
op.drop_table('logs')
# Drop enums
op.execute("DROP TYPE IF EXISTS logstatus")
op.execute("DROP TYPE IF EXISTS loglevel")

File diff suppressed because it is too large Load diff

View file

@ -91,6 +91,18 @@ class LiteLLMProvider(str, Enum):
ALEPH_ALPHA = "ALEPH_ALPHA" ALEPH_ALPHA = "ALEPH_ALPHA"
PETALS = "PETALS" PETALS = "PETALS"
CUSTOM = "CUSTOM" CUSTOM = "CUSTOM"
class LogLevel(str, Enum):
DEBUG = "DEBUG"
INFO = "INFO"
WARNING = "WARNING"
ERROR = "ERROR"
CRITICAL = "CRITICAL"
class LogStatus(str, Enum):
IN_PROGRESS = "IN_PROGRESS"
SUCCESS = "SUCCESS"
FAILED = "FAILED"
class Base(DeclarativeBase): class Base(DeclarativeBase):
pass pass
@ -163,6 +175,7 @@ class SearchSpace(BaseModel, TimestampMixin):
documents = relationship("Document", back_populates="search_space", order_by="Document.id", cascade="all, delete-orphan") documents = relationship("Document", back_populates="search_space", order_by="Document.id", cascade="all, delete-orphan")
podcasts = relationship("Podcast", back_populates="search_space", order_by="Podcast.id", cascade="all, delete-orphan") podcasts = relationship("Podcast", back_populates="search_space", order_by="Podcast.id", cascade="all, delete-orphan")
chats = relationship('Chat', back_populates='search_space', order_by='Chat.id', cascade="all, delete-orphan") chats = relationship('Chat', back_populates='search_space', order_by='Chat.id', cascade="all, delete-orphan")
logs = relationship("Log", back_populates="search_space", order_by="Log.id", cascade="all, delete-orphan")
class SearchSourceConnector(BaseModel, TimestampMixin): class SearchSourceConnector(BaseModel, TimestampMixin):
__tablename__ = "search_source_connectors" __tablename__ = "search_source_connectors"
@ -196,6 +209,18 @@ class LLMConfig(BaseModel, TimestampMixin):
user_id = Column(UUID(as_uuid=True), ForeignKey("user.id", ondelete='CASCADE'), nullable=False) user_id = Column(UUID(as_uuid=True), ForeignKey("user.id", ondelete='CASCADE'), nullable=False)
user = relationship("User", back_populates="llm_configs", foreign_keys=[user_id]) user = relationship("User", back_populates="llm_configs", foreign_keys=[user_id])
class Log(BaseModel, TimestampMixin):
__tablename__ = "logs"
level = Column(SQLAlchemyEnum(LogLevel), nullable=False, index=True)
status = Column(SQLAlchemyEnum(LogStatus), nullable=False, index=True)
message = Column(Text, nullable=False)
source = Column(String(200), nullable=True, index=True) # Service/component that generated the log
log_metadata = Column(JSON, nullable=True, default={}) # Additional context data
search_space_id = Column(Integer, ForeignKey("searchspaces.id", ondelete='CASCADE'), nullable=False)
search_space = relationship("SearchSpace", back_populates="logs")
if config.AUTH_TYPE == "GOOGLE": if config.AUTH_TYPE == "GOOGLE":
class OAuthAccount(SQLAlchemyBaseOAuthAccountTableUUID, Base): class OAuthAccount(SQLAlchemyBaseOAuthAccountTableUUID, Base):
pass pass

View file

@ -5,6 +5,7 @@ from .podcasts_routes import router as podcasts_router
from .chats_routes import router as chats_router from .chats_routes import router as chats_router
from .search_source_connectors_routes import router as search_source_connectors_router from .search_source_connectors_routes import router as search_source_connectors_router
from .llm_config_routes import router as llm_config_router from .llm_config_routes import router as llm_config_router
from .logs_routes import router as logs_router
router = APIRouter() router = APIRouter()
@ -14,3 +15,4 @@ router.include_router(podcasts_router)
router.include_router(chats_router) router.include_router(chats_router)
router.include_router(search_source_connectors_router) router.include_router(search_source_connectors_router)
router.include_router(llm_config_router) router.include_router(llm_config_router)
router.include_router(logs_router)

View file

@ -54,32 +54,23 @@ async def handle_chat_data(
if message['role'] == "user": if message['role'] == "user":
langchain_chat_history.append(HumanMessage(content=message['content'])) langchain_chat_history.append(HumanMessage(content=message['content']))
elif message['role'] == "assistant": elif message['role'] == "assistant":
# Find the last "ANSWER" annotation specifically langchain_chat_history.append(AIMessage(content=message['content']))
answer_annotation = None
for annotation in reversed(message['annotations']):
if annotation['type'] == "ANSWER":
answer_annotation = annotation
break
if answer_annotation:
answer_text = answer_annotation['content']
# If content is a list, join it into a single string
if isinstance(answer_text, list):
answer_text = "\n".join(answer_text)
langchain_chat_history.append(AIMessage(content=answer_text))
response = StreamingResponse(stream_connector_search_results( response = StreamingResponse(
user_query, stream_connector_search_results(
user.id, user_query,
search_space_id, # Already converted to int in lines 32-37 user.id,
session, search_space_id,
research_mode, session,
selected_connectors, research_mode,
langchain_chat_history, selected_connectors,
search_mode_str, langchain_chat_history,
document_ids_to_add_in_context search_mode_str,
)) document_ids_to_add_in_context,
response.headers['x-vercel-ai-data-stream'] = 'v1' )
)
response.headers["x-vercel-ai-data-stream"] = "v1"
return response return response

View file

@ -135,11 +135,19 @@ async def process_file_in_background(
filename: str, filename: str,
search_space_id: int, search_space_id: int,
user_id: str, user_id: str,
session: AsyncSession session: AsyncSession,
task_logger: 'TaskLoggingService',
log_entry: 'Log'
): ):
try: try:
# Check if the file is a markdown or text file # Check if the file is a markdown or text file
if filename.lower().endswith(('.md', '.markdown', '.txt')): if filename.lower().endswith(('.md', '.markdown', '.txt')):
await task_logger.log_task_progress(
log_entry,
f"Processing markdown/text file: {filename}",
{"file_type": "markdown", "processing_stage": "reading_file"}
)
# For markdown files, read the content directly # For markdown files, read the content directly
with open(file_path, 'r', encoding='utf-8') as f: with open(file_path, 'r', encoding='utf-8') as f:
markdown_content = f.read() markdown_content = f.read()
@ -151,16 +159,42 @@ async def process_file_in_background(
except: except:
pass pass
await task_logger.log_task_progress(
log_entry,
f"Creating document from markdown content: {filename}",
{"processing_stage": "creating_document", "content_length": len(markdown_content)}
)
# Process markdown directly through specialized function # Process markdown directly through specialized function
await add_received_markdown_file_document( result = await add_received_markdown_file_document(
session, session,
filename, filename,
markdown_content, markdown_content,
search_space_id, search_space_id,
user_id user_id
) )
if result:
await task_logger.log_task_success(
log_entry,
f"Successfully processed markdown file: {filename}",
{"document_id": result.id, "content_hash": result.content_hash, "file_type": "markdown"}
)
else:
await task_logger.log_task_success(
log_entry,
f"Markdown file already exists (duplicate): {filename}",
{"duplicate_detected": True, "file_type": "markdown"}
)
# Check if the file is an audio file # Check if the file is an audio file
elif filename.lower().endswith(('.mp3', '.mp4', '.mpeg', '.mpga', '.m4a', '.wav', '.webm')): elif filename.lower().endswith(('.mp3', '.mp4', '.mpeg', '.mpga', '.m4a', '.wav', '.webm')):
await task_logger.log_task_progress(
log_entry,
f"Processing audio file for transcription: {filename}",
{"file_type": "audio", "processing_stage": "starting_transcription"}
)
# Open the audio file for transcription # Open the audio file for transcription
with open(file_path, "rb") as audio_file: with open(file_path, "rb") as audio_file:
# Use LiteLLM for audio transcription # Use LiteLLM for audio transcription
@ -184,6 +218,12 @@ async def process_file_in_background(
# Add metadata about the transcription # Add metadata about the transcription
transcribed_text = f"# Transcription of {filename}\n\n{transcribed_text}" transcribed_text = f"# Transcription of {filename}\n\n{transcribed_text}"
await task_logger.log_task_progress(
log_entry,
f"Transcription completed, creating document: {filename}",
{"processing_stage": "transcription_complete", "transcript_length": len(transcribed_text)}
)
# Clean up the temp file # Clean up the temp file
try: try:
os.unlink(file_path) os.unlink(file_path)
@ -191,15 +231,35 @@ async def process_file_in_background(
pass pass
# Process transcription as markdown document # Process transcription as markdown document
await add_received_markdown_file_document( result = await add_received_markdown_file_document(
session, session,
filename, filename,
transcribed_text, transcribed_text,
search_space_id, search_space_id,
user_id user_id
) )
if result:
await task_logger.log_task_success(
log_entry,
f"Successfully transcribed and processed audio file: {filename}",
{"document_id": result.id, "content_hash": result.content_hash, "file_type": "audio", "transcript_length": len(transcribed_text)}
)
else:
await task_logger.log_task_success(
log_entry,
f"Audio file transcript already exists (duplicate): {filename}",
{"duplicate_detected": True, "file_type": "audio"}
)
else: else:
if app_config.ETL_SERVICE == "UNSTRUCTURED": if app_config.ETL_SERVICE == "UNSTRUCTURED":
await task_logger.log_task_progress(
log_entry,
f"Processing file with Unstructured ETL: {filename}",
{"file_type": "document", "etl_service": "UNSTRUCTURED", "processing_stage": "loading"}
)
from langchain_unstructured import UnstructuredLoader from langchain_unstructured import UnstructuredLoader
# Process the file # Process the file
@ -215,6 +275,12 @@ async def process_file_in_background(
docs = await loader.aload() docs = await loader.aload()
await task_logger.log_task_progress(
log_entry,
f"Unstructured ETL completed, creating document: {filename}",
{"processing_stage": "etl_complete", "elements_count": len(docs)}
)
# Clean up the temp file # Clean up the temp file
import os import os
try: try:
@ -223,14 +289,34 @@ async def process_file_in_background(
pass pass
# Pass the documents to the existing background task # Pass the documents to the existing background task
await add_received_file_document_using_unstructured( result = await add_received_file_document_using_unstructured(
session, session,
filename, filename,
docs, docs,
search_space_id, search_space_id,
user_id user_id
) )
if result:
await task_logger.log_task_success(
log_entry,
f"Successfully processed file with Unstructured: {filename}",
{"document_id": result.id, "content_hash": result.content_hash, "file_type": "document", "etl_service": "UNSTRUCTURED"}
)
else:
await task_logger.log_task_success(
log_entry,
f"Document already exists (duplicate): {filename}",
{"duplicate_detected": True, "file_type": "document", "etl_service": "UNSTRUCTURED"}
)
elif app_config.ETL_SERVICE == "LLAMACLOUD": elif app_config.ETL_SERVICE == "LLAMACLOUD":
await task_logger.log_task_progress(
log_entry,
f"Processing file with LlamaCloud ETL: {filename}",
{"file_type": "document", "etl_service": "LLAMACLOUD", "processing_stage": "parsing"}
)
from llama_cloud_services import LlamaParse from llama_cloud_services import LlamaParse
from llama_cloud_services.parse.utils import ResultType from llama_cloud_services.parse.utils import ResultType
@ -257,19 +343,45 @@ async def process_file_in_background(
# Get markdown documents from the result # Get markdown documents from the result
markdown_documents = await result.aget_markdown_documents(split_by_page=False) markdown_documents = await result.aget_markdown_documents(split_by_page=False)
await task_logger.log_task_progress(
log_entry,
f"LlamaCloud parsing completed, creating documents: {filename}",
{"processing_stage": "parsing_complete", "documents_count": len(markdown_documents)}
)
for doc in markdown_documents: for doc in markdown_documents:
# Extract text content from the markdown documents # Extract text content from the markdown documents
markdown_content = doc.text markdown_content = doc.text
# Process the documents using our LlamaCloud background task # Process the documents using our LlamaCloud background task
await add_received_file_document_using_llamacloud( doc_result = await add_received_file_document_using_llamacloud(
session, session,
filename, filename,
llamacloud_markdown_document=markdown_content, llamacloud_markdown_document=markdown_content,
search_space_id=search_space_id, search_space_id=search_space_id,
user_id=user_id user_id=user_id
) )
if doc_result:
await task_logger.log_task_success(
log_entry,
f"Successfully processed file with LlamaCloud: {filename}",
{"document_id": doc_result.id, "content_hash": doc_result.content_hash, "file_type": "document", "etl_service": "LLAMACLOUD"}
)
else:
await task_logger.log_task_success(
log_entry,
f"Document already exists (duplicate): {filename}",
{"duplicate_detected": True, "file_type": "document", "etl_service": "LLAMACLOUD"}
)
elif app_config.ETL_SERVICE == "DOCLING": elif app_config.ETL_SERVICE == "DOCLING":
await task_logger.log_task_progress(
log_entry,
f"Processing file with Docling ETL: {filename}",
{"file_type": "document", "etl_service": "DOCLING", "processing_stage": "parsing"}
)
# Use Docling service for document processing # Use Docling service for document processing
from app.services.document_processing.docling_service import create_docling_service from app.services.document_processing.docling_service import create_docling_service
@ -286,17 +398,43 @@ async def process_file_in_background(
except: except:
pass pass
await task_logger.log_task_progress(
log_entry,
f"Docling parsing completed, creating document: {filename}",
{"processing_stage": "parsing_complete", "content_length": len(result['content'])}
)
# Process the document using our Docling background task # Process the document using our Docling background task
await add_received_file_document_using_docling( doc_result = await add_received_file_document_using_docling(
session, session,
filename, filename,
docling_markdown_document=result['content'], docling_markdown_document=result['content'],
search_space_id=search_space_id, search_space_id=search_space_id,
user_id=user_id user_id=user_id
) )
if doc_result:
await task_logger.log_task_success(
log_entry,
f"Successfully processed file with Docling: {filename}",
{"document_id": doc_result.id, "content_hash": doc_result.content_hash, "file_type": "document", "etl_service": "DOCLING"}
)
else:
await task_logger.log_task_success(
log_entry,
f"Document already exists (duplicate): {filename}",
{"duplicate_detected": True, "file_type": "document", "etl_service": "DOCLING"}
)
except Exception as e: except Exception as e:
await task_logger.log_task_failure(
log_entry,
f"Failed to process file: {filename}",
str(e),
{"error_type": type(e).__name__, "filename": filename}
)
import logging import logging
logging.error(f"Error processing file in background: {str(e)}") logging.error(f"Error processing file in background: {str(e)}")
raise # Re-raise so the wrapper can also handle it
@router.get("/documents/", response_model=List[DocumentRead]) @router.get("/documents/", response_model=List[DocumentRead])
@ -467,11 +605,47 @@ async def process_extension_document_with_new_session(
): ):
"""Create a new session and process extension document.""" """Create a new session and process extension document."""
from app.db import async_session_maker from app.db import async_session_maker
from app.services.task_logging_service import TaskLoggingService
async with async_session_maker() as session: async with async_session_maker() as session:
# Initialize task logging service
task_logger = TaskLoggingService(session, search_space_id)
# Log task start
log_entry = await task_logger.log_task_start(
task_name="process_extension_document",
source="document_processor",
message=f"Starting processing of extension document from {individual_document.metadata.VisitedWebPageTitle}",
metadata={
"document_type": "EXTENSION",
"url": individual_document.metadata.VisitedWebPageURL,
"title": individual_document.metadata.VisitedWebPageTitle,
"user_id": user_id
}
)
try: try:
await add_extension_received_document(session, individual_document, search_space_id, user_id) result = await add_extension_received_document(session, individual_document, search_space_id, user_id)
if result:
await task_logger.log_task_success(
log_entry,
f"Successfully processed extension document: {individual_document.metadata.VisitedWebPageTitle}",
{"document_id": result.id, "content_hash": result.content_hash}
)
else:
await task_logger.log_task_success(
log_entry,
f"Extension document already exists (duplicate): {individual_document.metadata.VisitedWebPageTitle}",
{"duplicate_detected": True}
)
except Exception as e: except Exception as e:
await task_logger.log_task_failure(
log_entry,
f"Failed to process extension document: {individual_document.metadata.VisitedWebPageTitle}",
str(e),
{"error_type": type(e).__name__}
)
import logging import logging
logging.error(f"Error processing extension document: {str(e)}") logging.error(f"Error processing extension document: {str(e)}")
@ -483,11 +657,46 @@ async def process_crawled_url_with_new_session(
): ):
"""Create a new session and process crawled URL.""" """Create a new session and process crawled URL."""
from app.db import async_session_maker from app.db import async_session_maker
from app.services.task_logging_service import TaskLoggingService
async with async_session_maker() as session: async with async_session_maker() as session:
# Initialize task logging service
task_logger = TaskLoggingService(session, search_space_id)
# Log task start
log_entry = await task_logger.log_task_start(
task_name="process_crawled_url",
source="document_processor",
message=f"Starting URL crawling and processing for: {url}",
metadata={
"document_type": "CRAWLED_URL",
"url": url,
"user_id": user_id
}
)
try: try:
await add_crawled_url_document(session, url, search_space_id, user_id) result = await add_crawled_url_document(session, url, search_space_id, user_id)
if result:
await task_logger.log_task_success(
log_entry,
f"Successfully crawled and processed URL: {url}",
{"document_id": result.id, "title": result.title, "content_hash": result.content_hash}
)
else:
await task_logger.log_task_success(
log_entry,
f"URL document already exists (duplicate): {url}",
{"duplicate_detected": True}
)
except Exception as e: except Exception as e:
await task_logger.log_task_failure(
log_entry,
f"Failed to crawl URL: {url}",
str(e),
{"error_type": type(e).__name__}
)
import logging import logging
logging.error(f"Error processing crawled URL: {str(e)}") logging.error(f"Error processing crawled URL: {str(e)}")
@ -500,9 +709,38 @@ async def process_file_in_background_with_new_session(
): ):
"""Create a new session and process file.""" """Create a new session and process file."""
from app.db import async_session_maker from app.db import async_session_maker
from app.services.task_logging_service import TaskLoggingService
async with async_session_maker() as session: async with async_session_maker() as session:
await process_file_in_background(file_path, filename, search_space_id, user_id, session) # Initialize task logging service
task_logger = TaskLoggingService(session, search_space_id)
# Log task start
log_entry = await task_logger.log_task_start(
task_name="process_file_upload",
source="document_processor",
message=f"Starting file processing for: {filename}",
metadata={
"document_type": "FILE",
"filename": filename,
"file_path": file_path,
"user_id": user_id
}
)
try:
await process_file_in_background(file_path, filename, search_space_id, user_id, session, task_logger, log_entry)
# Note: success/failure logging is handled within process_file_in_background
except Exception as e:
await task_logger.log_task_failure(
log_entry,
f"Failed to process file: {filename}",
str(e),
{"error_type": type(e).__name__}
)
import logging
logging.error(f"Error processing file: {str(e)}")
async def process_youtube_video_with_new_session( async def process_youtube_video_with_new_session(
@ -512,11 +750,46 @@ async def process_youtube_video_with_new_session(
): ):
"""Create a new session and process YouTube video.""" """Create a new session and process YouTube video."""
from app.db import async_session_maker from app.db import async_session_maker
from app.services.task_logging_service import TaskLoggingService
async with async_session_maker() as session: async with async_session_maker() as session:
# Initialize task logging service
task_logger = TaskLoggingService(session, search_space_id)
# Log task start
log_entry = await task_logger.log_task_start(
task_name="process_youtube_video",
source="document_processor",
message=f"Starting YouTube video processing for: {url}",
metadata={
"document_type": "YOUTUBE_VIDEO",
"url": url,
"user_id": user_id
}
)
try: try:
await add_youtube_video_document(session, url, search_space_id, user_id) result = await add_youtube_video_document(session, url, search_space_id, user_id)
if result:
await task_logger.log_task_success(
log_entry,
f"Successfully processed YouTube video: {result.title}",
{"document_id": result.id, "video_id": result.document_metadata.get("video_id"), "content_hash": result.content_hash}
)
else:
await task_logger.log_task_success(
log_entry,
f"YouTube video document already exists (duplicate): {url}",
{"duplicate_detected": True}
)
except Exception as e: except Exception as e:
await task_logger.log_task_failure(
log_entry,
f"Failed to process YouTube video: {url}",
str(e),
{"error_type": type(e).__name__}
)
import logging import logging
logging.error(f"Error processing YouTube video: {str(e)}") logging.error(f"Error processing YouTube video: {str(e)}")

View file

@ -0,0 +1,280 @@
from fastapi import APIRouter, Depends, HTTPException, Query
from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.future import select
from sqlalchemy import and_, desc
from typing import List, Optional
from datetime import datetime, timedelta
from app.db import get_async_session, User, SearchSpace, Log, LogLevel, LogStatus
from app.schemas import LogCreate, LogUpdate, LogRead, LogFilter
from app.users import current_active_user
from app.utils.check_ownership import check_ownership
router = APIRouter()
@router.post("/logs/", response_model=LogRead)
async def create_log(
log: LogCreate,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user)
):
"""Create a new log entry."""
try:
# Check if the user owns the search space
await check_ownership(session, SearchSpace, log.search_space_id, user)
db_log = Log(**log.model_dump())
session.add(db_log)
await session.commit()
await session.refresh(db_log)
return db_log
except HTTPException:
raise
except Exception as e:
await session.rollback()
raise HTTPException(
status_code=500,
detail=f"Failed to create log: {str(e)}"
)
@router.get("/logs/", response_model=List[LogRead])
async def read_logs(
skip: int = 0,
limit: int = 100,
search_space_id: Optional[int] = None,
level: Optional[LogLevel] = None,
status: Optional[LogStatus] = None,
source: Optional[str] = None,
start_date: Optional[datetime] = None,
end_date: Optional[datetime] = None,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user)
):
"""Get logs with optional filtering."""
try:
# Build base query - only logs from user's search spaces
query = (
select(Log)
.join(SearchSpace)
.filter(SearchSpace.user_id == user.id)
.order_by(desc(Log.created_at)) # Most recent first
)
# Apply filters
filters = []
if search_space_id is not None:
await check_ownership(session, SearchSpace, search_space_id, user)
filters.append(Log.search_space_id == search_space_id)
if level is not None:
filters.append(Log.level == level)
if status is not None:
filters.append(Log.status == status)
if source is not None:
filters.append(Log.source.ilike(f"%{source}%"))
if start_date is not None:
filters.append(Log.created_at >= start_date)
if end_date is not None:
filters.append(Log.created_at <= end_date)
if filters:
query = query.filter(and_(*filters))
# Apply pagination
result = await session.execute(query.offset(skip).limit(limit))
return result.scalars().all()
except HTTPException:
raise
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Failed to fetch logs: {str(e)}"
)
@router.get("/logs/{log_id}", response_model=LogRead)
async def read_log(
log_id: int,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user)
):
"""Get a specific log by ID."""
try:
# Get log and verify user owns the search space
result = await session.execute(
select(Log)
.join(SearchSpace)
.filter(Log.id == log_id, SearchSpace.user_id == user.id)
)
log = result.scalars().first()
if not log:
raise HTTPException(status_code=404, detail="Log not found")
return log
except HTTPException:
raise
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Failed to fetch log: {str(e)}"
)
@router.put("/logs/{log_id}", response_model=LogRead)
async def update_log(
log_id: int,
log_update: LogUpdate,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user)
):
"""Update a log entry."""
try:
# Get log and verify user owns the search space
result = await session.execute(
select(Log)
.join(SearchSpace)
.filter(Log.id == log_id, SearchSpace.user_id == user.id)
)
db_log = result.scalars().first()
if not db_log:
raise HTTPException(status_code=404, detail="Log not found")
# Update only provided fields
update_data = log_update.model_dump(exclude_unset=True)
for field, value in update_data.items():
setattr(db_log, field, value)
await session.commit()
await session.refresh(db_log)
return db_log
except HTTPException:
raise
except Exception as e:
await session.rollback()
raise HTTPException(
status_code=500,
detail=f"Failed to update log: {str(e)}"
)
@router.delete("/logs/{log_id}")
async def delete_log(
log_id: int,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user)
):
"""Delete a log entry."""
try:
# Get log and verify user owns the search space
result = await session.execute(
select(Log)
.join(SearchSpace)
.filter(Log.id == log_id, SearchSpace.user_id == user.id)
)
db_log = result.scalars().first()
if not db_log:
raise HTTPException(status_code=404, detail="Log not found")
await session.delete(db_log)
await session.commit()
return {"message": "Log deleted successfully"}
except HTTPException:
raise
except Exception as e:
await session.rollback()
raise HTTPException(
status_code=500,
detail=f"Failed to delete log: {str(e)}"
)
@router.get("/logs/search-space/{search_space_id}/summary")
async def get_logs_summary(
search_space_id: int,
hours: int = 24,
session: AsyncSession = Depends(get_async_session),
user: User = Depends(current_active_user)
):
"""Get a summary of logs for a search space in the last X hours."""
try:
# Check ownership
await check_ownership(session, SearchSpace, search_space_id, user)
# Calculate time window
since = datetime.utcnow().replace(microsecond=0) - timedelta(hours=hours)
# Get logs from the time window
result = await session.execute(
select(Log)
.filter(
and_(
Log.search_space_id == search_space_id,
Log.created_at >= since
)
)
.order_by(desc(Log.created_at))
)
logs = result.scalars().all()
# Create summary
summary = {
"total_logs": len(logs),
"time_window_hours": hours,
"by_status": {},
"by_level": {},
"by_source": {},
"active_tasks": [],
"recent_failures": []
}
# Count by status and level
for log in logs:
# Status counts
status_str = log.status.value
summary["by_status"][status_str] = summary["by_status"].get(status_str, 0) + 1
# Level counts
level_str = log.level.value
summary["by_level"][level_str] = summary["by_level"].get(level_str, 0) + 1
# Source counts
if log.source:
summary["by_source"][log.source] = summary["by_source"].get(log.source, 0) + 1
# Active tasks (IN_PROGRESS)
if log.status == LogStatus.IN_PROGRESS:
task_name = log.log_metadata.get("task_name", "Unknown") if log.log_metadata else "Unknown"
summary["active_tasks"].append({
"id": log.id,
"task_name": task_name,
"message": log.message,
"started_at": log.created_at,
"source": log.source
})
# Recent failures
if log.status == LogStatus.FAILED and len(summary["recent_failures"]) < 10:
task_name = log.log_metadata.get("task_name", "Unknown") if log.log_metadata else "Unknown"
summary["recent_failures"].append({
"id": log.id,
"task_name": task_name,
"message": log.message,
"failed_at": log.created_at,
"source": log.source,
"error_details": log.log_metadata.get("error_details") if log.log_metadata else None
})
return summary
except HTTPException:
raise
except Exception as e:
raise HTTPException(
status_code=500,
detail=f"Failed to generate logs summary: {str(e)}"
)

View file

@ -14,6 +14,7 @@ from .podcasts import PodcastBase, PodcastCreate, PodcastUpdate, PodcastRead, Po
from .chats import ChatBase, ChatCreate, ChatUpdate, ChatRead, AISDKChatRequest from .chats import ChatBase, ChatCreate, ChatUpdate, ChatRead, AISDKChatRequest
from .search_source_connector import SearchSourceConnectorBase, SearchSourceConnectorCreate, SearchSourceConnectorUpdate, SearchSourceConnectorRead from .search_source_connector import SearchSourceConnectorBase, SearchSourceConnectorCreate, SearchSourceConnectorUpdate, SearchSourceConnectorRead
from .llm_config import LLMConfigBase, LLMConfigCreate, LLMConfigUpdate, LLMConfigRead from .llm_config import LLMConfigBase, LLMConfigCreate, LLMConfigUpdate, LLMConfigRead
from .logs import LogBase, LogCreate, LogUpdate, LogRead, LogFilter
__all__ = [ __all__ = [
"AISDKChatRequest", "AISDKChatRequest",
@ -53,4 +54,9 @@ __all__ = [
"LLMConfigCreate", "LLMConfigCreate",
"LLMConfigUpdate", "LLMConfigUpdate",
"LLMConfigRead", "LLMConfigRead",
"LogBase",
"LogCreate",
"LogUpdate",
"LogRead",
"LogFilter",
] ]

View file

@ -0,0 +1,44 @@
from datetime import datetime
from typing import Optional, Dict, Any
from pydantic import BaseModel, ConfigDict
from .base import IDModel, TimestampModel
from app.db import LogLevel, LogStatus
class LogBase(BaseModel):
level: LogLevel
status: LogStatus
message: str
source: Optional[str] = None
log_metadata: Optional[Dict[str, Any]] = None
class LogCreate(BaseModel):
level: LogLevel
status: LogStatus
message: str
source: Optional[str] = None
log_metadata: Optional[Dict[str, Any]] = None
search_space_id: int
class LogUpdate(BaseModel):
level: Optional[LogLevel] = None
status: Optional[LogStatus] = None
message: Optional[str] = None
source: Optional[str] = None
log_metadata: Optional[Dict[str, Any]] = None
class LogRead(LogBase, IDModel, TimestampModel):
id: int
created_at: datetime
search_space_id: int
model_config = ConfigDict(from_attributes=True)
class LogFilter(BaseModel):
level: Optional[LogLevel] = None
status: Optional[LogStatus] = None
source: Optional[str] = None
search_space_id: Optional[int] = None
start_date: Optional[datetime] = None
end_date: Optional[datetime] = None
model_config = ConfigDict(from_attributes=True)

View file

@ -23,17 +23,138 @@ class StreamingService:
"content": [] "content": []
} }
] ]
# It is used to send annotations to the frontend
# DEPRECATED: This sends the full annotation array every time (inefficient)
def _format_annotations(self) -> str: def _format_annotations(self) -> str:
""" """
Format the annotations as a string Format the annotations as a string
DEPRECATED: This method sends the full annotation state every time.
Use the delta formatters instead for optimal streaming.
Returns: Returns:
str: The formatted annotations string str: The formatted annotations string
""" """
return f'8:{json.dumps(self.message_annotations)}\n' return f'8:{json.dumps(self.message_annotations)}\n'
# It is used to end Streaming def format_terminal_info_delta(self, text: str, message_type: str = "info") -> str:
"""
Format a single terminal info message as a delta annotation
Args:
text: The terminal message text
message_type: The message type (info, error, success, etc.)
Returns:
str: The formatted annotation delta string
"""
message = {"id": self.terminal_idx, "text": text, "type": message_type}
self.terminal_idx += 1
# Update internal state for reference
self.message_annotations[0]["content"].append(message)
# Return only the delta annotation
annotation = {"type": "TERMINAL_INFO", "content": [message]}
return f"8:[{json.dumps(annotation)}]\n"
def format_sources_delta(self, sources: List[Dict[str, Any]]) -> str:
"""
Format sources as a delta annotation
Args:
sources: List of source objects
Returns:
str: The formatted annotation delta string
"""
# Update internal state
self.message_annotations[1]["content"] = sources
# Return only the delta annotation
annotation = {"type": "SOURCES", "content": sources}
return f"8:[{json.dumps(annotation)}]\n"
def format_answer_delta(self, answer_chunk: str) -> str:
"""
Format a single answer chunk as a delta annotation
Args:
answer_chunk: The new answer chunk to add
Returns:
str: The formatted annotation delta string
"""
# Update internal state by appending the chunk
if isinstance(self.message_annotations[2]["content"], list):
self.message_annotations[2]["content"].append(answer_chunk)
else:
self.message_annotations[2]["content"] = [answer_chunk]
# Return only the delta annotation with the new chunk
annotation = {"type": "ANSWER", "content": [answer_chunk]}
return f"8:[{json.dumps(annotation)}]\n"
def format_answer_annotation(self, answer_lines: List[str]) -> str:
"""
Format the complete answer as a replacement annotation
Args:
answer_lines: Complete list of answer lines
Returns:
str: The formatted annotation string
"""
# Update internal state
self.message_annotations[2]["content"] = answer_lines
# Return the full answer annotation
annotation = {"type": "ANSWER", "content": answer_lines}
return f"8:[{json.dumps(annotation)}]\n"
def format_further_questions_delta(
self, further_questions: List[Dict[str, Any]]
) -> str:
"""
Format further questions as a delta annotation
Args:
further_questions: List of further question objects
Returns:
str: The formatted annotation delta string
"""
# Update internal state
self.message_annotations[3]["content"] = further_questions
# Return only the delta annotation
annotation = {"type": "FURTHER_QUESTIONS", "content": further_questions}
return f"8:[{json.dumps(annotation)}]\n"
def format_text_chunk(self, text: str) -> str:
"""
Format a text chunk using the text stream part
Args:
text: The text chunk to stream
Returns:
str: The formatted text part string
"""
return f"0:{json.dumps(text)}\n"
def format_error(self, error_message: str) -> str:
"""
Format an error using the error stream part
Args:
error_message: The error message
Returns:
str: The formatted error part string
"""
return f"3:{json.dumps(error_message)}\n"
def format_completion(self, prompt_tokens: int = 156, completion_tokens: int = 204) -> str: def format_completion(self, prompt_tokens: int = 156, completion_tokens: int = 204) -> str:
""" """
Format a completion message Format a completion message
@ -56,7 +177,12 @@ class StreamingService:
} }
return f'd:{json.dumps(completion_data)}\n' return f'd:{json.dumps(completion_data)}\n'
# DEPRECATED METHODS: Keep for backward compatibility but mark as deprecated
def only_update_terminal(self, text: str, message_type: str = "info") -> str: def only_update_terminal(self, text: str, message_type: str = "info") -> str:
"""
DEPRECATED: Use format_terminal_info_delta() instead for optimal streaming
"""
self.message_annotations[0]["content"].append({ self.message_annotations[0]["content"].append({
"id": self.terminal_idx, "id": self.terminal_idx,
"text": text, "text": text,
@ -66,17 +192,23 @@ class StreamingService:
return self.message_annotations return self.message_annotations
def only_update_sources(self, sources: List[Dict[str, Any]]) -> str: def only_update_sources(self, sources: List[Dict[str, Any]]) -> str:
"""
DEPRECATED: Use format_sources_delta() instead for optimal streaming
"""
self.message_annotations[1]["content"] = sources self.message_annotations[1]["content"] = sources
return self.message_annotations return self.message_annotations
def only_update_answer(self, answer: List[str]) -> str: def only_update_answer(self, answer: List[str]) -> str:
"""
DEPRECATED: Use format_answer_delta() or format_answer_annotation() instead for optimal streaming
"""
self.message_annotations[2]["content"] = answer self.message_annotations[2]["content"] = answer
return self.message_annotations return self.message_annotations
def only_update_further_questions(self, further_questions: List[Dict[str, Any]]) -> str: def only_update_further_questions(self, further_questions: List[Dict[str, Any]]) -> str:
""" """
Update the further questions annotation DEPRECATED: Use format_further_questions_delta() instead for optimal streaming
Args: Args:
further_questions: List of further question objects with id and question fields further_questions: List of further question objects with id and question fields

View file

@ -0,0 +1,204 @@
from typing import Optional, Dict, Any
from sqlalchemy.ext.asyncio import AsyncSession
from app.db import Log, LogLevel, LogStatus
import logging
import json
from datetime import datetime
logger = logging.getLogger(__name__)
class TaskLoggingService:
"""Service for logging background tasks using the database Log model"""
def __init__(self, session: AsyncSession, search_space_id: int):
self.session = session
self.search_space_id = search_space_id
async def log_task_start(
self,
task_name: str,
source: str,
message: str,
metadata: Optional[Dict[str, Any]] = None
) -> Log:
"""
Log the start of a task with IN_PROGRESS status
Args:
task_name: Name/identifier of the task
source: Source service/component (e.g., 'document_processor', 'slack_indexer')
message: Human-readable message about the task
metadata: Additional context data
Returns:
Log: The created log entry
"""
log_metadata = metadata or {}
log_metadata.update({
"task_name": task_name,
"started_at": datetime.utcnow().isoformat()
})
log_entry = Log(
level=LogLevel.INFO,
status=LogStatus.IN_PROGRESS,
message=message,
source=source,
log_metadata=log_metadata,
search_space_id=self.search_space_id
)
self.session.add(log_entry)
await self.session.commit()
await self.session.refresh(log_entry)
logger.info(f"Started task {task_name}: {message}")
return log_entry
async def log_task_success(
self,
log_entry: Log,
message: str,
additional_metadata: Optional[Dict[str, Any]] = None
) -> Log:
"""
Update a log entry to SUCCESS status
Args:
log_entry: The original log entry to update
message: Success message
additional_metadata: Additional metadata to merge
Returns:
Log: The updated log entry
"""
# Update the existing log entry
log_entry.status = LogStatus.SUCCESS
log_entry.message = message
# Merge additional metadata
if additional_metadata:
if log_entry.log_metadata is None:
log_entry.log_metadata = {}
log_entry.log_metadata.update(additional_metadata)
log_entry.log_metadata["completed_at"] = datetime.utcnow().isoformat()
await self.session.commit()
await self.session.refresh(log_entry)
task_name = log_entry.log_metadata.get("task_name", "unknown") if log_entry.log_metadata else "unknown"
logger.info(f"Completed task {task_name}: {message}")
return log_entry
async def log_task_failure(
self,
log_entry: Log,
error_message: str,
error_details: Optional[str] = None,
additional_metadata: Optional[Dict[str, Any]] = None
) -> Log:
"""
Update a log entry to FAILED status
Args:
log_entry: The original log entry to update
error_message: Error message
error_details: Detailed error information
additional_metadata: Additional metadata to merge
Returns:
Log: The updated log entry
"""
# Update the existing log entry
log_entry.status = LogStatus.FAILED
log_entry.level = LogLevel.ERROR
log_entry.message = error_message
# Merge additional metadata
if log_entry.log_metadata is None:
log_entry.log_metadata = {}
log_entry.log_metadata.update({
"failed_at": datetime.utcnow().isoformat(),
"error_details": error_details
})
if additional_metadata:
log_entry.log_metadata.update(additional_metadata)
await self.session.commit()
await self.session.refresh(log_entry)
task_name = log_entry.log_metadata.get("task_name", "unknown") if log_entry.log_metadata else "unknown"
logger.error(f"Failed task {task_name}: {error_message}")
if error_details:
logger.error(f"Error details: {error_details}")
return log_entry
async def log_task_progress(
self,
log_entry: Log,
progress_message: str,
progress_metadata: Optional[Dict[str, Any]] = None
) -> Log:
"""
Update a log entry with progress information while keeping IN_PROGRESS status
Args:
log_entry: The log entry to update
progress_message: Progress update message
progress_metadata: Additional progress metadata
Returns:
Log: The updated log entry
"""
log_entry.message = progress_message
if progress_metadata:
if log_entry.log_metadata is None:
log_entry.log_metadata = {}
log_entry.log_metadata.update(progress_metadata)
log_entry.log_metadata["last_progress_update"] = datetime.utcnow().isoformat()
await self.session.commit()
await self.session.refresh(log_entry)
task_name = log_entry.log_metadata.get("task_name", "unknown") if log_entry.log_metadata else "unknown"
logger.info(f"Progress update for task {task_name}: {progress_message}")
return log_entry
async def log_simple_event(
self,
level: LogLevel,
source: str,
message: str,
metadata: Optional[Dict[str, Any]] = None
) -> Log:
"""
Log a simple event (not a long-running task)
Args:
level: Log level
source: Source service/component
message: Log message
metadata: Additional context data
Returns:
Log: The created log entry
"""
log_entry = Log(
level=level,
status=LogStatus.SUCCESS, # Simple events are immediately complete
message=message,
source=source,
log_metadata=metadata or {},
search_space_id=self.search_space_id
)
self.session.add(log_entry)
await self.session.commit()
await self.session.refresh(log_entry)
logger.info(f"Logged event from {source}: {message}")
return log_entry

View file

@ -8,6 +8,7 @@ from app.config import config
from app.prompts import SUMMARY_PROMPT_TEMPLATE from app.prompts import SUMMARY_PROMPT_TEMPLATE
from app.utils.document_converters import convert_document_to_markdown, generate_content_hash from app.utils.document_converters import convert_document_to_markdown, generate_content_hash
from app.services.llm_service import get_user_long_context_llm from app.services.llm_service import get_user_long_context_llm
from app.services.task_logging_service import TaskLoggingService
from langchain_core.documents import Document as LangChainDocument from langchain_core.documents import Document as LangChainDocument
from langchain_community.document_loaders import FireCrawlLoader, AsyncChromiumLoader from langchain_community.document_loaders import FireCrawlLoader, AsyncChromiumLoader
from langchain_community.document_transformers import MarkdownifyTransformer from langchain_community.document_transformers import MarkdownifyTransformer
@ -22,10 +23,34 @@ md = MarkdownifyTransformer()
async def add_crawled_url_document( async def add_crawled_url_document(
session: AsyncSession, url: str, search_space_id: int, user_id: str session: AsyncSession, url: str, search_space_id: int, user_id: str
) -> Optional[Document]: ) -> Optional[Document]:
task_logger = TaskLoggingService(session, search_space_id)
# Log task start
log_entry = await task_logger.log_task_start(
task_name="crawl_url_document",
source="background_task",
message=f"Starting URL crawling process for: {url}",
metadata={"url": url, "user_id": str(user_id)}
)
try: try:
# URL validation step
await task_logger.log_task_progress(
log_entry,
f"Validating URL: {url}",
{"stage": "validation"}
)
if not validators.url(url): if not validators.url(url):
raise ValueError(f"Url {url} is not a valid URL address") raise ValueError(f"Url {url} is not a valid URL address")
# Set up crawler
await task_logger.log_task_progress(
log_entry,
f"Setting up crawler for URL: {url}",
{"stage": "crawler_setup", "firecrawl_available": bool(config.FIRECRAWL_API_KEY)}
)
if config.FIRECRAWL_API_KEY: if config.FIRECRAWL_API_KEY:
crawl_loader = FireCrawlLoader( crawl_loader = FireCrawlLoader(
url=url, url=url,
@ -39,6 +64,13 @@ async def add_crawled_url_document(
else: else:
crawl_loader = AsyncChromiumLoader(urls=[url], headless=True) crawl_loader = AsyncChromiumLoader(urls=[url], headless=True)
# Perform crawling
await task_logger.log_task_progress(
log_entry,
f"Crawling URL content: {url}",
{"stage": "crawling", "crawler_type": type(crawl_loader).__name__}
)
url_crawled = await crawl_loader.aload() url_crawled = await crawl_loader.aload()
if type(crawl_loader) == FireCrawlLoader: if type(crawl_loader) == FireCrawlLoader:
@ -46,6 +78,13 @@ async def add_crawled_url_document(
elif type(crawl_loader) == AsyncChromiumLoader: elif type(crawl_loader) == AsyncChromiumLoader:
content_in_markdown = md.transform_documents(url_crawled)[0].page_content content_in_markdown = md.transform_documents(url_crawled)[0].page_content
# Format document
await task_logger.log_task_progress(
log_entry,
f"Processing crawled content from: {url}",
{"stage": "content_processing", "content_length": len(content_in_markdown)}
)
# Format document metadata in a more maintainable way # Format document metadata in a more maintainable way
metadata_sections = [ metadata_sections = [
( (
@ -74,6 +113,13 @@ async def add_crawled_url_document(
combined_document_string = "\n".join(document_parts) combined_document_string = "\n".join(document_parts)
content_hash = generate_content_hash(combined_document_string, search_space_id) content_hash = generate_content_hash(combined_document_string, search_space_id)
# Check for duplicates
await task_logger.log_task_progress(
log_entry,
f"Checking for duplicate content: {url}",
{"stage": "duplicate_check", "content_hash": content_hash}
)
# Check if document with this content hash already exists # Check if document with this content hash already exists
existing_doc_result = await session.execute( existing_doc_result = await session.execute(
select(Document).where(Document.content_hash == content_hash) select(Document).where(Document.content_hash == content_hash)
@ -81,15 +127,33 @@ async def add_crawled_url_document(
existing_document = existing_doc_result.scalars().first() existing_document = existing_doc_result.scalars().first()
if existing_document: if existing_document:
await task_logger.log_task_success(
log_entry,
f"Document already exists for URL: {url}",
{"duplicate_detected": True, "existing_document_id": existing_document.id}
)
logging.info(f"Document with content hash {content_hash} already exists. Skipping processing.") logging.info(f"Document with content hash {content_hash} already exists. Skipping processing.")
return existing_document return existing_document
# Get LLM for summary generation
await task_logger.log_task_progress(
log_entry,
f"Preparing for summary generation: {url}",
{"stage": "llm_setup"}
)
# Get user's long context LLM # Get user's long context LLM
user_llm = await get_user_long_context_llm(session, user_id) user_llm = await get_user_long_context_llm(session, user_id)
if not user_llm: if not user_llm:
raise RuntimeError(f"No long context LLM configured for user {user_id}") raise RuntimeError(f"No long context LLM configured for user {user_id}")
# Generate summary # Generate summary
await task_logger.log_task_progress(
log_entry,
f"Generating summary for URL content: {url}",
{"stage": "summary_generation"}
)
summary_chain = SUMMARY_PROMPT_TEMPLATE | user_llm summary_chain = SUMMARY_PROMPT_TEMPLATE | user_llm
summary_result = await summary_chain.ainvoke( summary_result = await summary_chain.ainvoke(
{"document": combined_document_string} {"document": combined_document_string}
@ -98,6 +162,12 @@ async def add_crawled_url_document(
summary_embedding = config.embedding_model_instance.embed(summary_content) summary_embedding = config.embedding_model_instance.embed(summary_content)
# Process chunks # Process chunks
await task_logger.log_task_progress(
log_entry,
f"Processing content chunks for URL: {url}",
{"stage": "chunk_processing"}
)
chunks = [ chunks = [
Chunk( Chunk(
content=chunk.text, content=chunk.text,
@ -107,6 +177,12 @@ async def add_crawled_url_document(
] ]
# Create and store document # Create and store document
await task_logger.log_task_progress(
log_entry,
f"Creating document in database for URL: {url}",
{"stage": "document_creation", "chunks_count": len(chunks)}
)
document = Document( document = Document(
search_space_id=search_space_id, search_space_id=search_space_id,
title=url_crawled[0].metadata["title"] title=url_crawled[0].metadata["title"]
@ -124,13 +200,38 @@ async def add_crawled_url_document(
await session.commit() await session.commit()
await session.refresh(document) await session.refresh(document)
# Log success
await task_logger.log_task_success(
log_entry,
f"Successfully crawled and processed URL: {url}",
{
"document_id": document.id,
"title": document.title,
"content_hash": content_hash,
"chunks_count": len(chunks),
"summary_length": len(summary_content)
}
)
return document return document
except SQLAlchemyError as db_error: except SQLAlchemyError as db_error:
await session.rollback() await session.rollback()
await task_logger.log_task_failure(
log_entry,
f"Database error while processing URL: {url}",
str(db_error),
{"error_type": "SQLAlchemyError"}
)
raise db_error raise db_error
except Exception as e: except Exception as e:
await session.rollback() await session.rollback()
await task_logger.log_task_failure(
log_entry,
f"Failed to crawl URL: {url}",
str(e),
{"error_type": type(e).__name__}
)
raise RuntimeError(f"Failed to crawl URL: {str(e)}") raise RuntimeError(f"Failed to crawl URL: {str(e)}")
@ -148,6 +249,20 @@ async def add_extension_received_document(
Returns: Returns:
Document object if successful, None if failed Document object if successful, None if failed
""" """
task_logger = TaskLoggingService(session, search_space_id)
# Log task start
log_entry = await task_logger.log_task_start(
task_name="extension_document",
source="background_task",
message=f"Processing extension document: {content.metadata.VisitedWebPageTitle}",
metadata={
"url": content.metadata.VisitedWebPageURL,
"title": content.metadata.VisitedWebPageTitle,
"user_id": str(user_id)
}
)
try: try:
# Format document metadata in a more maintainable way # Format document metadata in a more maintainable way
metadata_sections = [ metadata_sections = [
@ -188,6 +303,11 @@ async def add_extension_received_document(
existing_document = existing_doc_result.scalars().first() existing_document = existing_doc_result.scalars().first()
if existing_document: if existing_document:
await task_logger.log_task_success(
log_entry,
f"Extension document already exists: {content.metadata.VisitedWebPageTitle}",
{"duplicate_detected": True, "existing_document_id": existing_document.id}
)
logging.info(f"Document with content hash {content_hash} already exists. Skipping processing.") logging.info(f"Document with content hash {content_hash} already exists. Skipping processing.")
return existing_document return existing_document
@ -229,19 +349,52 @@ async def add_extension_received_document(
await session.commit() await session.commit()
await session.refresh(document) await session.refresh(document)
# Log success
await task_logger.log_task_success(
log_entry,
f"Successfully processed extension document: {content.metadata.VisitedWebPageTitle}",
{
"document_id": document.id,
"content_hash": content_hash,
"url": content.metadata.VisitedWebPageURL
}
)
return document return document
except SQLAlchemyError as db_error: except SQLAlchemyError as db_error:
await session.rollback() await session.rollback()
await task_logger.log_task_failure(
log_entry,
f"Database error processing extension document: {content.metadata.VisitedWebPageTitle}",
str(db_error),
{"error_type": "SQLAlchemyError"}
)
raise db_error raise db_error
except Exception as e: except Exception as e:
await session.rollback() await session.rollback()
await task_logger.log_task_failure(
log_entry,
f"Failed to process extension document: {content.metadata.VisitedWebPageTitle}",
str(e),
{"error_type": type(e).__name__}
)
raise RuntimeError(f"Failed to process extension document: {str(e)}") raise RuntimeError(f"Failed to process extension document: {str(e)}")
async def add_received_markdown_file_document( async def add_received_markdown_file_document(
session: AsyncSession, file_name: str, file_in_markdown: str, search_space_id: int, user_id: str session: AsyncSession, file_name: str, file_in_markdown: str, search_space_id: int, user_id: str
) -> Optional[Document]: ) -> Optional[Document]:
task_logger = TaskLoggingService(session, search_space_id)
# Log task start
log_entry = await task_logger.log_task_start(
task_name="markdown_file_document",
source="background_task",
message=f"Processing markdown file: {file_name}",
metadata={"filename": file_name, "user_id": str(user_id), "content_length": len(file_in_markdown)}
)
try: try:
content_hash = generate_content_hash(file_in_markdown, search_space_id) content_hash = generate_content_hash(file_in_markdown, search_space_id)
@ -252,6 +405,11 @@ async def add_received_markdown_file_document(
existing_document = existing_doc_result.scalars().first() existing_document = existing_doc_result.scalars().first()
if existing_document: if existing_document:
await task_logger.log_task_success(
log_entry,
f"Markdown file document already exists: {file_name}",
{"duplicate_detected": True, "existing_document_id": existing_document.id}
)
logging.info(f"Document with content hash {content_hash} already exists. Skipping processing.") logging.info(f"Document with content hash {content_hash} already exists. Skipping processing.")
return existing_document return existing_document
@ -293,12 +451,36 @@ async def add_received_markdown_file_document(
await session.commit() await session.commit()
await session.refresh(document) await session.refresh(document)
# Log success
await task_logger.log_task_success(
log_entry,
f"Successfully processed markdown file: {file_name}",
{
"document_id": document.id,
"content_hash": content_hash,
"chunks_count": len(chunks),
"summary_length": len(summary_content)
}
)
return document return document
except SQLAlchemyError as db_error: except SQLAlchemyError as db_error:
await session.rollback() await session.rollback()
await task_logger.log_task_failure(
log_entry,
f"Database error processing markdown file: {file_name}",
str(db_error),
{"error_type": "SQLAlchemyError"}
)
raise db_error raise db_error
except Exception as e: except Exception as e:
await session.rollback() await session.rollback()
await task_logger.log_task_failure(
log_entry,
f"Failed to process markdown file: {file_name}",
str(e),
{"error_type": type(e).__name__}
)
raise RuntimeError(f"Failed to process file document: {str(e)}") raise RuntimeError(f"Failed to process file document: {str(e)}")
@ -566,8 +748,24 @@ async def add_youtube_video_document(
SQLAlchemyError: If there's a database error SQLAlchemyError: If there's a database error
RuntimeError: If the video processing fails RuntimeError: If the video processing fails
""" """
task_logger = TaskLoggingService(session, search_space_id)
# Log task start
log_entry = await task_logger.log_task_start(
task_name="youtube_video_document",
source="background_task",
message=f"Starting YouTube video processing for: {url}",
metadata={"url": url, "user_id": str(user_id)}
)
try: try:
# Extract video ID from URL # Extract video ID from URL
await task_logger.log_task_progress(
log_entry,
f"Extracting video ID from URL: {url}",
{"stage": "video_id_extraction"}
)
def get_youtube_video_id(url: str): def get_youtube_video_id(url: str):
parsed_url = urlparse(url) parsed_url = urlparse(url)
hostname = parsed_url.hostname hostname = parsed_url.hostname
@ -589,7 +787,19 @@ async def add_youtube_video_document(
if not video_id: if not video_id:
raise ValueError(f"Could not extract video ID from URL: {url}") raise ValueError(f"Could not extract video ID from URL: {url}")
# Get video metadata using async HTTP client await task_logger.log_task_progress(
log_entry,
f"Video ID extracted: {video_id}",
{"stage": "video_id_extracted", "video_id": video_id}
)
# Get video metadata
await task_logger.log_task_progress(
log_entry,
f"Fetching video metadata for: {video_id}",
{"stage": "metadata_fetch"}
)
params = { params = {
"format": "json", "format": "json",
"url": f"https://www.youtube.com/watch?v={video_id}", "url": f"https://www.youtube.com/watch?v={video_id}",
@ -600,7 +810,19 @@ async def add_youtube_video_document(
async with http_session.get(oembed_url, params=params) as response: async with http_session.get(oembed_url, params=params) as response:
video_data = await response.json() video_data = await response.json()
await task_logger.log_task_progress(
log_entry,
f"Video metadata fetched: {video_data.get('title', 'Unknown')}",
{"stage": "metadata_fetched", "title": video_data.get('title'), "author": video_data.get('author_name')}
)
# Get video transcript # Get video transcript
await task_logger.log_task_progress(
log_entry,
f"Fetching transcript for video: {video_id}",
{"stage": "transcript_fetch"}
)
try: try:
captions = YouTubeTranscriptApi.get_transcript(video_id) captions = YouTubeTranscriptApi.get_transcript(video_id)
# Include complete caption information with timestamps # Include complete caption information with timestamps
@ -612,8 +834,26 @@ async def add_youtube_video_document(
timestamp = f"[{start_time:.2f}s-{start_time + duration:.2f}s]" timestamp = f"[{start_time:.2f}s-{start_time + duration:.2f}s]"
transcript_segments.append(f"{timestamp} {text}") transcript_segments.append(f"{timestamp} {text}")
transcript_text = "\n".join(transcript_segments) transcript_text = "\n".join(transcript_segments)
await task_logger.log_task_progress(
log_entry,
f"Transcript fetched successfully: {len(captions)} segments",
{"stage": "transcript_fetched", "segments_count": len(captions), "transcript_length": len(transcript_text)}
)
except Exception as e: except Exception as e:
transcript_text = f"No captions available for this video. Error: {str(e)}" transcript_text = f"No captions available for this video. Error: {str(e)}"
await task_logger.log_task_progress(
log_entry,
f"No transcript available for video: {video_id}",
{"stage": "transcript_unavailable", "error": str(e)}
)
# Format document
await task_logger.log_task_progress(
log_entry,
f"Processing video content: {video_data.get('title', 'YouTube Video')}",
{"stage": "content_processing"}
)
# Format document metadata in a more maintainable way # Format document metadata in a more maintainable way
metadata_sections = [ metadata_sections = [
@ -646,6 +886,13 @@ async def add_youtube_video_document(
combined_document_string = "\n".join(document_parts) combined_document_string = "\n".join(document_parts)
content_hash = generate_content_hash(combined_document_string, search_space_id) content_hash = generate_content_hash(combined_document_string, search_space_id)
# Check for duplicates
await task_logger.log_task_progress(
log_entry,
f"Checking for duplicate video content: {video_id}",
{"stage": "duplicate_check", "content_hash": content_hash}
)
# Check if document with this content hash already exists # Check if document with this content hash already exists
existing_doc_result = await session.execute( existing_doc_result = await session.execute(
select(Document).where(Document.content_hash == content_hash) select(Document).where(Document.content_hash == content_hash)
@ -653,15 +900,33 @@ async def add_youtube_video_document(
existing_document = existing_doc_result.scalars().first() existing_document = existing_doc_result.scalars().first()
if existing_document: if existing_document:
await task_logger.log_task_success(
log_entry,
f"YouTube video document already exists: {video_data.get('title', 'YouTube Video')}",
{"duplicate_detected": True, "existing_document_id": existing_document.id, "video_id": video_id}
)
logging.info(f"Document with content hash {content_hash} already exists. Skipping processing.") logging.info(f"Document with content hash {content_hash} already exists. Skipping processing.")
return existing_document return existing_document
# Get LLM for summary generation
await task_logger.log_task_progress(
log_entry,
f"Preparing for summary generation: {video_data.get('title', 'YouTube Video')}",
{"stage": "llm_setup"}
)
# Get user's long context LLM # Get user's long context LLM
user_llm = await get_user_long_context_llm(session, user_id) user_llm = await get_user_long_context_llm(session, user_id)
if not user_llm: if not user_llm:
raise RuntimeError(f"No long context LLM configured for user {user_id}") raise RuntimeError(f"No long context LLM configured for user {user_id}")
# Generate summary # Generate summary
await task_logger.log_task_progress(
log_entry,
f"Generating summary for video: {video_data.get('title', 'YouTube Video')}",
{"stage": "summary_generation"}
)
summary_chain = SUMMARY_PROMPT_TEMPLATE | user_llm summary_chain = SUMMARY_PROMPT_TEMPLATE | user_llm
summary_result = await summary_chain.ainvoke( summary_result = await summary_chain.ainvoke(
{"document": combined_document_string} {"document": combined_document_string}
@ -670,6 +935,12 @@ async def add_youtube_video_document(
summary_embedding = config.embedding_model_instance.embed(summary_content) summary_embedding = config.embedding_model_instance.embed(summary_content)
# Process chunks # Process chunks
await task_logger.log_task_progress(
log_entry,
f"Processing content chunks for video: {video_data.get('title', 'YouTube Video')}",
{"stage": "chunk_processing"}
)
chunks = [ chunks = [
Chunk( Chunk(
content=chunk.text, content=chunk.text,
@ -679,6 +950,11 @@ async def add_youtube_video_document(
] ]
# Create document # Create document
await task_logger.log_task_progress(
log_entry,
f"Creating YouTube video document in database: {video_data.get('title', 'YouTube Video')}",
{"stage": "document_creation", "chunks_count": len(chunks)}
)
document = Document( document = Document(
title=video_data.get("title", "YouTube Video"), title=video_data.get("title", "YouTube Video"),
@ -701,11 +977,38 @@ async def add_youtube_video_document(
await session.commit() await session.commit()
await session.refresh(document) await session.refresh(document)
# Log success
await task_logger.log_task_success(
log_entry,
f"Successfully processed YouTube video: {video_data.get('title', 'YouTube Video')}",
{
"document_id": document.id,
"video_id": video_id,
"title": document.title,
"content_hash": content_hash,
"chunks_count": len(chunks),
"summary_length": len(summary_content),
"has_transcript": "No captions available" not in transcript_text
}
)
return document return document
except SQLAlchemyError as db_error: except SQLAlchemyError as db_error:
await session.rollback() await session.rollback()
await task_logger.log_task_failure(
log_entry,
f"Database error while processing YouTube video: {url}",
str(db_error),
{"error_type": "SQLAlchemyError", "video_id": video_id if 'video_id' in locals() else None}
)
raise db_error raise db_error
except Exception as e: except Exception as e:
await session.rollback() await session.rollback()
await task_logger.log_task_failure(
log_entry,
f"Failed to process YouTube video: {url}",
str(e),
{"error_type": type(e).__name__, "video_id": video_id if 'video_id' in locals() else None}
)
logging.error(f"Failed to process YouTube video: {str(e)}") logging.error(f"Failed to process YouTube video: {str(e)}")
raise raise

View file

@ -7,6 +7,7 @@ from app.db import Document, DocumentType, Chunk, SearchSourceConnector, SearchS
from app.config import config from app.config import config
from app.prompts import SUMMARY_PROMPT_TEMPLATE from app.prompts import SUMMARY_PROMPT_TEMPLATE
from app.services.llm_service import get_user_long_context_llm from app.services.llm_service import get_user_long_context_llm
from app.services.task_logging_service import TaskLoggingService
from app.connectors.slack_history import SlackHistory from app.connectors.slack_history import SlackHistory
from app.connectors.notion_history import NotionHistoryConnector from app.connectors.notion_history import NotionHistoryConnector
from app.connectors.github_connector import GitHubConnector from app.connectors.github_connector import GitHubConnector
@ -42,8 +43,24 @@ async def index_slack_messages(
Returns: Returns:
Tuple containing (number of documents indexed, error message or None) Tuple containing (number of documents indexed, error message or None)
""" """
task_logger = TaskLoggingService(session, search_space_id)
# Log task start
log_entry = await task_logger.log_task_start(
task_name="slack_messages_indexing",
source="connector_indexing_task",
message=f"Starting Slack messages indexing for connector {connector_id}",
metadata={"connector_id": connector_id, "user_id": str(user_id), "start_date": start_date, "end_date": end_date}
)
try: try:
# Get the connector # Get the connector
await task_logger.log_task_progress(
log_entry,
f"Retrieving Slack connector {connector_id} from database",
{"stage": "connector_retrieval"}
)
result = await session.execute( result = await session.execute(
select(SearchSourceConnector) select(SearchSourceConnector)
.filter( .filter(
@ -54,17 +71,41 @@ async def index_slack_messages(
connector = result.scalars().first() connector = result.scalars().first()
if not connector: if not connector:
await task_logger.log_task_failure(
log_entry,
f"Connector with ID {connector_id} not found or is not a Slack connector",
"Connector not found",
{"error_type": "ConnectorNotFound"}
)
return 0, f"Connector with ID {connector_id} not found or is not a Slack connector" return 0, f"Connector with ID {connector_id} not found or is not a Slack connector"
# Get the Slack token from the connector config # Get the Slack token from the connector config
slack_token = connector.config.get("SLACK_BOT_TOKEN") slack_token = connector.config.get("SLACK_BOT_TOKEN")
if not slack_token: if not slack_token:
await task_logger.log_task_failure(
log_entry,
f"Slack token not found in connector config for connector {connector_id}",
"Missing Slack token",
{"error_type": "MissingToken"}
)
return 0, "Slack token not found in connector config" return 0, "Slack token not found in connector config"
# Initialize Slack client # Initialize Slack client
await task_logger.log_task_progress(
log_entry,
f"Initializing Slack client for connector {connector_id}",
{"stage": "client_initialization"}
)
slack_client = SlackHistory(token=slack_token) slack_client = SlackHistory(token=slack_token)
# Calculate date range # Calculate date range
await task_logger.log_task_progress(
log_entry,
f"Calculating date range for Slack indexing",
{"stage": "date_calculation", "provided_start_date": start_date, "provided_end_date": end_date}
)
if start_date is None or end_date is None: if start_date is None or end_date is None:
# Fall back to calculating dates based on last_indexed_at # Fall back to calculating dates based on last_indexed_at
calculated_end_date = datetime.now() calculated_end_date = datetime.now()
@ -95,13 +136,30 @@ async def index_slack_messages(
logger.info(f"Indexing Slack messages from {start_date_str} to {end_date_str}") logger.info(f"Indexing Slack messages from {start_date_str} to {end_date_str}")
await task_logger.log_task_progress(
log_entry,
f"Fetching Slack channels from {start_date_str} to {end_date_str}",
{"stage": "fetch_channels", "start_date": start_date_str, "end_date": end_date_str}
)
# Get all channels # Get all channels
try: try:
channels = slack_client.get_all_channels() channels = slack_client.get_all_channels()
except Exception as e: except Exception as e:
await task_logger.log_task_failure(
log_entry,
f"Failed to get Slack channels for connector {connector_id}",
str(e),
{"error_type": "ChannelFetchError"}
)
return 0, f"Failed to get Slack channels: {str(e)}" return 0, f"Failed to get Slack channels: {str(e)}"
if not channels: if not channels:
await task_logger.log_task_success(
log_entry,
f"No Slack channels found for connector {connector_id}",
{"channels_found": 0}
)
return 0, "No Slack channels found" return 0, "No Slack channels found"
# Track the number of documents indexed # Track the number of documents indexed
@ -109,6 +167,12 @@ async def index_slack_messages(
documents_skipped = 0 documents_skipped = 0
skipped_channels = [] skipped_channels = []
await task_logger.log_task_progress(
log_entry,
f"Starting to process {len(channels)} Slack channels",
{"stage": "process_channels", "total_channels": len(channels)}
)
# Process each channel # Process each channel
for channel_obj in channels: # Modified loop to iterate over list of channel objects for channel_obj in channels: # Modified loop to iterate over list of channel objects
channel_id = channel_obj["id"] channel_id = channel_obj["id"]
@ -283,15 +347,40 @@ async def index_slack_messages(
else: else:
result_message = f"Processed {total_processed} channels." result_message = f"Processed {total_processed} channels."
# Log success
await task_logger.log_task_success(
log_entry,
f"Successfully completed Slack indexing for connector {connector_id}",
{
"channels_processed": total_processed,
"documents_indexed": documents_indexed,
"documents_skipped": documents_skipped,
"skipped_channels_count": len(skipped_channels),
"result_message": result_message
}
)
logger.info(f"Slack indexing completed: {documents_indexed} new channels, {documents_skipped} skipped") logger.info(f"Slack indexing completed: {documents_indexed} new channels, {documents_skipped} skipped")
return total_processed, result_message return total_processed, result_message
except SQLAlchemyError as db_error: except SQLAlchemyError as db_error:
await session.rollback() await session.rollback()
await task_logger.log_task_failure(
log_entry,
f"Database error during Slack indexing for connector {connector_id}",
str(db_error),
{"error_type": "SQLAlchemyError"}
)
logger.error(f"Database error: {str(db_error)}") logger.error(f"Database error: {str(db_error)}")
return 0, f"Database error: {str(db_error)}" return 0, f"Database error: {str(db_error)}"
except Exception as e: except Exception as e:
await session.rollback() await session.rollback()
await task_logger.log_task_failure(
log_entry,
f"Failed to index Slack messages for connector {connector_id}",
str(e),
{"error_type": type(e).__name__}
)
logger.error(f"Failed to index Slack messages: {str(e)}") logger.error(f"Failed to index Slack messages: {str(e)}")
return 0, f"Failed to index Slack messages: {str(e)}" return 0, f"Failed to index Slack messages: {str(e)}"
@ -316,8 +405,24 @@ async def index_notion_pages(
Returns: Returns:
Tuple containing (number of documents indexed, error message or None) Tuple containing (number of documents indexed, error message or None)
""" """
task_logger = TaskLoggingService(session, search_space_id)
# Log task start
log_entry = await task_logger.log_task_start(
task_name="notion_pages_indexing",
source="connector_indexing_task",
message=f"Starting Notion pages indexing for connector {connector_id}",
metadata={"connector_id": connector_id, "user_id": str(user_id), "start_date": start_date, "end_date": end_date}
)
try: try:
# Get the connector # Get the connector
await task_logger.log_task_progress(
log_entry,
f"Retrieving Notion connector {connector_id} from database",
{"stage": "connector_retrieval"}
)
result = await session.execute( result = await session.execute(
select(SearchSourceConnector) select(SearchSourceConnector)
.filter( .filter(
@ -328,14 +433,32 @@ async def index_notion_pages(
connector = result.scalars().first() connector = result.scalars().first()
if not connector: if not connector:
await task_logger.log_task_failure(
log_entry,
f"Connector with ID {connector_id} not found or is not a Notion connector",
"Connector not found",
{"error_type": "ConnectorNotFound"}
)
return 0, f"Connector with ID {connector_id} not found or is not a Notion connector" return 0, f"Connector with ID {connector_id} not found or is not a Notion connector"
# Get the Notion token from the connector config # Get the Notion token from the connector config
notion_token = connector.config.get("NOTION_INTEGRATION_TOKEN") notion_token = connector.config.get("NOTION_INTEGRATION_TOKEN")
if not notion_token: if not notion_token:
await task_logger.log_task_failure(
log_entry,
f"Notion integration token not found in connector config for connector {connector_id}",
"Missing Notion token",
{"error_type": "MissingToken"}
)
return 0, "Notion integration token not found in connector config" return 0, "Notion integration token not found in connector config"
# Initialize Notion client # Initialize Notion client
await task_logger.log_task_progress(
log_entry,
f"Initializing Notion client for connector {connector_id}",
{"stage": "client_initialization"}
)
logger.info(f"Initializing Notion client for connector {connector_id}") logger.info(f"Initializing Notion client for connector {connector_id}")
notion_client = NotionHistoryConnector(token=notion_token) notion_client = NotionHistoryConnector(token=notion_token)
@ -364,15 +487,32 @@ async def index_notion_pages(
logger.info(f"Fetching Notion pages from {start_date_iso} to {end_date_iso}") logger.info(f"Fetching Notion pages from {start_date_iso} to {end_date_iso}")
await task_logger.log_task_progress(
log_entry,
f"Fetching Notion pages from {start_date_iso} to {end_date_iso}",
{"stage": "fetch_pages", "start_date": start_date_iso, "end_date": end_date_iso}
)
# Get all pages # Get all pages
try: try:
pages = notion_client.get_all_pages(start_date=start_date_iso, end_date=end_date_iso) pages = notion_client.get_all_pages(start_date=start_date_iso, end_date=end_date_iso)
logger.info(f"Found {len(pages)} Notion pages") logger.info(f"Found {len(pages)} Notion pages")
except Exception as e: except Exception as e:
await task_logger.log_task_failure(
log_entry,
f"Failed to get Notion pages for connector {connector_id}",
str(e),
{"error_type": "PageFetchError"}
)
logger.error(f"Error fetching Notion pages: {str(e)}", exc_info=True) logger.error(f"Error fetching Notion pages: {str(e)}", exc_info=True)
return 0, f"Failed to get Notion pages: {str(e)}" return 0, f"Failed to get Notion pages: {str(e)}"
if not pages: if not pages:
await task_logger.log_task_success(
log_entry,
f"No Notion pages found for connector {connector_id}",
{"pages_found": 0}
)
logger.info("No Notion pages found to index") logger.info("No Notion pages found to index")
return 0, "No Notion pages found" return 0, "No Notion pages found"
@ -381,6 +521,12 @@ async def index_notion_pages(
documents_skipped = 0 documents_skipped = 0
skipped_pages = [] skipped_pages = []
await task_logger.log_task_progress(
log_entry,
f"Starting to process {len(pages)} Notion pages",
{"stage": "process_pages", "total_pages": len(pages)}
)
# Process each page # Process each page
for page in pages: for page in pages:
try: try:
@ -552,15 +698,40 @@ async def index_notion_pages(
else: else:
result_message = f"Processed {total_processed} pages." result_message = f"Processed {total_processed} pages."
# Log success
await task_logger.log_task_success(
log_entry,
f"Successfully completed Notion indexing for connector {connector_id}",
{
"pages_processed": total_processed,
"documents_indexed": documents_indexed,
"documents_skipped": documents_skipped,
"skipped_pages_count": len(skipped_pages),
"result_message": result_message
}
)
logger.info(f"Notion indexing completed: {documents_indexed} new pages, {documents_skipped} skipped") logger.info(f"Notion indexing completed: {documents_indexed} new pages, {documents_skipped} skipped")
return total_processed, result_message return total_processed, result_message
except SQLAlchemyError as db_error: except SQLAlchemyError as db_error:
await session.rollback() await session.rollback()
await task_logger.log_task_failure(
log_entry,
f"Database error during Notion indexing for connector {connector_id}",
str(db_error),
{"error_type": "SQLAlchemyError"}
)
logger.error(f"Database error during Notion indexing: {str(db_error)}", exc_info=True) logger.error(f"Database error during Notion indexing: {str(db_error)}", exc_info=True)
return 0, f"Database error: {str(db_error)}" return 0, f"Database error: {str(db_error)}"
except Exception as e: except Exception as e:
await session.rollback() await session.rollback()
await task_logger.log_task_failure(
log_entry,
f"Failed to index Notion pages for connector {connector_id}",
str(e),
{"error_type": type(e).__name__}
)
logger.error(f"Failed to index Notion pages: {str(e)}", exc_info=True) logger.error(f"Failed to index Notion pages: {str(e)}", exc_info=True)
return 0, f"Failed to index Notion pages: {str(e)}" return 0, f"Failed to index Notion pages: {str(e)}"
@ -585,11 +756,27 @@ async def index_github_repos(
Returns: Returns:
Tuple containing (number of documents indexed, error message or None) Tuple containing (number of documents indexed, error message or None)
""" """
task_logger = TaskLoggingService(session, search_space_id)
# Log task start
log_entry = await task_logger.log_task_start(
task_name="github_repos_indexing",
source="connector_indexing_task",
message=f"Starting GitHub repositories indexing for connector {connector_id}",
metadata={"connector_id": connector_id, "user_id": str(user_id), "start_date": start_date, "end_date": end_date}
)
documents_processed = 0 documents_processed = 0
errors = [] errors = []
try: try:
# 1. Get the GitHub connector from the database # 1. Get the GitHub connector from the database
await task_logger.log_task_progress(
log_entry,
f"Retrieving GitHub connector {connector_id} from database",
{"stage": "connector_retrieval"}
)
result = await session.execute( result = await session.execute(
select(SearchSourceConnector) select(SearchSourceConnector)
.filter( .filter(
@ -600,6 +787,12 @@ async def index_github_repos(
connector = result.scalars().first() connector = result.scalars().first()
if not connector: if not connector:
await task_logger.log_task_failure(
log_entry,
f"Connector with ID {connector_id} not found or is not a GitHub connector",
"Connector not found",
{"error_type": "ConnectorNotFound"}
)
return 0, f"Connector with ID {connector_id} not found or is not a GitHub connector" return 0, f"Connector with ID {connector_id} not found or is not a GitHub connector"
# 2. Get the GitHub PAT and selected repositories from the connector config # 2. Get the GitHub PAT and selected repositories from the connector config
@ -607,20 +800,50 @@ async def index_github_repos(
repo_full_names_to_index = connector.config.get("repo_full_names") repo_full_names_to_index = connector.config.get("repo_full_names")
if not github_pat: if not github_pat:
await task_logger.log_task_failure(
log_entry,
f"GitHub Personal Access Token (PAT) not found in connector config for connector {connector_id}",
"Missing GitHub PAT",
{"error_type": "MissingToken"}
)
return 0, "GitHub Personal Access Token (PAT) not found in connector config" return 0, "GitHub Personal Access Token (PAT) not found in connector config"
if not repo_full_names_to_index or not isinstance(repo_full_names_to_index, list): if not repo_full_names_to_index or not isinstance(repo_full_names_to_index, list):
return 0, "'repo_full_names' not found or is not a list in connector config" await task_logger.log_task_failure(
log_entry,
f"'repo_full_names' not found or is not a list in connector config for connector {connector_id}",
"Invalid repo configuration",
{"error_type": "InvalidConfiguration"}
)
return 0, "'repo_full_names' not found or is not a list in connector config"
# 3. Initialize GitHub connector client # 3. Initialize GitHub connector client
await task_logger.log_task_progress(
log_entry,
f"Initializing GitHub client for connector {connector_id}",
{"stage": "client_initialization", "repo_count": len(repo_full_names_to_index)}
)
try: try:
github_client = GitHubConnector(token=github_pat) github_client = GitHubConnector(token=github_pat)
except ValueError as e: except ValueError as e:
await task_logger.log_task_failure(
log_entry,
f"Failed to initialize GitHub client for connector {connector_id}",
str(e),
{"error_type": "ClientInitializationError"}
)
return 0, f"Failed to initialize GitHub client: {str(e)}" return 0, f"Failed to initialize GitHub client: {str(e)}"
# 4. Validate selected repositories # 4. Validate selected repositories
# For simplicity, we'll proceed with the list provided. # For simplicity, we'll proceed with the list provided.
# If a repo is inaccessible, get_repository_files will likely fail gracefully later. # If a repo is inaccessible, get_repository_files will likely fail gracefully later.
await task_logger.log_task_progress(
log_entry,
f"Starting indexing for {len(repo_full_names_to_index)} selected repositories",
{"stage": "repo_processing", "repo_count": len(repo_full_names_to_index), "start_date": start_date, "end_date": end_date}
)
logger.info(f"Starting indexing for {len(repo_full_names_to_index)} selected repositories.") logger.info(f"Starting indexing for {len(repo_full_names_to_index)} selected repositories.")
if start_date and end_date: if start_date and end_date:
logger.info(f"Date range requested: {start_date} to {end_date} (Note: GitHub indexing processes all files regardless of dates)") logger.info(f"Date range requested: {start_date} to {end_date} (Note: GitHub indexing processes all files regardless of dates)")
@ -719,13 +942,36 @@ async def index_github_repos(
await session.commit() await session.commit()
logger.info(f"Finished GitHub indexing for connector {connector_id}. Processed {documents_processed} files.") logger.info(f"Finished GitHub indexing for connector {connector_id}. Processed {documents_processed} files.")
# Log success
await task_logger.log_task_success(
log_entry,
f"Successfully completed GitHub indexing for connector {connector_id}",
{
"documents_processed": documents_processed,
"errors_count": len(errors),
"repo_count": len(repo_full_names_to_index)
}
)
except SQLAlchemyError as db_err: except SQLAlchemyError as db_err:
await session.rollback() await session.rollback()
await task_logger.log_task_failure(
log_entry,
f"Database error during GitHub indexing for connector {connector_id}",
str(db_err),
{"error_type": "SQLAlchemyError"}
)
logger.error(f"Database error during GitHub indexing for connector {connector_id}: {db_err}") logger.error(f"Database error during GitHub indexing for connector {connector_id}: {db_err}")
errors.append(f"Database error: {db_err}") errors.append(f"Database error: {db_err}")
return documents_processed, "; ".join(errors) if errors else str(db_err) return documents_processed, "; ".join(errors) if errors else str(db_err)
except Exception as e: except Exception as e:
await session.rollback() await session.rollback()
await task_logger.log_task_failure(
log_entry,
f"Unexpected error during GitHub indexing for connector {connector_id}",
str(e),
{"error_type": type(e).__name__}
)
logger.error(f"Unexpected error during GitHub indexing for connector {connector_id}: {e}", exc_info=True) logger.error(f"Unexpected error during GitHub indexing for connector {connector_id}: {e}", exc_info=True)
errors.append(f"Unexpected error: {e}") errors.append(f"Unexpected error: {e}")
return documents_processed, "; ".join(errors) if errors else str(e) return documents_processed, "; ".join(errors) if errors else str(e)
@ -754,8 +1000,24 @@ async def index_linear_issues(
Returns: Returns:
Tuple containing (number of documents indexed, error message or None) Tuple containing (number of documents indexed, error message or None)
""" """
task_logger = TaskLoggingService(session, search_space_id)
# Log task start
log_entry = await task_logger.log_task_start(
task_name="linear_issues_indexing",
source="connector_indexing_task",
message=f"Starting Linear issues indexing for connector {connector_id}",
metadata={"connector_id": connector_id, "user_id": str(user_id), "start_date": start_date, "end_date": end_date}
)
try: try:
# Get the connector # Get the connector
await task_logger.log_task_progress(
log_entry,
f"Retrieving Linear connector {connector_id} from database",
{"stage": "connector_retrieval"}
)
result = await session.execute( result = await session.execute(
select(SearchSourceConnector) select(SearchSourceConnector)
.filter( .filter(
@ -766,14 +1028,32 @@ async def index_linear_issues(
connector = result.scalars().first() connector = result.scalars().first()
if not connector: if not connector:
await task_logger.log_task_failure(
log_entry,
f"Connector with ID {connector_id} not found or is not a Linear connector",
"Connector not found",
{"error_type": "ConnectorNotFound"}
)
return 0, f"Connector with ID {connector_id} not found or is not a Linear connector" return 0, f"Connector with ID {connector_id} not found or is not a Linear connector"
# Get the Linear token from the connector config # Get the Linear token from the connector config
linear_token = connector.config.get("LINEAR_API_KEY") linear_token = connector.config.get("LINEAR_API_KEY")
if not linear_token: if not linear_token:
await task_logger.log_task_failure(
log_entry,
f"Linear API token not found in connector config for connector {connector_id}",
"Missing Linear token",
{"error_type": "MissingToken"}
)
return 0, "Linear API token not found in connector config" return 0, "Linear API token not found in connector config"
# Initialize Linear client # Initialize Linear client
await task_logger.log_task_progress(
log_entry,
f"Initializing Linear client for connector {connector_id}",
{"stage": "client_initialization"}
)
linear_client = LinearConnector(token=linear_token) linear_client = LinearConnector(token=linear_token)
# Calculate date range # Calculate date range
@ -807,6 +1087,12 @@ async def index_linear_issues(
logger.info(f"Fetching Linear issues from {start_date_str} to {end_date_str}") logger.info(f"Fetching Linear issues from {start_date_str} to {end_date_str}")
await task_logger.log_task_progress(
log_entry,
f"Fetching Linear issues from {start_date_str} to {end_date_str}",
{"stage": "fetch_issues", "start_date": start_date_str, "end_date": end_date_str}
)
# Get issues within date range # Get issues within date range
try: try:
issues, error = linear_client.get_issues_by_date_range( issues, error = linear_client.get_issues_by_date_range(
@ -855,6 +1141,12 @@ async def index_linear_issues(
documents_skipped = 0 documents_skipped = 0
skipped_issues = [] skipped_issues = []
await task_logger.log_task_progress(
log_entry,
f"Starting to process {len(issues)} Linear issues",
{"stage": "process_issues", "total_issues": len(issues)}
)
# Process each issue # Process each issue
for issue in issues: for issue in issues:
try: try:
@ -959,16 +1251,39 @@ async def index_linear_issues(
await session.commit() await session.commit()
logger.info(f"Successfully committed all Linear document changes to database") logger.info(f"Successfully committed all Linear document changes to database")
# Log success
await task_logger.log_task_success(
log_entry,
f"Successfully completed Linear indexing for connector {connector_id}",
{
"issues_processed": total_processed,
"documents_indexed": documents_indexed,
"documents_skipped": documents_skipped,
"skipped_issues_count": len(skipped_issues)
}
)
logger.info(f"Linear indexing completed: {documents_indexed} new issues, {documents_skipped} skipped") logger.info(f"Linear indexing completed: {documents_indexed} new issues, {documents_skipped} skipped")
return total_processed, None # Return None as the error message to indicate success return total_processed, None # Return None as the error message to indicate success
except SQLAlchemyError as db_error: except SQLAlchemyError as db_error:
await session.rollback() await session.rollback()
await task_logger.log_task_failure(
log_entry,
f"Database error during Linear indexing for connector {connector_id}",
str(db_error),
{"error_type": "SQLAlchemyError"}
)
logger.error(f"Database error: {str(db_error)}", exc_info=True) logger.error(f"Database error: {str(db_error)}", exc_info=True)
return 0, f"Database error: {str(db_error)}" return 0, f"Database error: {str(db_error)}"
except Exception as e: except Exception as e:
await session.rollback() await session.rollback()
await task_logger.log_task_failure(
log_entry,
f"Failed to index Linear issues for connector {connector_id}",
str(e),
{"error_type": type(e).__name__}
)
logger.error(f"Failed to index Linear issues: {str(e)}", exc_info=True) logger.error(f"Failed to index Linear issues: {str(e)}", exc_info=True)
return 0, f"Failed to index Linear issues: {str(e)}" return 0, f"Failed to index Linear issues: {str(e)}"
@ -993,8 +1308,24 @@ async def index_discord_messages(
Returns: Returns:
Tuple containing (number of documents indexed, error message or None) Tuple containing (number of documents indexed, error message or None)
""" """
task_logger = TaskLoggingService(session, search_space_id)
# Log task start
log_entry = await task_logger.log_task_start(
task_name="discord_messages_indexing",
source="connector_indexing_task",
message=f"Starting Discord messages indexing for connector {connector_id}",
metadata={"connector_id": connector_id, "user_id": str(user_id), "start_date": start_date, "end_date": end_date}
)
try: try:
# Get the connector # Get the connector
await task_logger.log_task_progress(
log_entry,
f"Retrieving Discord connector {connector_id} from database",
{"stage": "connector_retrieval"}
)
result = await session.execute( result = await session.execute(
select(SearchSourceConnector) select(SearchSourceConnector)
.filter( .filter(
@ -1005,16 +1336,34 @@ async def index_discord_messages(
connector = result.scalars().first() connector = result.scalars().first()
if not connector: if not connector:
await task_logger.log_task_failure(
log_entry,
f"Connector with ID {connector_id} not found or is not a Discord connector",
"Connector not found",
{"error_type": "ConnectorNotFound"}
)
return 0, f"Connector with ID {connector_id} not found or is not a Discord connector" return 0, f"Connector with ID {connector_id} not found or is not a Discord connector"
# Get the Discord token from the connector config # Get the Discord token from the connector config
discord_token = connector.config.get("DISCORD_BOT_TOKEN") discord_token = connector.config.get("DISCORD_BOT_TOKEN")
if not discord_token: if not discord_token:
await task_logger.log_task_failure(
log_entry,
f"Discord token not found in connector config for connector {connector_id}",
"Missing Discord token",
{"error_type": "MissingToken"}
)
return 0, "Discord token not found in connector config" return 0, "Discord token not found in connector config"
logger.info(f"Starting Discord indexing for connector {connector_id}") logger.info(f"Starting Discord indexing for connector {connector_id}")
# Initialize Discord client # Initialize Discord client
await task_logger.log_task_progress(
log_entry,
f"Initializing Discord client for connector {connector_id}",
{"stage": "client_initialization"}
)
discord_client = DiscordConnector(token=discord_token) discord_client = DiscordConnector(token=discord_token)
# Calculate date range # Calculate date range
@ -1054,6 +1403,12 @@ async def index_discord_messages(
skipped_channels = [] skipped_channels = []
try: try:
await task_logger.log_task_progress(
log_entry,
f"Starting Discord bot and fetching guilds for connector {connector_id}",
{"stage": "fetch_guilds"}
)
logger.info("Starting Discord bot to fetch guilds") logger.info("Starting Discord bot to fetch guilds")
discord_client._bot_task = asyncio.create_task(discord_client.start_bot()) discord_client._bot_task = asyncio.create_task(discord_client.start_bot())
await discord_client._wait_until_ready() await discord_client._wait_until_ready()
@ -1062,15 +1417,32 @@ async def index_discord_messages(
guilds = await discord_client.get_guilds() guilds = await discord_client.get_guilds()
logger.info(f"Found {len(guilds)} guilds") logger.info(f"Found {len(guilds)} guilds")
except Exception as e: except Exception as e:
await task_logger.log_task_failure(
log_entry,
f"Failed to get Discord guilds for connector {connector_id}",
str(e),
{"error_type": "GuildFetchError"}
)
logger.error(f"Failed to get Discord guilds: {str(e)}", exc_info=True) logger.error(f"Failed to get Discord guilds: {str(e)}", exc_info=True)
await discord_client.close_bot() await discord_client.close_bot()
return 0, f"Failed to get Discord guilds: {str(e)}" return 0, f"Failed to get Discord guilds: {str(e)}"
if not guilds: if not guilds:
await task_logger.log_task_success(
log_entry,
f"No Discord guilds found for connector {connector_id}",
{"guilds_found": 0}
)
logger.info("No Discord guilds found to index") logger.info("No Discord guilds found to index")
await discord_client.close_bot() await discord_client.close_bot()
return 0, "No Discord guilds found" return 0, "No Discord guilds found"
# Process each guild and channel # Process each guild and channel
await task_logger.log_task_progress(
log_entry,
f"Starting to process {len(guilds)} Discord guilds",
{"stage": "process_guilds", "total_guilds": len(guilds)}
)
for guild in guilds: for guild in guilds:
guild_id = guild["id"] guild_id = guild["id"]
guild_name = guild["name"] guild_name = guild["name"]
@ -1242,14 +1614,40 @@ async def index_discord_messages(
else: else:
result_message = f"Processed {documents_indexed} channels." result_message = f"Processed {documents_indexed} channels."
# Log success
await task_logger.log_task_success(
log_entry,
f"Successfully completed Discord indexing for connector {connector_id}",
{
"channels_processed": documents_indexed,
"documents_indexed": documents_indexed,
"documents_skipped": documents_skipped,
"skipped_channels_count": len(skipped_channels),
"guilds_processed": len(guilds),
"result_message": result_message
}
)
logger.info(f"Discord indexing completed: {documents_indexed} new channels, {documents_skipped} skipped") logger.info(f"Discord indexing completed: {documents_indexed} new channels, {documents_skipped} skipped")
return documents_indexed, result_message return documents_indexed, result_message
except SQLAlchemyError as db_error: except SQLAlchemyError as db_error:
await session.rollback() await session.rollback()
await task_logger.log_task_failure(
log_entry,
f"Database error during Discord indexing for connector {connector_id}",
str(db_error),
{"error_type": "SQLAlchemyError"}
)
logger.error(f"Database error during Discord indexing: {str(db_error)}", exc_info=True) logger.error(f"Database error during Discord indexing: {str(db_error)}", exc_info=True)
return 0, f"Database error: {str(db_error)}" return 0, f"Database error: {str(db_error)}"
except Exception as e: except Exception as e:
await session.rollback() await session.rollback()
await task_logger.log_task_failure(
log_entry,
f"Failed to index Discord messages for connector {connector_id}",
str(e),
{"error_type": type(e).__name__}
)
logger.error(f"Failed to index Discord messages: {str(e)}", exc_info=True) logger.error(f"Failed to index Discord messages: {str(e)}", exc_info=True)
return 0, f"Failed to index Discord messages: {str(e)}" return 0, f"Failed to index Discord messages: {str(e)}"

View file

@ -2,8 +2,10 @@
from app.agents.podcaster.graph import graph as podcaster_graph from app.agents.podcaster.graph import graph as podcaster_graph
from app.agents.podcaster.state import State from app.agents.podcaster.state import State
from app.db import Chat, Podcast from app.db import Chat, Podcast
from app.services.task_logging_service import TaskLoggingService
from sqlalchemy import select from sqlalchemy import select
from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.ext.asyncio import AsyncSession
from sqlalchemy.exc import SQLAlchemyError
async def generate_document_podcast( async def generate_document_podcast(
@ -24,73 +26,177 @@ async def generate_chat_podcast(
podcast_title: str, podcast_title: str,
user_id: int user_id: int
): ):
# Fetch the chat with the specified ID task_logger = TaskLoggingService(session, search_space_id)
query = select(Chat).filter(
Chat.id == chat_id,
Chat.search_space_id == search_space_id
)
result = await session.execute(query) # Log task start
chat = result.scalars().first() log_entry = await task_logger.log_task_start(
task_name="generate_chat_podcast",
if not chat: source="podcast_task",
raise ValueError(f"Chat with id {chat_id} not found in search space {search_space_id}") message=f"Starting podcast generation for chat {chat_id}",
metadata={
# Create chat history structure "chat_id": chat_id,
chat_history_str = "<chat_history>" "search_space_id": search_space_id,
"podcast_title": podcast_title,
for message in chat.messages: "user_id": str(user_id)
if message["role"] == "user":
chat_history_str += f"<user_message>{message['content']}</user_message>"
elif message["role"] == "assistant":
# Last annotation type will always be "ANSWER" here
answer_annotation = message["annotations"][-1]
answer_text = ""
if answer_annotation["type"] == "ANSWER":
answer_text = answer_annotation["content"]
# If content is a list, join it into a single string
if isinstance(answer_text, list):
answer_text = "\n".join(answer_text)
chat_history_str += f"<assistant_message>{answer_text}</assistant_message>"
chat_history_str += "</chat_history>"
# Pass it to the SurfSense Podcaster
config = {
"configurable": {
"podcast_title": "SurfSense",
"user_id": str(user_id),
} }
}
# Initialize state with database session and streaming service
initial_state = State(
source_content=chat_history_str,
db_session=session
) )
# Run the graph directly try:
result = await podcaster_graph.ainvoke(initial_state, config=config) # Fetch the chat with the specified ID
await task_logger.log_task_progress(
# Convert podcast transcript entries to serializable format log_entry,
serializable_transcript = [] f"Fetching chat {chat_id} from database",
for entry in result["podcast_transcript"]: {"stage": "fetch_chat"}
serializable_transcript.append({ )
"speaker_id": entry.speaker_id,
"dialog": entry.dialog query = select(Chat).filter(
}) Chat.id == chat_id,
Chat.search_space_id == search_space_id
# Create a new podcast entry )
podcast = Podcast(
title=f"{podcast_title}", result = await session.execute(query)
podcast_transcript=serializable_transcript, chat = result.scalars().first()
file_location=result["final_podcast_file_path"],
search_space_id=search_space_id if not chat:
) await task_logger.log_task_failure(
log_entry,
# Add to session and commit f"Chat with id {chat_id} not found in search space {search_space_id}",
session.add(podcast) "Chat not found",
await session.commit() {"error_type": "ChatNotFound"}
await session.refresh(podcast) )
raise ValueError(f"Chat with id {chat_id} not found in search space {search_space_id}")
return podcast
# Create chat history structure
await task_logger.log_task_progress(
log_entry,
f"Processing chat history for chat {chat_id}",
{"stage": "process_chat_history", "message_count": len(chat.messages)}
)
chat_history_str = "<chat_history>"
processed_messages = 0
for message in chat.messages:
if message["role"] == "user":
chat_history_str += f"<user_message>{message['content']}</user_message>"
processed_messages += 1
elif message["role"] == "assistant":
# Last annotation type will always be "ANSWER" here
answer_annotation = message["annotations"][-1]
answer_text = ""
if answer_annotation["type"] == "ANSWER":
answer_text = answer_annotation["content"]
# If content is a list, join it into a single string
if isinstance(answer_text, list):
answer_text = "\n".join(answer_text)
chat_history_str += f"<assistant_message>{answer_text}</assistant_message>"
processed_messages += 1
chat_history_str += "</chat_history>"
# Pass it to the SurfSense Podcaster
await task_logger.log_task_progress(
log_entry,
f"Initializing podcast generation for chat {chat_id}",
{"stage": "initialize_podcast_generation", "processed_messages": processed_messages, "content_length": len(chat_history_str)}
)
config = {
"configurable": {
"podcast_title": "SurfSense",
"user_id": str(user_id),
}
}
# Initialize state with database session and streaming service
initial_state = State(
source_content=chat_history_str,
db_session=session
)
# Run the graph directly
await task_logger.log_task_progress(
log_entry,
f"Running podcast generation graph for chat {chat_id}",
{"stage": "run_podcast_graph"}
)
result = await podcaster_graph.ainvoke(initial_state, config=config)
# Convert podcast transcript entries to serializable format
await task_logger.log_task_progress(
log_entry,
f"Processing podcast transcript for chat {chat_id}",
{"stage": "process_transcript", "transcript_entries": len(result["podcast_transcript"])}
)
serializable_transcript = []
for entry in result["podcast_transcript"]:
serializable_transcript.append({
"speaker_id": entry.speaker_id,
"dialog": entry.dialog
})
# Create a new podcast entry
await task_logger.log_task_progress(
log_entry,
f"Creating podcast database entry for chat {chat_id}",
{"stage": "create_podcast_entry", "file_location": result.get("final_podcast_file_path")}
)
podcast = Podcast(
title=f"{podcast_title}",
podcast_transcript=serializable_transcript,
file_location=result["final_podcast_file_path"],
search_space_id=search_space_id
)
# Add to session and commit
session.add(podcast)
await session.commit()
await session.refresh(podcast)
# Log success
await task_logger.log_task_success(
log_entry,
f"Successfully generated podcast for chat {chat_id}",
{
"podcast_id": podcast.id,
"podcast_title": podcast_title,
"transcript_entries": len(serializable_transcript),
"file_location": result.get("final_podcast_file_path"),
"processed_messages": processed_messages,
"content_length": len(chat_history_str)
}
)
return podcast
except ValueError as ve:
# ValueError is already logged above for chat not found
if "not found" not in str(ve):
await task_logger.log_task_failure(
log_entry,
f"Value error during podcast generation for chat {chat_id}",
str(ve),
{"error_type": "ValueError"}
)
raise ve
except SQLAlchemyError as db_error:
await session.rollback()
await task_logger.log_task_failure(
log_entry,
f"Database error during podcast generation for chat {chat_id}",
str(db_error),
{"error_type": "SQLAlchemyError"}
)
raise db_error
except Exception as e:
await session.rollback()
await task_logger.log_task_failure(
log_entry,
f"Unexpected error during podcast generation for chat {chat_id}",
str(e),
{"error_type": type(e).__name__}
)
raise RuntimeError(f"Failed to generate podcast for chat {chat_id}: {str(e)}")

View file

@ -83,9 +83,8 @@ async def stream_connector_search_results(
config=config, config=config,
stream_mode="custom", stream_mode="custom",
): ):
# If the chunk contains a 'yeild_value' key, print its value if isinstance(chunk, dict):
# Note: there's a typo in 'yeild_value' in the code, but we need to match it if "yield_value" in chunk:
if isinstance(chunk, dict) and 'yeild_value' in chunk: yield chunk["yield_value"]
yield chunk['yeild_value']
yield streaming_service.format_completion()
yield streaming_service.format_completion()

View file

@ -329,7 +329,7 @@ export default function ChatsPageClient({ searchSpaceId }: ChatsPageClientProps)
// Helper to finish the podcast generation process // Helper to finish the podcast generation process
const finishPodcastGeneration = () => { const finishPodcastGeneration = () => {
toast.success("All podcasts are being generated! Check the podcasts tab to see them when ready."); toast.success("All podcasts are being generated! Check the logs tab to see their status.");
setPodcastDialogOpen(false); setPodcastDialogOpen(false);
setSelectedChats([]); setSelectedChats([]);
setSelectionMode(false); setSelectionMode(false);

View file

@ -134,7 +134,7 @@ export default function ConnectorsPage() {
const endDateStr = endDate ? format(endDate, "yyyy-MM-dd") : undefined; const endDateStr = endDate ? format(endDate, "yyyy-MM-dd") : undefined;
await indexConnector(selectedConnectorForIndexing, searchSpaceId, startDateStr, endDateStr); await indexConnector(selectedConnectorForIndexing, searchSpaceId, startDateStr, endDateStr);
toast.success("Connector content indexed successfully"); toast.success("Connector content indexing started");
} catch (error) { } catch (error) {
console.error("Error indexing connector content:", error); console.error("Error indexing connector content:", error);
toast.error( toast.error(
@ -155,7 +155,7 @@ export default function ConnectorsPage() {
setIndexingConnectorId(connectorId); setIndexingConnectorId(connectorId);
try { try {
await indexConnector(connectorId, searchSpaceId); await indexConnector(connectorId, searchSpaceId);
toast.success("Connector content indexed successfully"); toast.success("Connector content indexing started");
} catch (error) { } catch (error) {
console.error("Error indexing connector content:", error); console.error("Error indexing connector content:", error);
toast.error( toast.error(

View file

@ -170,9 +170,9 @@ export default function FileUploader() {
formData.append('search_space_id', search_space_id) formData.append('search_space_id', search_space_id)
try { try {
toast("File Upload", { // toast("File Upload", {
description: "Files Uploading Initiated", // description: "Files Uploading Initiated",
}) // })
const response = await fetch(`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL!}/api/v1/documents/fileupload`, { const response = await fetch(`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL!}/api/v1/documents/fileupload`, {
method: "POST", method: "POST",
@ -188,8 +188,8 @@ export default function FileUploader() {
await response.json() await response.json()
toast("Upload Successful", { toast("Upload Task Initiated", {
description: "Files Uploaded Successfully", description: "Files Uploading Initiated",
}) })
router.push(`/dashboard/${search_space_id}/documents`); router.push(`/dashboard/${search_space_id}/documents`);

View file

@ -43,10 +43,10 @@ export default function DashboardLayout({
title: "Upload Documents", title: "Upload Documents",
url: `/dashboard/${search_space_id}/documents/upload`, url: `/dashboard/${search_space_id}/documents/upload`,
}, },
{ // { TODO: FIX THIS AND ADD IT BACK
title: "Add Webpages", // title: "Add Webpages",
url: `/dashboard/${search_space_id}/documents/webpage`, // url: `/dashboard/${search_space_id}/documents/webpage`,
}, // },
{ {
title: "Add Youtube Videos", title: "Add Youtube Videos",
url: `/dashboard/${search_space_id}/documents/youtube`, url: `/dashboard/${search_space_id}/documents/youtube`,
@ -78,6 +78,13 @@ export default function DashboardLayout({
icon: "Podcast", icon: "Podcast",
items: [ items: [
], ],
},
{
title: "Logs",
url: `/dashboard/${search_space_id}/logs`,
icon: "FileText",
items: [
],
} }
] ]

File diff suppressed because it is too large Load diff

View file

@ -981,19 +981,16 @@ const ChatPage = () => {
const renderTerminalContent = (message: any) => { const renderTerminalContent = (message: any) => {
if (!message.annotations) return null; if (!message.annotations) return null;
// Get all TERMINAL_INFO annotations // Get all TERMINAL_INFO annotations content
const terminalInfoAnnotations = (message.annotations as any[]).filter( const terminalInfoAnnotations = (message.annotations as any[]).map(item => {
(a) => a.type === "TERMINAL_INFO", if(item.type === "TERMINAL_INFO") {
); return item.content.map((a: any) => a.text)
// Get the latest TERMINAL_INFO annotation }
const latestTerminalInfo = }).flat().filter(Boolean)
terminalInfoAnnotations.length > 0
? terminalInfoAnnotations[terminalInfoAnnotations.length - 1]
: null;
// Render the content of the latest TERMINAL_INFO annotation // Render the content of the latest TERMINAL_INFO annotation
return latestTerminalInfo?.content.map((item: any, idx: number) => ( return terminalInfoAnnotations.map((item: any, idx: number) => (
<div key={idx} className="py-0.5 flex items-start text-gray-300"> <div key={idx} className="py-0.5 flex items-start text-gray-300">
<span className="text-gray-500 text-xs mr-2 w-10 flex-shrink-0"> <span className="text-gray-500 text-xs mr-2 w-10 flex-shrink-0">
[{String(idx).padStart(2, "0")}: [{String(idx).padStart(2, "0")}:
@ -1008,7 +1005,7 @@ const ChatPage = () => {
${item.type === "warning" ? "text-yellow-300" : ""} ${item.type === "warning" ? "text-yellow-300" : ""}
`} `}
> >
{item.text} {item}
</span> </span>
</div> </div>
)); ));

View file

@ -1,40 +1,48 @@
'use client' "use client";
import React from 'react' import React from "react";
import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "@/components/ui/card" import { useRouter } from "next/navigation";
import { Button } from "@/components/ui/button" import { ArrowLeft } from "lucide-react";
import { Alert, AlertDescription, AlertTitle } from "@/components/ui/alert" import {
import { motion, AnimatePresence } from "framer-motion" Card,
import { IconCheck, IconCopy, IconKey } from "@tabler/icons-react" CardContent,
import { Tooltip, TooltipContent, TooltipProvider, TooltipTrigger } from "@/components/ui/tooltip" CardDescription,
import { useApiKey } from "@/hooks/use-api-key" CardHeader,
CardTitle,
} from "@/components/ui/card";
import { Button } from "@/components/ui/button";
import { Alert, AlertDescription, AlertTitle } from "@/components/ui/alert";
import { motion, AnimatePresence } from "framer-motion";
import { IconCheck, IconCopy, IconKey } from "@tabler/icons-react";
import {
Tooltip,
TooltipContent,
TooltipProvider,
TooltipTrigger,
} from "@/components/ui/tooltip";
import { useApiKey } from "@/hooks/use-api-key";
const fadeIn = { const fadeIn = {
hidden: { opacity: 0 }, hidden: { opacity: 0 },
visible: { opacity: 1, transition: { duration: 0.4 } } visible: { opacity: 1, transition: { duration: 0.4 } },
} };
const staggerContainer = { const staggerContainer = {
hidden: { opacity: 0 }, hidden: { opacity: 0 },
visible: { visible: {
opacity: 1, opacity: 1,
transition: { transition: {
staggerChildren: 0.1 staggerChildren: 0.1,
} },
} },
} };
const ApiKeyClient = () => { const ApiKeyClient = () => {
const { const { apiKey, isLoading, copied, copyToClipboard } = useApiKey();
apiKey, const router = useRouter();
isLoading,
copied,
copyToClipboard
} = useApiKey()
return ( return (
<div className="flex justify-center w-full min-h-screen py-10 px-4"> <div className="flex justify-center w-full min-h-screen py-10 px-4">
<motion.div <motion.div
className="w-full max-w-3xl" className="w-full max-w-3xl"
initial="hidden" initial="hidden"
animate="visible" animate="visible"
@ -52,7 +60,8 @@ const ApiKeyClient = () => {
<IconKey className="h-4 w-4" /> <IconKey className="h-4 w-4" />
<AlertTitle>Important</AlertTitle> <AlertTitle>Important</AlertTitle>
<AlertDescription> <AlertDescription>
Your API key grants full access to your account. Never share it publicly or with unauthorized users. Your API key grants full access to your account. Never share it
publicly or with unauthorized users.
</AlertDescription> </AlertDescription>
</Alert> </Alert>
</motion.div> </motion.div>
@ -68,15 +77,15 @@ const ApiKeyClient = () => {
<CardContent> <CardContent>
<AnimatePresence mode="wait"> <AnimatePresence mode="wait">
{isLoading ? ( {isLoading ? (
<motion.div <motion.div
key="loading" key="loading"
initial={{ opacity: 0 }} initial={{ opacity: 0 }}
animate={{ opacity: 1 }} animate={{ opacity: 1 }}
exit={{ opacity: 0 }} exit={{ opacity: 0 }}
className="h-10 w-full bg-muted animate-pulse rounded-md" className="h-10 w-full bg-muted animate-pulse rounded-md"
/> />
) : apiKey ? ( ) : apiKey ? (
<motion.div <motion.div
key="api-key" key="api-key"
initial={{ opacity: 0, y: 10 }} initial={{ opacity: 0, y: 10 }}
animate={{ opacity: 1, y: 0 }} animate={{ opacity: 1, y: 0 }}
@ -96,9 +105,9 @@ const ApiKeyClient = () => {
<TooltipProvider> <TooltipProvider>
<Tooltip> <Tooltip>
<TooltipTrigger asChild> <TooltipTrigger asChild>
<Button <Button
variant="outline" variant="outline"
size="icon" size="icon"
onClick={copyToClipboard} onClick={copyToClipboard}
className="flex-shrink-0" className="flex-shrink-0"
> >
@ -107,7 +116,11 @@ const ApiKeyClient = () => {
animate={copied ? { scale: [1, 1.2, 1] } : {}} animate={copied ? { scale: [1, 1.2, 1] } : {}}
transition={{ duration: 0.2 }} transition={{ duration: 0.2 }}
> >
{copied ? <IconCheck className="h-4 w-4" /> : <IconCopy className="h-4 w-4" />} {copied ? (
<IconCheck className="h-4 w-4" />
) : (
<IconCopy className="h-4 w-4" />
)}
</motion.div> </motion.div>
</Button> </Button>
</TooltipTrigger> </TooltipTrigger>
@ -118,7 +131,7 @@ const ApiKeyClient = () => {
</TooltipProvider> </TooltipProvider>
</motion.div> </motion.div>
) : ( ) : (
<motion.div <motion.div
key="no-key" key="no-key"
initial={{ opacity: 0 }} initial={{ opacity: 0 }}
animate={{ opacity: 1 }} animate={{ opacity: 1 }}
@ -133,34 +146,39 @@ const ApiKeyClient = () => {
</Card> </Card>
</motion.div> </motion.div>
<motion.div <motion.div
className="mt-8" className="mt-8"
variants={fadeIn} variants={fadeIn}
initial={{ opacity: 0, y: 20 }} initial={{ opacity: 0, y: 20 }}
animate={{ opacity: 1, y: 0 }} animate={{ opacity: 1, y: 0 }}
transition={{ delay: 0.3 }} transition={{ delay: 0.3 }}
> >
<h2 className="text-xl font-semibold mb-4 text-center">How to use your API key</h2> <h2 className="text-xl font-semibold mb-4 text-center">
How to use your API key
</h2>
<Card> <Card>
<CardContent className="pt-6"> <CardContent className="pt-6">
<motion.div <motion.div
className="space-y-4" className="space-y-4"
initial="hidden" initial="hidden"
animate="visible" animate="visible"
variants={staggerContainer} variants={staggerContainer}
> >
<motion.div variants={fadeIn}> <motion.div variants={fadeIn}>
<h3 className="font-medium mb-2 text-center">Authentication</h3> <h3 className="font-medium mb-2 text-center">
Authentication
</h3>
<p className="text-sm text-muted-foreground text-center"> <p className="text-sm text-muted-foreground text-center">
Include your API key in the Authorization header of your requests: Include your API key in the Authorization header of your
requests:
</p> </p>
<motion.pre <motion.pre
className="bg-muted p-3 rounded-md mt-2 overflow-x-auto" className="bg-muted p-3 rounded-md mt-2 overflow-x-auto"
whileHover={{ scale: 1.01 }} whileHover={{ scale: 1.01 }}
transition={{ type: "spring", stiffness: 400, damping: 10 }} transition={{ type: "spring", stiffness: 400, damping: 10 }}
> >
<code className="text-xs"> <code className="text-xs">
Authorization: Bearer {apiKey || 'YOUR_API_KEY'} Authorization: Bearer {apiKey || "YOUR_API_KEY"}
</code> </code>
</motion.pre> </motion.pre>
</motion.div> </motion.div>
@ -169,8 +187,18 @@ const ApiKeyClient = () => {
</Card> </Card>
</motion.div> </motion.div>
</motion.div> </motion.div>
<div>
<button
onClick={() => router.push("/dashboard")}
className="flex items-center justify-center h-10 w-10 rounded-lg bg-primary/10 hover:bg-primary/30 transition-colors"
aria-label="Back to Dashboard"
type="button"
>
<ArrowLeft className="h-5 w-5 text-primary" />
</button>
</div>
</div> </div>
) );
} };
export default ApiKeyClient export default ApiKeyClient;

View file

@ -1,14 +1,15 @@
"use client"; "use client";
import React from 'react' import React, { useEffect, useState } from 'react'
import Link from 'next/link' import Link from 'next/link'
import { motion } from 'framer-motion' import { motion } from 'framer-motion'
import { Button } from '@/components/ui/button' import { Button } from '@/components/ui/button'
import { Plus, Search, Trash2, AlertCircle, Loader2, LogOut } from 'lucide-react' import { Plus, Search, Trash2, AlertCircle, Loader2 } from 'lucide-react'
import { Tilt } from '@/components/ui/tilt' import { Tilt } from '@/components/ui/tilt'
import { Spotlight } from '@/components/ui/spotlight' import { Spotlight } from '@/components/ui/spotlight'
import { Logo } from '@/components/Logo'; import { Logo } from '@/components/Logo';
import { ThemeTogglerComponent } from '@/components/theme/theme-toggle'; import { ThemeTogglerComponent } from '@/components/theme/theme-toggle';
import { UserDropdown } from '@/components/UserDropdown';
import { toast } from 'sonner'; import { toast } from 'sonner';
import { import {
AlertDialog, AlertDialog,
@ -28,8 +29,17 @@ import {
} from "@/components/ui/alert"; } from "@/components/ui/alert";
import { Card, CardContent, CardDescription, CardFooter, CardHeader, CardTitle } from "@/components/ui/card"; import { Card, CardContent, CardDescription, CardFooter, CardHeader, CardTitle } from "@/components/ui/card";
import { useSearchSpaces } from '@/hooks/use-search-spaces'; import { useSearchSpaces } from '@/hooks/use-search-spaces';
import { apiClient } from '@/lib/api';
import { useRouter } from 'next/navigation'; import { useRouter } from 'next/navigation';
interface User {
id: string;
email: string;
is_active: boolean;
is_superuser: boolean;
is_verified: boolean;
}
/** /**
* Formats a date string into a readable format * Formats a date string into a readable format
* @param dateString - The date string to format * @param dateString - The date string to format
@ -147,17 +157,47 @@ const DashboardPage = () => {
const router = useRouter(); const router = useRouter();
const { searchSpaces, loading, error, refreshSearchSpaces } = useSearchSpaces(); const { searchSpaces, loading, error, refreshSearchSpaces } = useSearchSpaces();
// User state management
const [user, setUser] = useState<User | null>(null);
const [isLoadingUser, setIsLoadingUser] = useState(true);
const [userError, setUserError] = useState<string | null>(null);
// Fetch user details
useEffect(() => {
const fetchUser = async () => {
try {
if (typeof window === 'undefined') return;
try {
const userData = await apiClient.get<User>('users/me');
setUser(userData);
setUserError(null);
} catch (error) {
console.error('Error fetching user:', error);
setUserError(error instanceof Error ? error.message : 'Unknown error occurred');
} finally {
setIsLoadingUser(false);
}
} catch (error) {
console.error('Error in fetchUser:', error);
setIsLoadingUser(false);
}
};
fetchUser();
}, []);
// Create user object for UserDropdown
const customUser = {
name: user?.email ? user.email.split('@')[0] : 'User',
email: user?.email || (isLoadingUser ? 'Loading...' : userError ? 'Error loading user' : 'Unknown User'),
avatar: '/icon-128.png', // Default avatar
};
if (loading) return <LoadingScreen />; if (loading) return <LoadingScreen />;
if (error) return <ErrorScreen message={error} />; if (error) return <ErrorScreen message={error} />;
const handleLogout = () => {
if (typeof window !== 'undefined') {
localStorage.removeItem('surfsense_bearer_token');
router.push('/');
}
};
const handleDeleteSearchSpace = async (id: number) => { const handleDeleteSearchSpace = async (id: number) => {
// Send DELETE request to the API // Send DELETE request to the API
try { try {
@ -201,18 +241,10 @@ const DashboardPage = () => {
</p> </p>
</div> </div>
</div> </div>
<div className="flex items-center space-x-3"> <div className="flex items-center space-x-3">
<Button <UserDropdown user={customUser} />
variant="ghost" <ThemeTogglerComponent />
size="icon" </div>
onClick={handleLogout}
className="h-9 w-9 rounded-full"
aria-label="Logout"
>
<LogOut className="h-5 w-5" />
</Button>
<ThemeTogglerComponent />
</div>
</div> </div>
<div className="flex flex-col space-y-6 mt-6"> <div className="flex flex-col space-y-6 mt-6">

View file

@ -152,3 +152,7 @@
--sidebar-ring: 217.2 91.2% 59.8%; --sidebar-ring: 217.2 91.2% 59.8%;
} }
} }
button {
cursor: pointer;
}

View file

@ -1,13 +1,16 @@
"use client"; "use client";
import React from 'react'; import React from 'react';
import { useRouter } from 'next/navigation'; // Add this import
import { Tabs, TabsContent, TabsList, TabsTrigger } from '@/components/ui/tabs'; import { Tabs, TabsContent, TabsList, TabsTrigger } from '@/components/ui/tabs';
import { Separator } from '@/components/ui/separator'; import { Separator } from '@/components/ui/separator';
import { Bot, Settings, Brain } from 'lucide-react'; import { Bot, Settings, Brain, ArrowLeft } from 'lucide-react'; // Import ArrowLeft icon
import { ModelConfigManager } from '@/components/settings/model-config-manager'; import { ModelConfigManager } from '@/components/settings/model-config-manager';
import { LLMRoleManager } from '@/components/settings/llm-role-manager'; import { LLMRoleManager } from '@/components/settings/llm-role-manager';
export default function SettingsPage() { export default function SettingsPage() {
const router = useRouter(); // Initialize router
return ( return (
<div className="min-h-screen bg-background"> <div className="min-h-screen bg-background">
<div className="container max-w-7xl mx-auto p-6 lg:p-8"> <div className="container max-w-7xl mx-auto p-6 lg:p-8">
@ -15,6 +18,15 @@ export default function SettingsPage() {
{/* Header Section */} {/* Header Section */}
<div className="space-y-4"> <div className="space-y-4">
<div className="flex items-center space-x-4"> <div className="flex items-center space-x-4">
{/* Back Button */}
<button
onClick={() => router.push('/dashboard')}
className="flex items-center justify-center h-10 w-10 rounded-lg bg-primary/10 hover:bg-primary/20 transition-colors"
aria-label="Back to Dashboard"
type="button"
>
<ArrowLeft className="h-5 w-5 text-primary" />
</button>
<div className="flex h-12 w-12 items-center justify-center rounded-lg bg-primary/10"> <div className="flex h-12 w-12 items-center justify-center rounded-lg bg-primary/10">
<Settings className="h-6 w-6 text-primary" /> <Settings className="h-6 w-6 text-primary" />
</div> </div>
@ -57,4 +69,4 @@ export default function SettingsPage() {
</div> </div>
</div> </div>
); );
} }

View file

@ -0,0 +1,101 @@
"use client"
import {
BadgeCheck,
ChevronsUpDown,
LogOut,
Settings,
} from "lucide-react"
import {
Avatar,
AvatarFallback,
AvatarImage,
} from "@/components/ui/avatar"
import {
DropdownMenu,
DropdownMenuContent,
DropdownMenuGroup,
DropdownMenuItem,
DropdownMenuLabel,
DropdownMenuSeparator,
DropdownMenuTrigger,
} from "@/components/ui/dropdown-menu"
import { Button } from "@/components/ui/button"
import { useRouter, useParams } from "next/navigation"
export function UserDropdown({
user,
}: {
user: {
name: string
email: string
avatar: string
}
}) {
const router = useRouter()
const handleLogout = () => {
try {
if (typeof window !== 'undefined') {
localStorage.removeItem('surfsense_bearer_token');
router.push('/');
}
} catch (error) {
console.error('Error during logout:', error);
// Optionally, provide user feedback
if (typeof window !== 'undefined') {
alert('Logout failed. Please try again.');
router.push('/');
}
}
};
return (
<DropdownMenu>
<DropdownMenuTrigger asChild>
<Button
variant="ghost"
className="relative h-10 w-10 rounded-full"
>
<Avatar className="h-8 w-8">
<AvatarImage src={user.avatar} alt={user.name} />
<AvatarFallback>{user.name.charAt(0)?.toUpperCase() || '?'}</AvatarFallback>
</Avatar>
</Button>
</DropdownMenuTrigger>
<DropdownMenuContent
className="w-56"
align="end"
forceMount
>
<DropdownMenuLabel className="font-normal">
<div className="flex flex-col space-y-1">
<p className="text-sm font-medium leading-none">{user.name}</p>
<p className="text-xs leading-none text-muted-foreground">
{user.email}
</p>
</div>
</DropdownMenuLabel>
<DropdownMenuSeparator />
<DropdownMenuGroup>
<DropdownMenuItem onClick={() => router.push(`/dashboard/api-key`)}>
<BadgeCheck className="mr-2 h-4 w-4" />
API Key
</DropdownMenuItem>
</DropdownMenuGroup>
<DropdownMenuSeparator />
<DropdownMenuItem onClick={() => router.push(`/settings`)}>
<Settings className="mr-2 h-4 w-4" />
Settings
</DropdownMenuItem>
<DropdownMenuItem onClick={handleLogout}>
<LogOut className="mr-2 h-4 w-4" />
Log out
</DropdownMenuItem>
</DropdownMenuContent>
</DropdownMenu>
)
}

View file

@ -3,7 +3,7 @@
import { useState } from "react"; import { useState } from "react";
import { motion } from "framer-motion"; import { motion } from "framer-motion";
import { cn } from "@/lib/utils"; import { cn } from "@/lib/utils";
import { Plus, Search, Trash2 } from "lucide-react"; import { MoveLeftIcon, Plus, Search, Trash2 } from "lucide-react";
import { Button } from "@/components/ui/button"; import { Button } from "@/components/ui/button";
import { Input } from "@/components/ui/input"; import { Input } from "@/components/ui/input";
import { Label } from "@/components/ui/label"; import { Label } from "@/components/ui/label";
@ -33,6 +33,7 @@ import {
FormLabel, FormLabel,
FormMessage, FormMessage,
} from "@/components/ui/form"; } from "@/components/ui/form";
import { useRouter } from "next/navigation";
// Define the form schema with Zod // Define the form schema with Zod
const searchSpaceFormSchema = z.object({ const searchSpaceFormSchema = z.object({
@ -59,7 +60,8 @@ export function SearchSpaceForm({
initialData = { name: "", description: "" } initialData = { name: "", description: "" }
}: SearchSpaceFormProps) { }: SearchSpaceFormProps) {
const [showDeleteDialog, setShowDeleteDialog] = useState(false); const [showDeleteDialog, setShowDeleteDialog] = useState(false);
const router = useRouter();
// Initialize the form with React Hook Form and Zod validation // Initialize the form with React Hook Form and Zod validation
const form = useForm<SearchSpaceFormValues>({ const form = useForm<SearchSpaceFormValues>({
resolver: zodResolver(searchSpaceFormSchema), resolver: zodResolver(searchSpaceFormSchema),
@ -115,17 +117,32 @@ export function SearchSpaceForm({
animate="visible" animate="visible"
variants={containerVariants} variants={containerVariants}
> >
<motion.div className="flex flex-col space-y-2" variants={itemVariants}> <motion.div className="flex items-center justify-between" variants={itemVariants}>
<h2 className="text-3xl font-bold tracking-tight"> <div className="flex flex-col space-y-2">
{isEditing ? "Edit Search Space" : "Create Search Space"} <h2 className="text-3xl font-bold tracking-tight">
</h2> {isEditing ? "Edit Search Space" : "Create Search Space"}
<p className="text-muted-foreground"> </h2>
{isEditing <p className="text-muted-foreground">
? "Update your search space details" {isEditing
: "Create a new search space to organize your documents, chats, and podcasts."} ? "Update your search space details"
</p> : "Create a new search space to organize your documents, chats, and podcasts."}
</p>
</div>
<button
className="group relative rounded-full p-3 bg-background/80 hover:bg-muted border border-border hover:border-primary/20 shadow-sm hover:shadow-md transition-all duration-200 backdrop-blur-sm"
onClick={() => {
router.push('/dashboard')
}}
>
<MoveLeftIcon
size={18}
className="text-muted-foreground group-hover:text-foreground transition-colors duration-200"
/>
<div className="absolute inset-0 rounded-full bg-gradient-to-r from-blue-500/10 to-purple-500/10 opacity-0 group-hover:opacity-100 transition-opacity duration-300" />
</button>
</motion.div> </motion.div>
<motion.div <motion.div
className="w-full" className="w-full"
variants={itemVariants} variants={itemVariants}
@ -190,9 +207,9 @@ export function SearchSpaceForm({
</div> </div>
</Tilt> </Tilt>
</motion.div> </motion.div>
<Separator className="my-4" /> <Separator className="my-4" />
<Form {...form}> <Form {...form}>
<form onSubmit={form.handleSubmit(handleFormSubmit)} className="space-y-6"> <form onSubmit={form.handleSubmit(handleFormSubmit)} className="space-y-6">
<FormField <FormField
@ -211,7 +228,7 @@ export function SearchSpaceForm({
</FormItem> </FormItem>
)} )}
/> />
<FormField <FormField
control={form.control} control={form.control}
name="description" name="description"
@ -228,7 +245,7 @@ export function SearchSpaceForm({
</FormItem> </FormItem>
)} )}
/> />
<div className="flex justify-end pt-2"> <div className="flex justify-end pt-2">
<Button <Button
type="submit" type="submit"

View file

@ -31,14 +31,6 @@ interface SearchSpace {
user_id: string; user_id: string;
} }
interface User {
id: string;
email: string;
is_active: boolean;
is_superuser: boolean;
is_verified: boolean;
}
interface AppSidebarProviderProps { interface AppSidebarProviderProps {
searchSpaceId: string; searchSpaceId: string;
navSecondary: { navSecondary: {
@ -58,20 +50,17 @@ interface AppSidebarProviderProps {
}[]; }[];
} }
export function AppSidebarProvider({ export function AppSidebarProvider({
searchSpaceId, searchSpaceId,
navSecondary, navSecondary,
navMain navMain
}: AppSidebarProviderProps) { }: AppSidebarProviderProps) {
const [recentChats, setRecentChats] = useState<{ name: string; url: string; icon: string; id: number; search_space_id: number; actions: { name: string; icon: string; onClick: () => void }[] }[]>([]); const [recentChats, setRecentChats] = useState<{ name: string; url: string; icon: string; id: number; search_space_id: number; actions: { name: string; icon: string; onClick: () => void }[] }[]>([]);
const [searchSpace, setSearchSpace] = useState<SearchSpace | null>(null); const [searchSpace, setSearchSpace] = useState<SearchSpace | null>(null);
const [user, setUser] = useState<User | null>(null);
const [isLoadingChats, setIsLoadingChats] = useState(true); const [isLoadingChats, setIsLoadingChats] = useState(true);
const [isLoadingSearchSpace, setIsLoadingSearchSpace] = useState(true); const [isLoadingSearchSpace, setIsLoadingSearchSpace] = useState(true);
const [isLoadingUser, setIsLoadingUser] = useState(true);
const [chatError, setChatError] = useState<string | null>(null); const [chatError, setChatError] = useState<string | null>(null);
const [searchSpaceError, setSearchSpaceError] = useState<string | null>(null); const [searchSpaceError, setSearchSpaceError] = useState<string | null>(null);
const [userError, setUserError] = useState<string | null>(null);
const [showDeleteDialog, setShowDeleteDialog] = useState(false); const [showDeleteDialog, setShowDeleteDialog] = useState(false);
const [chatToDelete, setChatToDelete] = useState<{ id: number, name: string } | null>(null); const [chatToDelete, setChatToDelete] = useState<{ id: number, name: string } | null>(null);
const [isDeleting, setIsDeleting] = useState(false); const [isDeleting, setIsDeleting] = useState(false);
@ -82,33 +71,6 @@ export function AppSidebarProvider({
setIsClient(true); setIsClient(true);
}, []); }, []);
// Fetch user details
useEffect(() => {
const fetchUser = async () => {
try {
// Only run on client-side
if (typeof window === 'undefined') return;
try {
// Use the API client instead of direct fetch
const userData = await apiClient.get<User>('users/me');
setUser(userData);
setUserError(null);
} catch (error) {
console.error('Error fetching user:', error);
setUserError(error instanceof Error ? error.message : 'Unknown error occurred');
} finally {
setIsLoadingUser(false);
}
} catch (error) {
console.error('Error in fetchUser:', error);
setIsLoadingUser(false);
}
};
fetchUser();
}, []);
// Fetch recent chats // Fetch recent chats
useEffect(() => { useEffect(() => {
const fetchRecentChats = async () => { const fetchRecentChats = async () => {
@ -119,9 +81,9 @@ export function AppSidebarProvider({
try { try {
// Use the API client instead of direct fetch - filter by current search space ID // Use the API client instead of direct fetch - filter by current search space ID
const chats: Chat[] = await apiClient.get<Chat[]>(`api/v1/chats/?limit=5&skip=0&search_space_id=${searchSpaceId}`); const chats: Chat[] = await apiClient.get<Chat[]>(`api/v1/chats/?limit=5&skip=0&search_space_id=${searchSpaceId}`);
// Sort chats by created_at in descending order (newest first) // Sort chats by created_at in descending order (newest first)
const sortedChats = chats.sort((a, b) => const sortedChats = chats.sort((a, b) =>
new Date(b.created_at).getTime() - new Date(a.created_at).getTime() new Date(b.created_at).getTime() - new Date(a.created_at).getTime()
); );
// console.log("sortedChats", sortedChats); // console.log("sortedChats", sortedChats);
@ -171,7 +133,7 @@ export function AppSidebarProvider({
// Set up a refresh interval (every 5 minutes) // Set up a refresh interval (every 5 minutes)
const intervalId = setInterval(fetchRecentChats, 5 * 60 * 1000); const intervalId = setInterval(fetchRecentChats, 5 * 60 * 1000);
// Clean up interval on component unmount // Clean up interval on component unmount
return () => clearInterval(intervalId); return () => clearInterval(intervalId);
}, [searchSpaceId]); }, [searchSpaceId]);
@ -179,16 +141,16 @@ export function AppSidebarProvider({
// Handle delete chat // Handle delete chat
const handleDeleteChat = async () => { const handleDeleteChat = async () => {
if (!chatToDelete) return; if (!chatToDelete) return;
try { try {
setIsDeleting(true); setIsDeleting(true);
// Use the API client instead of direct fetch // Use the API client instead of direct fetch
await apiClient.delete(`api/v1/chats/${chatToDelete.id}`); await apiClient.delete(`api/v1/chats/${chatToDelete.id}`);
// Close dialog and refresh chats // Close dialog and refresh chats
setRecentChats(recentChats.filter(chat => chat.id !== chatToDelete.id)); setRecentChats(recentChats.filter(chat => chat.id !== chatToDelete.id));
} catch (error) { } catch (error) {
console.error('Error deleting chat:', error); console.error('Error deleting chat:', error);
} finally { } finally {
@ -226,15 +188,15 @@ export function AppSidebarProvider({
}, [searchSpaceId]); }, [searchSpaceId]);
// Create a fallback chat if there's an error or no chats // Create a fallback chat if there's an error or no chats
const fallbackChats = chatError || (!isLoadingChats && recentChats.length === 0) const fallbackChats = chatError || (!isLoadingChats && recentChats.length === 0)
? [{ ? [{
name: chatError ? "Error loading chats" : "No recent chats", name: chatError ? "Error loading chats" : "No recent chats",
url: "#", url: "#",
icon: chatError ? "AlertCircle" : "MessageCircleMore", icon: chatError ? "AlertCircle" : "MessageCircleMore",
id: 0, id: 0,
search_space_id: Number(searchSpaceId), search_space_id: Number(searchSpaceId),
actions: [] actions: []
}] }]
: []; : [];
// Use fallback chats if there's an error or no chats // Use fallback chats if there's an error or no chats
@ -249,22 +211,14 @@ export function AppSidebarProvider({
}; };
} }
// Create user object for AppSidebar
const customUser = {
name: isClient && user?.email ? user.email.split('@')[0] : 'User',
email: isClient ? (user?.email || (isLoadingUser ? 'Loading...' : userError ? 'Error loading user' : 'Unknown User')) : 'Loading...',
avatar: '/icon-128.png', // Default avatar
};
return ( return (
<> <>
<AppSidebar <AppSidebar
user={customUser}
navSecondary={updatedNavSecondary} navSecondary={updatedNavSecondary}
navMain={navMain} navMain={navMain}
RecentChats={isClient ? displayChats : []} RecentChats={isClient ? displayChats : []}
/> />
{/* Delete Confirmation Dialog - Only render on client */} {/* Delete Confirmation Dialog - Only render on client */}
{isClient && ( {isClient && (
<Dialog open={showDeleteDialog} onOpenChange={setShowDeleteDialog}> <Dialog open={showDeleteDialog} onOpenChange={setShowDeleteDialog}>

View file

@ -16,13 +16,13 @@ import {
Trash2, Trash2,
Podcast, Podcast,
type LucideIcon, type LucideIcon,
FileText,
} from "lucide-react" } from "lucide-react"
import { Logo } from "@/components/Logo"; import { Logo } from "@/components/Logo";
import { NavMain } from "@/components/sidebar/nav-main" import { NavMain } from "@/components/sidebar/nav-main"
import { NavProjects } from "@/components/sidebar/nav-projects" import { NavProjects } from "@/components/sidebar/nav-projects"
import { NavSecondary } from "@/components/sidebar/nav-secondary" import { NavSecondary } from "@/components/sidebar/nav-secondary"
import { NavUser } from "@/components/sidebar/nav-user"
import { import {
Sidebar, Sidebar,
SidebarContent, SidebarContent,
@ -47,7 +47,8 @@ export const iconMap: Record<string, LucideIcon> = {
Info, Info,
ExternalLink, ExternalLink,
Trash2, Trash2,
Podcast Podcast,
FileText
} }
const defaultData = { const defaultData = {
@ -141,11 +142,6 @@ const defaultData = {
} }
interface AppSidebarProps extends React.ComponentProps<typeof Sidebar> { interface AppSidebarProps extends React.ComponentProps<typeof Sidebar> {
user?: {
name: string
email: string
avatar: string
}
navMain?: { navMain?: {
title: string title: string
url: string url: string
@ -176,7 +172,6 @@ interface AppSidebarProps extends React.ComponentProps<typeof Sidebar> {
} }
export function AppSidebar({ export function AppSidebar({
user = defaultData.user,
navMain = defaultData.navMain, navMain = defaultData.navMain,
navSecondary = defaultData.navSecondary, navSecondary = defaultData.navSecondary,
RecentChats = defaultData.RecentChats, RecentChats = defaultData.RecentChats,
@ -230,9 +225,9 @@ export function AppSidebar({
{processedRecentChats.length > 0 && <NavProjects chats={processedRecentChats} />} {processedRecentChats.length > 0 && <NavProjects chats={processedRecentChats} />}
<NavSecondary items={processedNavSecondary} className="mt-auto" /> <NavSecondary items={processedNavSecondary} className="mt-auto" />
</SidebarContent> </SidebarContent>
<SidebarFooter> {/* <SidebarFooter>
<NavUser user={user} /> footer
</SidebarFooter> </SidebarFooter> */}
</Sidebar> </Sidebar>
) )
} }

View file

@ -74,7 +74,7 @@ function DropdownMenuItem({
data-inset={inset} data-inset={inset}
data-variant={variant} data-variant={variant}
className={cn( className={cn(
"focus:bg-accent focus:text-accent-foreground data-[variant=destructive]:text-destructive-foreground data-[variant=destructive]:focus:bg-destructive/10 dark:data-[variant=destructive]:focus:bg-destructive/40 data-[variant=destructive]:focus:text-destructive-foreground data-[variant=destructive]:*:[svg]:!text-destructive-foreground [&_svg:not([class*='text-'])]:text-muted-foreground relative flex cursor-default items-center gap-2 rounded-sm px-2 py-1.5 text-sm outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 data-[inset]:pl-8 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4", "focus:bg-accent focus:text-accent-foreground data-[variant=destructive]:text-destructive-foreground data-[variant=destructive]:focus:bg-destructive/10 dark:data-[variant=destructive]:focus:bg-destructive/40 data-[variant=destructive]:focus:text-destructive-foreground data-[variant=destructive]:*:[svg]:!text-destructive-foreground [&_svg:not([class*='text-'])]:text-muted-foreground relative flex cursor-pointer items-center gap-2 rounded-sm px-2 py-1.5 text-sm outline-hidden select-none data-[disabled]:pointer-events-none data-[disabled]:opacity-50 data-[inset]:pl-8 [&_svg]:pointer-events-none [&_svg]:shrink-0 [&_svg:not([class*='size-'])]:size-4 ",
className className
)} )}
{...props} {...props}

View file

@ -1 +1,2 @@
export * from './useSearchSourceConnectors'; export * from './useSearchSourceConnectors';
export * from './use-logs';

View file

@ -0,0 +1,313 @@
"use client"
import { useState, useEffect, useCallback, useMemo } from 'react';
import { toast } from 'sonner';
export type LogLevel = "DEBUG" | "INFO" | "WARNING" | "ERROR" | "CRITICAL";
export type LogStatus = "IN_PROGRESS" | "SUCCESS" | "FAILED";
export interface Log {
id: number;
level: LogLevel;
status: LogStatus;
message: string;
source?: string;
log_metadata?: Record<string, any>;
created_at: string;
search_space_id: number;
}
export interface LogFilters {
search_space_id?: number;
level?: LogLevel;
status?: LogStatus;
source?: string;
start_date?: string;
end_date?: string;
}
export interface LogSummary {
total_logs: number;
time_window_hours: number;
by_status: Record<string, number>;
by_level: Record<string, number>;
by_source: Record<string, number>;
active_tasks: Array<{
id: number;
task_name: string;
message: string;
started_at: string;
source?: string;
}>;
recent_failures: Array<{
id: number;
task_name: string;
message: string;
failed_at: string;
source?: string;
error_details?: string;
}>;
}
export function useLogs(searchSpaceId?: number, filters: LogFilters = {}) {
const [logs, setLogs] = useState<Log[]>([]);
const [loading, setLoading] = useState(true);
const [error, setError] = useState<string | null>(null);
// Memoize filters to prevent infinite re-renders
const memoizedFilters = useMemo(() => filters, [JSON.stringify(filters)]);
const buildQueryParams = useCallback((customFilters: LogFilters = {}) => {
const params = new URLSearchParams();
const allFilters = { ...memoizedFilters, ...customFilters };
if (allFilters.search_space_id) {
params.append('search_space_id', allFilters.search_space_id.toString());
}
if (allFilters.level) {
params.append('level', allFilters.level);
}
if (allFilters.status) {
params.append('status', allFilters.status);
}
if (allFilters.source) {
params.append('source', allFilters.source);
}
if (allFilters.start_date) {
params.append('start_date', allFilters.start_date);
}
if (allFilters.end_date) {
params.append('end_date', allFilters.end_date);
}
return params.toString();
}, [memoizedFilters]);
const fetchLogs = useCallback(async (customFilters: LogFilters = {}, options: { skip?: number; limit?: number } = {}) => {
try {
setLoading(true);
const params = new URLSearchParams(buildQueryParams(customFilters));
if (options.skip !== undefined) params.append('skip', options.skip.toString());
if (options.limit !== undefined) params.append('limit', options.limit.toString());
const response = await fetch(
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/logs/?${params}`,
{
headers: {
Authorization: `Bearer ${localStorage.getItem('surfsense_bearer_token')}`,
},
method: "GET",
}
);
if (!response.ok) {
const errorData = await response.json().catch(() => ({}));
throw new Error(errorData.detail || "Failed to fetch logs");
}
const data = await response.json();
setLogs(data);
setError(null);
return data;
} catch (err: any) {
setError(err.message || 'Failed to fetch logs');
console.error('Error fetching logs:', err);
throw err;
} finally {
setLoading(false);
}
}, [buildQueryParams]);
// Initial fetch
useEffect(() => {
const initialFilters = searchSpaceId ? { ...memoizedFilters, search_space_id: searchSpaceId } : memoizedFilters;
fetchLogs(initialFilters);
}, [searchSpaceId, fetchLogs, memoizedFilters]);
// Function to refresh the logs list
const refreshLogs = useCallback(async (customFilters: LogFilters = {}) => {
const finalFilters = searchSpaceId ? { ...customFilters, search_space_id: searchSpaceId } : customFilters;
return await fetchLogs(finalFilters);
}, [searchSpaceId, fetchLogs]);
// Function to create a new log
const createLog = useCallback(async (logData: Omit<Log, 'id' | 'created_at'>) => {
try {
const response = await fetch(
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/logs/`,
{
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${localStorage.getItem('surfsense_bearer_token')}`,
},
method: "POST",
body: JSON.stringify(logData),
}
);
if (!response.ok) {
const errorData = await response.json().catch(() => ({}));
throw new Error(errorData.detail || "Failed to create log");
}
const newLog = await response.json();
setLogs(prevLogs => [newLog, ...prevLogs]);
toast.success("Log created successfully");
return newLog;
} catch (err: any) {
toast.error(err.message || 'Failed to create log');
console.error('Error creating log:', err);
throw err;
}
}, []);
// Function to update a log
const updateLog = useCallback(async (logId: number, updateData: Partial<Omit<Log, 'id' | 'created_at' | 'search_space_id'>>) => {
try {
const response = await fetch(
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/logs/${logId}`,
{
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${localStorage.getItem('surfsense_bearer_token')}`,
},
method: "PUT",
body: JSON.stringify(updateData),
}
);
if (!response.ok) {
const errorData = await response.json().catch(() => ({}));
throw new Error(errorData.detail || "Failed to update log");
}
const updatedLog = await response.json();
setLogs(prevLogs =>
prevLogs.map(log => log.id === logId ? updatedLog : log)
);
toast.success("Log updated successfully");
return updatedLog;
} catch (err: any) {
toast.error(err.message || 'Failed to update log');
console.error('Error updating log:', err);
throw err;
}
}, []);
// Function to delete a log
const deleteLog = useCallback(async (logId: number) => {
try {
const response = await fetch(
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/logs/${logId}`,
{
headers: {
Authorization: `Bearer ${localStorage.getItem('surfsense_bearer_token')}`,
},
method: "DELETE",
}
);
if (!response.ok) {
const errorData = await response.json().catch(() => ({}));
throw new Error(errorData.detail || "Failed to delete log");
}
setLogs(prevLogs => prevLogs.filter(log => log.id !== logId));
toast.success("Log deleted successfully");
return true;
} catch (err: any) {
toast.error(err.message || 'Failed to delete log');
console.error('Error deleting log:', err);
return false;
}
}, []);
// Function to get a single log
const getLog = useCallback(async (logId: number) => {
try {
const response = await fetch(
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/logs/${logId}`,
{
headers: {
Authorization: `Bearer ${localStorage.getItem('surfsense_bearer_token')}`,
},
method: "GET",
}
);
if (!response.ok) {
const errorData = await response.json().catch(() => ({}));
throw new Error(errorData.detail || "Failed to fetch log");
}
return await response.json();
} catch (err: any) {
toast.error(err.message || 'Failed to fetch log');
console.error('Error fetching log:', err);
throw err;
}
}, []);
return {
logs,
loading,
error,
refreshLogs,
createLog,
updateLog,
deleteLog,
getLog,
fetchLogs
};
}
// Separate hook for log summary
export function useLogsSummary(searchSpaceId: number, hours: number = 24) {
const [summary, setSummary] = useState<LogSummary | null>(null);
const [loading, setLoading] = useState(true);
const [error, setError] = useState<string | null>(null);
const fetchSummary = useCallback(async () => {
if (!searchSpaceId) return;
try {
setLoading(true);
const response = await fetch(
`${process.env.NEXT_PUBLIC_FASTAPI_BACKEND_URL}/api/v1/logs/search-space/${searchSpaceId}/summary?hours=${hours}`,
{
headers: {
Authorization: `Bearer ${localStorage.getItem('surfsense_bearer_token')}`,
},
method: "GET",
}
);
if (!response.ok) {
const errorData = await response.json().catch(() => ({}));
throw new Error(errorData.detail || "Failed to fetch logs summary");
}
const data = await response.json();
setSummary(data);
setError(null);
return data;
} catch (err: any) {
setError(err.message || 'Failed to fetch logs summary');
console.error('Error fetching logs summary:', err);
throw err;
} finally {
setLoading(false);
}
}, [searchSpaceId, hours]);
useEffect(() => {
fetchSummary();
}, [fetchSummary]);
const refreshSummary = useCallback(() => {
return fetchSummary();
}, [fetchSummary]);
return { summary, loading, error, refreshSummary };
}