Enhance logging configuration and update application version to 0.0.8; improve file indexing title extraction

This commit is contained in:
Dmitriy Kazimirov 2025-04-06 13:49:05 +00:00
parent ee6a1b21c7
commit da243e7f2f
2 changed files with 48 additions and 3 deletions
src

View file

@ -9,6 +9,43 @@ from bs4 import BeautifulSoup
import PyPDF2
import time
import logging
from logging.config import dictConfig
# Configure logging to suppress verbose Elasticsearch logs
dictConfig({
'version': 1,
'formatters': {
'default': {
'format': '[%(asctime)s] %(levelname)s in %(module)s: %(message)s',
}
},
'handlers': {
'console': {
'class': 'logging.StreamHandler',
'formatter': 'default',
'stream': 'ext://sys.stdout'
}
},
'root': {
'level': 'INFO',
'handlers': ['console']
},
'loggers': {
'elastic_transport': {
'level': 'WARNING',
'propagate': False
},
'urllib3': {
'level': 'WARNING',
'propagate': False
},
'elasticsearch': {
'level': 'WARNING',
'propagate': False
}
}
})
from logging.config import dictConfig
import multiprocessing
from src.core.index import index_files, get_progress, is_file_indexed, are_files_indexed
from io import StringIO
@ -18,7 +55,7 @@ from markdown_it import MarkdownIt
from functools import lru_cache
from threading import Lock
# Application version
APP_VERSION = "0.0.7 (2025 Apr 6th)"
APP_VERSION = "0.0.8 (2025 Apr 6th)"
# Configuration constants
ITEMS_PER_PAGE = int(os.environ.get("ITEMS_PER_PAGE", 50)) # Default items per page
@ -910,6 +947,6 @@ def get_folder_contents(folder_path):
return jsonify({"error": str(e)}), 500
if __name__ == '__main__':
logging.basicConfig(level=logging.DEBUG)
# Logging configured via dictConfig at module level
logging.info("Starting the API - inside main block")
app.run(debug=True, host='0.0.0.0')

View file

@ -372,9 +372,17 @@ def index_single_file(file_path):
try:
encoded_file_path = file_path.encode('utf-8').decode('utf-8')
# Extract book title from filename if possible
filename = os.path.basename(file_path)
title = filename
if ' - ' in filename: # Common pattern in filenames
title_parts = filename.split(' - ')
if len(title_parts) > 1:
title = ' - '.join(title_parts[:-1]) # Take all but last part
# Update progress
with progress_lock:
indexing_progress['current_file'] = file_path
indexing_progress['current_file'] = f"{title} ({filename})"
# Extract text based on file type
if file_path.endswith(".epub"):