Enhance logging configuration and update application version to 0.0.8; improve file indexing title extraction

2025-04-06 13:49:05 +00:00 · 2025-04-06 13:49:05 +00:00 · da243e7f2f
commit da243e7f2f
parent ee6a1b21c7
2 changed files with 48 additions and 3 deletions
--- a/src/api/app.py
+++ b/src/api/app.py
@ -9,6 +9,43 @@ from bs4 import BeautifulSoup
 import PyPDF2
 import time
 import logging
+from logging.config import dictConfig
+
+# Configure logging to suppress verbose Elasticsearch logs
+dictConfig({
+    'version': 1,
+    'formatters': {
+        'default': {
+            'format': '[%(asctime)s] %(levelname)s in %(module)s: %(message)s',
+        }
+    },
+    'handlers': {
+        'console': {
+            'class': 'logging.StreamHandler',
+            'formatter': 'default',
+            'stream': 'ext://sys.stdout'
+        }
+    },
+    'root': {
+        'level': 'INFO',
+        'handlers': ['console']
+    },
+    'loggers': {
+        'elastic_transport': {
+            'level': 'WARNING',
+            'propagate': False
+        },
+        'urllib3': {
+            'level': 'WARNING',
+            'propagate': False
+        },
+        'elasticsearch': {
+            'level': 'WARNING',
+            'propagate': False
+        }
+    }
+})
+from logging.config import dictConfig
 import multiprocessing
 from src.core.index import index_files, get_progress, is_file_indexed, are_files_indexed
 from io import StringIO
@ -18,7 +55,7 @@ from markdown_it import MarkdownIt
 from functools import lru_cache
 from threading import Lock
 # Application version
-APP_VERSION = "0.0.7 (2025 Apr 6th)"
+APP_VERSION = "0.0.8 (2025 Apr 6th)"

 # Configuration constants
 ITEMS_PER_PAGE = int(os.environ.get("ITEMS_PER_PAGE", 50))  # Default items per page
@ -910,6 +947,6 @@ def get_folder_contents(folder_path):
        return jsonify({"error": str(e)}), 500

 if __name__ == '__main__':
-    logging.basicConfig(level=logging.DEBUG)
+    # Logging configured via dictConfig at module level
    logging.info("Starting the API - inside main block")
    app.run(debug=True, host='0.0.0.0')
--- a/src/core/index.py
+++ b/src/core/index.py
@ -372,9 +372,17 @@ def index_single_file(file_path):
    try:
        encoded_file_path = file_path.encode('utf-8').decode('utf-8')
        
+        # Extract book title from filename if possible
+        filename = os.path.basename(file_path)
+        title = filename
+        if ' - ' in filename:  # Common pattern in filenames
+            title_parts = filename.split(' - ')
+            if len(title_parts) > 1:
+                title = ' - '.join(title_parts[:-1])  # Take all but last part
+        
        # Update progress
        with progress_lock:
-            indexing_progress['current_file'] = file_path
+            indexing_progress['current_file'] = f"{title} ({filename})"
        
        # Extract text based on file type
        if file_path.endswith(".epub"):