From 792bec4c406949bab2abe39e56ef77515f494de8 Mon Sep 17 00:00:00 2001 From: Dmitriy Kazimirov <dmitriy.kazimirov@viorsan.com> Date: Wed, 2 Apr 2025 05:00:15 +0000 Subject: [PATCH] logging docs + fixed generation of urls on back so we don't have to do it on front. --- plugin/search_books.js | 29 ++++++++++++--- readme.md | 11 +++++- src/api/app.py | 68 +++++++++++++++++++++++++++++++++-- src/api/templates/files.html | 2 +- src/api/templates/search.html | 2 +- 5 files changed, 103 insertions(+), 9 deletions(-) diff --git a/plugin/search_books.js b/plugin/search_books.js index 4ef6c16..cd5c75f 100644 --- a/plugin/search_books.js +++ b/plugin/search_books.js @@ -68,19 +68,40 @@ function search_books(params, userSettings) { // Create properly encoded URL let formattedUrl = ''; + console.log(`Result: ${JSON.stringify(result)}`); // Debugging log + console.log(`Raw URL: ${result.raw_url}`); // Debugging log + if (result.raw_url) { try { // Split URL into parts and encode components separately const url = new URL(result.raw_url); - const pathParts = url.pathname.split('/').map(part => - encodeURIComponent(part).replace(/'/g, "%27") + console.log(`Raw URL: ${result.raw_url}`); // Debugging log + //const pathParts = url.pathname.split('/').map(part => + // encodeURIComponent(part).replace(/'/g, "%27") + //); + const pathParts = url.pathname.split('/').map(part => + encodeURIComponent(decodeURIComponent(part)) // fix double encoding + .replace(/'/g, "%27") ); - const search = url.search ? '?' + encodeURIComponent(url.search.slice(1)) : ''; - formattedUrl = `${url.origin}${pathParts.join('/')}${search}`; + console.log(`Path parts: ${pathParts}`); // Debugging log + + + // Correct encoding of query params + //const search = url.search ? '?' + encodeURIComponent(url.search.slice(1)) : ''; + const search = url.searchParams.toString(); // automatic encode of param + console.log(`Search params: ${search}`); // Debugging log + //formattedUrl = `${url.origin}${pathParts.join('/')}${search}`; + formattedUrl = `${url.origin}${pathParts.join('/')}${search ? `?${search}` : ''}`; + console.log(`Formatted URL: ${formattedUrl}`); // Debugging log + } catch (e) { + console.error('Error parsing URL:', e); // Debugging log formattedUrl = result.raw_url; // Fallback to original if URL parsing fails + console.log(`Fallback URL: ${formattedUrl}`); // Debugging log } } + + return `Book: ${result.file_path}\n` + `Snippet: ${result.snippet}\n` + diff --git a/readme.md b/readme.md index 3d07cc8..fccd50f 100644 --- a/readme.md +++ b/readme.md @@ -103,10 +103,19 @@ docker-compose logs -f api docker-compose down && docker-compose up -d --build ``` -Logs +Logs (app) ```bash docker logs booksearch_app -f ``` +Logs (elasticsearch) +```bash + docker logs booksearch_elastic -f +``` +Logs (both) +```bash + docker-compose logs -f +``` + ### Log Rotation Configure Docker log rotation in `/etc/docker/daemon.json`: diff --git a/src/api/app.py b/src/api/app.py index f839b0f..957cab3 100644 --- a/src/api/app.py +++ b/src/api/app.py @@ -104,12 +104,14 @@ def search(): file_path = file_path[len("/books/"):] url = f"{base_url}/{file_path}" - raw_url = f"{base_url}/file/{file_path}?format=html" - + raw_url_old = f"{base_url}/file/{file_path}?format=html" + raw_url = f"{base_url}/file_html/{file_path}" + search_results.append({ "file_path": file_path, "url": url, "raw_url": raw_url, + "raw_url_old": raw_url_old, "snippet": snippet, "score": hit['_score'] }) @@ -193,8 +195,70 @@ def list_files(): return jsonify({"error": str(e)}), 500 return render_template('files.html', error=str(e)) +@app.route('/file_html/<path:file_path>', methods=['GET']) +def get_file_html(file_path): + """Serve the HTML version of the file""" + # Ensure the file path is within the /books directory + books_dir = "/books" + # TODO: remove this logic from regular erv + + # Decode URL-encoded path and normalize + decoded_path = unquote(file_path) + # Remove any leading slashes or duplicate 'books/' segments + decoded_path = decoded_path.lstrip('/') + if decoded_path.startswith('books/'): + decoded_path = decoded_path[6:] + + # Join paths safely + full_path = os.path.normpath(os.path.join(books_dir, decoded_path)) + + # Validate the path is within the books directory + if not os.path.abspath(full_path).startswith(os.path.abspath(books_dir)): + return jsonify({"error": "Access denied: File path outside of books directory"}), 403 + + try: + # Handle EPUB files + if file_path.lower().endswith('.epub'): + # Convert EPUB to HTML + try: + book = epub.read_epub(full_path) + html_content = [] + for item in book.get_items(): + if item.get_type() == ebooklib.ITEM_DOCUMENT: + content = item.get_content() + if content: + soup = BeautifulSoup(content, 'html.parser') + # Preserve basic formatting tags + for tag in soup.find_all(): + if tag.name not in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'br', 'div', 'span', 'strong', 'em', 'b', 'i', 'ul', 'ol', 'li']: + tag.unwrap() + html_content.append(str(soup)) + except Exception as e: + logging.error(f"Error processing EPUB {full_path}: {str(e)}") + return jsonify({"error": f"Failed to process EPUB: {str(e)}"}), 500 + return render_template('text_file.html', + file_path=file_path, + content='<hr>'.join(html_content), + is_html=True) + + # Handle regular text files + with open(full_path, 'r', encoding='utf-8', errors='ignore') as f: + content = f.read() + + # If it's an API request or the Accept header doesn't include HTML, return plain text + if request.headers.get('Accept') == 'application/json' or 'text/html' not in request.headers.get('Accept', ''): + return content, 200, {'Content-Type': 'text/plain; charset=utf-8'} + + # Otherwise, render a simple HTML page with the content + return render_template('text_file.html', file_path=file_path, content=content) + except Exception as e: + return jsonify({"error": str(e)}), 404 + + + @app.route('/file/<path:file_path>', methods=['GET']) def get_file(file_path): + """Serve the file with proper headers""" # Ensure the file path is within the /books directory books_dir = "/books" diff --git a/src/api/templates/files.html b/src/api/templates/files.html index e89a227..e759e81 100644 --- a/src/api/templates/files.html +++ b/src/api/templates/files.html @@ -115,7 +115,7 @@ <span class="file-name"> <a href="/file/{{ file.path }}"> {% if file.path.endswith('.epub') %} - <br><a href="/file/{{ file.path }}?format=html" class="plain-view-link">(View as HTML)</a> + <br><a href="/file_html/{{ file.path }}" class="plain-view-link">(View as HTML)</a> {% endif %} {% if file.title != file.name %} <span class="book-title">{{ file.title }}</span> diff --git a/src/api/templates/search.html b/src/api/templates/search.html index 604cfeb..4ed9576 100644 --- a/src/api/templates/search.html +++ b/src/api/templates/search.html @@ -37,7 +37,7 @@ <div class="file-actions"> <a href="/file/{{ result.file_path.replace('/books/', '') }}" class="file-action">View Full File</a> <span class="action-separator">|</span> - <a href="/file/{{ result.file_path.replace('/books/', '') }}?format=html" class="file-action">View as HTML</a> + <a href="/file_html/{{ result.file_path.replace('/books/', '') }}" class="file-action">View as HTML</a> </div> </div> {% endfor %}