From 792bec4c406949bab2abe39e56ef77515f494de8 Mon Sep 17 00:00:00 2001
From: Dmitriy Kazimirov <dmitriy.kazimirov@viorsan.com>
Date: Wed, 2 Apr 2025 05:00:15 +0000
Subject: [PATCH] logging docs + fixed generation of urls on back so we don't
 have to do it on front.

---
 plugin/search_books.js        | 29 ++++++++++++---
 readme.md                     | 11 +++++-
 src/api/app.py                | 68 +++++++++++++++++++++++++++++++++--
 src/api/templates/files.html  |  2 +-
 src/api/templates/search.html |  2 +-
 5 files changed, 103 insertions(+), 9 deletions(-)

diff --git a/plugin/search_books.js b/plugin/search_books.js
index 4ef6c16..cd5c75f 100644
--- a/plugin/search_books.js
+++ b/plugin/search_books.js
@@ -68,19 +68,40 @@ function search_books(params, userSettings) {
       
       // Create properly encoded URL
       let formattedUrl = '';
+      console.log(`Result: ${JSON.stringify(result)}`); // Debugging log
+      console.log(`Raw URL: ${result.raw_url}`); // Debugging log
+
       if (result.raw_url) {
         try {
           // Split URL into parts and encode components separately
           const url = new URL(result.raw_url);
-          const pathParts = url.pathname.split('/').map(part =>
-            encodeURIComponent(part).replace(/'/g, "%27")
+          console.log(`Raw URL: ${result.raw_url}`); // Debugging log
+          //const pathParts = url.pathname.split('/').map(part =>
+          //  encodeURIComponent(part).replace(/'/g, "%27")
+          //);
+          const pathParts = url.pathname.split('/').map(part => 
+            encodeURIComponent(decodeURIComponent(part)) // fix double encoding
+              .replace(/'/g, "%27")
           );
-          const search = url.search ? '?' + encodeURIComponent(url.search.slice(1)) : '';
-          formattedUrl = `${url.origin}${pathParts.join('/')}${search}`;
+          console.log(`Path parts: ${pathParts}`); // Debugging log
+
+
+          // Correct encoding of query params
+          //const search = url.search ? '?' + encodeURIComponent(url.search.slice(1)) : '';
+          const search = url.searchParams.toString(); // automatic encode of param
+          console.log(`Search params: ${search}`); // Debugging log
+          //formattedUrl = `${url.origin}${pathParts.join('/')}${search}`;
+          formattedUrl = `${url.origin}${pathParts.join('/')}${search ? `?${search}` : ''}`;
+          console.log(`Formatted URL: ${formattedUrl}`); // Debugging log
+
         } catch (e) {
+          console.error('Error parsing URL:', e); // Debugging log
           formattedUrl = result.raw_url; // Fallback to original if URL parsing fails
+          console.log(`Fallback URL: ${formattedUrl}`); // Debugging log
         }
       }
+
+      
       
       return `Book: ${result.file_path}\n` +
              `Snippet: ${result.snippet}\n` +
diff --git a/readme.md b/readme.md
index 3d07cc8..fccd50f 100644
--- a/readme.md
+++ b/readme.md
@@ -103,10 +103,19 @@ docker-compose logs -f api
 docker-compose down && docker-compose up -d --build
 ```
 
-Logs 
+Logs (app)
 ```bash
  docker logs booksearch_app  -f
 ```
+Logs  (elasticsearch)
+```bash
+ docker logs booksearch_elastic  -f
+```
+Logs  (both)
+```bash
+ docker-compose logs -f
+```
+
 
 ### Log Rotation
 Configure Docker log rotation in `/etc/docker/daemon.json`:
diff --git a/src/api/app.py b/src/api/app.py
index f839b0f..957cab3 100644
--- a/src/api/app.py
+++ b/src/api/app.py
@@ -104,12 +104,14 @@ def search():
                 file_path = file_path[len("/books/"):]
 
             url = f"{base_url}/{file_path}"
-            raw_url = f"{base_url}/file/{file_path}?format=html"
-            
+            raw_url_old = f"{base_url}/file/{file_path}?format=html"
+            raw_url = f"{base_url}/file_html/{file_path}"
+
             search_results.append({
                 "file_path": file_path,
                 "url": url,
                 "raw_url": raw_url,
+                "raw_url_old": raw_url_old,
                 "snippet": snippet,
                 "score": hit['_score']
             })
@@ -193,8 +195,70 @@ def list_files():
             return jsonify({"error": str(e)}), 500
         return render_template('files.html', error=str(e))
 
+@app.route('/file_html/<path:file_path>', methods=['GET'])
+def get_file_html(file_path):
+    """Serve the HTML version of the file"""
+    # Ensure the file path is within the /books directory
+    books_dir = "/books"
+    # TODO: remove this logic from regular erv
+    
+    # Decode URL-encoded path and normalize
+    decoded_path = unquote(file_path)
+    # Remove any leading slashes or duplicate 'books/' segments
+    decoded_path = decoded_path.lstrip('/')
+    if decoded_path.startswith('books/'):
+        decoded_path = decoded_path[6:]
+    
+    # Join paths safely
+    full_path = os.path.normpath(os.path.join(books_dir, decoded_path))
+    
+    # Validate the path is within the books directory
+    if not os.path.abspath(full_path).startswith(os.path.abspath(books_dir)):
+        return jsonify({"error": "Access denied: File path outside of books directory"}), 403
+
+    try:
+        # Handle EPUB files
+        if file_path.lower().endswith('.epub'):
+                # Convert EPUB to HTML
+                try:
+                    book = epub.read_epub(full_path)
+                    html_content = []
+                    for item in book.get_items():
+                        if item.get_type() == ebooklib.ITEM_DOCUMENT:
+                            content = item.get_content()
+                            if content:
+                                soup = BeautifulSoup(content, 'html.parser')
+                                # Preserve basic formatting tags
+                                for tag in soup.find_all():
+                                    if tag.name not in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'br', 'div', 'span', 'strong', 'em', 'b', 'i', 'ul', 'ol', 'li']:
+                                        tag.unwrap()
+                                html_content.append(str(soup))
+                except Exception as e:
+                    logging.error(f"Error processing EPUB {full_path}: {str(e)}")
+                    return jsonify({"error": f"Failed to process EPUB: {str(e)}"}), 500
+                return render_template('text_file.html',
+                                   file_path=file_path,
+                                   content='<hr>'.join(html_content),
+                                   is_html=True)
+        
+        # Handle regular text files
+        with open(full_path, 'r', encoding='utf-8', errors='ignore') as f:
+            content = f.read()
+        
+        # If it's an API request or the Accept header doesn't include HTML, return plain text
+        if request.headers.get('Accept') == 'application/json' or 'text/html' not in request.headers.get('Accept', ''):
+            return content, 200, {'Content-Type': 'text/plain; charset=utf-8'}
+        
+        # Otherwise, render a simple HTML page with the content
+        return render_template('text_file.html', file_path=file_path, content=content)
+    except Exception as e:
+        return jsonify({"error": str(e)}), 404
+
+
+
 @app.route('/file/<path:file_path>', methods=['GET'])
 def get_file(file_path):
+    """Serve the file with proper headers"""
     # Ensure the file path is within the /books directory
     books_dir = "/books"
     
diff --git a/src/api/templates/files.html b/src/api/templates/files.html
index e89a227..e759e81 100644
--- a/src/api/templates/files.html
+++ b/src/api/templates/files.html
@@ -115,7 +115,7 @@
                 <span class="file-name">
                     <a href="/file/{{ file.path }}">
                     {% if file.path.endswith('.epub') %}
-                    <br><a href="/file/{{ file.path }}?format=html" class="plain-view-link">(View as HTML)</a>
+                    <br><a href="/file_html/{{ file.path }}" class="plain-view-link">(View as HTML)</a>
                     {% endif %}
                         {% if file.title != file.name %}
                             <span class="book-title">{{ file.title }}</span>
diff --git a/src/api/templates/search.html b/src/api/templates/search.html
index 604cfeb..4ed9576 100644
--- a/src/api/templates/search.html
+++ b/src/api/templates/search.html
@@ -37,7 +37,7 @@
                 <div class="file-actions">
                     <a href="/file/{{ result.file_path.replace('/books/', '') }}" class="file-action">View Full File</a>
                     <span class="action-separator">|</span>
-                    <a href="/file/{{ result.file_path.replace('/books/', '') }}?format=html" class="file-action">View as HTML</a>
+                    <a href="/file_html/{{ result.file_path.replace('/books/', '') }}" class="file-action">View as HTML</a>
                 </div>
             </div>
             {% endfor %}