diff --git a/.env b/.env index a860be4..0c0e8b8 100644 --- a/.env +++ b/.env @@ -1,60 +1,60 @@ -# Application Configuration -# ======================== - -# Base URL for the application (required, string) -# Format: http://hostname:port or https://hostname:port -BASE_URL=http://localhost:8000 - -# CPU Limit for container (optional, float) -# Number of CPU cores to allocate (e.g., 0.5, 1, 2) -# Default: 2 (will be used if not specified) -CPU_LIMIT=2 - -# Snippet character limit (optional, integer) -# Maximum length for text snippets in characters -# Default: 100 -SNIPPET_CHAR_LIMIT=100 - -# Debug mode (optional, boolean) -# Enable debug output when set to True -# Default: False -DEBUG=False - -# Application port (optional, integer) -# Port the application listens on -# Default: 5000 -PORT=5000 - - -# Elasticsearch Configuration -# ========================== - -# Elasticsearch host (required, string) -# Hostname or IP of Elasticsearch service -ELASTICSEARCH_HOST=elasticsearch - -# Elasticsearch username (sensitive, required, string) -# Admin username for Elasticsearch -ELASTICSEARCH_USERNAME=admin - -# Elasticsearch password (sensitive, required, string) -# Admin password for Elasticsearch -ELASTICSEARCH_PASSWORD=password - - -# File Storage Configuration -# ========================= - -# SMB share path (optional, string) -# Local path where books are mounted -# Default: ./smb_share -SMB_SHARE_PATH=./smb_share - -# Admin Credentials -# ================ - -# Admin username for API management (required, string) -ADMIN_USER=admin - -# Admin password for API management (required, string) +# Application Configuration +# ======================== + +# Base URL for the application (required, string) +# Format: http://hostname:port or https://hostname:port +BASE_URL=http://localhost:8000 + +# CPU Limit for container (optional, float) +# Number of CPU cores to allocate (e.g., 0.5, 1, 2) +# Default: 2 (will be used if not specified) +CPU_LIMIT=2 + +# Snippet character limit (optional, integer) +# Maximum length for text snippets in characters +# Default: 100 +SNIPPET_CHAR_LIMIT=100 + +# Debug mode (optional, boolean) +# Enable debug output when set to True +# Default: False +DEBUG=False + +# Application port (optional, integer) +# Port the application listens on +# Default: 5000 +PORT=5000 + + +# Elasticsearch Configuration +# ========================== + +# Elasticsearch host (required, string) +# Hostname or IP of Elasticsearch service +ELASTICSEARCH_HOST=elasticsearch + +# Elasticsearch username (sensitive, required, string) +# Admin username for Elasticsearch +ELASTICSEARCH_USERNAME=admin + +# Elasticsearch password (sensitive, required, string) +# Admin password for Elasticsearch +ELASTICSEARCH_PASSWORD=password + + +# File Storage Configuration +# ========================= + +# SMB share path (optional, string) +# Local path where books are mounted +# Default: ./smb_share +SMB_SHARE_PATH=./smb_share + +# Admin Credentials +# ================ + +# Admin username for API management (required, string) +ADMIN_USER=admin + +# Admin password for API management (required, string) ADMIN_PASSWORD=securepassword123 \ No newline at end of file diff --git a/.github/workflows/test-epub-viewer.yml b/.github/workflows/test-epub-viewer.yml index 52a723a..2fc6597 100644 --- a/.github/workflows/test-epub-viewer.yml +++ b/.github/workflows/test-epub-viewer.yml @@ -1,39 +1,39 @@ -name: Test EPUB Viewer - -on: - push: - branches: [ main ] - pull_request: - branches: [ main ] - -jobs: - test: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v3 - - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.10' - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install pytest - pip install flask elasticsearch ebooklib beautifulsoup4 PyPDF2 - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - - - name: Run tests - run: | - cd api - python -m pytest test_epub_viewer.py -v - - - name: Run integration tests with Playwright - run: | - pip install playwright pytest-playwright - playwright install - cd api - python -m pytest test_epub_viewer_integration.py -v +name: Test EPUB Viewer + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + test: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pytest + pip install flask elasticsearch ebooklib beautifulsoup4 PyPDF2 + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + + - name: Run tests + run: | + cd api + python -m pytest test_epub_viewer.py -v + + - name: Run integration tests with Playwright + run: | + pip install playwright pytest-playwright + playwright install + cd api + python -m pytest test_epub_viewer_integration.py -v if: false # Disabled until we create the integration tests with Playwright \ No newline at end of file diff --git a/.gitignore b/.gitignore index bf64a5c..999ef3a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,30 +1,30 @@ -# Ignore all EPUB files in smb_share -smb_share/*.epub - -# Ignore sample text file -smb_share/sample.txt - -# Python cache -__pycache__/ -*.py[cod] -*$py.class - -# Virtual environment -venv/ - -# IDE specific files -.vscode/ -.idea/ - -# Logs and databases -*.log -*.sqlite - -# OS generated files -.DS_Store -.DS_Store? -._* -.Spotlight-V100 -.Trashes -ehthumbs.db +# Ignore all EPUB files in smb_share +smb_share/*.epub + +# Ignore sample text file +smb_share/sample.txt + +# Python cache +__pycache__/ +*.py[cod] +*$py.class + +# Virtual environment +venv/ + +# IDE specific files +.vscode/ +.idea/ + +# Logs and databases +*.log +*.sqlite + +# OS generated files +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db Thumbs.db \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 06b0849..c670ef8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,29 +1,29 @@ -FROM python:3.9-alpine - -WORKDIR /app - -# Install dependencies -RUN pip install flask elasticsearch ebooklib beautifulsoup4 PyPDF2 pytz - -# Create books directory with proper permissions -RUN mkdir -p /books && chmod 777 /books - -# Copy the API code and static files -COPY src/api/app.py . -COPY src/api/static /app/static -COPY src/api/templates /app/templates - -# Expose the API port -EXPOSE 5000 - -# Copy the indexing script -COPY src/core/index.py . - -# Copy the test file -COPY tests/unit/test_app.py . - -# Add a dummy file to invalidate cache -ADD dummy.txt . - -# Command to run the API +FROM python:3.9-alpine + +WORKDIR /app + +# Install dependencies +RUN pip install flask elasticsearch ebooklib beautifulsoup4 PyPDF2 pytz + +# Create books directory with proper permissions +RUN mkdir -p /books && chmod 777 /books + +# Copy the API code and static files +COPY src/api/app.py . +COPY src/api/static /app/static +COPY src/api/templates /app/templates + +# Expose the API port +EXPOSE 5000 + +# Copy the indexing script +COPY src/core/index.py . + +# Copy the test file +COPY tests/unit/test_app.py . + +# Add a dummy file to invalidate cache +ADD dummy.txt . + +# Command to run the API CMD ["python", "app.py"] \ No newline at end of file diff --git a/ai.md b/ai.md index 702db09..9f302d4 100644 --- a/ai.md +++ b/ai.md @@ -1,33 +1,33 @@ -# Final Deployment Status - -## Configuration Summary: -1. **Container Auto-Restart**: - - Both services configured with `restart: unless-stopped` - - Containers will automatically restart on failures - -2. **Resource Limits**: - - CPU: `${CPU_LIMIT}` cores - - Memory: 2GB limit - -3. **Dependencies**: - - pytz installed in container (version 2025.2) - - All required Python packages verified - - Dockerfile updated to include pytz for future builds - -4. **Known Issues**: - - Docker Compose v1.25.0 limitations: - - Doesn't respect container_name directives - - Shows harmless deploy key warnings - - Solution: Upgrade to Docker Compose v2.x - -## Verification: -- All services running -- CORS headers properly configured -- pytz module successfully imported (version 2025.2) -- API endpoints functional - -## System Status: OPERATIONAL -- API: Running on port 8000 -- Elasticsearch: Running on port 9200 -- Auto-restart configured +# Final Deployment Status + +## Configuration Summary: +1. **Container Auto-Restart**: + - Both services configured with `restart: unless-stopped` + - Containers will automatically restart on failures + +2. **Resource Limits**: + - CPU: `${CPU_LIMIT}` cores + - Memory: 2GB limit + +3. **Dependencies**: + - pytz installed in container (version 2025.2) + - All required Python packages verified + - Dockerfile updated to include pytz for future builds + +4. **Known Issues**: + - Docker Compose v1.25.0 limitations: + - Doesn't respect container_name directives + - Shows harmless deploy key warnings + - Solution: Upgrade to Docker Compose v2.x + +## Verification: +- All services running +- CORS headers properly configured +- pytz module successfully imported (version 2025.2) +- API endpoints functional + +## System Status: OPERATIONAL +- API: Running on port 8000 +- Elasticsearch: Running on port 9200 +- Auto-restart configured - All features functional \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index a8e4ee6..ffc004f 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,39 +1,39 @@ -version: '3.7' -services: - booksearch_app: - build: . - container_name: booksearch_app - ports: - - "8000:5000" - environment: - - ELASTICSEARCH_HOST=booksearch_elastic - - BASE_URL=${BASE_URL} - - CPU_LIMIT=${CPU_LIMIT} - - SNIPPET_CHAR_LIMIT=${SNIPPET_CHAR_LIMIT} - volumes: - - ./smb_share:/books - depends_on: - - booksearch_elastic - restart: unless-stopped - deploy: - resources: - limits: - cpus: ${CPU_LIMIT} - memory: 2G - - booksearch_elastic: - container_name: booksearch_elastic - image: bitnami/elasticsearch:latest - ports: - - "9200:9200" - - "9300:9300" - environment: - - discovery.type=single-node - - ELASTICSEARCH_USERNAME=admin - - ELASTICSEARCH_PASSWORD=password - restart: unless-stopped - healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:9200"] - interval: 30s - timeout: 10s +version: '3.7' +services: + booksearch_app: + build: . + container_name: booksearch_app + ports: + - "8000:5000" + environment: + - ELASTICSEARCH_HOST=booksearch_elastic + - BASE_URL=${BASE_URL} + - CPU_LIMIT=${CPU_LIMIT} + - SNIPPET_CHAR_LIMIT=${SNIPPET_CHAR_LIMIT} + volumes: + - ./smb_share:/books + depends_on: + - booksearch_elastic + restart: unless-stopped + deploy: + resources: + limits: + cpus: ${CPU_LIMIT} + memory: 2G + + booksearch_elastic: + container_name: booksearch_elastic + image: bitnami/elasticsearch:latest + ports: + - "9200:9200" + - "9300:9300" + environment: + - discovery.type=single-node + - ELASTICSEARCH_USERNAME=admin + - ELASTICSEARCH_PASSWORD=password + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9200"] + interval: 30s + timeout: 10s retries: 5 \ No newline at end of file diff --git a/plugin/search_books.js b/plugin/search_books.js index 85ed68e..180bde3 100644 --- a/plugin/search_books.js +++ b/plugin/search_books.js @@ -1,109 +1,109 @@ -function search_books(params, userSettings) { - const query = params.query; - const apiUrl = (userSettings.apiUrl || 'http://localhost:8000').replace(/\/$/, ''); - const useProxy = userSettings.useProxy || false; - const proxyUrl = userSettings.proxyUrl || 'https://cors-anywhere.herokuapp.com/'; - - // Debugging headers - WARNING: Only for development/testing - const debugHeaders = userSettings.debugHeaders || {}; - - if (!query) { - throw new Error('Search query is required'); - } - - // Prepare the target URL - const targetUrl = `${apiUrl}/search?query=${encodeURIComponent(query)}`; - const requestUrl = useProxy ? `${proxyUrl}${targetUrl}` : targetUrl; - - // Add timeout handling - const controller = new AbortController(); - const timeoutId = setTimeout(() => controller.abort(), 10000); - - // Prepare headers - const headers = { - 'Accept': 'application/json', - ...(useProxy ? { 'X-Requested-With': 'XMLHttpRequest' } : {}), - ...debugHeaders // Add debug headers if provided - }; - - return fetch(requestUrl, { - method: 'GET', - headers: headers, - signal: controller.signal - }) - .then(async response => { - clearTimeout(timeoutId); - - if (!response.ok) { - const errorBody = await response.text().catch(() => ''); - throw new Error(`API request failed with status ${response.status}. Response: ${errorBody}`); - } - - const contentType = response.headers.get('content-type'); - if (!contentType || !contentType.includes('application/json')) { - throw new Error(`Invalid content type: ${contentType}`); - } - - return response.json(); - }) - .then(results => { - if (!Array.isArray(results)) { - throw new Error(`Invalid response format. Expected array, got ${typeof results}`); - } - - if (results.length === 0) { - return 'No books found matching your search'; - } - - // Format results with book paths and snippets - return results.map(result => { - if (!result.file_path || !result.snippet) { - throw new Error('Invalid result format - missing required fields'); - } - - // Create properly encoded URL - let formattedUrl = ''; - if (result.raw_url) { - try { - // Split URL into parts and encode components separately - const url = new URL(result.raw_url); - const pathParts = url.pathname.split('/').map(part => - encodeURIComponent(part).replace(/'/g, "%27") - ); - const search = url.search ? '?' + encodeURIComponent(url.search.slice(1)) : ''; - formattedUrl = `${url.origin}${pathParts.join('/')}${search}`; - } catch (e) { - formattedUrl = result.raw_url; // Fallback to original if URL parsing fails - } - } - - return `Book: ${result.file_path}\n` + - `Snippet: ${result.snippet}\n` + - (formattedUrl ? `URL: ${formattedUrl}\n` : ''); - }).join('\n\n'); - }) - .catch(error => { - clearTimeout(timeoutId); - let errorMessage = `Error searching books: ${error.message}`; - - if (error.name === 'AbortError') { - errorMessage += '\n\nDiagnostics: Request timed out. Check if:'; - errorMessage += `\n- The API is running at ${apiUrl}`; - errorMessage += '\n- The server is accessible from your network'; - if (!useProxy) { - errorMessage += '\n- Try enabling proxy in plugin settings'; - } - } else if (error.message.includes('Failed to fetch') || error.message.includes('CORS')) { - errorMessage += '\n\nDiagnostics: Network request failed. Check if:'; - errorMessage += `\n- The API URL (${apiUrl}) is correct`; - errorMessage += '\n- CORS is properly configured on the server'; - errorMessage += '\n- The server is running and accessible'; - if (!useProxy) { - errorMessage += '\n- Try enabling proxy in plugin settings to bypass CORS'; - } - errorMessage += '\n- For debugging, you can add CORS headers in plugin settings'; - } - - return errorMessage; - }); +function search_books(params, userSettings) { + const query = params.query; + const apiUrl = (userSettings.apiUrl || 'http://localhost:8000').replace(/\/$/, ''); + const useProxy = userSettings.useProxy || false; + const proxyUrl = userSettings.proxyUrl || 'https://cors-anywhere.herokuapp.com/'; + + // Debugging headers - WARNING: Only for development/testing + const debugHeaders = userSettings.debugHeaders || {}; + + if (!query) { + throw new Error('Search query is required'); + } + + // Prepare the target URL + const targetUrl = `${apiUrl}/search?query=${encodeURIComponent(query)}`; + const requestUrl = useProxy ? `${proxyUrl}${targetUrl}` : targetUrl; + + // Add timeout handling + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), 10000); + + // Prepare headers + const headers = { + 'Accept': 'application/json', + ...(useProxy ? { 'X-Requested-With': 'XMLHttpRequest' } : {}), + ...debugHeaders // Add debug headers if provided + }; + + return fetch(requestUrl, { + method: 'GET', + headers: headers, + signal: controller.signal + }) + .then(async response => { + clearTimeout(timeoutId); + + if (!response.ok) { + const errorBody = await response.text().catch(() => ''); + throw new Error(`API request failed with status ${response.status}. Response: ${errorBody}`); + } + + const contentType = response.headers.get('content-type'); + if (!contentType || !contentType.includes('application/json')) { + throw new Error(`Invalid content type: ${contentType}`); + } + + return response.json(); + }) + .then(results => { + if (!Array.isArray(results)) { + throw new Error(`Invalid response format. Expected array, got ${typeof results}`); + } + + if (results.length === 0) { + return 'No books found matching your search'; + } + + // Format results with book paths and snippets + return results.map(result => { + if (!result.file_path || !result.snippet) { + throw new Error('Invalid result format - missing required fields'); + } + + // Create properly encoded URL + let formattedUrl = ''; + if (result.raw_url) { + try { + // Split URL into parts and encode components separately + const url = new URL(result.raw_url); + const pathParts = url.pathname.split('/').map(part => + encodeURIComponent(part).replace(/'/g, "%27") + ); + const search = url.search ? '?' + encodeURIComponent(url.search.slice(1)) : ''; + formattedUrl = `${url.origin}${pathParts.join('/')}${search}`; + } catch (e) { + formattedUrl = result.raw_url; // Fallback to original if URL parsing fails + } + } + + return `Book: ${result.file_path}\n` + + `Snippet: ${result.snippet}\n` + + (formattedUrl ? `URL: ${formattedUrl}\n` : ''); + }).join('\n\n'); + }) + .catch(error => { + clearTimeout(timeoutId); + let errorMessage = `Error searching books: ${error.message}`; + + if (error.name === 'AbortError') { + errorMessage += '\n\nDiagnostics: Request timed out. Check if:'; + errorMessage += `\n- The API is running at ${apiUrl}`; + errorMessage += '\n- The server is accessible from your network'; + if (!useProxy) { + errorMessage += '\n- Try enabling proxy in plugin settings'; + } + } else if (error.message.includes('Failed to fetch') || error.message.includes('CORS')) { + errorMessage += '\n\nDiagnostics: Network request failed. Check if:'; + errorMessage += `\n- The API URL (${apiUrl}) is correct`; + errorMessage += '\n- CORS is properly configured on the server'; + errorMessage += '\n- The server is running and accessible'; + if (!useProxy) { + errorMessage += '\n- Try enabling proxy in plugin settings to bypass CORS'; + } + errorMessage += '\n- For debugging, you can add CORS headers in plugin settings'; + } + + return errorMessage; + }); } \ No newline at end of file diff --git a/plugin/search_books.spec.json b/plugin/search_books.spec.json index b0b6ffe..0c5ad6d 100644 --- a/plugin/search_books.spec.json +++ b/plugin/search_books.spec.json @@ -1,16 +1,16 @@ -{ - "name": "search_books", - "description": "Search for books by content using the book search API.", - "parameters": { - "type": "object", - "properties": { - "query": { - "type": "string", - "description": "The search query to find matching book content" - } - }, - "required": [ - "query" - ] - } +{ + "name": "search_books", + "description": "Search for books by content using the book search API.", + "parameters": { + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "The search query to find matching book content" + } + }, + "required": [ + "query" + ] + } } \ No newline at end of file diff --git a/plugin/userSettings.json b/plugin/userSettings.json index 96b2754..eee8787 100644 --- a/plugin/userSettings.json +++ b/plugin/userSettings.json @@ -1,45 +1,45 @@ -[ - { - "name": "cors_allow_origin", - "label": "CORS Allowed Origin", - "required": false, - "default": "*", - "description": "Value for Access-Control-Allow-Origin header, typically '*' for public APIs" - }, - { - "name": "cors_allow_methods", - "label": "CORS Allowed Methods", - "required": false, - "default": "GET, POST, PUT", - "description": "Comma-separated HTTP methods for Access-Control-Allow-Methods header" - }, - { - "name": "cors_allow_headers", - "label": "CORS Allowed Headers", - "required": false, - "default": "Content-Type", - "description": "Comma-separated headers for Access-Control-Allow-Headers" - }, - { - "name": "proxyUrl", - "label": "Proxy Server URL", - "required": false, - "default": "", - "description": "URL of the proxy server to use for external requests" - }, - { - "name": "bookSearchAPIKey", - "label": "Search Engine API Key", - "type": "password", - "default":"", - "required":false, - "description": "API Key to use for while making requests (not yet used)" - }, - { - "name": "apiUrl", - "label": "API Base URL", - "required": false, - "default": "http://localhost:8000", - "description": "Base URL for the API endpoints" - } +[ + { + "name": "cors_allow_origin", + "label": "CORS Allowed Origin", + "required": false, + "default": "*", + "description": "Value for Access-Control-Allow-Origin header, typically '*' for public APIs" + }, + { + "name": "cors_allow_methods", + "label": "CORS Allowed Methods", + "required": false, + "default": "GET, POST, PUT", + "description": "Comma-separated HTTP methods for Access-Control-Allow-Methods header" + }, + { + "name": "cors_allow_headers", + "label": "CORS Allowed Headers", + "required": false, + "default": "Content-Type", + "description": "Comma-separated headers for Access-Control-Allow-Headers" + }, + { + "name": "proxyUrl", + "label": "Proxy Server URL", + "required": false, + "default": "", + "description": "URL of the proxy server to use for external requests" + }, + { + "name": "bookSearchAPIKey", + "label": "Search Engine API Key", + "type": "password", + "default":"", + "required":false, + "description": "API Key to use for while making requests (not yet used)" + }, + { + "name": "apiUrl", + "label": "API Base URL", + "required": false, + "default": "http://localhost:8000", + "description": "Base URL for the API endpoints" + } ] \ No newline at end of file diff --git a/readme.md b/readme.md index 4a46b1c..9e68480 100644 --- a/readme.md +++ b/readme.md @@ -1,241 +1,241 @@ - -# What it IS? - -## TypeMind Plugin: EPUB/PDF/TXT Search Integration - -A plugin for [TypeMind](https://docs.typingmind.com/plugins/build-a-typingmind-plugin) that mimics the **WebSearch** feature but focuses on retrieving books/documents. Users can query, e.g., *"Find me books about Hecate"*, and the plugin returns **clickable links** to relevant files (EPUB, PDF, TXT). - -### Features -- **File Formats**: Supports EPUB, PDF, and TXT (assumed compatibility). -- **Requirement**: Users must provide their own files for indexing. - -### Technical Context -- **Language**: Python. -- **Skill Level**: - - My Python knowledge is **extremely rusty** (last project: a not too simple game bot years ago). - - Self-assessment: **Python novice**. -- **Tools Used**: - - **Sonnet 3.7** and **DeepSeek-V3-0324** (for AI/ML integration). - - **RooCode** - -### Purpose -1. **Experiment**: Test RooCode’s capabilities and identify practical applications. -2. **Non-Production**: **⚠️ Do NOT deploy this in production** (even if "fixed" by someone). - ---- - -### Key Notes -- Humor/self-deprecation preserved (e.g., "extremely rusty," "novice"). -- Technical terms standardized (Sonnet 3.7, DeepSeek-V3-0324). -- Critical warnings emphasized (**bold + emoji** for production risk). - - - -# Application Deployment Guide (Ubuntu LTS) - -## Prerequisites - -### System Requirements -- Ubuntu 22.04 LTS (64-bit) -- Minimum 2 CPU cores, 4GB RAM -- 20GB free disk space -- Open ports: 8000 (app), 9200 (Elasticsearch) - -### Required Software -```bash -# Update package lists -sudo apt update - -# Install Docker and Docker Compose -sudo apt install -y docker.io docker-compose -sudo systemctl enable --now docker - -# Add current user to docker group (logout required) -sudo usermod -aG docker $USER -``` - -## Environment Configuration - -1. Clone the repository: -```bash -git clone https://github.com/intari/roocodetests_1.git -cd roocodetests_1 -``` - -2. Configure environment variables: -```bash -# Copy example .env file -cp .env.example .env - -# Edit configuration (nano/vim) -nano .env -``` -Key variables to configure: -- `BASE_URL`: Public URL of your application -- `ELASTICSEARCH_PASSWORD`: Secure password for Elasticsearch -- `CPU_LIMIT`: CPU cores to allocate (default: 2) - -## Application Deployment - -1. Start all services: -```bash -docker-compose up -d -``` - -2. Verify services are running: -```bash -docker-compose ps -``` - -3. Check application logs: -```bash -docker-compose logs -f api -``` - -4. Access the application: -- Web interface: http://your-server-ip:8000 -- Elasticsearch: http://your-server-ip:9200 - -## Maintenance - -## restart & rebuild -```bash -docker-compose down && docker-compose up -d --build -``` - -Logs -```bash - docker logs booksearch_app -f -``` - -### Log Rotation -Configure Docker log rotation in `/etc/docker/daemon.json`: -```json -{ - "log-driver": "json-file", - "log-opts": { - "max-size": "10m", - "max-file": "3" - } -} -``` -Then restart Docker: -```bash -sudo systemctl restart docker -``` - -### Backups -1. Create backup script (`/usr/local/bin/backup-app.sh`): -```bash -#!/bin/bash -BACKUP_DIR=/var/backups/app -mkdir -p $BACKUP_DIR -docker-compose exec -T elasticsearch curl -X POST "localhost:9200/_snapshot/backup_repo/_all" -H "Content-Type: application/json" -docker-compose exec -T elasticsearch curl -X GET "localhost:9200/_snapshot/backup_repo/snapshot_$(date +%Y-%m-%d)?pretty" -``` - -2. Make executable and schedule daily cron job: -```bash -sudo chmod +x /usr/local/bin/backup-app.sh -sudo crontab -e -# Add: 0 3 * * * /usr/local/bin/backup-app.sh -``` - -### Updates -1. Pull latest changes: -```bash -git pull origin main -``` - -2. Rebuild containers: -```bash -docker-compose up -d --build -``` - -## Troubleshooting - -### Common Issues - -**Application not starting:** -```bash -# Check container status -docker ps -a - -# View logs -docker-compose logs api -``` - -**Elasticsearch health issues:** -```bash -# Check cluster health -curl -X GET "localhost:9200/_cluster/health?pretty" - -# Check node stats -curl -X GET "localhost:9200/_nodes/stats?pretty" -``` - -**Port conflicts:** -```bash -# Check used ports -sudo netstat -tulnp - -# Change ports in docker-compose.yml if needed -``` - -### Debugging -1. Access running container shell: -```bash -docker-compose exec api bash -``` - -2. Check resource usage: -```bash -docker stats -``` - -## Check Request via JSON : -curl -H "Accept: application/json" -X GET https://booksearch.yourdomain.com/search?query=android - -# Simple search -curl -H "Accept: application/json" "https://booksearch.yourdomain.com/search?query=android" - -# Search with format parameter -curl "https://booksearch.yourdomain.com/search?query=android&format=json" - -# Error case -curl -H "Accept: application/json" "https://booksearch.yourdomain.com/search" - - -## API Endpoints - -### Search API -``` -GET /search?query={query}[&format=json] -``` - -### Reset Elasticsearch Index -``` -POST /reset_index -Headers: -- Authorization: Basic base64(username:password) -``` - -Example: -```bash -curl -X POST -u admin:securepassword123 https://booksearch.yourdomain.com/reset_index -``` - -## References -- [Ubuntu Docker Installation](https://docs.docker.com/engine/install/ubuntu/) -- [Docker Compose Reference](https://docs.docker.com/compose/reference/) -- [Elasticsearch Documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/index.html) - - -## Plugin-alt -method:get -https://booksearch.yourdomain.com/search?query={prompt}&format=json -alt version for plugin -request headers -{ -"Accept": "application/json" + +# What it IS? + +## TypeMind Plugin: EPUB/PDF/TXT Search Integration + +A plugin for [TypeMind](https://docs.typingmind.com/plugins/build-a-typingmind-plugin) that mimics the **WebSearch** feature but focuses on retrieving books/documents. Users can query, e.g., *"Find me books about Hecate"*, and the plugin returns **clickable links** to relevant files (EPUB, PDF, TXT). + +### Features +- **File Formats**: Supports EPUB, PDF, and TXT (assumed compatibility). +- **Requirement**: Users must provide their own files for indexing. + +### Technical Context +- **Language**: Python. +- **Skill Level**: + - My Python knowledge is **extremely rusty** (last project: a not too simple game bot years ago). + - Self-assessment: **Python novice**. +- **Tools Used**: + - **Sonnet 3.7** and **DeepSeek-V3-0324** (for AI/ML integration). + - **RooCode** + +### Purpose +1. **Experiment**: Test RooCode’s capabilities and identify practical applications. +2. **Non-Production**: **⚠️ Do NOT deploy this in production** (even if "fixed" by someone). + +--- + +### Key Notes +- Humor/self-deprecation preserved (e.g., "extremely rusty," "novice"). +- Technical terms standardized (Sonnet 3.7, DeepSeek-V3-0324). +- Critical warnings emphasized (**bold + emoji** for production risk). + + + +# Application Deployment Guide (Ubuntu LTS) + +## Prerequisites + +### System Requirements +- Ubuntu 22.04 LTS (64-bit) +- Minimum 2 CPU cores, 4GB RAM +- 20GB free disk space +- Open ports: 8000 (app), 9200 (Elasticsearch) + +### Required Software +```bash +# Update package lists +sudo apt update + +# Install Docker and Docker Compose +sudo apt install -y docker.io docker-compose +sudo systemctl enable --now docker + +# Add current user to docker group (logout required) +sudo usermod -aG docker $USER +``` + +## Environment Configuration + +1. Clone the repository: +```bash +git clone https://github.com/intari/roocodetests_1.git +cd roocodetests_1 +``` + +2. Configure environment variables: +```bash +# Copy example .env file +cp .env.example .env + +# Edit configuration (nano/vim) +nano .env +``` +Key variables to configure: +- `BASE_URL`: Public URL of your application +- `ELASTICSEARCH_PASSWORD`: Secure password for Elasticsearch +- `CPU_LIMIT`: CPU cores to allocate (default: 2) + +## Application Deployment + +1. Start all services: +```bash +docker-compose up -d +``` + +2. Verify services are running: +```bash +docker-compose ps +``` + +3. Check application logs: +```bash +docker-compose logs -f api +``` + +4. Access the application: +- Web interface: http://your-server-ip:8000 +- Elasticsearch: http://your-server-ip:9200 + +## Maintenance + +## restart & rebuild +```bash +docker-compose down && docker-compose up -d --build +``` + +Logs +```bash + docker logs booksearch_app -f +``` + +### Log Rotation +Configure Docker log rotation in `/etc/docker/daemon.json`: +```json +{ + "log-driver": "json-file", + "log-opts": { + "max-size": "10m", + "max-file": "3" + } +} +``` +Then restart Docker: +```bash +sudo systemctl restart docker +``` + +### Backups +1. Create backup script (`/usr/local/bin/backup-app.sh`): +```bash +#!/bin/bash +BACKUP_DIR=/var/backups/app +mkdir -p $BACKUP_DIR +docker-compose exec -T elasticsearch curl -X POST "localhost:9200/_snapshot/backup_repo/_all" -H "Content-Type: application/json" +docker-compose exec -T elasticsearch curl -X GET "localhost:9200/_snapshot/backup_repo/snapshot_$(date +%Y-%m-%d)?pretty" +``` + +2. Make executable and schedule daily cron job: +```bash +sudo chmod +x /usr/local/bin/backup-app.sh +sudo crontab -e +# Add: 0 3 * * * /usr/local/bin/backup-app.sh +``` + +### Updates +1. Pull latest changes: +```bash +git pull origin main +``` + +2. Rebuild containers: +```bash +docker-compose up -d --build +``` + +## Troubleshooting + +### Common Issues + +**Application not starting:** +```bash +# Check container status +docker ps -a + +# View logs +docker-compose logs api +``` + +**Elasticsearch health issues:** +```bash +# Check cluster health +curl -X GET "localhost:9200/_cluster/health?pretty" + +# Check node stats +curl -X GET "localhost:9200/_nodes/stats?pretty" +``` + +**Port conflicts:** +```bash +# Check used ports +sudo netstat -tulnp + +# Change ports in docker-compose.yml if needed +``` + +### Debugging +1. Access running container shell: +```bash +docker-compose exec api bash +``` + +2. Check resource usage: +```bash +docker stats +``` + +## Check Request via JSON : +curl -H "Accept: application/json" -X GET https://booksearch.yourdomain.com/search?query=android + +# Simple search +curl -H "Accept: application/json" "https://booksearch.yourdomain.com/search?query=android" + +# Search with format parameter +curl "https://booksearch.yourdomain.com/search?query=android&format=json" + +# Error case +curl -H "Accept: application/json" "https://booksearch.yourdomain.com/search" + + +## API Endpoints + +### Search API +``` +GET /search?query={query}[&format=json] +``` + +### Reset Elasticsearch Index +``` +POST /reset_index +Headers: +- Authorization: Basic base64(username:password) +``` + +Example: +```bash +curl -X POST -u admin:securepassword123 https://booksearch.yourdomain.com/reset_index +``` + +## References +- [Ubuntu Docker Installation](https://docs.docker.com/engine/install/ubuntu/) +- [Docker Compose Reference](https://docs.docker.com/compose/reference/) +- [Elasticsearch Documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/index.html) + + +## Plugin-alt +method:get +https://booksearch.yourdomain.com/search?query={prompt}&format=json +alt version for plugin +request headers +{ +"Accept": "application/json" } \ No newline at end of file diff --git a/roocodetest.code-workspace b/roocodetest.code-workspace index 876a149..9017da0 100644 --- a/roocodetest.code-workspace +++ b/roocodetest.code-workspace @@ -1,8 +1,8 @@ -{ - "folders": [ - { - "path": "." - } - ], - "settings": {} +{ + "folders": [ + { + "path": "." + } + ], + "settings": {} } \ No newline at end of file diff --git a/scripts/run_tests.bat b/scripts/run_tests.bat index 17ac1c6..cacacf2 100644 --- a/scripts/run_tests.bat +++ b/scripts/run_tests.bat @@ -1,37 +1,37 @@ -@echo off -echo Setting up test environment... - -echo Checking Python version... -python --version -if errorlevel 1 ( - echo Python not found. Please install Python 3.10+ first. - pause - exit /b 1 -) - -echo Installing Python dependencies... -python -m pip install --upgrade pip --user -if errorlevel 1 ( - echo Failed to upgrade pip - pause - exit /b 1 -) - -pip install -r requirements.txt --user -if errorlevel 1 ( - echo Failed to install dependencies - pause - exit /b 1 -) - -echo Running EPUB viewer tests... -cd api -python -m pytest test_epub_viewer.py -v -if errorlevel 1 ( - echo Some tests failed - pause - exit /b 1 -) - -echo All tests completed successfully! +@echo off +echo Setting up test environment... + +echo Checking Python version... +python --version +if errorlevel 1 ( + echo Python not found. Please install Python 3.10+ first. + pause + exit /b 1 +) + +echo Installing Python dependencies... +python -m pip install --upgrade pip --user +if errorlevel 1 ( + echo Failed to upgrade pip + pause + exit /b 1 +) + +pip install -r requirements.txt --user +if errorlevel 1 ( + echo Failed to install dependencies + pause + exit /b 1 +) + +echo Running EPUB viewer tests... +cd api +python -m pytest test_epub_viewer.py -v +if errorlevel 1 ( + echo Some tests failed + pause + exit /b 1 +) + +echo All tests completed successfully! pause \ No newline at end of file diff --git a/scripts/run_tests.sh b/scripts/run_tests.sh index 6f6bba6..0aed9f0 100644 --- a/scripts/run_tests.sh +++ b/scripts/run_tests.sh @@ -1,29 +1,29 @@ -#!/bin/bash - -echo "Setting up test environment..." - -echo "Checking Python version..." -python3 --version || { - echo "Python 3 not found. Please install Python 3.10+ first." - exit 1 -} - -echo "Installing Python dependencies..." -python3 -m pip install --upgrade pip --user || { - echo "Failed to upgrade pip" - exit 1 -} - -pip3 install -r requirements.txt --user || { - echo "Failed to install dependencies" - exit 1 -} - -echo "Running EPUB viewer tests..." -cd api -python3 -m pytest test_epub_viewer.py -v || { - echo "Some tests failed" - exit 1 -} - +#!/bin/bash + +echo "Setting up test environment..." + +echo "Checking Python version..." +python3 --version || { + echo "Python 3 not found. Please install Python 3.10+ first." + exit 1 +} + +echo "Installing Python dependencies..." +python3 -m pip install --upgrade pip --user || { + echo "Failed to upgrade pip" + exit 1 +} + +pip3 install -r requirements.txt --user || { + echo "Failed to install dependencies" + exit 1 +} + +echo "Running EPUB viewer tests..." +cd api +python3 -m pytest test_epub_viewer.py -v || { + echo "Some tests failed" + exit 1 +} + echo "All tests completed successfully!" \ No newline at end of file diff --git a/src/api/app.py b/src/api/app.py index eac3325..ea4713d 100644 --- a/src/api/app.py +++ b/src/api/app.py @@ -1,406 +1,406 @@ -from flask import Flask, request, jsonify, render_template, send_from_directory -from urllib.parse import unquote -from elasticsearch import Elasticsearch -import os -import ebooklib -from ebooklib import epub -from bs4 import BeautifulSoup -import PyPDF2 -import time -import logging -import multiprocessing -import sys -from pathlib import Path -sys.path.append(str(Path(__file__).parent.parent)) -from index import index_files, get_progress -from io import StringIO -import sys - -app = Flask(__name__, static_folder='static') - -@app.after_request -def add_cors_headers(response): - response.headers['Access-Control-Allow-Origin'] = '*' - response.headers['Access-Control-Allow-Methods'] = 'GET, POST, PUT' - response.headers['Access-Control-Allow-Headers'] = 'Content-Type' - return response - -# Elasticsearch Configuration -ELASTICSEARCH_HOST = os.environ.get("ELASTICSEARCH_HOST", "localhost") -ELASTICSEARCH_PORT = int(os.environ.get("ELASTICSEARCH_PORT", 9200)) -INDEX_NAME = "book_index" - -# Wait for Elasticsearch to be available -es = None -while True: - try: - es = Elasticsearch([{'host': ELASTICSEARCH_HOST, 'port': ELASTICSEARCH_PORT, 'scheme': 'http'}]) - if es.ping(): - print("Connected to Elasticsearch") - break - else: - print("Elasticsearch not available, retrying...") - except Exception as e: - print(f"Error connecting to Elasticsearch: {e}") - time.sleep(5) - -def extract_text_from_epub(epub_path): - try: - book = epub.read_epub(epub_path) - text = '' - for item in book.get_items(): - if item.media_type == 'application/xhtml+xml': - content = item.get_content() - if content: - soup = BeautifulSoup(content, 'html.parser') - text += soup.get_text() - return text - except Exception as e: - logging.error(f"Error processing EPUB {epub_path}: {str(e)}") - return f"Error extracting text: {str(e)}" - -def extract_text_from_pdf(pdf_path): - text = '' - with open(pdf_path, 'rb') as pdf_file: - pdf_reader = PyPDF2.PdfReader(pdf_file) - for page_num in range(len(pdf_reader.pages)): - page = pdf_reader.pages[page_num] - text += page.extract_text() - return text - -@app.route('/', methods=['GET']) -def home(): - return render_template('search.html') - -@app.route('/search', methods=['GET']) -def search(): - query = request.args.get('query') - if not query: - if request.headers.get('Accept') == 'application/json': - return jsonify({"error": "Query parameter is required"}), 400 - return render_template('search.html', query='') - - try: - results = es.search(index=INDEX_NAME, query={'match': {'content': query}}) - hits = results['hits']['hits'] - - search_results = [] - for hit in hits: - file_path = hit['_source']['file_path'] - content = hit['_source']['content'] - - # Highlight snippet (simple version) - snippet_char_limit = int(os.environ.get("SNIPPET_CHAR_LIMIT", 100)) - index = content.lower().find(query.lower()) - if index != -1: - start = max(0, index - snippet_char_limit) - end = min(len(content), index + snippet_char_limit + len(query)) - snippet = content[start:end] - else: - snippet = "No snippet found" - - # Get base URL from environment - base_url = os.environ.get("BASE_URL", "http://localhost:8000") - # Construct URLs - # Remove "/books/" from path start if it's here - if file_path.startswith("/books/"): - file_path = file_path[len("/books/"):] - - url = f"{base_url}/{file_path}" - raw_url = f"{base_url}/file/{file_path}?format=html" - - search_results.append({ - "file_path": file_path, - "url": url, - "raw_url": raw_url, - "snippet": snippet, - "score": hit['_score'] - }) - - # If it's an API request or format=json is specified - if request.headers.get('Accept') == 'application/json' or request.args.get('format') == 'json': - response = jsonify({ - "query": query, - "results": search_results, - "total": len(search_results), - "took": results['took'] - }) - response.headers['Content-Type'] = 'application/json' - return response - - # Otherwise, render the HTML template - return render_template('search.html', results=search_results, query=query) - - except Exception as e: - if request.headers.get('Accept') == 'application/json' or request.args.get('format') == 'json': - response = jsonify({ - "error": str(e), - "query": query - }) - response.headers['Content-Type'] = 'application/json' - return response, 500 - return render_template('search.html', error=str(e), query=query) - -@app.route('/files', methods=['GET']) -def list_files(): - books_dir = "/books" - files = [] - - try: - # Check if indexing is in progress - indexing_in_progress = get_progress() is not None - - for filename in os.listdir(books_dir): - file_path = os.path.join(books_dir, filename) - if os.path.isfile(file_path): - file_size = os.path.getsize(file_path) - # Extract book title from filename if possible - title = filename - if ' - ' in filename: # Common pattern in filenames - title_parts = filename.split(' - ') - if len(title_parts) > 1: - title = ' - '.join(title_parts[:-1]) # Take all but last part - - files.append({ - 'name': filename, - 'title': title, - 'path': filename, - 'size': file_size, - 'size_mb': round(file_size / (1024 * 1024), 2) - }) - - # Calculate totals - total_files = len(files) - total_size = sum(f['size'] for f in files) - total_size_mb = round(total_size / (1024 * 1024), 2) - - # If it's an API request, return JSON - if request.headers.get('Accept') == 'application/json': - return jsonify({ - 'files': files, - 'total_files': total_files, - 'total_size': total_size, - 'total_size_mb': total_size_mb, - 'indexing_in_progress': indexing_in_progress - }) - - # Otherwise, render the HTML template - return render_template('files.html', - files=files, - total_files=total_files, - total_size=total_size, - total_size_mb=total_size_mb, - indexing_in_progress=indexing_in_progress) - except Exception as e: - if request.headers.get('Accept') == 'application/json': - return jsonify({"error": str(e)}), 500 - return render_template('files.html', error=str(e)) - -@app.route('/file/<path:file_path>', methods=['GET']) -def get_file(file_path): - # Ensure the file path is within the /books directory - books_dir = "/books" - - # Decode URL-encoded path and normalize - decoded_path = unquote(file_path) - # Remove any leading slashes or duplicate 'books/' segments - decoded_path = decoded_path.lstrip('/') - if decoded_path.startswith('books/'): - decoded_path = decoded_path[6:] - - # Join paths safely - full_path = os.path.normpath(os.path.join(books_dir, decoded_path)) - - # Validate the path is within the books directory - if not os.path.abspath(full_path).startswith(os.path.abspath(books_dir)): - return jsonify({"error": "Access denied: File path outside of books directory"}), 403 - - try: - # Handle EPUB files - if file_path.lower().endswith('.epub'): - if request.args.get('format') == 'html': - # Convert EPUB to HTML - try: - book = epub.read_epub(full_path) - html_content = [] - for item in book.get_items(): - if item.get_type() == ebooklib.ITEM_DOCUMENT: - content = item.get_content() - if content: - soup = BeautifulSoup(content, 'html.parser') - # Preserve basic formatting tags - for tag in soup.find_all(): - if tag.name not in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'br', 'div', 'span', 'strong', 'em', 'b', 'i', 'ul', 'ol', 'li']: - tag.unwrap() - html_content.append(str(soup)) - except Exception as e: - logging.error(f"Error processing EPUB {full_path}: {str(e)}") - return jsonify({"error": f"Failed to process EPUB: {str(e)}"}), 500 - return render_template('text_file.html', - file_path=file_path, - content='<hr>'.join(html_content), - is_html=True) - else: - # Render the viewer template - return render_template('epub_viewer.html', file_path=file_path) - - # Handle regular text files - with open(full_path, 'r', encoding='utf-8', errors='ignore') as f: - content = f.read() - - # If it's an API request or the Accept header doesn't include HTML, return plain text - if request.headers.get('Accept') == 'application/json' or 'text/html' not in request.headers.get('Accept', ''): - return content, 200, {'Content-Type': 'text/plain; charset=utf-8'} - - # Otherwise, render a simple HTML page with the content - return render_template('text_file.html', file_path=file_path, content=content) - except Exception as e: - return jsonify({"error": str(e)}), 404 - -@app.route('/epub/<path:file_path>', methods=['GET']) -def get_epub_file(file_path): - """Serve the raw EPUB file with proper headers""" - books_dir = "/books" - full_path = os.path.join(books_dir, file_path) - - # Validate the path is within the books directory - if not os.path.abspath(full_path).startswith(os.path.abspath(books_dir)): - return jsonify({"error": "Access denied: File path outside of books directory"}), 403 - - try: - # Serve the raw EPUB file with proper headers - response = send_from_directory( - books_dir, - file_path, - as_attachment=True, - mimetype='application/epub+zip' - ) - response.headers['Access-Control-Allow-Origin'] = '*' - response.headers['Access-Control-Allow-Methods'] = 'GET' - response.headers['Content-Disposition'] = f'attachment; filename="{os.path.basename(file_path)}"' - return response - except Exception as e: - return jsonify({"error": str(e)}), 404 - -@app.route('/index_books', methods=['GET']) -def index_books(): - logging.info("Indexing books endpoint called") - - # Get CPU configuration - cpu_limit = os.environ.get("CPU_LIMIT") - available_cpus = multiprocessing.cpu_count() - used_cpus = float(cpu_limit) if cpu_limit else max(1, available_cpus - 1) - - # Capture stdout to a string - old_stdout = sys.stdout - sys.stdout = captured_output = StringIO() - - try: - # Start indexing in a separate thread - from threading import Thread - index_thread = Thread(target=index_files, args=("/books",)) - index_thread.start() - - # If it's an API request, return immediately - if request.headers.get('Accept') == 'application/json': - return jsonify({"message": "Indexing started in background"}) - - # Otherwise, render the progress page with CPU info - return render_template('indexing.html', - available_cpus=available_cpus, - used_cpus=used_cpus) - - except Exception as e: - logging.error(f"Indexing failed: {e}") - sys.stdout = old_stdout - - if request.headers.get('Accept') == 'application/json': - return jsonify({"error": str(e)}), 500 - - # Create a simple HTML response for errors - return render_template('indexing_error.html', error=str(e)) - finally: - sys.stdout = old_stdout - -@app.route('/indexing_progress', methods=['GET']) -def get_indexing_progress(): - progress = get_progress() - if progress is None: - return jsonify({"status": "not_running"}) - - # Format time for display - from datetime import datetime - import pytz - - # Get browser timezone from Accept-Language header or use UTC as fallback - browser_tz = request.headers.get('X-Timezone', 'UTC') - try: - tz = pytz.timezone(browser_tz) - except pytz.UnknownTimeZoneError: - tz = pytz.UTC - - elapsed_min = int(progress['elapsed_time'] // 60) - elapsed_sec = int(progress['elapsed_time'] % 60) - - if progress['estimated_remaining'] > 0: - remaining_min = int(progress['estimated_remaining'] // 60) - remaining_sec = int(progress['estimated_remaining'] % 60) - completion_time = datetime.fromtimestamp(progress['estimated_completion'], tz).strftime('%H:%M:%S (%Z)') - else: - remaining_min = 0 - remaining_sec = 0 - completion_time = "N/A" - - return jsonify({ - "status": "running", - "total_files": progress['total_files'], - "processed_files": progress['processed_files'], - "percentage": round(progress['percentage'], 1), - "current_file": progress['current_file'], - "elapsed_time": f"{elapsed_min}m {elapsed_sec}s", - "estimated_remaining": f"{remaining_min}m {remaining_sec}s", - "estimated_completion": completion_time, - "errors": progress['errors'] - }) - -@app.route('/abort_indexing', methods=['POST']) -def abort_indexing(): - # In a real implementation, we would set a flag to stop the indexing - # For now, we'll just return a message - return jsonify({"status": "abort_requested", "message": "Indexing will stop after current file"}) - -@app.route('/reset_index', methods=['POST']) -def reset_index(): - """Reset the Elasticsearch index by deleting and recreating it""" - try: - # Check for basic auth - auth = request.authorization - if not auth or auth.username != os.environ.get("ADMIN_USER") or auth.password != os.environ.get("ADMIN_PASSWORD"): - return jsonify({"error": "Authentication required"}), 401 - - # Delete existing index if it exists - if es.indices.exists(index=INDEX_NAME): - es.indices.delete(index=INDEX_NAME) - - # Create new index with mapping - es.indices.create(index=INDEX_NAME, body={ - "settings": { - "number_of_shards": 1, - "number_of_replicas": 0 - }, - "mappings": { - "properties": { - "file_path": {"type": "keyword"}, - "content": {"type": "text"} - } - } - }) - - return jsonify({"status": "success", "message": "Index reset successfully"}) - except Exception as e: - return jsonify({"error": str(e)}), 500 - -if __name__ == '__main__': - logging.basicConfig(level=logging.DEBUG) - logging.info("Starting the API - inside main block") +from flask import Flask, request, jsonify, render_template, send_from_directory +from urllib.parse import unquote +from elasticsearch import Elasticsearch +import os +import ebooklib +from ebooklib import epub +from bs4 import BeautifulSoup +import PyPDF2 +import time +import logging +import multiprocessing +import sys +from pathlib import Path +sys.path.append(str(Path(__file__).parent.parent)) +from index import index_files, get_progress +from io import StringIO +import sys + +app = Flask(__name__, static_folder='static') + +@app.after_request +def add_cors_headers(response): + response.headers['Access-Control-Allow-Origin'] = '*' + response.headers['Access-Control-Allow-Methods'] = 'GET, POST, PUT' + response.headers['Access-Control-Allow-Headers'] = 'Content-Type' + return response + +# Elasticsearch Configuration +ELASTICSEARCH_HOST = os.environ.get("ELASTICSEARCH_HOST", "localhost") +ELASTICSEARCH_PORT = int(os.environ.get("ELASTICSEARCH_PORT", 9200)) +INDEX_NAME = "book_index" + +# Wait for Elasticsearch to be available +es = None +while True: + try: + es = Elasticsearch([{'host': ELASTICSEARCH_HOST, 'port': ELASTICSEARCH_PORT, 'scheme': 'http'}]) + if es.ping(): + print("Connected to Elasticsearch") + break + else: + print("Elasticsearch not available, retrying...") + except Exception as e: + print(f"Error connecting to Elasticsearch: {e}") + time.sleep(5) + +def extract_text_from_epub(epub_path): + try: + book = epub.read_epub(epub_path) + text = '' + for item in book.get_items(): + if item.media_type == 'application/xhtml+xml': + content = item.get_content() + if content: + soup = BeautifulSoup(content, 'html.parser') + text += soup.get_text() + return text + except Exception as e: + logging.error(f"Error processing EPUB {epub_path}: {str(e)}") + return f"Error extracting text: {str(e)}" + +def extract_text_from_pdf(pdf_path): + text = '' + with open(pdf_path, 'rb') as pdf_file: + pdf_reader = PyPDF2.PdfReader(pdf_file) + for page_num in range(len(pdf_reader.pages)): + page = pdf_reader.pages[page_num] + text += page.extract_text() + return text + +@app.route('/', methods=['GET']) +def home(): + return render_template('search.html') + +@app.route('/search', methods=['GET']) +def search(): + query = request.args.get('query') + if not query: + if request.headers.get('Accept') == 'application/json': + return jsonify({"error": "Query parameter is required"}), 400 + return render_template('search.html', query='') + + try: + results = es.search(index=INDEX_NAME, query={'match': {'content': query}}) + hits = results['hits']['hits'] + + search_results = [] + for hit in hits: + file_path = hit['_source']['file_path'] + content = hit['_source']['content'] + + # Highlight snippet (simple version) + snippet_char_limit = int(os.environ.get("SNIPPET_CHAR_LIMIT", 100)) + index = content.lower().find(query.lower()) + if index != -1: + start = max(0, index - snippet_char_limit) + end = min(len(content), index + snippet_char_limit + len(query)) + snippet = content[start:end] + else: + snippet = "No snippet found" + + # Get base URL from environment + base_url = os.environ.get("BASE_URL", "http://localhost:8000") + # Construct URLs + # Remove "/books/" from path start if it's here + if file_path.startswith("/books/"): + file_path = file_path[len("/books/"):] + + url = f"{base_url}/{file_path}" + raw_url = f"{base_url}/file/{file_path}?format=html" + + search_results.append({ + "file_path": file_path, + "url": url, + "raw_url": raw_url, + "snippet": snippet, + "score": hit['_score'] + }) + + # If it's an API request or format=json is specified + if request.headers.get('Accept') == 'application/json' or request.args.get('format') == 'json': + response = jsonify({ + "query": query, + "results": search_results, + "total": len(search_results), + "took": results['took'] + }) + response.headers['Content-Type'] = 'application/json' + return response + + # Otherwise, render the HTML template + return render_template('search.html', results=search_results, query=query) + + except Exception as e: + if request.headers.get('Accept') == 'application/json' or request.args.get('format') == 'json': + response = jsonify({ + "error": str(e), + "query": query + }) + response.headers['Content-Type'] = 'application/json' + return response, 500 + return render_template('search.html', error=str(e), query=query) + +@app.route('/files', methods=['GET']) +def list_files(): + books_dir = "/books" + files = [] + + try: + # Check if indexing is in progress + indexing_in_progress = get_progress() is not None + + for filename in os.listdir(books_dir): + file_path = os.path.join(books_dir, filename) + if os.path.isfile(file_path): + file_size = os.path.getsize(file_path) + # Extract book title from filename if possible + title = filename + if ' - ' in filename: # Common pattern in filenames + title_parts = filename.split(' - ') + if len(title_parts) > 1: + title = ' - '.join(title_parts[:-1]) # Take all but last part + + files.append({ + 'name': filename, + 'title': title, + 'path': filename, + 'size': file_size, + 'size_mb': round(file_size / (1024 * 1024), 2) + }) + + # Calculate totals + total_files = len(files) + total_size = sum(f['size'] for f in files) + total_size_mb = round(total_size / (1024 * 1024), 2) + + # If it's an API request, return JSON + if request.headers.get('Accept') == 'application/json': + return jsonify({ + 'files': files, + 'total_files': total_files, + 'total_size': total_size, + 'total_size_mb': total_size_mb, + 'indexing_in_progress': indexing_in_progress + }) + + # Otherwise, render the HTML template + return render_template('files.html', + files=files, + total_files=total_files, + total_size=total_size, + total_size_mb=total_size_mb, + indexing_in_progress=indexing_in_progress) + except Exception as e: + if request.headers.get('Accept') == 'application/json': + return jsonify({"error": str(e)}), 500 + return render_template('files.html', error=str(e)) + +@app.route('/file/<path:file_path>', methods=['GET']) +def get_file(file_path): + # Ensure the file path is within the /books directory + books_dir = "/books" + + # Decode URL-encoded path and normalize + decoded_path = unquote(file_path) + # Remove any leading slashes or duplicate 'books/' segments + decoded_path = decoded_path.lstrip('/') + if decoded_path.startswith('books/'): + decoded_path = decoded_path[6:] + + # Join paths safely + full_path = os.path.normpath(os.path.join(books_dir, decoded_path)) + + # Validate the path is within the books directory + if not os.path.abspath(full_path).startswith(os.path.abspath(books_dir)): + return jsonify({"error": "Access denied: File path outside of books directory"}), 403 + + try: + # Handle EPUB files + if file_path.lower().endswith('.epub'): + if request.args.get('format') == 'html': + # Convert EPUB to HTML + try: + book = epub.read_epub(full_path) + html_content = [] + for item in book.get_items(): + if item.get_type() == ebooklib.ITEM_DOCUMENT: + content = item.get_content() + if content: + soup = BeautifulSoup(content, 'html.parser') + # Preserve basic formatting tags + for tag in soup.find_all(): + if tag.name not in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'br', 'div', 'span', 'strong', 'em', 'b', 'i', 'ul', 'ol', 'li']: + tag.unwrap() + html_content.append(str(soup)) + except Exception as e: + logging.error(f"Error processing EPUB {full_path}: {str(e)}") + return jsonify({"error": f"Failed to process EPUB: {str(e)}"}), 500 + return render_template('text_file.html', + file_path=file_path, + content='<hr>'.join(html_content), + is_html=True) + else: + # Render the viewer template + return render_template('epub_viewer.html', file_path=file_path) + + # Handle regular text files + with open(full_path, 'r', encoding='utf-8', errors='ignore') as f: + content = f.read() + + # If it's an API request or the Accept header doesn't include HTML, return plain text + if request.headers.get('Accept') == 'application/json' or 'text/html' not in request.headers.get('Accept', ''): + return content, 200, {'Content-Type': 'text/plain; charset=utf-8'} + + # Otherwise, render a simple HTML page with the content + return render_template('text_file.html', file_path=file_path, content=content) + except Exception as e: + return jsonify({"error": str(e)}), 404 + +@app.route('/epub/<path:file_path>', methods=['GET']) +def get_epub_file(file_path): + """Serve the raw EPUB file with proper headers""" + books_dir = "/books" + full_path = os.path.join(books_dir, file_path) + + # Validate the path is within the books directory + if not os.path.abspath(full_path).startswith(os.path.abspath(books_dir)): + return jsonify({"error": "Access denied: File path outside of books directory"}), 403 + + try: + # Serve the raw EPUB file with proper headers + response = send_from_directory( + books_dir, + file_path, + as_attachment=True, + mimetype='application/epub+zip' + ) + response.headers['Access-Control-Allow-Origin'] = '*' + response.headers['Access-Control-Allow-Methods'] = 'GET' + response.headers['Content-Disposition'] = f'attachment; filename="{os.path.basename(file_path)}"' + return response + except Exception as e: + return jsonify({"error": str(e)}), 404 + +@app.route('/index_books', methods=['GET']) +def index_books(): + logging.info("Indexing books endpoint called") + + # Get CPU configuration + cpu_limit = os.environ.get("CPU_LIMIT") + available_cpus = multiprocessing.cpu_count() + used_cpus = float(cpu_limit) if cpu_limit else max(1, available_cpus - 1) + + # Capture stdout to a string + old_stdout = sys.stdout + sys.stdout = captured_output = StringIO() + + try: + # Start indexing in a separate thread + from threading import Thread + index_thread = Thread(target=index_files, args=("/books",)) + index_thread.start() + + # If it's an API request, return immediately + if request.headers.get('Accept') == 'application/json': + return jsonify({"message": "Indexing started in background"}) + + # Otherwise, render the progress page with CPU info + return render_template('indexing.html', + available_cpus=available_cpus, + used_cpus=used_cpus) + + except Exception as e: + logging.error(f"Indexing failed: {e}") + sys.stdout = old_stdout + + if request.headers.get('Accept') == 'application/json': + return jsonify({"error": str(e)}), 500 + + # Create a simple HTML response for errors + return render_template('indexing_error.html', error=str(e)) + finally: + sys.stdout = old_stdout + +@app.route('/indexing_progress', methods=['GET']) +def get_indexing_progress(): + progress = get_progress() + if progress is None: + return jsonify({"status": "not_running"}) + + # Format time for display + from datetime import datetime + import pytz + + # Get browser timezone from Accept-Language header or use UTC as fallback + browser_tz = request.headers.get('X-Timezone', 'UTC') + try: + tz = pytz.timezone(browser_tz) + except pytz.UnknownTimeZoneError: + tz = pytz.UTC + + elapsed_min = int(progress['elapsed_time'] // 60) + elapsed_sec = int(progress['elapsed_time'] % 60) + + if progress['estimated_remaining'] > 0: + remaining_min = int(progress['estimated_remaining'] // 60) + remaining_sec = int(progress['estimated_remaining'] % 60) + completion_time = datetime.fromtimestamp(progress['estimated_completion'], tz).strftime('%H:%M:%S (%Z)') + else: + remaining_min = 0 + remaining_sec = 0 + completion_time = "N/A" + + return jsonify({ + "status": "running", + "total_files": progress['total_files'], + "processed_files": progress['processed_files'], + "percentage": round(progress['percentage'], 1), + "current_file": progress['current_file'], + "elapsed_time": f"{elapsed_min}m {elapsed_sec}s", + "estimated_remaining": f"{remaining_min}m {remaining_sec}s", + "estimated_completion": completion_time, + "errors": progress['errors'] + }) + +@app.route('/abort_indexing', methods=['POST']) +def abort_indexing(): + # In a real implementation, we would set a flag to stop the indexing + # For now, we'll just return a message + return jsonify({"status": "abort_requested", "message": "Indexing will stop after current file"}) + +@app.route('/reset_index', methods=['POST']) +def reset_index(): + """Reset the Elasticsearch index by deleting and recreating it""" + try: + # Check for basic auth + auth = request.authorization + if not auth or auth.username != os.environ.get("ADMIN_USER") or auth.password != os.environ.get("ADMIN_PASSWORD"): + return jsonify({"error": "Authentication required"}), 401 + + # Delete existing index if it exists + if es.indices.exists(index=INDEX_NAME): + es.indices.delete(index=INDEX_NAME) + + # Create new index with mapping + es.indices.create(index=INDEX_NAME, body={ + "settings": { + "number_of_shards": 1, + "number_of_replicas": 0 + }, + "mappings": { + "properties": { + "file_path": {"type": "keyword"}, + "content": {"type": "text"} + } + } + }) + + return jsonify({"status": "success", "message": "Index reset successfully"}) + except Exception as e: + return jsonify({"error": str(e)}), 500 + +if __name__ == '__main__': + logging.basicConfig(level=logging.DEBUG) + logging.info("Starting the API - inside main block") app.run(debug=True, host='0.0.0.0') \ No newline at end of file diff --git a/src/api/static/css/epub_viewer.css b/src/api/static/css/epub_viewer.css index 3341c8d..1d6a7ba 100644 --- a/src/api/static/css/epub_viewer.css +++ b/src/api/static/css/epub_viewer.css @@ -1,66 +1,66 @@ -#viewerContainer { - position: absolute; - top: 50px; - left: 0; - right: 0; - bottom: 0; - overflow: auto; -} - -#viewer { - width: 100%; - height: 90vh; - margin: 0 auto; -} - -.controls { - text-align: center; - padding: 10px; -} - -#prev, #next { - padding: 10px 20px; - margin: 10px; - background: #007bff; - color: white; - border: none; - border-radius: 4px; - cursor: pointer; -} - -.error { - color: red; - padding: 20px; - text-align: center; -} - -header { - text-align: center; - padding: 20px 0; -} - -.nav { - background-color: #f8f9fa; - padding: 10px; -} - -.nav ul { - list-style: none; - display: flex; - justify-content: center; - gap: 20px; - padding: 0; - margin: 0; -} - -.nav a { - text-decoration: none; - color: #007bff; -} - -footer { - text-align: center; - padding: 20px; - margin-top: 20px; - background-color: #f8f9fa; +#viewerContainer { + position: absolute; + top: 50px; + left: 0; + right: 0; + bottom: 0; + overflow: auto; +} + +#viewer { + width: 100%; + height: 90vh; + margin: 0 auto; +} + +.controls { + text-align: center; + padding: 10px; +} + +#prev, #next { + padding: 10px 20px; + margin: 10px; + background: #007bff; + color: white; + border: none; + border-radius: 4px; + cursor: pointer; +} + +.error { + color: red; + padding: 20px; + text-align: center; +} + +header { + text-align: center; + padding: 20px 0; +} + +.nav { + background-color: #f8f9fa; + padding: 10px; +} + +.nav ul { + list-style: none; + display: flex; + justify-content: center; + gap: 20px; + padding: 0; + margin: 0; +} + +.nav a { + text-decoration: none; + color: #007bff; +} + +footer { + text-align: center; + padding: 20px; + margin-top: 20px; + background-color: #f8f9fa; } \ No newline at end of file diff --git a/src/api/static/css/style.css b/src/api/static/css/style.css index 1607718..df31208 100644 --- a/src/api/static/css/style.css +++ b/src/api/static/css/style.css @@ -1,236 +1,236 @@ -body { - font-family: 'Arial', sans-serif; - line-height: 1.6; - margin: 0; - padding: 0; - background-color: #f4f4f4; - color: #333; -} - -.container { - width: 80%; - margin: auto; - overflow: hidden; - padding: 20px; -} - -header { - background: #35424a; - color: white; - padding: 20px; - text-align: center; - border-bottom: 4px solid #1abc9c; -} - -header h1 { - margin: 0; -} - -.search-container { - margin: 30px 0; - text-align: center; -} - -.search-box { - width: 70%; - padding: 12px; - border: 1px solid #ddd; - border-radius: 4px; - font-size: 16px; -} - -.search-button { - padding: 12px 24px; - background: #1abc9c; - color: white; - border: none; - border-radius: 4px; - cursor: pointer; - font-size: 16px; -} - -.search-button:hover { - background: #16a085; -} - -.results { - margin-top: 30px; -} - -.result-item { - background: white; - padding: 15px; - margin-bottom: 15px; - border-radius: 5px; - box-shadow: 0 2px 5px rgba(0,0,0,0.1); -} - -.result-item h3 { - margin-top: 0; - color: #1abc9c; -} - -.result-item p { - margin-bottom: 10px; -} - -.result-item a { - color: #3498db; - text-decoration: none; -} - -.result-item a:hover { - text-decoration: underline; -} - -.file-list { - list-style: none; - padding: 0; -} - -.file-list li { - background: white; - padding: 15px; - margin-bottom: 10px; - border-radius: 5px; - box-shadow: 0 2px 5px rgba(0,0,0,0.1); -} - -.file-list a { - color: #3498db; - text-decoration: none; - font-weight: bold; -} - -.file-list a:hover { - text-decoration: underline; -} - -.nav { - background: #35424a; - color: white; - padding: 10px 0; -} - -.nav ul { - padding: 0; - list-style: none; - text-align: center; -} - -.nav li { - display: inline; - margin: 0 15px; -} - -.nav a { - color: white; - text-decoration: none; -} - -.nav a:hover { - color: #1abc9c; -} - -footer { - background: #35424a; - color: white; - text-align: center; - padding: 20px; - margin-top: 40px; -} - -/* Indexing page styles */ -.progress-container { - margin: 20px 0; - padding: 20px; - background: #f5f5f5; - border-radius: 5px; -} - -.progress-bar { - height: 20px; - background: #e0e0e0; - border-radius: 10px; - margin: 10px 0; - overflow: hidden; -} - -.progress-fill { - height: 100%; - background: #4CAF50; - width: 0%; - transition: width 0.3s; -} - -.progress-stats { - display: flex; - justify-content: space-between; - margin-bottom: 10px; -} - -.progress-details { - margin-top: 20px; -} - -.current-file { - font-weight: bold; - margin: 10px 0; - word-break: break-all; -} - -.time-stats { - display: grid; - grid-template-columns: repeat(2, 1fr); - gap: 10px; - margin-top: 15px; -} - -.time-stat { - background: #e9e9e9; - padding: 10px; - border-radius: 5px; -} - -.abort-button { - background: #f44336; - color: white; - border: none; - padding: 10px 20px; - border-radius: 5px; - cursor: pointer; - margin-top: 20px; -} - -.abort-button:hover { - background: #d32f2f; -} - -.error-list { - margin-top: 10px; -} - -.error-item { - padding: 10px; - margin-bottom: 5px; - background: #ffebee; - border-left: 3px solid #f44336; -} - -.file-actions { - margin-top: 10px; -} - -.file-action { - color: #3498db; - text-decoration: none; -} - -.file-action:hover { - text-decoration: underline; -} - -.action-separator { - margin: 0 5px; - color: #999; +body { + font-family: 'Arial', sans-serif; + line-height: 1.6; + margin: 0; + padding: 0; + background-color: #f4f4f4; + color: #333; +} + +.container { + width: 80%; + margin: auto; + overflow: hidden; + padding: 20px; +} + +header { + background: #35424a; + color: white; + padding: 20px; + text-align: center; + border-bottom: 4px solid #1abc9c; +} + +header h1 { + margin: 0; +} + +.search-container { + margin: 30px 0; + text-align: center; +} + +.search-box { + width: 70%; + padding: 12px; + border: 1px solid #ddd; + border-radius: 4px; + font-size: 16px; +} + +.search-button { + padding: 12px 24px; + background: #1abc9c; + color: white; + border: none; + border-radius: 4px; + cursor: pointer; + font-size: 16px; +} + +.search-button:hover { + background: #16a085; +} + +.results { + margin-top: 30px; +} + +.result-item { + background: white; + padding: 15px; + margin-bottom: 15px; + border-radius: 5px; + box-shadow: 0 2px 5px rgba(0,0,0,0.1); +} + +.result-item h3 { + margin-top: 0; + color: #1abc9c; +} + +.result-item p { + margin-bottom: 10px; +} + +.result-item a { + color: #3498db; + text-decoration: none; +} + +.result-item a:hover { + text-decoration: underline; +} + +.file-list { + list-style: none; + padding: 0; +} + +.file-list li { + background: white; + padding: 15px; + margin-bottom: 10px; + border-radius: 5px; + box-shadow: 0 2px 5px rgba(0,0,0,0.1); +} + +.file-list a { + color: #3498db; + text-decoration: none; + font-weight: bold; +} + +.file-list a:hover { + text-decoration: underline; +} + +.nav { + background: #35424a; + color: white; + padding: 10px 0; +} + +.nav ul { + padding: 0; + list-style: none; + text-align: center; +} + +.nav li { + display: inline; + margin: 0 15px; +} + +.nav a { + color: white; + text-decoration: none; +} + +.nav a:hover { + color: #1abc9c; +} + +footer { + background: #35424a; + color: white; + text-align: center; + padding: 20px; + margin-top: 40px; +} + +/* Indexing page styles */ +.progress-container { + margin: 20px 0; + padding: 20px; + background: #f5f5f5; + border-radius: 5px; +} + +.progress-bar { + height: 20px; + background: #e0e0e0; + border-radius: 10px; + margin: 10px 0; + overflow: hidden; +} + +.progress-fill { + height: 100%; + background: #4CAF50; + width: 0%; + transition: width 0.3s; +} + +.progress-stats { + display: flex; + justify-content: space-between; + margin-bottom: 10px; +} + +.progress-details { + margin-top: 20px; +} + +.current-file { + font-weight: bold; + margin: 10px 0; + word-break: break-all; +} + +.time-stats { + display: grid; + grid-template-columns: repeat(2, 1fr); + gap: 10px; + margin-top: 15px; +} + +.time-stat { + background: #e9e9e9; + padding: 10px; + border-radius: 5px; +} + +.abort-button { + background: #f44336; + color: white; + border: none; + padding: 10px 20px; + border-radius: 5px; + cursor: pointer; + margin-top: 20px; +} + +.abort-button:hover { + background: #d32f2f; +} + +.error-list { + margin-top: 10px; +} + +.error-item { + padding: 10px; + margin-bottom: 5px; + background: #ffebee; + border-left: 3px solid #f44336; +} + +.file-actions { + margin-top: 10px; +} + +.file-action { + color: #3498db; + text-decoration: none; +} + +.file-action:hover { + text-decoration: underline; +} + +.action-separator { + margin: 0 5px; + color: #999; } \ No newline at end of file diff --git a/src/api/templates/epub_viewer.html b/src/api/templates/epub_viewer.html index e826e2b..ed49f9e 100644 --- a/src/api/templates/epub_viewer.html +++ b/src/api/templates/epub_viewer.html @@ -1,262 +1,262 @@ -<!DOCTYPE html> -<html> -<head> - <title>{{ file_path }}</title> - <link rel="stylesheet" href="/static/css/style.css"> - <link rel="stylesheet" href="/static/css/epub_viewer.css"> - <script src="https://cdn.jsdelivr.net/npm/epubjs@0.3.93/dist/epub.min.js"></script> - <script src="https://cdn.jsdelivr.net/npm/jszip@3.10.1/dist/jszip.min.js"></script> -</head> -<body> - <header> - <h1>EPUB: {{ file_path }}</h1> - </header> - - <nav class="nav"> - <ul> - <li><a href="/">Home</a></li> - <li><a href="/files">File List</a></li> - <li><a href="/index_books">Re-Index Books</a></li> - </ul> - </nav> - - <div class="container"> - <div id="viewer"></div> - <div class="controls"> - <button id="prev">Previous</button> - <button id="next">Next</button> - </div> - </div> - - <script> - // Debug logging function - function debug(message, obj) { - console.log("EPUB DEBUG: " + message, obj || ''); - // Add to page for visibility - const debugDiv = document.getElementById('debug-output') || - (function() { - const div = document.createElement('div'); - div.id = 'debug-output'; - div.style.position = 'fixed'; - div.style.bottom = '10px'; - div.style.right = '10px'; - div.style.backgroundColor = 'rgba(0,0,0,0.7)'; - div.style.color = 'white'; - div.style.padding = '10px'; - div.style.maxHeight = '200px'; - div.style.overflow = 'auto'; - div.style.zIndex = '9999'; - document.body.appendChild(div); - return div; - })(); - - const logEntry = document.createElement('div'); - logEntry.textContent = message + (obj ? ': ' + JSON.stringify(obj) : ''); - debugDiv.appendChild(logEntry); - } - - // Global variables - var book = null; - var rendition = null; - - function handlePrev() { - debug("Previous button clicked"); - if (!rendition) { - debug("ERROR: rendition not available for prev"); - return; - } - try { - rendition.prev().then(() => { - debug("Navigation to previous page successful"); - }).catch(err => { - debug("Navigation to previous page failed", err.message); - }); - } catch (err) { - debug("Error in prev navigation", err.message); - } - } - - function handleNext() { - debug("Next button clicked"); - if (!rendition) { - debug("ERROR: rendition not available for next"); - return; - } - try { - rendition.next().then(() => { - debug("Navigation to next page successful"); - }).catch(err => { - debug("Navigation to next page failed", err.message); - }); - } catch (err) { - debug("Error in next navigation", err.message); - } - } - - function handleKeydown(e) { - if (!rendition) { - debug("ERROR: rendition not available for keydown"); - return; - } - if (e.keyCode === 37) { - debug("Left arrow key pressed"); - rendition.prev(); - } - if (e.keyCode === 39) { - debug("Right arrow key pressed"); - rendition.next(); - } - } - - function initializeEPUB() { - debug("Initializing EPUB viewer"); - - try { - // Use dedicated endpoint for EPUB files - const fileUrl = "/epub/" + encodeURIComponent("{{ file_path }}"); - debug("Loading EPUB from URL", fileUrl); - // TEST_EPUB_URL: /epub/{{ file_path }} - - // Create book object - window.book = book = ePub(fileUrl); - debug("Book object created successfully"); - console.log("Book object details:", book); - - if (!book) { - throw new Error("Failed to initialize EPUB reader"); - } - - // Set up error handler - book.on('error', function(err) { - debug("EPUB error event", err); - document.getElementById("viewer").innerHTML = - '<div class="error">Error loading EPUB: ' + err.message + '</div>'; - }); - - // Set up ready handler - book.on('ready', function() { - debug("Book ready event fired"); - }); - - // Create rendition - debug("Creating rendition"); - window.rendition = rendition = book.renderTo("viewer", { - width: "100%", - height: "100%", - spread: "none", - manager: "continuous", - style: ` - body { - margin: 0; - padding: 20px; - background-color: white; - color: black; - font-size: 1.2em; - line-height: 1.5; - } - img { - max-width: 100%; - } - ` - }); - - // Hide iframe initially to prevent flash of unstyled content - const viewer = document.getElementById("viewer"); - if (viewer) { - viewer.style.visibility = "hidden"; - } - - debug("Displaying rendition"); - rendition.display() - .then(() => { - debug("Rendition displayed successfully"); - - // Set up resize handler - const resizeHandler = function() { - try { - if (rendition) { - rendition.resize(); - } - } catch (err) { - console.error("Resize error:", err); - } - }; - window.addEventListener('resize', resizeHandler); - - // Show content and initialize navigation - setTimeout(() => { - try { - if (rendition) { - rendition.resize(); - const viewer = document.getElementById('viewer'); - if (viewer) { - viewer.style.visibility = 'visible'; - } - // Initialize navigation - rendition.start(); - } - } catch (err) { - debug("Content display error", err.message); - } - }, 100); - - return rendition; - }) - .catch(err => { - debug("Rendition error", err); - document.getElementById("viewer").innerHTML = - '<div class="error">Error displaying EPUB: ' + err.message + '</div>'; - }); - - // Set up event listeners - debug("Setting up event listeners"); - try { - document.getElementById("prev").addEventListener("click", handlePrev); - document.getElementById("next").addEventListener("click", handleNext); - document.addEventListener("keydown", handleKeydown); - - // Add loading indicator - const loadingIndicator = document.createElement('div'); - loadingIndicator.id = 'loading-indicator'; - loadingIndicator.style.position = 'fixed'; - loadingIndicator.style.top = '50%'; - loadingIndicator.style.left = '50%'; - loadingIndicator.style.transform = 'translate(-50%, -50%)'; - loadingIndicator.style.backgroundColor = 'rgba(0,0,0,0.7)'; - loadingIndicator.style.color = 'white'; - loadingIndicator.style.padding = '20px'; - loadingIndicator.style.borderRadius = '5px'; - loadingIndicator.style.zIndex = '1000'; - loadingIndicator.textContent = 'Loading EPUB...'; - document.body.appendChild(loadingIndicator); - - // Remove indicator when loaded - book.on('ready', function() { - const indicator = document.getElementById('loading-indicator'); - if (indicator) { - indicator.remove(); - } - }); - - } catch (err) { - debug("Error setting up event listeners", err.message); - console.error("Event listener setup error:", err); - } - - } catch (err) { - debug("EPUB initialization error", err); - document.getElementById("viewer").innerHTML = - '<div class="error">Failed to load EPUB: ' + err.message + '</div>'; - } - } - - // Initialize when DOM is loaded - debug("Setting up DOMContentLoaded listener"); - document.addEventListener('DOMContentLoaded', initializeEPUB); - </script> - - <footer> - <p>© 2025 Book Search Engine</p> - </footer> -</body> +<!DOCTYPE html> +<html> +<head> + <title>{{ file_path }}</title> + <link rel="stylesheet" href="/static/css/style.css"> + <link rel="stylesheet" href="/static/css/epub_viewer.css"> + <script src="https://cdn.jsdelivr.net/npm/epubjs@0.3.93/dist/epub.min.js"></script> + <script src="https://cdn.jsdelivr.net/npm/jszip@3.10.1/dist/jszip.min.js"></script> +</head> +<body> + <header> + <h1>EPUB: {{ file_path }}</h1> + </header> + + <nav class="nav"> + <ul> + <li><a href="/">Home</a></li> + <li><a href="/files">File List</a></li> + <li><a href="/index_books">Re-Index Books</a></li> + </ul> + </nav> + + <div class="container"> + <div id="viewer"></div> + <div class="controls"> + <button id="prev">Previous</button> + <button id="next">Next</button> + </div> + </div> + + <script> + // Debug logging function + function debug(message, obj) { + console.log("EPUB DEBUG: " + message, obj || ''); + // Add to page for visibility + const debugDiv = document.getElementById('debug-output') || + (function() { + const div = document.createElement('div'); + div.id = 'debug-output'; + div.style.position = 'fixed'; + div.style.bottom = '10px'; + div.style.right = '10px'; + div.style.backgroundColor = 'rgba(0,0,0,0.7)'; + div.style.color = 'white'; + div.style.padding = '10px'; + div.style.maxHeight = '200px'; + div.style.overflow = 'auto'; + div.style.zIndex = '9999'; + document.body.appendChild(div); + return div; + })(); + + const logEntry = document.createElement('div'); + logEntry.textContent = message + (obj ? ': ' + JSON.stringify(obj) : ''); + debugDiv.appendChild(logEntry); + } + + // Global variables + var book = null; + var rendition = null; + + function handlePrev() { + debug("Previous button clicked"); + if (!rendition) { + debug("ERROR: rendition not available for prev"); + return; + } + try { + rendition.prev().then(() => { + debug("Navigation to previous page successful"); + }).catch(err => { + debug("Navigation to previous page failed", err.message); + }); + } catch (err) { + debug("Error in prev navigation", err.message); + } + } + + function handleNext() { + debug("Next button clicked"); + if (!rendition) { + debug("ERROR: rendition not available for next"); + return; + } + try { + rendition.next().then(() => { + debug("Navigation to next page successful"); + }).catch(err => { + debug("Navigation to next page failed", err.message); + }); + } catch (err) { + debug("Error in next navigation", err.message); + } + } + + function handleKeydown(e) { + if (!rendition) { + debug("ERROR: rendition not available for keydown"); + return; + } + if (e.keyCode === 37) { + debug("Left arrow key pressed"); + rendition.prev(); + } + if (e.keyCode === 39) { + debug("Right arrow key pressed"); + rendition.next(); + } + } + + function initializeEPUB() { + debug("Initializing EPUB viewer"); + + try { + // Use dedicated endpoint for EPUB files + const fileUrl = "/epub/" + encodeURIComponent("{{ file_path }}"); + debug("Loading EPUB from URL", fileUrl); + // TEST_EPUB_URL: /epub/{{ file_path }} + + // Create book object + window.book = book = ePub(fileUrl); + debug("Book object created successfully"); + console.log("Book object details:", book); + + if (!book) { + throw new Error("Failed to initialize EPUB reader"); + } + + // Set up error handler + book.on('error', function(err) { + debug("EPUB error event", err); + document.getElementById("viewer").innerHTML = + '<div class="error">Error loading EPUB: ' + err.message + '</div>'; + }); + + // Set up ready handler + book.on('ready', function() { + debug("Book ready event fired"); + }); + + // Create rendition + debug("Creating rendition"); + window.rendition = rendition = book.renderTo("viewer", { + width: "100%", + height: "100%", + spread: "none", + manager: "continuous", + style: ` + body { + margin: 0; + padding: 20px; + background-color: white; + color: black; + font-size: 1.2em; + line-height: 1.5; + } + img { + max-width: 100%; + } + ` + }); + + // Hide iframe initially to prevent flash of unstyled content + const viewer = document.getElementById("viewer"); + if (viewer) { + viewer.style.visibility = "hidden"; + } + + debug("Displaying rendition"); + rendition.display() + .then(() => { + debug("Rendition displayed successfully"); + + // Set up resize handler + const resizeHandler = function() { + try { + if (rendition) { + rendition.resize(); + } + } catch (err) { + console.error("Resize error:", err); + } + }; + window.addEventListener('resize', resizeHandler); + + // Show content and initialize navigation + setTimeout(() => { + try { + if (rendition) { + rendition.resize(); + const viewer = document.getElementById('viewer'); + if (viewer) { + viewer.style.visibility = 'visible'; + } + // Initialize navigation + rendition.start(); + } + } catch (err) { + debug("Content display error", err.message); + } + }, 100); + + return rendition; + }) + .catch(err => { + debug("Rendition error", err); + document.getElementById("viewer").innerHTML = + '<div class="error">Error displaying EPUB: ' + err.message + '</div>'; + }); + + // Set up event listeners + debug("Setting up event listeners"); + try { + document.getElementById("prev").addEventListener("click", handlePrev); + document.getElementById("next").addEventListener("click", handleNext); + document.addEventListener("keydown", handleKeydown); + + // Add loading indicator + const loadingIndicator = document.createElement('div'); + loadingIndicator.id = 'loading-indicator'; + loadingIndicator.style.position = 'fixed'; + loadingIndicator.style.top = '50%'; + loadingIndicator.style.left = '50%'; + loadingIndicator.style.transform = 'translate(-50%, -50%)'; + loadingIndicator.style.backgroundColor = 'rgba(0,0,0,0.7)'; + loadingIndicator.style.color = 'white'; + loadingIndicator.style.padding = '20px'; + loadingIndicator.style.borderRadius = '5px'; + loadingIndicator.style.zIndex = '1000'; + loadingIndicator.textContent = 'Loading EPUB...'; + document.body.appendChild(loadingIndicator); + + // Remove indicator when loaded + book.on('ready', function() { + const indicator = document.getElementById('loading-indicator'); + if (indicator) { + indicator.remove(); + } + }); + + } catch (err) { + debug("Error setting up event listeners", err.message); + console.error("Event listener setup error:", err); + } + + } catch (err) { + debug("EPUB initialization error", err); + document.getElementById("viewer").innerHTML = + '<div class="error">Failed to load EPUB: ' + err.message + '</div>'; + } + } + + // Initialize when DOM is loaded + debug("Setting up DOMContentLoaded listener"); + document.addEventListener('DOMContentLoaded', initializeEPUB); + </script> + + <footer> + <p>© 2025 Book Search Engine</p> + </footer> +</body> </html> \ No newline at end of file diff --git a/src/api/templates/files.html b/src/api/templates/files.html index 99c4b31..e89a227 100644 --- a/src/api/templates/files.html +++ b/src/api/templates/files.html @@ -1,141 +1,141 @@ -<!DOCTYPE html> -<html lang="en"> -<head> - <meta charset="UTF-8"> - <meta name="viewport" content="width=device-width, initial-scale=1.0"> - <title>Book Files</title> - <link rel="stylesheet" href="/static/css/style.css"> - <style> - .file-list { - list-style: none; - padding: 0; - } - .file-item { - display: flex; - justify-content: space-between; - padding: 8px 0; - border-bottom: 1px solid #eee; - } - .file-name { - flex: 1; - word-break: break-all; - } - .file-size { - color: #666; - min-width: 80px; - text-align: right; - } - .book-title { - font-weight: bold; - color: #333; - } - .file-name-muted { - color: #999; - font-size: 0.9em; - margin-left: 8px; - } - .summary { - background: #f5f5f5; - padding: 15px; - border-radius: 5px; - margin-bottom: 20px; - } - .summary-item { - display: flex; - justify-content: space-between; - margin-bottom: 5px; - } - .summary-label { - font-weight: bold; - } - .indexing-status { - background: #fff8e1; - padding: 15px; - border-radius: 5px; - margin-bottom: 20px; - border-left: 4px solid #ffc107; - } - .indexing-link { - color: #2196f3; - text-decoration: none; - } - .indexing-link:hover { - text-decoration: underline; - } - .plain-view-link { - font-size: 0.8em; - color: #666; - text-decoration: none; - margin-left: 8px; - } - .plain-view-link:hover { - text-decoration: underline; - color: #2196f3; - } - </style> -</head> -<body> - <header> - <h1>Book Files</h1> - </header> - - <nav class="nav"> - <ul> - <li><a href="/">Home</a></li> - <li><a href="/files">File List</a></li> - <li><a href="/index_books">Re-Index Books</a></li> - </ul> - </nav> - - <div class="container"> - <div class="summary"> - <div class="summary-item"> - <span class="summary-label">Total Files:</span> - <span>{{ total_files }}</span> - </div> - <div class="summary-item"> - <span class="summary-label">Total Size:</span> - <span>{{ total_size_mb }} MB</span> - </div> - </div> - - {% if indexing_in_progress %} - <div class="indexing-status"> - Indexing is currently in progress. - <a href="/index_books" class="indexing-link">View re-indexing progress</a> - </div> - {% endif %} - - <h2>Available Files</h2> - - {% if files %} - <ul class="file-list"> - {% for file in files %} - <li class="file-item"> - <span class="file-name"> - <a href="/file/{{ file.path }}"> - {% if file.path.endswith('.epub') %} - <br><a href="/file/{{ file.path }}?format=html" class="plain-view-link">(View as HTML)</a> - {% endif %} - {% if file.title != file.name %} - <span class="book-title">{{ file.title }}</span> - <span class="file-name-muted">{{ file.name }}</span> - {% else %} - {{ file.name }} - {% endif %} - </a> - </span> - <span class="file-size">{{ file.size_mb }} MB</span> - </li> - {% endfor %} - </ul> - {% else %} - <p>No files available. Please add files to the books directory.</p> - {% endif %} - </div> - - <footer> - <p>© 2025 Intari</p> - </footer> -</body> +<!DOCTYPE html> +<html lang="en"> +<head> + <meta charset="UTF-8"> + <meta name="viewport" content="width=device-width, initial-scale=1.0"> + <title>Book Files</title> + <link rel="stylesheet" href="/static/css/style.css"> + <style> + .file-list { + list-style: none; + padding: 0; + } + .file-item { + display: flex; + justify-content: space-between; + padding: 8px 0; + border-bottom: 1px solid #eee; + } + .file-name { + flex: 1; + word-break: break-all; + } + .file-size { + color: #666; + min-width: 80px; + text-align: right; + } + .book-title { + font-weight: bold; + color: #333; + } + .file-name-muted { + color: #999; + font-size: 0.9em; + margin-left: 8px; + } + .summary { + background: #f5f5f5; + padding: 15px; + border-radius: 5px; + margin-bottom: 20px; + } + .summary-item { + display: flex; + justify-content: space-between; + margin-bottom: 5px; + } + .summary-label { + font-weight: bold; + } + .indexing-status { + background: #fff8e1; + padding: 15px; + border-radius: 5px; + margin-bottom: 20px; + border-left: 4px solid #ffc107; + } + .indexing-link { + color: #2196f3; + text-decoration: none; + } + .indexing-link:hover { + text-decoration: underline; + } + .plain-view-link { + font-size: 0.8em; + color: #666; + text-decoration: none; + margin-left: 8px; + } + .plain-view-link:hover { + text-decoration: underline; + color: #2196f3; + } + </style> +</head> +<body> + <header> + <h1>Book Files</h1> + </header> + + <nav class="nav"> + <ul> + <li><a href="/">Home</a></li> + <li><a href="/files">File List</a></li> + <li><a href="/index_books">Re-Index Books</a></li> + </ul> + </nav> + + <div class="container"> + <div class="summary"> + <div class="summary-item"> + <span class="summary-label">Total Files:</span> + <span>{{ total_files }}</span> + </div> + <div class="summary-item"> + <span class="summary-label">Total Size:</span> + <span>{{ total_size_mb }} MB</span> + </div> + </div> + + {% if indexing_in_progress %} + <div class="indexing-status"> + Indexing is currently in progress. + <a href="/index_books" class="indexing-link">View re-indexing progress</a> + </div> + {% endif %} + + <h2>Available Files</h2> + + {% if files %} + <ul class="file-list"> + {% for file in files %} + <li class="file-item"> + <span class="file-name"> + <a href="/file/{{ file.path }}"> + {% if file.path.endswith('.epub') %} + <br><a href="/file/{{ file.path }}?format=html" class="plain-view-link">(View as HTML)</a> + {% endif %} + {% if file.title != file.name %} + <span class="book-title">{{ file.title }}</span> + <span class="file-name-muted">{{ file.name }}</span> + {% else %} + {{ file.name }} + {% endif %} + </a> + </span> + <span class="file-size">{{ file.size_mb }} MB</span> + </li> + {% endfor %} + </ul> + {% else %} + <p>No files available. Please add files to the books directory.</p> + {% endif %} + </div> + + <footer> + <p>© 2025 Intari</p> + </footer> +</body> </html> \ No newline at end of file diff --git a/src/api/templates/indexing.html b/src/api/templates/indexing.html index 0189159..564166d 100644 --- a/src/api/templates/indexing.html +++ b/src/api/templates/indexing.html @@ -1,163 +1,163 @@ -<!DOCTYPE html> -<html lang="en"> -<head> - <meta charset="UTF-8"> - <meta name="viewport" content="width=device-width, initial-scale=1.0"> - <title>Indexing Books</title> - <link rel="stylesheet" href="/static/css/style.css"> - <script src="https://cdn.jsdelivr.net/npm/chart.js"></script> -</head> -<body> - <header> - <h1>Indexing Books</h1> - </header> - - <nav class="nav"> - <ul> - <li><a href="/">Home</a></li> - <li><a href="/files">File List</a></li> - <li><a href="/index_books">Re-Index Books</a></li> - </ul> - </nav> - - <div class="container"> - <div class="progress-container"> - <h2>Indexing Progress</h2> - <div class="progress-stats"> - <span id="processed-files">0</span> of <span id="total-files">0</span> files processed - <span id="percentage">0%</span> - </div> - <div class="progress-bar"> - <div class="progress-fill" id="progress-fill"></div> - </div> - - <div class="current-file" id="current-file"> - Current file: Starting indexing... - </div> - - <div class="time-stats"> - <div class="time-stat"> - <div>CPU cores:</div> - <div>{{ used_cpus }} of {{ available_cpus }}</div> - </div> - <div class="time-stat"> - <div>Time elapsed:</div> - <div id="elapsed-time">0m 0s</div> - </div> - <div class="time-stat"> - <div>Estimated remaining:</div> - <div id="estimated-remaining">Calculating...</div> - </div> - <div class="time-stat"> - <div>Estimated completion:</div> - <div id="estimated-completion">Calculating...</div> - </div> - <div class="time-stat"> - <div>Files per minute:</div> - <div id="files-per-minute">0</div> - </div> - </div> - - <button class="abort-button" id="abort-button">Abort Indexing</button> - </div> - - <div class="progress-details"> - <h3>Recent Errors</h3> - <div id="error-list" class="error-list"> - No errors yet - </div> - </div> - </div> - - <footer> - <p>© 2025 Intari</p> - </footer> - - <script> - const progressFill = document.getElementById('progress-fill'); - const processedFiles = document.getElementById('processed-files'); - const totalFiles = document.getElementById('total-files'); - const percentage = document.getElementById('percentage'); - const currentFile = document.getElementById('current-file'); - const elapsedTime = document.getElementById('elapsed-time'); - const estimatedRemaining = document.getElementById('estimated-remaining'); - const estimatedCompletion = document.getElementById('estimated-completion'); - const filesPerMinute = document.getElementById('files-per-minute'); - const errorList = document.getElementById('error-list'); - const abortButton = document.getElementById('abort-button'); - - let updateInterval; - let speedChart; - - // Update progress every second - function updateProgress() { - // Get browser timezone - const timezone = Intl.DateTimeFormat().resolvedOptions().timeZone; - - fetch('/indexing_progress', { - headers: { - 'X-Timezone': timezone - } - }) - .then(response => response.json()) - .then(data => { - if (data.status === 'not_running') { - // Indexing completed - clearInterval(updateInterval); - window.location.href = '/files'; - return; - } - - // Update progress bar - progressFill.style.width = `${data.percentage}%`; - processedFiles.textContent = data.processed_files; - totalFiles.textContent = data.total_files; - percentage.textContent = `${data.percentage.toFixed(1)}%`; - - // Update current file - currentFile.textContent = `Current file: ${data.current_file || 'Processing...'}`; - - // Update time stats - elapsedTime.textContent = data.elapsed_time; - estimatedRemaining.textContent = data.estimated_remaining; - estimatedCompletion.textContent = data.estimated_completion; - - // Calculate files per minute - if (data.elapsed_time) { - const [min, sec] = data.elapsed_time.split(/[ms]/).filter(Boolean).map(Number); - const totalSeconds = min * 60 + sec; - if (totalSeconds > 0) { - const fpm = (data.processed_files / totalSeconds * 60).toFixed(1); - filesPerMinute.textContent = fpm; - } - } - - // Update errors - if (data.errors && data.errors.length > 0) { - errorList.innerHTML = data.errors.map(err => - `<div class="error-item">${err}</div>` - ).join(''); - } - }) - .catch(error => { - console.error('Error fetching progress:', error); - }); - } - - // Start updating progress - updateInterval = setInterval(updateProgress, 1000); - updateProgress(); - - // Handle abort button - abortButton.addEventListener('click', () => { - if (confirm('Are you sure you want to abort indexing?')) { - fetch('/abort_indexing', { method: 'POST' }) - .then(response => response.json()) - .then(data => { - alert(data.message); - }); - } - }); - </script> -</body> +<!DOCTYPE html> +<html lang="en"> +<head> + <meta charset="UTF-8"> + <meta name="viewport" content="width=device-width, initial-scale=1.0"> + <title>Indexing Books</title> + <link rel="stylesheet" href="/static/css/style.css"> + <script src="https://cdn.jsdelivr.net/npm/chart.js"></script> +</head> +<body> + <header> + <h1>Indexing Books</h1> + </header> + + <nav class="nav"> + <ul> + <li><a href="/">Home</a></li> + <li><a href="/files">File List</a></li> + <li><a href="/index_books">Re-Index Books</a></li> + </ul> + </nav> + + <div class="container"> + <div class="progress-container"> + <h2>Indexing Progress</h2> + <div class="progress-stats"> + <span id="processed-files">0</span> of <span id="total-files">0</span> files processed + <span id="percentage">0%</span> + </div> + <div class="progress-bar"> + <div class="progress-fill" id="progress-fill"></div> + </div> + + <div class="current-file" id="current-file"> + Current file: Starting indexing... + </div> + + <div class="time-stats"> + <div class="time-stat"> + <div>CPU cores:</div> + <div>{{ used_cpus }} of {{ available_cpus }}</div> + </div> + <div class="time-stat"> + <div>Time elapsed:</div> + <div id="elapsed-time">0m 0s</div> + </div> + <div class="time-stat"> + <div>Estimated remaining:</div> + <div id="estimated-remaining">Calculating...</div> + </div> + <div class="time-stat"> + <div>Estimated completion:</div> + <div id="estimated-completion">Calculating...</div> + </div> + <div class="time-stat"> + <div>Files per minute:</div> + <div id="files-per-minute">0</div> + </div> + </div> + + <button class="abort-button" id="abort-button">Abort Indexing</button> + </div> + + <div class="progress-details"> + <h3>Recent Errors</h3> + <div id="error-list" class="error-list"> + No errors yet + </div> + </div> + </div> + + <footer> + <p>© 2025 Intari</p> + </footer> + + <script> + const progressFill = document.getElementById('progress-fill'); + const processedFiles = document.getElementById('processed-files'); + const totalFiles = document.getElementById('total-files'); + const percentage = document.getElementById('percentage'); + const currentFile = document.getElementById('current-file'); + const elapsedTime = document.getElementById('elapsed-time'); + const estimatedRemaining = document.getElementById('estimated-remaining'); + const estimatedCompletion = document.getElementById('estimated-completion'); + const filesPerMinute = document.getElementById('files-per-minute'); + const errorList = document.getElementById('error-list'); + const abortButton = document.getElementById('abort-button'); + + let updateInterval; + let speedChart; + + // Update progress every second + function updateProgress() { + // Get browser timezone + const timezone = Intl.DateTimeFormat().resolvedOptions().timeZone; + + fetch('/indexing_progress', { + headers: { + 'X-Timezone': timezone + } + }) + .then(response => response.json()) + .then(data => { + if (data.status === 'not_running') { + // Indexing completed + clearInterval(updateInterval); + window.location.href = '/files'; + return; + } + + // Update progress bar + progressFill.style.width = `${data.percentage}%`; + processedFiles.textContent = data.processed_files; + totalFiles.textContent = data.total_files; + percentage.textContent = `${data.percentage.toFixed(1)}%`; + + // Update current file + currentFile.textContent = `Current file: ${data.current_file || 'Processing...'}`; + + // Update time stats + elapsedTime.textContent = data.elapsed_time; + estimatedRemaining.textContent = data.estimated_remaining; + estimatedCompletion.textContent = data.estimated_completion; + + // Calculate files per minute + if (data.elapsed_time) { + const [min, sec] = data.elapsed_time.split(/[ms]/).filter(Boolean).map(Number); + const totalSeconds = min * 60 + sec; + if (totalSeconds > 0) { + const fpm = (data.processed_files / totalSeconds * 60).toFixed(1); + filesPerMinute.textContent = fpm; + } + } + + // Update errors + if (data.errors && data.errors.length > 0) { + errorList.innerHTML = data.errors.map(err => + `<div class="error-item">${err}</div>` + ).join(''); + } + }) + .catch(error => { + console.error('Error fetching progress:', error); + }); + } + + // Start updating progress + updateInterval = setInterval(updateProgress, 1000); + updateProgress(); + + // Handle abort button + abortButton.addEventListener('click', () => { + if (confirm('Are you sure you want to abort indexing?')) { + fetch('/abort_indexing', { method: 'POST' }) + .then(response => response.json()) + .then(data => { + alert(data.message); + }); + } + }); + </script> +</body> </html> \ No newline at end of file diff --git a/src/api/templates/search.html b/src/api/templates/search.html index 2861a62..604cfeb 100644 --- a/src/api/templates/search.html +++ b/src/api/templates/search.html @@ -1,56 +1,56 @@ -<!DOCTYPE html> -<html lang="en"> -<head> - <meta charset="UTF-8"> - <meta name="viewport" content="width=device-width, initial-scale=1.0"> - <title>Book Search</title> - <link rel="stylesheet" href="/static/css/style.css"> -</head> -<body> - <header> - <h1>Book Search Engine</h1> - </header> - - <nav class="nav"> - <ul> - <li><a href="/">Home</a></li> - <li><a href="/files">File List</a></li> - <li><a href="/index_books">Re-Index Books</a></li> - </ul> - </nav> - - <div class="container"> - <div class="search-container"> - <form action="/search" method="GET"> - <input type="text" name="query" placeholder="Search for content..." class="search-box" value="{{ query }}"> - <button type="submit" class="search-button">Search</button> - </form> - </div> - - {% if results %} - <div class="results"> - <h2>Search Results</h2> - {% for result in results %} - <div class="result-item"> - <h3>{{ result.file_path.split('/')[-1] }}</h3> - <p>{{ result.snippet }}</p> - <div class="file-actions"> - <a href="/file/{{ result.file_path.replace('/books/', '') }}" class="file-action">View Full File</a> - <span class="action-separator">|</span> - <a href="/file/{{ result.file_path.replace('/books/', '') }}?format=html" class="file-action">View as HTML</a> - </div> - </div> - {% endfor %} - </div> - {% elif query %} - <div class="results"> - <p>No results found for "{{ query }}"</p> - </div> - {% endif %} - </div> - - <footer> - <p>© 2025 Intari</p> - </footer> -</body> +<!DOCTYPE html> +<html lang="en"> +<head> + <meta charset="UTF-8"> + <meta name="viewport" content="width=device-width, initial-scale=1.0"> + <title>Book Search</title> + <link rel="stylesheet" href="/static/css/style.css"> +</head> +<body> + <header> + <h1>Book Search Engine</h1> + </header> + + <nav class="nav"> + <ul> + <li><a href="/">Home</a></li> + <li><a href="/files">File List</a></li> + <li><a href="/index_books">Re-Index Books</a></li> + </ul> + </nav> + + <div class="container"> + <div class="search-container"> + <form action="/search" method="GET"> + <input type="text" name="query" placeholder="Search for content..." class="search-box" value="{{ query }}"> + <button type="submit" class="search-button">Search</button> + </form> + </div> + + {% if results %} + <div class="results"> + <h2>Search Results</h2> + {% for result in results %} + <div class="result-item"> + <h3>{{ result.file_path.split('/')[-1] }}</h3> + <p>{{ result.snippet }}</p> + <div class="file-actions"> + <a href="/file/{{ result.file_path.replace('/books/', '') }}" class="file-action">View Full File</a> + <span class="action-separator">|</span> + <a href="/file/{{ result.file_path.replace('/books/', '') }}?format=html" class="file-action">View as HTML</a> + </div> + </div> + {% endfor %} + </div> + {% elif query %} + <div class="results"> + <p>No results found for "{{ query }}"</p> + </div> + {% endif %} + </div> + + <footer> + <p>© 2025 Intari</p> + </footer> +</body> </html> \ No newline at end of file diff --git a/src/api/templates/text_file.html b/src/api/templates/text_file.html index cb05941..9c4a300 100644 --- a/src/api/templates/text_file.html +++ b/src/api/templates/text_file.html @@ -1,64 +1,64 @@ -и<!DOCTYPE html> -<html> -<head> - <title>{{ file_path }}</title> - <link rel="stylesheet" href="/static/css/style.css"> - <style> - pre { - background-color: white; - padding: 20px; - border-radius: 5px; - white-space: pre-wrap; - word-wrap: break-word; - } - .html-content { - background-color: white; - padding: 20px; - border-radius: 5px; - } - .html-content hr { - margin: 30px 0; - border: 0; - border-top: 1px solid #eee; - } - .html-content h1, - .html-content h2, - .html-content h3, - .html-content h4, - .html-content h5, - .html-content h6 { - margin: 1em 0 0.5em 0; - line-height: 1.2; - } - .html-content p { - margin: 0 0 1em 0; - line-height: 1.5; - } - </style> -</head> -<body> - <header> - <h1>File: {{ file_path }}</h1> - </header> - - <nav class="nav"> - <ul> - <li><a href="/">Home</a></li> - <li><a href="/files">File List</a></li> - <li><a href="/index_books">Re-Index Books</a></li> - </ul> - </nav> - - <div class="container"> - {% if is_html %} - <div class="html-content">{{ content|safe }}</div> - {% else %} - <pre>{{ content }}</pre> - {% endif %} - </div> - - <footer> - <p>© 2025 Book Search Engine</p> - </footer> -</body> +и<!DOCTYPE html> +<html> +<head> + <title>{{ file_path }}</title> + <link rel="stylesheet" href="/static/css/style.css"> + <style> + pre { + background-color: white; + padding: 20px; + border-radius: 5px; + white-space: pre-wrap; + word-wrap: break-word; + } + .html-content { + background-color: white; + padding: 20px; + border-radius: 5px; + } + .html-content hr { + margin: 30px 0; + border: 0; + border-top: 1px solid #eee; + } + .html-content h1, + .html-content h2, + .html-content h3, + .html-content h4, + .html-content h5, + .html-content h6 { + margin: 1em 0 0.5em 0; + line-height: 1.2; + } + .html-content p { + margin: 0 0 1em 0; + line-height: 1.5; + } + </style> +</head> +<body> + <header> + <h1>File: {{ file_path }}</h1> + </header> + + <nav class="nav"> + <ul> + <li><a href="/">Home</a></li> + <li><a href="/files">File List</a></li> + <li><a href="/index_books">Re-Index Books</a></li> + </ul> + </nav> + + <div class="container"> + {% if is_html %} + <div class="html-content">{{ content|safe }}</div> + {% else %} + <pre>{{ content }}</pre> + {% endif %} + </div> + + <footer> + <p>© 2025 Book Search Engine</p> + </footer> +</body> </html> \ No newline at end of file diff --git a/src/core/index.py b/src/core/index.py index e935ef3..3b41a21 100644 --- a/src/core/index.py +++ b/src/core/index.py @@ -1,142 +1,142 @@ -from elasticsearch import Elasticsearch -import os -import ebooklib -from ebooklib import epub -from bs4 import BeautifulSoup -import PyPDF2 -import time -from threading import Lock - -# Elasticsearch Configuration -ELASTICSEARCH_HOST = os.environ.get("ELASTICSEARCH_HOST", "localhost") -ELASTICSEARCH_PORT = int(os.environ.get("ELASTICSEARCH_PORT", 9200)) -es = Elasticsearch([{'host': ELASTICSEARCH_HOST, 'port': ELASTICSEARCH_PORT, 'scheme': 'http'}]) -INDEX_NAME = "book_index" - -# Global variables for progress tracking -indexing_progress = { - 'total_files': 0, - 'processed_files': 0, - 'start_time': None, - 'is_running': False, - 'current_file': '', - 'errors': [] -} -progress_lock = Lock() - -def create_index(): - if not es.indices.exists(index=INDEX_NAME): - es.indices.create(index=INDEX_NAME) - -def extract_text_from_epub(epub_path): - book = epub.read_epub(epub_path) - text = '' - for item in book.get_items(): - if item.media_type == 'application/xhtml+xml': - soup = BeautifulSoup(item.get_content(), 'html.parser') - text += soup.get_text() - return text - -def extract_text_from_pdf(pdf_path): - text = '' - with open(pdf_path, 'rb') as pdf_file: - pdf_reader = PyPDF2.PdfReader(pdf_file) - for page_num in range(len(pdf_reader.pages)): - page = pdf_reader.pages[page_num] - text += page.extract_text() - return text - -def get_progress(): - with progress_lock: - if not indexing_progress['is_running']: - return None - - progress = indexing_progress.copy() - if progress['total_files'] > 0: - progress['percentage'] = (progress['processed_files'] / progress['total_files']) * 100 - else: - progress['percentage'] = 0 - - elapsed = time.time() - progress['start_time'] - progress['elapsed_time'] = elapsed - if progress['processed_files'] > 0: - time_per_file = elapsed / progress['processed_files'] - remaining_files = progress['total_files'] - progress['processed_files'] - progress['estimated_remaining'] = time_per_file * remaining_files - progress['estimated_completion'] = time.time() + progress['estimated_remaining'] - else: - progress['estimated_remaining'] = 0 - progress['estimated_completion'] = 0 - - return progress - -def index_files(directory): - global indexing_progress - - with progress_lock: - indexing_progress = { - 'total_files': 0, - 'processed_files': 0, - 'start_time': time.time(), - 'is_running': True, - 'current_file': '', - 'errors': [] - } - - try: - create_index() - - # First count all files - total_files = 0 - for root, _, files in os.walk(directory): - for file in files: - if file.endswith(('.epub', '.pdf', '.txt')): - total_files += 1 - - with progress_lock: - indexing_progress['total_files'] = total_files - - # Now process files - for root, _, files in os.walk(directory): - for file in files: - file_path = os.path.join(root, file) - - with progress_lock: - indexing_progress['current_file'] = file_path - - try: - encoded_file_path = file_path.encode('utf-8').decode('utf-8') - if file_path.endswith(".epub"): - text = extract_text_from_epub(file_path) - elif file_path.endswith(".pdf"): - text = extract_text_from_pdf(file_path) - elif file_path.endswith(".txt"): - with open(encoded_file_path, 'r', encoding='utf-8', errors='ignore') as f: - text = f.read() - else: - print(f"Skipping unsupported file type: {file_path}") - continue - - doc = { - 'file_path': file_path, - 'content': text - } - es.index(index=INDEX_NAME, document=doc) - print(f"Indexed: {file_path}") - - with progress_lock: - indexing_progress['processed_files'] += 1 - - except Exception as e: - error_msg = f"Error indexing {file_path}: {type(e)}, {e}" - print(error_msg) - with progress_lock: - indexing_progress['errors'].append(error_msg) - - finally: - with progress_lock: - indexing_progress['is_running'] = False - -if __name__ == '__main__': - BOOKS_DIR = "/books" # This should match the volume mount in docker-compose.yml +from elasticsearch import Elasticsearch +import os +import ebooklib +from ebooklib import epub +from bs4 import BeautifulSoup +import PyPDF2 +import time +from threading import Lock + +# Elasticsearch Configuration +ELASTICSEARCH_HOST = os.environ.get("ELASTICSEARCH_HOST", "localhost") +ELASTICSEARCH_PORT = int(os.environ.get("ELASTICSEARCH_PORT", 9200)) +es = Elasticsearch([{'host': ELASTICSEARCH_HOST, 'port': ELASTICSEARCH_PORT, 'scheme': 'http'}]) +INDEX_NAME = "book_index" + +# Global variables for progress tracking +indexing_progress = { + 'total_files': 0, + 'processed_files': 0, + 'start_time': None, + 'is_running': False, + 'current_file': '', + 'errors': [] +} +progress_lock = Lock() + +def create_index(): + if not es.indices.exists(index=INDEX_NAME): + es.indices.create(index=INDEX_NAME) + +def extract_text_from_epub(epub_path): + book = epub.read_epub(epub_path) + text = '' + for item in book.get_items(): + if item.media_type == 'application/xhtml+xml': + soup = BeautifulSoup(item.get_content(), 'html.parser') + text += soup.get_text() + return text + +def extract_text_from_pdf(pdf_path): + text = '' + with open(pdf_path, 'rb') as pdf_file: + pdf_reader = PyPDF2.PdfReader(pdf_file) + for page_num in range(len(pdf_reader.pages)): + page = pdf_reader.pages[page_num] + text += page.extract_text() + return text + +def get_progress(): + with progress_lock: + if not indexing_progress['is_running']: + return None + + progress = indexing_progress.copy() + if progress['total_files'] > 0: + progress['percentage'] = (progress['processed_files'] / progress['total_files']) * 100 + else: + progress['percentage'] = 0 + + elapsed = time.time() - progress['start_time'] + progress['elapsed_time'] = elapsed + if progress['processed_files'] > 0: + time_per_file = elapsed / progress['processed_files'] + remaining_files = progress['total_files'] - progress['processed_files'] + progress['estimated_remaining'] = time_per_file * remaining_files + progress['estimated_completion'] = time.time() + progress['estimated_remaining'] + else: + progress['estimated_remaining'] = 0 + progress['estimated_completion'] = 0 + + return progress + +def index_files(directory): + global indexing_progress + + with progress_lock: + indexing_progress = { + 'total_files': 0, + 'processed_files': 0, + 'start_time': time.time(), + 'is_running': True, + 'current_file': '', + 'errors': [] + } + + try: + create_index() + + # First count all files + total_files = 0 + for root, _, files in os.walk(directory): + for file in files: + if file.endswith(('.epub', '.pdf', '.txt')): + total_files += 1 + + with progress_lock: + indexing_progress['total_files'] = total_files + + # Now process files + for root, _, files in os.walk(directory): + for file in files: + file_path = os.path.join(root, file) + + with progress_lock: + indexing_progress['current_file'] = file_path + + try: + encoded_file_path = file_path.encode('utf-8').decode('utf-8') + if file_path.endswith(".epub"): + text = extract_text_from_epub(file_path) + elif file_path.endswith(".pdf"): + text = extract_text_from_pdf(file_path) + elif file_path.endswith(".txt"): + with open(encoded_file_path, 'r', encoding='utf-8', errors='ignore') as f: + text = f.read() + else: + print(f"Skipping unsupported file type: {file_path}") + continue + + doc = { + 'file_path': file_path, + 'content': text + } + es.index(index=INDEX_NAME, document=doc) + print(f"Indexed: {file_path}") + + with progress_lock: + indexing_progress['processed_files'] += 1 + + except Exception as e: + error_msg = f"Error indexing {file_path}: {type(e)}, {e}" + print(error_msg) + with progress_lock: + indexing_progress['errors'].append(error_msg) + + finally: + with progress_lock: + indexing_progress['is_running'] = False + +if __name__ == '__main__': + BOOKS_DIR = "/books" # This should match the volume mount in docker-compose.yml index_files(BOOKS_DIR) \ No newline at end of file diff --git a/src/requirements.txt b/src/requirements.txt index 590f8f2..051e80d 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -1,7 +1,7 @@ -flask==3.0.2 -ebooklib==0.18 -beautifulsoup4==4.12.3 -pytest==8.3.2 -PyPDF2==3.0.1 -pytz==2024.1 +flask==3.0.2 +ebooklib==0.18 +beautifulsoup4==4.12.3 +pytest==8.3.2 +PyPDF2==3.0.1 +pytz==2024.1 elasticsearch>=8.0.0 \ No newline at end of file diff --git a/tests/unit/test_app.py b/tests/unit/test_app.py index 069a53d..65ef2df 100644 --- a/tests/unit/test_app.py +++ b/tests/unit/test_app.py @@ -1,127 +1,127 @@ -import unittest -import json -import os -import tempfile -import shutil -from app import app -from unittest.mock import patch, MagicMock - -class BookSearchAPITest(unittest.TestCase): - def setUp(self): - app.config['TESTING'] = True - self.client = app.test_client() - - # Create a temporary directory for test books - self.test_books_dir = tempfile.mkdtemp() - - # Create a sample test file - self.sample_file_path = os.path.join(self.test_books_dir, 'test_sample.txt') - with open(self.sample_file_path, 'w', encoding='utf-8') as f: - f.write("This is a test sample file for testing the book search API.") - - def tearDown(self): - # Remove the temporary directory - shutil.rmtree(self.test_books_dir) - - @patch('app.es') - @patch('app.index_files') - def test_index_books_api(self, mock_index_files, mock_es): - # Mock the index_files function - mock_index_files.return_value = None - - # Test the API endpoint - response = self.client.get('/index_books', headers={'Accept': 'application/json'}) - - # Check if the response is successful - self.assertEqual(response.status_code, 200) - - # Check if the response contains the expected message - data = json.loads(response.data) - self.assertIn('message', data) - self.assertEqual(data['message'], 'Indexing completed') - - # Check if the index_files function was called - mock_index_files.assert_called_once_with('/books') - - @patch('app.es') - def test_search_api(self, mock_es): - # Mock the Elasticsearch search method - mock_search_result = { - 'hits': { - 'hits': [ - { - '_source': { - 'file_path': '/books/test_sample.txt', - 'content': 'This is a test sample file for testing the book search API.' - } - } - ] - } - } - mock_es.search.return_value = mock_search_result - - # Test the API endpoint - response = self.client.get('/search?query=test', headers={'Accept': 'application/json'}) - - # Check if the response is successful - self.assertEqual(response.status_code, 200) - - # Check if the response contains the expected data - data = json.loads(response.data) - self.assertEqual(len(data), 1) - self.assertEqual(data[0]['file_path'], '/books/test_sample.txt') - self.assertIn('snippet', data[0]) - - # Check if the Elasticsearch search method was called with the correct parameters - mock_es.search.assert_called_once() - - @patch('app.os.listdir') - @patch('app.os.path.isfile') - def test_list_files_api(self, mock_isfile, mock_listdir): - # Mock the os.listdir function - mock_listdir.return_value = ['test_sample.txt', 'another_file.txt'] - # Mock the os.path.isfile function to always return True - mock_isfile.return_value = True - - # Test the API endpoint - response = self.client.get('/files', headers={'Accept': 'application/json'}) - - # Check if the response is successful - self.assertEqual(response.status_code, 200) - - # Check if the response contains the expected data - data = json.loads(response.data) - self.assertEqual(len(data), 2) - self.assertEqual(data[0]['name'], 'test_sample.txt') - self.assertEqual(data[1]['name'], 'another_file.txt') - - # Check if the os.listdir function was called with the correct parameters - mock_listdir.assert_called_once_with('/books') - - @patch('app.open') - @patch('app.os.path.isfile') - @patch('app.os.path.abspath') - def test_get_file_api(self, mock_abspath, mock_isfile, mock_open): - # Mock the necessary functions - mock_isfile.return_value = True - mock_abspath.side_effect = lambda x: x # Return the input unchanged - - # Mock the open function - mock_file = MagicMock() - mock_file.__enter__.return_value.read.return_value = "This is a test sample file." - mock_open.return_value = mock_file - - # Test the API endpoint - response = self.client.get('/file/test_sample.txt', headers={'Accept': 'application/json'}) - - # Check if the response is successful - self.assertEqual(response.status_code, 200) - - # Check if the response contains the expected data - self.assertEqual(response.data.decode('utf-8'), "This is a test sample file.") - - # Check if the open function was called with the correct parameters - mock_open.assert_called_once() - -if __name__ == '__main__': +import unittest +import json +import os +import tempfile +import shutil +from app import app +from unittest.mock import patch, MagicMock + +class BookSearchAPITest(unittest.TestCase): + def setUp(self): + app.config['TESTING'] = True + self.client = app.test_client() + + # Create a temporary directory for test books + self.test_books_dir = tempfile.mkdtemp() + + # Create a sample test file + self.sample_file_path = os.path.join(self.test_books_dir, 'test_sample.txt') + with open(self.sample_file_path, 'w', encoding='utf-8') as f: + f.write("This is a test sample file for testing the book search API.") + + def tearDown(self): + # Remove the temporary directory + shutil.rmtree(self.test_books_dir) + + @patch('app.es') + @patch('app.index_files') + def test_index_books_api(self, mock_index_files, mock_es): + # Mock the index_files function + mock_index_files.return_value = None + + # Test the API endpoint + response = self.client.get('/index_books', headers={'Accept': 'application/json'}) + + # Check if the response is successful + self.assertEqual(response.status_code, 200) + + # Check if the response contains the expected message + data = json.loads(response.data) + self.assertIn('message', data) + self.assertEqual(data['message'], 'Indexing completed') + + # Check if the index_files function was called + mock_index_files.assert_called_once_with('/books') + + @patch('app.es') + def test_search_api(self, mock_es): + # Mock the Elasticsearch search method + mock_search_result = { + 'hits': { + 'hits': [ + { + '_source': { + 'file_path': '/books/test_sample.txt', + 'content': 'This is a test sample file for testing the book search API.' + } + } + ] + } + } + mock_es.search.return_value = mock_search_result + + # Test the API endpoint + response = self.client.get('/search?query=test', headers={'Accept': 'application/json'}) + + # Check if the response is successful + self.assertEqual(response.status_code, 200) + + # Check if the response contains the expected data + data = json.loads(response.data) + self.assertEqual(len(data), 1) + self.assertEqual(data[0]['file_path'], '/books/test_sample.txt') + self.assertIn('snippet', data[0]) + + # Check if the Elasticsearch search method was called with the correct parameters + mock_es.search.assert_called_once() + + @patch('app.os.listdir') + @patch('app.os.path.isfile') + def test_list_files_api(self, mock_isfile, mock_listdir): + # Mock the os.listdir function + mock_listdir.return_value = ['test_sample.txt', 'another_file.txt'] + # Mock the os.path.isfile function to always return True + mock_isfile.return_value = True + + # Test the API endpoint + response = self.client.get('/files', headers={'Accept': 'application/json'}) + + # Check if the response is successful + self.assertEqual(response.status_code, 200) + + # Check if the response contains the expected data + data = json.loads(response.data) + self.assertEqual(len(data), 2) + self.assertEqual(data[0]['name'], 'test_sample.txt') + self.assertEqual(data[1]['name'], 'another_file.txt') + + # Check if the os.listdir function was called with the correct parameters + mock_listdir.assert_called_once_with('/books') + + @patch('app.open') + @patch('app.os.path.isfile') + @patch('app.os.path.abspath') + def test_get_file_api(self, mock_abspath, mock_isfile, mock_open): + # Mock the necessary functions + mock_isfile.return_value = True + mock_abspath.side_effect = lambda x: x # Return the input unchanged + + # Mock the open function + mock_file = MagicMock() + mock_file.__enter__.return_value.read.return_value = "This is a test sample file." + mock_open.return_value = mock_file + + # Test the API endpoint + response = self.client.get('/file/test_sample.txt', headers={'Accept': 'application/json'}) + + # Check if the response is successful + self.assertEqual(response.status_code, 200) + + # Check if the response contains the expected data + self.assertEqual(response.data.decode('utf-8'), "This is a test sample file.") + + # Check if the open function was called with the correct parameters + mock_open.assert_called_once() + +if __name__ == '__main__': unittest.main() \ No newline at end of file diff --git a/tests/unit/test_epub_viewer.py b/tests/unit/test_epub_viewer.py index bda9e7c..eb50797 100644 --- a/tests/unit/test_epub_viewer.py +++ b/tests/unit/test_epub_viewer.py @@ -1,114 +1,114 @@ -import os -import pytest -import tempfile -import shutil -from app import app -from unittest.mock import patch, MagicMock -from ebooklib import epub - -def create_test_epub(): - """Create a simple test EPUB file""" - # Create a simple EPUB file - book = epub.EpubBook() - - # Set metadata - book.set_identifier('test123456') - book.set_title('Test EPUB Book') - book.set_language('en') - book.add_author('Test Author') - - # Add a chapter - c1 = epub.EpubHtml(title='Chapter 1', file_name='chap_01.xhtml', lang='en') - c1.content = '<html><body><h1>Chapter 1</h1><p>This is a test EPUB file.</p></body></html>' - book.add_item(c1) - - # Add navigation - book.toc = [c1] - book.spine = ['nav', c1] - book.add_item(epub.EpubNcx()) - book.add_item(epub.EpubNav()) - - # Create temp directory - temp_dir = tempfile.mkdtemp() - epub_path = os.path.join(temp_dir, 'test.epub') - - # Write the EPUB file - epub.write_epub(epub_path, book) - - return epub_path, temp_dir - -@pytest.fixture -def client(): - """Create a test client for the Flask app""" - app.config['TESTING'] = True - with app.test_client() as client: - yield client - -@pytest.fixture -def test_epub(): - """Create a test EPUB file and clean up after the test""" - epub_path, temp_dir = create_test_epub() - - # Mock the books directory - original_join = os.path.join - - def mock_join(path, *paths): - if path == "/books" and paths and paths[0] == "test.epub": - return epub_path - return original_join(path, *paths) - - def mock_abspath(path): - if path == os.path.join("/books", "test.epub"): - return "/books/test.epub" - elif path == epub_path: - return "/books/test.epub" - return path - - with patch('os.path.join', side_effect=mock_join): - with patch('os.path.isfile', return_value=True): - with patch('os.path.abspath', side_effect=mock_abspath): - yield epub_path - - # Clean up - shutil.rmtree(temp_dir) - -def test_epub_viewer_page(client, test_epub): - """Test that the EPUB viewer page loads correctly""" - response = client.get('/file/test.epub') - - assert response.status_code == 200 - assert b'<!DOCTYPE html>' in response.data - assert b'<title>test.epub</title>' in response.data - assert b'<div id="viewer"></div>' in response.data - assert b'<script src="https://cdn.jsdelivr.net/npm/epubjs' in response.data - -def test_epub_file_endpoint(client, test_epub): - """Test that the EPUB file is served with correct headers""" - response = client.get('/epub/test.epub') - - assert response.status_code == 200 - assert response.headers['Content-Type'] == 'application/epub+zip' - assert response.headers['Access-Control-Allow-Origin'] == '*' - - # Check that the response contains EPUB data (at least the magic number) - assert response.data.startswith(b'PK') - -def test_epub_viewer_integration(client, test_epub): - """Test the integration between the viewer and the EPUB file""" - # This test would ideally use Selenium or Playwright to test the actual rendering - # Since we can't run a browser in this environment, we'll check for the correct setup - - # First, check that the viewer page loads - viewer_response = client.get('/file/test.epub') - assert viewer_response.status_code == 200 - - # Check that the JavaScript is correctly set up to load the EPUB - assert b'/epub/test.epub' in viewer_response.data - - # Check that the EPUB file is accessible - epub_response = client.get('/epub/test.epub') - assert epub_response.status_code == 200 - assert epub_response.headers['Content-Type'] == 'application/epub+zip' - -if __name__ == '__main__': +import os +import pytest +import tempfile +import shutil +from app import app +from unittest.mock import patch, MagicMock +from ebooklib import epub + +def create_test_epub(): + """Create a simple test EPUB file""" + # Create a simple EPUB file + book = epub.EpubBook() + + # Set metadata + book.set_identifier('test123456') + book.set_title('Test EPUB Book') + book.set_language('en') + book.add_author('Test Author') + + # Add a chapter + c1 = epub.EpubHtml(title='Chapter 1', file_name='chap_01.xhtml', lang='en') + c1.content = '<html><body><h1>Chapter 1</h1><p>This is a test EPUB file.</p></body></html>' + book.add_item(c1) + + # Add navigation + book.toc = [c1] + book.spine = ['nav', c1] + book.add_item(epub.EpubNcx()) + book.add_item(epub.EpubNav()) + + # Create temp directory + temp_dir = tempfile.mkdtemp() + epub_path = os.path.join(temp_dir, 'test.epub') + + # Write the EPUB file + epub.write_epub(epub_path, book) + + return epub_path, temp_dir + +@pytest.fixture +def client(): + """Create a test client for the Flask app""" + app.config['TESTING'] = True + with app.test_client() as client: + yield client + +@pytest.fixture +def test_epub(): + """Create a test EPUB file and clean up after the test""" + epub_path, temp_dir = create_test_epub() + + # Mock the books directory + original_join = os.path.join + + def mock_join(path, *paths): + if path == "/books" and paths and paths[0] == "test.epub": + return epub_path + return original_join(path, *paths) + + def mock_abspath(path): + if path == os.path.join("/books", "test.epub"): + return "/books/test.epub" + elif path == epub_path: + return "/books/test.epub" + return path + + with patch('os.path.join', side_effect=mock_join): + with patch('os.path.isfile', return_value=True): + with patch('os.path.abspath', side_effect=mock_abspath): + yield epub_path + + # Clean up + shutil.rmtree(temp_dir) + +def test_epub_viewer_page(client, test_epub): + """Test that the EPUB viewer page loads correctly""" + response = client.get('/file/test.epub') + + assert response.status_code == 200 + assert b'<!DOCTYPE html>' in response.data + assert b'<title>test.epub</title>' in response.data + assert b'<div id="viewer"></div>' in response.data + assert b'<script src="https://cdn.jsdelivr.net/npm/epubjs' in response.data + +def test_epub_file_endpoint(client, test_epub): + """Test that the EPUB file is served with correct headers""" + response = client.get('/epub/test.epub') + + assert response.status_code == 200 + assert response.headers['Content-Type'] == 'application/epub+zip' + assert response.headers['Access-Control-Allow-Origin'] == '*' + + # Check that the response contains EPUB data (at least the magic number) + assert response.data.startswith(b'PK') + +def test_epub_viewer_integration(client, test_epub): + """Test the integration between the viewer and the EPUB file""" + # This test would ideally use Selenium or Playwright to test the actual rendering + # Since we can't run a browser in this environment, we'll check for the correct setup + + # First, check that the viewer page loads + viewer_response = client.get('/file/test.epub') + assert viewer_response.status_code == 200 + + # Check that the JavaScript is correctly set up to load the EPUB + assert b'/epub/test.epub' in viewer_response.data + + # Check that the EPUB file is accessible + epub_response = client.get('/epub/test.epub') + assert epub_response.status_code == 200 + assert epub_response.headers['Content-Type'] == 'application/epub+zip' + +if __name__ == '__main__': pytest.main(['-xvs', __file__]) \ No newline at end of file