Update application

This commit is contained in:
Dmitriy Kazimirov 2025-04-01 10:48:30 +00:00
parent 6a46a20c9e
commit 891787be0f
25 changed files with 2474 additions and 2474 deletions

118
.env
View file

@ -1,60 +1,60 @@
# Application Configuration # Application Configuration
# ======================== # ========================
# Base URL for the application (required, string) # Base URL for the application (required, string)
# Format: http://hostname:port or https://hostname:port # Format: http://hostname:port or https://hostname:port
BASE_URL=http://localhost:8000 BASE_URL=http://localhost:8000
# CPU Limit for container (optional, float) # CPU Limit for container (optional, float)
# Number of CPU cores to allocate (e.g., 0.5, 1, 2) # Number of CPU cores to allocate (e.g., 0.5, 1, 2)
# Default: 2 (will be used if not specified) # Default: 2 (will be used if not specified)
CPU_LIMIT=2 CPU_LIMIT=2
# Snippet character limit (optional, integer) # Snippet character limit (optional, integer)
# Maximum length for text snippets in characters # Maximum length for text snippets in characters
# Default: 100 # Default: 100
SNIPPET_CHAR_LIMIT=100 SNIPPET_CHAR_LIMIT=100
# Debug mode (optional, boolean) # Debug mode (optional, boolean)
# Enable debug output when set to True # Enable debug output when set to True
# Default: False # Default: False
DEBUG=False DEBUG=False
# Application port (optional, integer) # Application port (optional, integer)
# Port the application listens on # Port the application listens on
# Default: 5000 # Default: 5000
PORT=5000 PORT=5000
# Elasticsearch Configuration # Elasticsearch Configuration
# ========================== # ==========================
# Elasticsearch host (required, string) # Elasticsearch host (required, string)
# Hostname or IP of Elasticsearch service # Hostname or IP of Elasticsearch service
ELASTICSEARCH_HOST=elasticsearch ELASTICSEARCH_HOST=elasticsearch
# Elasticsearch username (sensitive, required, string) # Elasticsearch username (sensitive, required, string)
# Admin username for Elasticsearch # Admin username for Elasticsearch
ELASTICSEARCH_USERNAME=admin ELASTICSEARCH_USERNAME=admin
# Elasticsearch password (sensitive, required, string) # Elasticsearch password (sensitive, required, string)
# Admin password for Elasticsearch # Admin password for Elasticsearch
ELASTICSEARCH_PASSWORD=password ELASTICSEARCH_PASSWORD=password
# File Storage Configuration # File Storage Configuration
# ========================= # =========================
# SMB share path (optional, string) # SMB share path (optional, string)
# Local path where books are mounted # Local path where books are mounted
# Default: ./smb_share # Default: ./smb_share
SMB_SHARE_PATH=./smb_share SMB_SHARE_PATH=./smb_share
# Admin Credentials # Admin Credentials
# ================ # ================
# Admin username for API management (required, string) # Admin username for API management (required, string)
ADMIN_USER=admin ADMIN_USER=admin
# Admin password for API management (required, string) # Admin password for API management (required, string)
ADMIN_PASSWORD=securepassword123 ADMIN_PASSWORD=securepassword123

View file

@ -1,39 +1,39 @@
name: Test EPUB Viewer name: Test EPUB Viewer
on: on:
push: push:
branches: [ main ] branches: [ main ]
pull_request: pull_request:
branches: [ main ] branches: [ main ]
jobs: jobs:
test: test:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- name: Set up Python - name: Set up Python
uses: actions/setup-python@v4 uses: actions/setup-python@v4
with: with:
python-version: '3.10' python-version: '3.10'
- name: Install dependencies - name: Install dependencies
run: | run: |
python -m pip install --upgrade pip python -m pip install --upgrade pip
pip install pytest pip install pytest
pip install flask elasticsearch ebooklib beautifulsoup4 PyPDF2 pip install flask elasticsearch ebooklib beautifulsoup4 PyPDF2
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Run tests - name: Run tests
run: | run: |
cd api cd api
python -m pytest test_epub_viewer.py -v python -m pytest test_epub_viewer.py -v
- name: Run integration tests with Playwright - name: Run integration tests with Playwright
run: | run: |
pip install playwright pytest-playwright pip install playwright pytest-playwright
playwright install playwright install
cd api cd api
python -m pytest test_epub_viewer_integration.py -v python -m pytest test_epub_viewer_integration.py -v
if: false # Disabled until we create the integration tests with Playwright if: false # Disabled until we create the integration tests with Playwright

58
.gitignore vendored
View file

@ -1,30 +1,30 @@
# Ignore all EPUB files in smb_share # Ignore all EPUB files in smb_share
smb_share/*.epub smb_share/*.epub
# Ignore sample text file # Ignore sample text file
smb_share/sample.txt smb_share/sample.txt
# Python cache # Python cache
__pycache__/ __pycache__/
*.py[cod] *.py[cod]
*$py.class *$py.class
# Virtual environment # Virtual environment
venv/ venv/
# IDE specific files # IDE specific files
.vscode/ .vscode/
.idea/ .idea/
# Logs and databases # Logs and databases
*.log *.log
*.sqlite *.sqlite
# OS generated files # OS generated files
.DS_Store .DS_Store
.DS_Store? .DS_Store?
._* ._*
.Spotlight-V100 .Spotlight-V100
.Trashes .Trashes
ehthumbs.db ehthumbs.db
Thumbs.db Thumbs.db

View file

@ -1,29 +1,29 @@
FROM python:3.9-alpine FROM python:3.9-alpine
WORKDIR /app WORKDIR /app
# Install dependencies # Install dependencies
RUN pip install flask elasticsearch ebooklib beautifulsoup4 PyPDF2 pytz RUN pip install flask elasticsearch ebooklib beautifulsoup4 PyPDF2 pytz
# Create books directory with proper permissions # Create books directory with proper permissions
RUN mkdir -p /books && chmod 777 /books RUN mkdir -p /books && chmod 777 /books
# Copy the API code and static files # Copy the API code and static files
COPY src/api/app.py . COPY src/api/app.py .
COPY src/api/static /app/static COPY src/api/static /app/static
COPY src/api/templates /app/templates COPY src/api/templates /app/templates
# Expose the API port # Expose the API port
EXPOSE 5000 EXPOSE 5000
# Copy the indexing script # Copy the indexing script
COPY src/core/index.py . COPY src/core/index.py .
# Copy the test file # Copy the test file
COPY tests/unit/test_app.py . COPY tests/unit/test_app.py .
# Add a dummy file to invalidate cache # Add a dummy file to invalidate cache
ADD dummy.txt . ADD dummy.txt .
# Command to run the API # Command to run the API
CMD ["python", "app.py"] CMD ["python", "app.py"]

64
ai.md
View file

@ -1,33 +1,33 @@
# Final Deployment Status # Final Deployment Status
## Configuration Summary: ## Configuration Summary:
1. **Container Auto-Restart**: 1. **Container Auto-Restart**:
- Both services configured with `restart: unless-stopped` - Both services configured with `restart: unless-stopped`
- Containers will automatically restart on failures - Containers will automatically restart on failures
2. **Resource Limits**: 2. **Resource Limits**:
- CPU: `${CPU_LIMIT}` cores - CPU: `${CPU_LIMIT}` cores
- Memory: 2GB limit - Memory: 2GB limit
3. **Dependencies**: 3. **Dependencies**:
- pytz installed in container (version 2025.2) - pytz installed in container (version 2025.2)
- All required Python packages verified - All required Python packages verified
- Dockerfile updated to include pytz for future builds - Dockerfile updated to include pytz for future builds
4. **Known Issues**: 4. **Known Issues**:
- Docker Compose v1.25.0 limitations: - Docker Compose v1.25.0 limitations:
- Doesn't respect container_name directives - Doesn't respect container_name directives
- Shows harmless deploy key warnings - Shows harmless deploy key warnings
- Solution: Upgrade to Docker Compose v2.x - Solution: Upgrade to Docker Compose v2.x
## Verification: ## Verification:
- All services running - All services running
- CORS headers properly configured - CORS headers properly configured
- pytz module successfully imported (version 2025.2) - pytz module successfully imported (version 2025.2)
- API endpoints functional - API endpoints functional
## System Status: OPERATIONAL ## System Status: OPERATIONAL
- API: Running on port 8000 - API: Running on port 8000
- Elasticsearch: Running on port 9200 - Elasticsearch: Running on port 9200
- Auto-restart configured - Auto-restart configured
- All features functional - All features functional

View file

@ -1,39 +1,39 @@
version: '3.7' version: '3.7'
services: services:
booksearch_app: booksearch_app:
build: . build: .
container_name: booksearch_app container_name: booksearch_app
ports: ports:
- "8000:5000" - "8000:5000"
environment: environment:
- ELASTICSEARCH_HOST=booksearch_elastic - ELASTICSEARCH_HOST=booksearch_elastic
- BASE_URL=${BASE_URL} - BASE_URL=${BASE_URL}
- CPU_LIMIT=${CPU_LIMIT} - CPU_LIMIT=${CPU_LIMIT}
- SNIPPET_CHAR_LIMIT=${SNIPPET_CHAR_LIMIT} - SNIPPET_CHAR_LIMIT=${SNIPPET_CHAR_LIMIT}
volumes: volumes:
- ./smb_share:/books - ./smb_share:/books
depends_on: depends_on:
- booksearch_elastic - booksearch_elastic
restart: unless-stopped restart: unless-stopped
deploy: deploy:
resources: resources:
limits: limits:
cpus: ${CPU_LIMIT} cpus: ${CPU_LIMIT}
memory: 2G memory: 2G
booksearch_elastic: booksearch_elastic:
container_name: booksearch_elastic container_name: booksearch_elastic
image: bitnami/elasticsearch:latest image: bitnami/elasticsearch:latest
ports: ports:
- "9200:9200" - "9200:9200"
- "9300:9300" - "9300:9300"
environment: environment:
- discovery.type=single-node - discovery.type=single-node
- ELASTICSEARCH_USERNAME=admin - ELASTICSEARCH_USERNAME=admin
- ELASTICSEARCH_PASSWORD=password - ELASTICSEARCH_PASSWORD=password
restart: unless-stopped restart: unless-stopped
healthcheck: healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9200"] test: ["CMD", "curl", "-f", "http://localhost:9200"]
interval: 30s interval: 30s
timeout: 10s timeout: 10s
retries: 5 retries: 5

View file

@ -1,109 +1,109 @@
function search_books(params, userSettings) { function search_books(params, userSettings) {
const query = params.query; const query = params.query;
const apiUrl = (userSettings.apiUrl || 'http://localhost:8000').replace(/\/$/, ''); const apiUrl = (userSettings.apiUrl || 'http://localhost:8000').replace(/\/$/, '');
const useProxy = userSettings.useProxy || false; const useProxy = userSettings.useProxy || false;
const proxyUrl = userSettings.proxyUrl || 'https://cors-anywhere.herokuapp.com/'; const proxyUrl = userSettings.proxyUrl || 'https://cors-anywhere.herokuapp.com/';
// Debugging headers - WARNING: Only for development/testing // Debugging headers - WARNING: Only for development/testing
const debugHeaders = userSettings.debugHeaders || {}; const debugHeaders = userSettings.debugHeaders || {};
if (!query) { if (!query) {
throw new Error('Search query is required'); throw new Error('Search query is required');
} }
// Prepare the target URL // Prepare the target URL
const targetUrl = `${apiUrl}/search?query=${encodeURIComponent(query)}`; const targetUrl = `${apiUrl}/search?query=${encodeURIComponent(query)}`;
const requestUrl = useProxy ? `${proxyUrl}${targetUrl}` : targetUrl; const requestUrl = useProxy ? `${proxyUrl}${targetUrl}` : targetUrl;
// Add timeout handling // Add timeout handling
const controller = new AbortController(); const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), 10000); const timeoutId = setTimeout(() => controller.abort(), 10000);
// Prepare headers // Prepare headers
const headers = { const headers = {
'Accept': 'application/json', 'Accept': 'application/json',
...(useProxy ? { 'X-Requested-With': 'XMLHttpRequest' } : {}), ...(useProxy ? { 'X-Requested-With': 'XMLHttpRequest' } : {}),
...debugHeaders // Add debug headers if provided ...debugHeaders // Add debug headers if provided
}; };
return fetch(requestUrl, { return fetch(requestUrl, {
method: 'GET', method: 'GET',
headers: headers, headers: headers,
signal: controller.signal signal: controller.signal
}) })
.then(async response => { .then(async response => {
clearTimeout(timeoutId); clearTimeout(timeoutId);
if (!response.ok) { if (!response.ok) {
const errorBody = await response.text().catch(() => ''); const errorBody = await response.text().catch(() => '');
throw new Error(`API request failed with status ${response.status}. Response: ${errorBody}`); throw new Error(`API request failed with status ${response.status}. Response: ${errorBody}`);
} }
const contentType = response.headers.get('content-type'); const contentType = response.headers.get('content-type');
if (!contentType || !contentType.includes('application/json')) { if (!contentType || !contentType.includes('application/json')) {
throw new Error(`Invalid content type: ${contentType}`); throw new Error(`Invalid content type: ${contentType}`);
} }
return response.json(); return response.json();
}) })
.then(results => { .then(results => {
if (!Array.isArray(results)) { if (!Array.isArray(results)) {
throw new Error(`Invalid response format. Expected array, got ${typeof results}`); throw new Error(`Invalid response format. Expected array, got ${typeof results}`);
} }
if (results.length === 0) { if (results.length === 0) {
return 'No books found matching your search'; return 'No books found matching your search';
} }
// Format results with book paths and snippets // Format results with book paths and snippets
return results.map(result => { return results.map(result => {
if (!result.file_path || !result.snippet) { if (!result.file_path || !result.snippet) {
throw new Error('Invalid result format - missing required fields'); throw new Error('Invalid result format - missing required fields');
} }
// Create properly encoded URL // Create properly encoded URL
let formattedUrl = ''; let formattedUrl = '';
if (result.raw_url) { if (result.raw_url) {
try { try {
// Split URL into parts and encode components separately // Split URL into parts and encode components separately
const url = new URL(result.raw_url); const url = new URL(result.raw_url);
const pathParts = url.pathname.split('/').map(part => const pathParts = url.pathname.split('/').map(part =>
encodeURIComponent(part).replace(/'/g, "%27") encodeURIComponent(part).replace(/'/g, "%27")
); );
const search = url.search ? '?' + encodeURIComponent(url.search.slice(1)) : ''; const search = url.search ? '?' + encodeURIComponent(url.search.slice(1)) : '';
formattedUrl = `${url.origin}${pathParts.join('/')}${search}`; formattedUrl = `${url.origin}${pathParts.join('/')}${search}`;
} catch (e) { } catch (e) {
formattedUrl = result.raw_url; // Fallback to original if URL parsing fails formattedUrl = result.raw_url; // Fallback to original if URL parsing fails
} }
} }
return `Book: ${result.file_path}\n` + return `Book: ${result.file_path}\n` +
`Snippet: ${result.snippet}\n` + `Snippet: ${result.snippet}\n` +
(formattedUrl ? `URL: ${formattedUrl}\n` : ''); (formattedUrl ? `URL: ${formattedUrl}\n` : '');
}).join('\n\n'); }).join('\n\n');
}) })
.catch(error => { .catch(error => {
clearTimeout(timeoutId); clearTimeout(timeoutId);
let errorMessage = `Error searching books: ${error.message}`; let errorMessage = `Error searching books: ${error.message}`;
if (error.name === 'AbortError') { if (error.name === 'AbortError') {
errorMessage += '\n\nDiagnostics: Request timed out. Check if:'; errorMessage += '\n\nDiagnostics: Request timed out. Check if:';
errorMessage += `\n- The API is running at ${apiUrl}`; errorMessage += `\n- The API is running at ${apiUrl}`;
errorMessage += '\n- The server is accessible from your network'; errorMessage += '\n- The server is accessible from your network';
if (!useProxy) { if (!useProxy) {
errorMessage += '\n- Try enabling proxy in plugin settings'; errorMessage += '\n- Try enabling proxy in plugin settings';
} }
} else if (error.message.includes('Failed to fetch') || error.message.includes('CORS')) { } else if (error.message.includes('Failed to fetch') || error.message.includes('CORS')) {
errorMessage += '\n\nDiagnostics: Network request failed. Check if:'; errorMessage += '\n\nDiagnostics: Network request failed. Check if:';
errorMessage += `\n- The API URL (${apiUrl}) is correct`; errorMessage += `\n- The API URL (${apiUrl}) is correct`;
errorMessage += '\n- CORS is properly configured on the server'; errorMessage += '\n- CORS is properly configured on the server';
errorMessage += '\n- The server is running and accessible'; errorMessage += '\n- The server is running and accessible';
if (!useProxy) { if (!useProxy) {
errorMessage += '\n- Try enabling proxy in plugin settings to bypass CORS'; errorMessage += '\n- Try enabling proxy in plugin settings to bypass CORS';
} }
errorMessage += '\n- For debugging, you can add CORS headers in plugin settings'; errorMessage += '\n- For debugging, you can add CORS headers in plugin settings';
} }
return errorMessage; return errorMessage;
}); });
} }

View file

@ -1,16 +1,16 @@
{ {
"name": "search_books", "name": "search_books",
"description": "Search for books by content using the book search API.", "description": "Search for books by content using the book search API.",
"parameters": { "parameters": {
"type": "object", "type": "object",
"properties": { "properties": {
"query": { "query": {
"type": "string", "type": "string",
"description": "The search query to find matching book content" "description": "The search query to find matching book content"
} }
}, },
"required": [ "required": [
"query" "query"
] ]
} }
} }

View file

@ -1,45 +1,45 @@
[ [
{ {
"name": "cors_allow_origin", "name": "cors_allow_origin",
"label": "CORS Allowed Origin", "label": "CORS Allowed Origin",
"required": false, "required": false,
"default": "*", "default": "*",
"description": "Value for Access-Control-Allow-Origin header, typically '*' for public APIs" "description": "Value for Access-Control-Allow-Origin header, typically '*' for public APIs"
}, },
{ {
"name": "cors_allow_methods", "name": "cors_allow_methods",
"label": "CORS Allowed Methods", "label": "CORS Allowed Methods",
"required": false, "required": false,
"default": "GET, POST, PUT", "default": "GET, POST, PUT",
"description": "Comma-separated HTTP methods for Access-Control-Allow-Methods header" "description": "Comma-separated HTTP methods for Access-Control-Allow-Methods header"
}, },
{ {
"name": "cors_allow_headers", "name": "cors_allow_headers",
"label": "CORS Allowed Headers", "label": "CORS Allowed Headers",
"required": false, "required": false,
"default": "Content-Type", "default": "Content-Type",
"description": "Comma-separated headers for Access-Control-Allow-Headers" "description": "Comma-separated headers for Access-Control-Allow-Headers"
}, },
{ {
"name": "proxyUrl", "name": "proxyUrl",
"label": "Proxy Server URL", "label": "Proxy Server URL",
"required": false, "required": false,
"default": "", "default": "",
"description": "URL of the proxy server to use for external requests" "description": "URL of the proxy server to use for external requests"
}, },
{ {
"name": "bookSearchAPIKey", "name": "bookSearchAPIKey",
"label": "Search Engine API Key", "label": "Search Engine API Key",
"type": "password", "type": "password",
"default":"", "default":"",
"required":false, "required":false,
"description": "API Key to use for while making requests (not yet used)" "description": "API Key to use for while making requests (not yet used)"
}, },
{ {
"name": "apiUrl", "name": "apiUrl",
"label": "API Base URL", "label": "API Base URL",
"required": false, "required": false,
"default": "http://localhost:8000", "default": "http://localhost:8000",
"description": "Base URL for the API endpoints" "description": "Base URL for the API endpoints"
} }
] ]

480
readme.md
View file

@ -1,241 +1,241 @@
# What it IS? # What it IS?
## TypeMind Plugin: EPUB/PDF/TXT Search Integration ## TypeMind Plugin: EPUB/PDF/TXT Search Integration
A plugin for [TypeMind](https://docs.typingmind.com/plugins/build-a-typingmind-plugin) that mimics the **WebSearch** feature but focuses on retrieving books/documents. Users can query, e.g., *"Find me books about Hecate"*, and the plugin returns **clickable links** to relevant files (EPUB, PDF, TXT). A plugin for [TypeMind](https://docs.typingmind.com/plugins/build-a-typingmind-plugin) that mimics the **WebSearch** feature but focuses on retrieving books/documents. Users can query, e.g., *"Find me books about Hecate"*, and the plugin returns **clickable links** to relevant files (EPUB, PDF, TXT).
### Features ### Features
- **File Formats**: Supports EPUB, PDF, and TXT (assumed compatibility). - **File Formats**: Supports EPUB, PDF, and TXT (assumed compatibility).
- **Requirement**: Users must provide their own files for indexing. - **Requirement**: Users must provide their own files for indexing.
### Technical Context ### Technical Context
- **Language**: Python. - **Language**: Python.
- **Skill Level**: - **Skill Level**:
- My Python knowledge is **extremely rusty** (last project: a not too simple game bot years ago). - My Python knowledge is **extremely rusty** (last project: a not too simple game bot years ago).
- Self-assessment: **Python novice**. - Self-assessment: **Python novice**.
- **Tools Used**: - **Tools Used**:
- **Sonnet 3.7** and **DeepSeek-V3-0324** (for AI/ML integration). - **Sonnet 3.7** and **DeepSeek-V3-0324** (for AI/ML integration).
- **RooCode** - **RooCode**
### Purpose ### Purpose
1. **Experiment**: Test RooCodes capabilities and identify practical applications. 1. **Experiment**: Test RooCodes capabilities and identify practical applications.
2. **Non-Production**: **⚠️ Do NOT deploy this in production** (even if "fixed" by someone). 2. **Non-Production**: **⚠️ Do NOT deploy this in production** (even if "fixed" by someone).
--- ---
### Key Notes ### Key Notes
- Humor/self-deprecation preserved (e.g., "extremely rusty," "novice"). - Humor/self-deprecation preserved (e.g., "extremely rusty," "novice").
- Technical terms standardized (Sonnet 3.7, DeepSeek-V3-0324). - Technical terms standardized (Sonnet 3.7, DeepSeek-V3-0324).
- Critical warnings emphasized (**bold + emoji** for production risk). - Critical warnings emphasized (**bold + emoji** for production risk).
# Application Deployment Guide (Ubuntu LTS) # Application Deployment Guide (Ubuntu LTS)
## Prerequisites ## Prerequisites
### System Requirements ### System Requirements
- Ubuntu 22.04 LTS (64-bit) - Ubuntu 22.04 LTS (64-bit)
- Minimum 2 CPU cores, 4GB RAM - Minimum 2 CPU cores, 4GB RAM
- 20GB free disk space - 20GB free disk space
- Open ports: 8000 (app), 9200 (Elasticsearch) - Open ports: 8000 (app), 9200 (Elasticsearch)
### Required Software ### Required Software
```bash ```bash
# Update package lists # Update package lists
sudo apt update sudo apt update
# Install Docker and Docker Compose # Install Docker and Docker Compose
sudo apt install -y docker.io docker-compose sudo apt install -y docker.io docker-compose
sudo systemctl enable --now docker sudo systemctl enable --now docker
# Add current user to docker group (logout required) # Add current user to docker group (logout required)
sudo usermod -aG docker $USER sudo usermod -aG docker $USER
``` ```
## Environment Configuration ## Environment Configuration
1. Clone the repository: 1. Clone the repository:
```bash ```bash
git clone https://github.com/intari/roocodetests_1.git git clone https://github.com/intari/roocodetests_1.git
cd roocodetests_1 cd roocodetests_1
``` ```
2. Configure environment variables: 2. Configure environment variables:
```bash ```bash
# Copy example .env file # Copy example .env file
cp .env.example .env cp .env.example .env
# Edit configuration (nano/vim) # Edit configuration (nano/vim)
nano .env nano .env
``` ```
Key variables to configure: Key variables to configure:
- `BASE_URL`: Public URL of your application - `BASE_URL`: Public URL of your application
- `ELASTICSEARCH_PASSWORD`: Secure password for Elasticsearch - `ELASTICSEARCH_PASSWORD`: Secure password for Elasticsearch
- `CPU_LIMIT`: CPU cores to allocate (default: 2) - `CPU_LIMIT`: CPU cores to allocate (default: 2)
## Application Deployment ## Application Deployment
1. Start all services: 1. Start all services:
```bash ```bash
docker-compose up -d docker-compose up -d
``` ```
2. Verify services are running: 2. Verify services are running:
```bash ```bash
docker-compose ps docker-compose ps
``` ```
3. Check application logs: 3. Check application logs:
```bash ```bash
docker-compose logs -f api docker-compose logs -f api
``` ```
4. Access the application: 4. Access the application:
- Web interface: http://your-server-ip:8000 - Web interface: http://your-server-ip:8000
- Elasticsearch: http://your-server-ip:9200 - Elasticsearch: http://your-server-ip:9200
## Maintenance ## Maintenance
## restart & rebuild ## restart & rebuild
```bash ```bash
docker-compose down && docker-compose up -d --build docker-compose down && docker-compose up -d --build
``` ```
Logs Logs
```bash ```bash
docker logs booksearch_app -f docker logs booksearch_app -f
``` ```
### Log Rotation ### Log Rotation
Configure Docker log rotation in `/etc/docker/daemon.json`: Configure Docker log rotation in `/etc/docker/daemon.json`:
```json ```json
{ {
"log-driver": "json-file", "log-driver": "json-file",
"log-opts": { "log-opts": {
"max-size": "10m", "max-size": "10m",
"max-file": "3" "max-file": "3"
} }
} }
``` ```
Then restart Docker: Then restart Docker:
```bash ```bash
sudo systemctl restart docker sudo systemctl restart docker
``` ```
### Backups ### Backups
1. Create backup script (`/usr/local/bin/backup-app.sh`): 1. Create backup script (`/usr/local/bin/backup-app.sh`):
```bash ```bash
#!/bin/bash #!/bin/bash
BACKUP_DIR=/var/backups/app BACKUP_DIR=/var/backups/app
mkdir -p $BACKUP_DIR mkdir -p $BACKUP_DIR
docker-compose exec -T elasticsearch curl -X POST "localhost:9200/_snapshot/backup_repo/_all" -H "Content-Type: application/json" docker-compose exec -T elasticsearch curl -X POST "localhost:9200/_snapshot/backup_repo/_all" -H "Content-Type: application/json"
docker-compose exec -T elasticsearch curl -X GET "localhost:9200/_snapshot/backup_repo/snapshot_$(date +%Y-%m-%d)?pretty" docker-compose exec -T elasticsearch curl -X GET "localhost:9200/_snapshot/backup_repo/snapshot_$(date +%Y-%m-%d)?pretty"
``` ```
2. Make executable and schedule daily cron job: 2. Make executable and schedule daily cron job:
```bash ```bash
sudo chmod +x /usr/local/bin/backup-app.sh sudo chmod +x /usr/local/bin/backup-app.sh
sudo crontab -e sudo crontab -e
# Add: 0 3 * * * /usr/local/bin/backup-app.sh # Add: 0 3 * * * /usr/local/bin/backup-app.sh
``` ```
### Updates ### Updates
1. Pull latest changes: 1. Pull latest changes:
```bash ```bash
git pull origin main git pull origin main
``` ```
2. Rebuild containers: 2. Rebuild containers:
```bash ```bash
docker-compose up -d --build docker-compose up -d --build
``` ```
## Troubleshooting ## Troubleshooting
### Common Issues ### Common Issues
**Application not starting:** **Application not starting:**
```bash ```bash
# Check container status # Check container status
docker ps -a docker ps -a
# View logs # View logs
docker-compose logs api docker-compose logs api
``` ```
**Elasticsearch health issues:** **Elasticsearch health issues:**
```bash ```bash
# Check cluster health # Check cluster health
curl -X GET "localhost:9200/_cluster/health?pretty" curl -X GET "localhost:9200/_cluster/health?pretty"
# Check node stats # Check node stats
curl -X GET "localhost:9200/_nodes/stats?pretty" curl -X GET "localhost:9200/_nodes/stats?pretty"
``` ```
**Port conflicts:** **Port conflicts:**
```bash ```bash
# Check used ports # Check used ports
sudo netstat -tulnp sudo netstat -tulnp
# Change ports in docker-compose.yml if needed # Change ports in docker-compose.yml if needed
``` ```
### Debugging ### Debugging
1. Access running container shell: 1. Access running container shell:
```bash ```bash
docker-compose exec api bash docker-compose exec api bash
``` ```
2. Check resource usage: 2. Check resource usage:
```bash ```bash
docker stats docker stats
``` ```
## Check Request via JSON : ## Check Request via JSON :
curl -H "Accept: application/json" -X GET https://booksearch.yourdomain.com/search?query=android curl -H "Accept: application/json" -X GET https://booksearch.yourdomain.com/search?query=android
# Simple search # Simple search
curl -H "Accept: application/json" "https://booksearch.yourdomain.com/search?query=android" curl -H "Accept: application/json" "https://booksearch.yourdomain.com/search?query=android"
# Search with format parameter # Search with format parameter
curl "https://booksearch.yourdomain.com/search?query=android&format=json" curl "https://booksearch.yourdomain.com/search?query=android&format=json"
# Error case # Error case
curl -H "Accept: application/json" "https://booksearch.yourdomain.com/search" curl -H "Accept: application/json" "https://booksearch.yourdomain.com/search"
## API Endpoints ## API Endpoints
### Search API ### Search API
``` ```
GET /search?query={query}[&format=json] GET /search?query={query}[&format=json]
``` ```
### Reset Elasticsearch Index ### Reset Elasticsearch Index
``` ```
POST /reset_index POST /reset_index
Headers: Headers:
- Authorization: Basic base64(username:password) - Authorization: Basic base64(username:password)
``` ```
Example: Example:
```bash ```bash
curl -X POST -u admin:securepassword123 https://booksearch.yourdomain.com/reset_index curl -X POST -u admin:securepassword123 https://booksearch.yourdomain.com/reset_index
``` ```
## References ## References
- [Ubuntu Docker Installation](https://docs.docker.com/engine/install/ubuntu/) - [Ubuntu Docker Installation](https://docs.docker.com/engine/install/ubuntu/)
- [Docker Compose Reference](https://docs.docker.com/compose/reference/) - [Docker Compose Reference](https://docs.docker.com/compose/reference/)
- [Elasticsearch Documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/index.html) - [Elasticsearch Documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/index.html)
## Plugin-alt ## Plugin-alt
method:get method:get
https://booksearch.yourdomain.com/search?query={prompt}&format=json https://booksearch.yourdomain.com/search?query={prompt}&format=json
alt version for plugin alt version for plugin
request headers request headers
{ {
"Accept": "application/json" "Accept": "application/json"
} }

View file

@ -1,8 +1,8 @@
{ {
"folders": [ "folders": [
{ {
"path": "." "path": "."
} }
], ],
"settings": {} "settings": {}
} }

View file

@ -1,37 +1,37 @@
@echo off @echo off
echo Setting up test environment... echo Setting up test environment...
echo Checking Python version... echo Checking Python version...
python --version python --version
if errorlevel 1 ( if errorlevel 1 (
echo Python not found. Please install Python 3.10+ first. echo Python not found. Please install Python 3.10+ first.
pause pause
exit /b 1 exit /b 1
) )
echo Installing Python dependencies... echo Installing Python dependencies...
python -m pip install --upgrade pip --user python -m pip install --upgrade pip --user
if errorlevel 1 ( if errorlevel 1 (
echo Failed to upgrade pip echo Failed to upgrade pip
pause pause
exit /b 1 exit /b 1
) )
pip install -r requirements.txt --user pip install -r requirements.txt --user
if errorlevel 1 ( if errorlevel 1 (
echo Failed to install dependencies echo Failed to install dependencies
pause pause
exit /b 1 exit /b 1
) )
echo Running EPUB viewer tests... echo Running EPUB viewer tests...
cd api cd api
python -m pytest test_epub_viewer.py -v python -m pytest test_epub_viewer.py -v
if errorlevel 1 ( if errorlevel 1 (
echo Some tests failed echo Some tests failed
pause pause
exit /b 1 exit /b 1
) )
echo All tests completed successfully! echo All tests completed successfully!
pause pause

View file

@ -1,29 +1,29 @@
#!/bin/bash #!/bin/bash
echo "Setting up test environment..." echo "Setting up test environment..."
echo "Checking Python version..." echo "Checking Python version..."
python3 --version || { python3 --version || {
echo "Python 3 not found. Please install Python 3.10+ first." echo "Python 3 not found. Please install Python 3.10+ first."
exit 1 exit 1
} }
echo "Installing Python dependencies..." echo "Installing Python dependencies..."
python3 -m pip install --upgrade pip --user || { python3 -m pip install --upgrade pip --user || {
echo "Failed to upgrade pip" echo "Failed to upgrade pip"
exit 1 exit 1
} }
pip3 install -r requirements.txt --user || { pip3 install -r requirements.txt --user || {
echo "Failed to install dependencies" echo "Failed to install dependencies"
exit 1 exit 1
} }
echo "Running EPUB viewer tests..." echo "Running EPUB viewer tests..."
cd api cd api
python3 -m pytest test_epub_viewer.py -v || { python3 -m pytest test_epub_viewer.py -v || {
echo "Some tests failed" echo "Some tests failed"
exit 1 exit 1
} }
echo "All tests completed successfully!" echo "All tests completed successfully!"

View file

@ -1,406 +1,406 @@
from flask import Flask, request, jsonify, render_template, send_from_directory from flask import Flask, request, jsonify, render_template, send_from_directory
from urllib.parse import unquote from urllib.parse import unquote
from elasticsearch import Elasticsearch from elasticsearch import Elasticsearch
import os import os
import ebooklib import ebooklib
from ebooklib import epub from ebooklib import epub
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import PyPDF2 import PyPDF2
import time import time
import logging import logging
import multiprocessing import multiprocessing
import sys import sys
from pathlib import Path from pathlib import Path
sys.path.append(str(Path(__file__).parent.parent)) sys.path.append(str(Path(__file__).parent.parent))
from index import index_files, get_progress from index import index_files, get_progress
from io import StringIO from io import StringIO
import sys import sys
app = Flask(__name__, static_folder='static') app = Flask(__name__, static_folder='static')
@app.after_request @app.after_request
def add_cors_headers(response): def add_cors_headers(response):
response.headers['Access-Control-Allow-Origin'] = '*' response.headers['Access-Control-Allow-Origin'] = '*'
response.headers['Access-Control-Allow-Methods'] = 'GET, POST, PUT' response.headers['Access-Control-Allow-Methods'] = 'GET, POST, PUT'
response.headers['Access-Control-Allow-Headers'] = 'Content-Type' response.headers['Access-Control-Allow-Headers'] = 'Content-Type'
return response return response
# Elasticsearch Configuration # Elasticsearch Configuration
ELASTICSEARCH_HOST = os.environ.get("ELASTICSEARCH_HOST", "localhost") ELASTICSEARCH_HOST = os.environ.get("ELASTICSEARCH_HOST", "localhost")
ELASTICSEARCH_PORT = int(os.environ.get("ELASTICSEARCH_PORT", 9200)) ELASTICSEARCH_PORT = int(os.environ.get("ELASTICSEARCH_PORT", 9200))
INDEX_NAME = "book_index" INDEX_NAME = "book_index"
# Wait for Elasticsearch to be available # Wait for Elasticsearch to be available
es = None es = None
while True: while True:
try: try:
es = Elasticsearch([{'host': ELASTICSEARCH_HOST, 'port': ELASTICSEARCH_PORT, 'scheme': 'http'}]) es = Elasticsearch([{'host': ELASTICSEARCH_HOST, 'port': ELASTICSEARCH_PORT, 'scheme': 'http'}])
if es.ping(): if es.ping():
print("Connected to Elasticsearch") print("Connected to Elasticsearch")
break break
else: else:
print("Elasticsearch not available, retrying...") print("Elasticsearch not available, retrying...")
except Exception as e: except Exception as e:
print(f"Error connecting to Elasticsearch: {e}") print(f"Error connecting to Elasticsearch: {e}")
time.sleep(5) time.sleep(5)
def extract_text_from_epub(epub_path): def extract_text_from_epub(epub_path):
try: try:
book = epub.read_epub(epub_path) book = epub.read_epub(epub_path)
text = '' text = ''
for item in book.get_items(): for item in book.get_items():
if item.media_type == 'application/xhtml+xml': if item.media_type == 'application/xhtml+xml':
content = item.get_content() content = item.get_content()
if content: if content:
soup = BeautifulSoup(content, 'html.parser') soup = BeautifulSoup(content, 'html.parser')
text += soup.get_text() text += soup.get_text()
return text return text
except Exception as e: except Exception as e:
logging.error(f"Error processing EPUB {epub_path}: {str(e)}") logging.error(f"Error processing EPUB {epub_path}: {str(e)}")
return f"Error extracting text: {str(e)}" return f"Error extracting text: {str(e)}"
def extract_text_from_pdf(pdf_path): def extract_text_from_pdf(pdf_path):
text = '' text = ''
with open(pdf_path, 'rb') as pdf_file: with open(pdf_path, 'rb') as pdf_file:
pdf_reader = PyPDF2.PdfReader(pdf_file) pdf_reader = PyPDF2.PdfReader(pdf_file)
for page_num in range(len(pdf_reader.pages)): for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num] page = pdf_reader.pages[page_num]
text += page.extract_text() text += page.extract_text()
return text return text
@app.route('/', methods=['GET']) @app.route('/', methods=['GET'])
def home(): def home():
return render_template('search.html') return render_template('search.html')
@app.route('/search', methods=['GET']) @app.route('/search', methods=['GET'])
def search(): def search():
query = request.args.get('query') query = request.args.get('query')
if not query: if not query:
if request.headers.get('Accept') == 'application/json': if request.headers.get('Accept') == 'application/json':
return jsonify({"error": "Query parameter is required"}), 400 return jsonify({"error": "Query parameter is required"}), 400
return render_template('search.html', query='') return render_template('search.html', query='')
try: try:
results = es.search(index=INDEX_NAME, query={'match': {'content': query}}) results = es.search(index=INDEX_NAME, query={'match': {'content': query}})
hits = results['hits']['hits'] hits = results['hits']['hits']
search_results = [] search_results = []
for hit in hits: for hit in hits:
file_path = hit['_source']['file_path'] file_path = hit['_source']['file_path']
content = hit['_source']['content'] content = hit['_source']['content']
# Highlight snippet (simple version) # Highlight snippet (simple version)
snippet_char_limit = int(os.environ.get("SNIPPET_CHAR_LIMIT", 100)) snippet_char_limit = int(os.environ.get("SNIPPET_CHAR_LIMIT", 100))
index = content.lower().find(query.lower()) index = content.lower().find(query.lower())
if index != -1: if index != -1:
start = max(0, index - snippet_char_limit) start = max(0, index - snippet_char_limit)
end = min(len(content), index + snippet_char_limit + len(query)) end = min(len(content), index + snippet_char_limit + len(query))
snippet = content[start:end] snippet = content[start:end]
else: else:
snippet = "No snippet found" snippet = "No snippet found"
# Get base URL from environment # Get base URL from environment
base_url = os.environ.get("BASE_URL", "http://localhost:8000") base_url = os.environ.get("BASE_URL", "http://localhost:8000")
# Construct URLs # Construct URLs
# Remove "/books/" from path start if it's here # Remove "/books/" from path start if it's here
if file_path.startswith("/books/"): if file_path.startswith("/books/"):
file_path = file_path[len("/books/"):] file_path = file_path[len("/books/"):]
url = f"{base_url}/{file_path}" url = f"{base_url}/{file_path}"
raw_url = f"{base_url}/file/{file_path}?format=html" raw_url = f"{base_url}/file/{file_path}?format=html"
search_results.append({ search_results.append({
"file_path": file_path, "file_path": file_path,
"url": url, "url": url,
"raw_url": raw_url, "raw_url": raw_url,
"snippet": snippet, "snippet": snippet,
"score": hit['_score'] "score": hit['_score']
}) })
# If it's an API request or format=json is specified # If it's an API request or format=json is specified
if request.headers.get('Accept') == 'application/json' or request.args.get('format') == 'json': if request.headers.get('Accept') == 'application/json' or request.args.get('format') == 'json':
response = jsonify({ response = jsonify({
"query": query, "query": query,
"results": search_results, "results": search_results,
"total": len(search_results), "total": len(search_results),
"took": results['took'] "took": results['took']
}) })
response.headers['Content-Type'] = 'application/json' response.headers['Content-Type'] = 'application/json'
return response return response
# Otherwise, render the HTML template # Otherwise, render the HTML template
return render_template('search.html', results=search_results, query=query) return render_template('search.html', results=search_results, query=query)
except Exception as e: except Exception as e:
if request.headers.get('Accept') == 'application/json' or request.args.get('format') == 'json': if request.headers.get('Accept') == 'application/json' or request.args.get('format') == 'json':
response = jsonify({ response = jsonify({
"error": str(e), "error": str(e),
"query": query "query": query
}) })
response.headers['Content-Type'] = 'application/json' response.headers['Content-Type'] = 'application/json'
return response, 500 return response, 500
return render_template('search.html', error=str(e), query=query) return render_template('search.html', error=str(e), query=query)
@app.route('/files', methods=['GET']) @app.route('/files', methods=['GET'])
def list_files(): def list_files():
books_dir = "/books" books_dir = "/books"
files = [] files = []
try: try:
# Check if indexing is in progress # Check if indexing is in progress
indexing_in_progress = get_progress() is not None indexing_in_progress = get_progress() is not None
for filename in os.listdir(books_dir): for filename in os.listdir(books_dir):
file_path = os.path.join(books_dir, filename) file_path = os.path.join(books_dir, filename)
if os.path.isfile(file_path): if os.path.isfile(file_path):
file_size = os.path.getsize(file_path) file_size = os.path.getsize(file_path)
# Extract book title from filename if possible # Extract book title from filename if possible
title = filename title = filename
if ' - ' in filename: # Common pattern in filenames if ' - ' in filename: # Common pattern in filenames
title_parts = filename.split(' - ') title_parts = filename.split(' - ')
if len(title_parts) > 1: if len(title_parts) > 1:
title = ' - '.join(title_parts[:-1]) # Take all but last part title = ' - '.join(title_parts[:-1]) # Take all but last part
files.append({ files.append({
'name': filename, 'name': filename,
'title': title, 'title': title,
'path': filename, 'path': filename,
'size': file_size, 'size': file_size,
'size_mb': round(file_size / (1024 * 1024), 2) 'size_mb': round(file_size / (1024 * 1024), 2)
}) })
# Calculate totals # Calculate totals
total_files = len(files) total_files = len(files)
total_size = sum(f['size'] for f in files) total_size = sum(f['size'] for f in files)
total_size_mb = round(total_size / (1024 * 1024), 2) total_size_mb = round(total_size / (1024 * 1024), 2)
# If it's an API request, return JSON # If it's an API request, return JSON
if request.headers.get('Accept') == 'application/json': if request.headers.get('Accept') == 'application/json':
return jsonify({ return jsonify({
'files': files, 'files': files,
'total_files': total_files, 'total_files': total_files,
'total_size': total_size, 'total_size': total_size,
'total_size_mb': total_size_mb, 'total_size_mb': total_size_mb,
'indexing_in_progress': indexing_in_progress 'indexing_in_progress': indexing_in_progress
}) })
# Otherwise, render the HTML template # Otherwise, render the HTML template
return render_template('files.html', return render_template('files.html',
files=files, files=files,
total_files=total_files, total_files=total_files,
total_size=total_size, total_size=total_size,
total_size_mb=total_size_mb, total_size_mb=total_size_mb,
indexing_in_progress=indexing_in_progress) indexing_in_progress=indexing_in_progress)
except Exception as e: except Exception as e:
if request.headers.get('Accept') == 'application/json': if request.headers.get('Accept') == 'application/json':
return jsonify({"error": str(e)}), 500 return jsonify({"error": str(e)}), 500
return render_template('files.html', error=str(e)) return render_template('files.html', error=str(e))
@app.route('/file/<path:file_path>', methods=['GET']) @app.route('/file/<path:file_path>', methods=['GET'])
def get_file(file_path): def get_file(file_path):
# Ensure the file path is within the /books directory # Ensure the file path is within the /books directory
books_dir = "/books" books_dir = "/books"
# Decode URL-encoded path and normalize # Decode URL-encoded path and normalize
decoded_path = unquote(file_path) decoded_path = unquote(file_path)
# Remove any leading slashes or duplicate 'books/' segments # Remove any leading slashes or duplicate 'books/' segments
decoded_path = decoded_path.lstrip('/') decoded_path = decoded_path.lstrip('/')
if decoded_path.startswith('books/'): if decoded_path.startswith('books/'):
decoded_path = decoded_path[6:] decoded_path = decoded_path[6:]
# Join paths safely # Join paths safely
full_path = os.path.normpath(os.path.join(books_dir, decoded_path)) full_path = os.path.normpath(os.path.join(books_dir, decoded_path))
# Validate the path is within the books directory # Validate the path is within the books directory
if not os.path.abspath(full_path).startswith(os.path.abspath(books_dir)): if not os.path.abspath(full_path).startswith(os.path.abspath(books_dir)):
return jsonify({"error": "Access denied: File path outside of books directory"}), 403 return jsonify({"error": "Access denied: File path outside of books directory"}), 403
try: try:
# Handle EPUB files # Handle EPUB files
if file_path.lower().endswith('.epub'): if file_path.lower().endswith('.epub'):
if request.args.get('format') == 'html': if request.args.get('format') == 'html':
# Convert EPUB to HTML # Convert EPUB to HTML
try: try:
book = epub.read_epub(full_path) book = epub.read_epub(full_path)
html_content = [] html_content = []
for item in book.get_items(): for item in book.get_items():
if item.get_type() == ebooklib.ITEM_DOCUMENT: if item.get_type() == ebooklib.ITEM_DOCUMENT:
content = item.get_content() content = item.get_content()
if content: if content:
soup = BeautifulSoup(content, 'html.parser') soup = BeautifulSoup(content, 'html.parser')
# Preserve basic formatting tags # Preserve basic formatting tags
for tag in soup.find_all(): for tag in soup.find_all():
if tag.name not in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'br', 'div', 'span', 'strong', 'em', 'b', 'i', 'ul', 'ol', 'li']: if tag.name not in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'br', 'div', 'span', 'strong', 'em', 'b', 'i', 'ul', 'ol', 'li']:
tag.unwrap() tag.unwrap()
html_content.append(str(soup)) html_content.append(str(soup))
except Exception as e: except Exception as e:
logging.error(f"Error processing EPUB {full_path}: {str(e)}") logging.error(f"Error processing EPUB {full_path}: {str(e)}")
return jsonify({"error": f"Failed to process EPUB: {str(e)}"}), 500 return jsonify({"error": f"Failed to process EPUB: {str(e)}"}), 500
return render_template('text_file.html', return render_template('text_file.html',
file_path=file_path, file_path=file_path,
content='<hr>'.join(html_content), content='<hr>'.join(html_content),
is_html=True) is_html=True)
else: else:
# Render the viewer template # Render the viewer template
return render_template('epub_viewer.html', file_path=file_path) return render_template('epub_viewer.html', file_path=file_path)
# Handle regular text files # Handle regular text files
with open(full_path, 'r', encoding='utf-8', errors='ignore') as f: with open(full_path, 'r', encoding='utf-8', errors='ignore') as f:
content = f.read() content = f.read()
# If it's an API request or the Accept header doesn't include HTML, return plain text # If it's an API request or the Accept header doesn't include HTML, return plain text
if request.headers.get('Accept') == 'application/json' or 'text/html' not in request.headers.get('Accept', ''): if request.headers.get('Accept') == 'application/json' or 'text/html' not in request.headers.get('Accept', ''):
return content, 200, {'Content-Type': 'text/plain; charset=utf-8'} return content, 200, {'Content-Type': 'text/plain; charset=utf-8'}
# Otherwise, render a simple HTML page with the content # Otherwise, render a simple HTML page with the content
return render_template('text_file.html', file_path=file_path, content=content) return render_template('text_file.html', file_path=file_path, content=content)
except Exception as e: except Exception as e:
return jsonify({"error": str(e)}), 404 return jsonify({"error": str(e)}), 404
@app.route('/epub/<path:file_path>', methods=['GET']) @app.route('/epub/<path:file_path>', methods=['GET'])
def get_epub_file(file_path): def get_epub_file(file_path):
"""Serve the raw EPUB file with proper headers""" """Serve the raw EPUB file with proper headers"""
books_dir = "/books" books_dir = "/books"
full_path = os.path.join(books_dir, file_path) full_path = os.path.join(books_dir, file_path)
# Validate the path is within the books directory # Validate the path is within the books directory
if not os.path.abspath(full_path).startswith(os.path.abspath(books_dir)): if not os.path.abspath(full_path).startswith(os.path.abspath(books_dir)):
return jsonify({"error": "Access denied: File path outside of books directory"}), 403 return jsonify({"error": "Access denied: File path outside of books directory"}), 403
try: try:
# Serve the raw EPUB file with proper headers # Serve the raw EPUB file with proper headers
response = send_from_directory( response = send_from_directory(
books_dir, books_dir,
file_path, file_path,
as_attachment=True, as_attachment=True,
mimetype='application/epub+zip' mimetype='application/epub+zip'
) )
response.headers['Access-Control-Allow-Origin'] = '*' response.headers['Access-Control-Allow-Origin'] = '*'
response.headers['Access-Control-Allow-Methods'] = 'GET' response.headers['Access-Control-Allow-Methods'] = 'GET'
response.headers['Content-Disposition'] = f'attachment; filename="{os.path.basename(file_path)}"' response.headers['Content-Disposition'] = f'attachment; filename="{os.path.basename(file_path)}"'
return response return response
except Exception as e: except Exception as e:
return jsonify({"error": str(e)}), 404 return jsonify({"error": str(e)}), 404
@app.route('/index_books', methods=['GET']) @app.route('/index_books', methods=['GET'])
def index_books(): def index_books():
logging.info("Indexing books endpoint called") logging.info("Indexing books endpoint called")
# Get CPU configuration # Get CPU configuration
cpu_limit = os.environ.get("CPU_LIMIT") cpu_limit = os.environ.get("CPU_LIMIT")
available_cpus = multiprocessing.cpu_count() available_cpus = multiprocessing.cpu_count()
used_cpus = float(cpu_limit) if cpu_limit else max(1, available_cpus - 1) used_cpus = float(cpu_limit) if cpu_limit else max(1, available_cpus - 1)
# Capture stdout to a string # Capture stdout to a string
old_stdout = sys.stdout old_stdout = sys.stdout
sys.stdout = captured_output = StringIO() sys.stdout = captured_output = StringIO()
try: try:
# Start indexing in a separate thread # Start indexing in a separate thread
from threading import Thread from threading import Thread
index_thread = Thread(target=index_files, args=("/books",)) index_thread = Thread(target=index_files, args=("/books",))
index_thread.start() index_thread.start()
# If it's an API request, return immediately # If it's an API request, return immediately
if request.headers.get('Accept') == 'application/json': if request.headers.get('Accept') == 'application/json':
return jsonify({"message": "Indexing started in background"}) return jsonify({"message": "Indexing started in background"})
# Otherwise, render the progress page with CPU info # Otherwise, render the progress page with CPU info
return render_template('indexing.html', return render_template('indexing.html',
available_cpus=available_cpus, available_cpus=available_cpus,
used_cpus=used_cpus) used_cpus=used_cpus)
except Exception as e: except Exception as e:
logging.error(f"Indexing failed: {e}") logging.error(f"Indexing failed: {e}")
sys.stdout = old_stdout sys.stdout = old_stdout
if request.headers.get('Accept') == 'application/json': if request.headers.get('Accept') == 'application/json':
return jsonify({"error": str(e)}), 500 return jsonify({"error": str(e)}), 500
# Create a simple HTML response for errors # Create a simple HTML response for errors
return render_template('indexing_error.html', error=str(e)) return render_template('indexing_error.html', error=str(e))
finally: finally:
sys.stdout = old_stdout sys.stdout = old_stdout
@app.route('/indexing_progress', methods=['GET']) @app.route('/indexing_progress', methods=['GET'])
def get_indexing_progress(): def get_indexing_progress():
progress = get_progress() progress = get_progress()
if progress is None: if progress is None:
return jsonify({"status": "not_running"}) return jsonify({"status": "not_running"})
# Format time for display # Format time for display
from datetime import datetime from datetime import datetime
import pytz import pytz
# Get browser timezone from Accept-Language header or use UTC as fallback # Get browser timezone from Accept-Language header or use UTC as fallback
browser_tz = request.headers.get('X-Timezone', 'UTC') browser_tz = request.headers.get('X-Timezone', 'UTC')
try: try:
tz = pytz.timezone(browser_tz) tz = pytz.timezone(browser_tz)
except pytz.UnknownTimeZoneError: except pytz.UnknownTimeZoneError:
tz = pytz.UTC tz = pytz.UTC
elapsed_min = int(progress['elapsed_time'] // 60) elapsed_min = int(progress['elapsed_time'] // 60)
elapsed_sec = int(progress['elapsed_time'] % 60) elapsed_sec = int(progress['elapsed_time'] % 60)
if progress['estimated_remaining'] > 0: if progress['estimated_remaining'] > 0:
remaining_min = int(progress['estimated_remaining'] // 60) remaining_min = int(progress['estimated_remaining'] // 60)
remaining_sec = int(progress['estimated_remaining'] % 60) remaining_sec = int(progress['estimated_remaining'] % 60)
completion_time = datetime.fromtimestamp(progress['estimated_completion'], tz).strftime('%H:%M:%S (%Z)') completion_time = datetime.fromtimestamp(progress['estimated_completion'], tz).strftime('%H:%M:%S (%Z)')
else: else:
remaining_min = 0 remaining_min = 0
remaining_sec = 0 remaining_sec = 0
completion_time = "N/A" completion_time = "N/A"
return jsonify({ return jsonify({
"status": "running", "status": "running",
"total_files": progress['total_files'], "total_files": progress['total_files'],
"processed_files": progress['processed_files'], "processed_files": progress['processed_files'],
"percentage": round(progress['percentage'], 1), "percentage": round(progress['percentage'], 1),
"current_file": progress['current_file'], "current_file": progress['current_file'],
"elapsed_time": f"{elapsed_min}m {elapsed_sec}s", "elapsed_time": f"{elapsed_min}m {elapsed_sec}s",
"estimated_remaining": f"{remaining_min}m {remaining_sec}s", "estimated_remaining": f"{remaining_min}m {remaining_sec}s",
"estimated_completion": completion_time, "estimated_completion": completion_time,
"errors": progress['errors'] "errors": progress['errors']
}) })
@app.route('/abort_indexing', methods=['POST']) @app.route('/abort_indexing', methods=['POST'])
def abort_indexing(): def abort_indexing():
# In a real implementation, we would set a flag to stop the indexing # In a real implementation, we would set a flag to stop the indexing
# For now, we'll just return a message # For now, we'll just return a message
return jsonify({"status": "abort_requested", "message": "Indexing will stop after current file"}) return jsonify({"status": "abort_requested", "message": "Indexing will stop after current file"})
@app.route('/reset_index', methods=['POST']) @app.route('/reset_index', methods=['POST'])
def reset_index(): def reset_index():
"""Reset the Elasticsearch index by deleting and recreating it""" """Reset the Elasticsearch index by deleting and recreating it"""
try: try:
# Check for basic auth # Check for basic auth
auth = request.authorization auth = request.authorization
if not auth or auth.username != os.environ.get("ADMIN_USER") or auth.password != os.environ.get("ADMIN_PASSWORD"): if not auth or auth.username != os.environ.get("ADMIN_USER") or auth.password != os.environ.get("ADMIN_PASSWORD"):
return jsonify({"error": "Authentication required"}), 401 return jsonify({"error": "Authentication required"}), 401
# Delete existing index if it exists # Delete existing index if it exists
if es.indices.exists(index=INDEX_NAME): if es.indices.exists(index=INDEX_NAME):
es.indices.delete(index=INDEX_NAME) es.indices.delete(index=INDEX_NAME)
# Create new index with mapping # Create new index with mapping
es.indices.create(index=INDEX_NAME, body={ es.indices.create(index=INDEX_NAME, body={
"settings": { "settings": {
"number_of_shards": 1, "number_of_shards": 1,
"number_of_replicas": 0 "number_of_replicas": 0
}, },
"mappings": { "mappings": {
"properties": { "properties": {
"file_path": {"type": "keyword"}, "file_path": {"type": "keyword"},
"content": {"type": "text"} "content": {"type": "text"}
} }
} }
}) })
return jsonify({"status": "success", "message": "Index reset successfully"}) return jsonify({"status": "success", "message": "Index reset successfully"})
except Exception as e: except Exception as e:
return jsonify({"error": str(e)}), 500 return jsonify({"error": str(e)}), 500
if __name__ == '__main__': if __name__ == '__main__':
logging.basicConfig(level=logging.DEBUG) logging.basicConfig(level=logging.DEBUG)
logging.info("Starting the API - inside main block") logging.info("Starting the API - inside main block")
app.run(debug=True, host='0.0.0.0') app.run(debug=True, host='0.0.0.0')

View file

@ -1,66 +1,66 @@
#viewerContainer { #viewerContainer {
position: absolute; position: absolute;
top: 50px; top: 50px;
left: 0; left: 0;
right: 0; right: 0;
bottom: 0; bottom: 0;
overflow: auto; overflow: auto;
} }
#viewer { #viewer {
width: 100%; width: 100%;
height: 90vh; height: 90vh;
margin: 0 auto; margin: 0 auto;
} }
.controls { .controls {
text-align: center; text-align: center;
padding: 10px; padding: 10px;
} }
#prev, #next { #prev, #next {
padding: 10px 20px; padding: 10px 20px;
margin: 10px; margin: 10px;
background: #007bff; background: #007bff;
color: white; color: white;
border: none; border: none;
border-radius: 4px; border-radius: 4px;
cursor: pointer; cursor: pointer;
} }
.error { .error {
color: red; color: red;
padding: 20px; padding: 20px;
text-align: center; text-align: center;
} }
header { header {
text-align: center; text-align: center;
padding: 20px 0; padding: 20px 0;
} }
.nav { .nav {
background-color: #f8f9fa; background-color: #f8f9fa;
padding: 10px; padding: 10px;
} }
.nav ul { .nav ul {
list-style: none; list-style: none;
display: flex; display: flex;
justify-content: center; justify-content: center;
gap: 20px; gap: 20px;
padding: 0; padding: 0;
margin: 0; margin: 0;
} }
.nav a { .nav a {
text-decoration: none; text-decoration: none;
color: #007bff; color: #007bff;
} }
footer { footer {
text-align: center; text-align: center;
padding: 20px; padding: 20px;
margin-top: 20px; margin-top: 20px;
background-color: #f8f9fa; background-color: #f8f9fa;
} }

View file

@ -1,236 +1,236 @@
body { body {
font-family: 'Arial', sans-serif; font-family: 'Arial', sans-serif;
line-height: 1.6; line-height: 1.6;
margin: 0; margin: 0;
padding: 0; padding: 0;
background-color: #f4f4f4; background-color: #f4f4f4;
color: #333; color: #333;
} }
.container { .container {
width: 80%; width: 80%;
margin: auto; margin: auto;
overflow: hidden; overflow: hidden;
padding: 20px; padding: 20px;
} }
header { header {
background: #35424a; background: #35424a;
color: white; color: white;
padding: 20px; padding: 20px;
text-align: center; text-align: center;
border-bottom: 4px solid #1abc9c; border-bottom: 4px solid #1abc9c;
} }
header h1 { header h1 {
margin: 0; margin: 0;
} }
.search-container { .search-container {
margin: 30px 0; margin: 30px 0;
text-align: center; text-align: center;
} }
.search-box { .search-box {
width: 70%; width: 70%;
padding: 12px; padding: 12px;
border: 1px solid #ddd; border: 1px solid #ddd;
border-radius: 4px; border-radius: 4px;
font-size: 16px; font-size: 16px;
} }
.search-button { .search-button {
padding: 12px 24px; padding: 12px 24px;
background: #1abc9c; background: #1abc9c;
color: white; color: white;
border: none; border: none;
border-radius: 4px; border-radius: 4px;
cursor: pointer; cursor: pointer;
font-size: 16px; font-size: 16px;
} }
.search-button:hover { .search-button:hover {
background: #16a085; background: #16a085;
} }
.results { .results {
margin-top: 30px; margin-top: 30px;
} }
.result-item { .result-item {
background: white; background: white;
padding: 15px; padding: 15px;
margin-bottom: 15px; margin-bottom: 15px;
border-radius: 5px; border-radius: 5px;
box-shadow: 0 2px 5px rgba(0,0,0,0.1); box-shadow: 0 2px 5px rgba(0,0,0,0.1);
} }
.result-item h3 { .result-item h3 {
margin-top: 0; margin-top: 0;
color: #1abc9c; color: #1abc9c;
} }
.result-item p { .result-item p {
margin-bottom: 10px; margin-bottom: 10px;
} }
.result-item a { .result-item a {
color: #3498db; color: #3498db;
text-decoration: none; text-decoration: none;
} }
.result-item a:hover { .result-item a:hover {
text-decoration: underline; text-decoration: underline;
} }
.file-list { .file-list {
list-style: none; list-style: none;
padding: 0; padding: 0;
} }
.file-list li { .file-list li {
background: white; background: white;
padding: 15px; padding: 15px;
margin-bottom: 10px; margin-bottom: 10px;
border-radius: 5px; border-radius: 5px;
box-shadow: 0 2px 5px rgba(0,0,0,0.1); box-shadow: 0 2px 5px rgba(0,0,0,0.1);
} }
.file-list a { .file-list a {
color: #3498db; color: #3498db;
text-decoration: none; text-decoration: none;
font-weight: bold; font-weight: bold;
} }
.file-list a:hover { .file-list a:hover {
text-decoration: underline; text-decoration: underline;
} }
.nav { .nav {
background: #35424a; background: #35424a;
color: white; color: white;
padding: 10px 0; padding: 10px 0;
} }
.nav ul { .nav ul {
padding: 0; padding: 0;
list-style: none; list-style: none;
text-align: center; text-align: center;
} }
.nav li { .nav li {
display: inline; display: inline;
margin: 0 15px; margin: 0 15px;
} }
.nav a { .nav a {
color: white; color: white;
text-decoration: none; text-decoration: none;
} }
.nav a:hover { .nav a:hover {
color: #1abc9c; color: #1abc9c;
} }
footer { footer {
background: #35424a; background: #35424a;
color: white; color: white;
text-align: center; text-align: center;
padding: 20px; padding: 20px;
margin-top: 40px; margin-top: 40px;
} }
/* Indexing page styles */ /* Indexing page styles */
.progress-container { .progress-container {
margin: 20px 0; margin: 20px 0;
padding: 20px; padding: 20px;
background: #f5f5f5; background: #f5f5f5;
border-radius: 5px; border-radius: 5px;
} }
.progress-bar { .progress-bar {
height: 20px; height: 20px;
background: #e0e0e0; background: #e0e0e0;
border-radius: 10px; border-radius: 10px;
margin: 10px 0; margin: 10px 0;
overflow: hidden; overflow: hidden;
} }
.progress-fill { .progress-fill {
height: 100%; height: 100%;
background: #4CAF50; background: #4CAF50;
width: 0%; width: 0%;
transition: width 0.3s; transition: width 0.3s;
} }
.progress-stats { .progress-stats {
display: flex; display: flex;
justify-content: space-between; justify-content: space-between;
margin-bottom: 10px; margin-bottom: 10px;
} }
.progress-details { .progress-details {
margin-top: 20px; margin-top: 20px;
} }
.current-file { .current-file {
font-weight: bold; font-weight: bold;
margin: 10px 0; margin: 10px 0;
word-break: break-all; word-break: break-all;
} }
.time-stats { .time-stats {
display: grid; display: grid;
grid-template-columns: repeat(2, 1fr); grid-template-columns: repeat(2, 1fr);
gap: 10px; gap: 10px;
margin-top: 15px; margin-top: 15px;
} }
.time-stat { .time-stat {
background: #e9e9e9; background: #e9e9e9;
padding: 10px; padding: 10px;
border-radius: 5px; border-radius: 5px;
} }
.abort-button { .abort-button {
background: #f44336; background: #f44336;
color: white; color: white;
border: none; border: none;
padding: 10px 20px; padding: 10px 20px;
border-radius: 5px; border-radius: 5px;
cursor: pointer; cursor: pointer;
margin-top: 20px; margin-top: 20px;
} }
.abort-button:hover { .abort-button:hover {
background: #d32f2f; background: #d32f2f;
} }
.error-list { .error-list {
margin-top: 10px; margin-top: 10px;
} }
.error-item { .error-item {
padding: 10px; padding: 10px;
margin-bottom: 5px; margin-bottom: 5px;
background: #ffebee; background: #ffebee;
border-left: 3px solid #f44336; border-left: 3px solid #f44336;
} }
.file-actions { .file-actions {
margin-top: 10px; margin-top: 10px;
} }
.file-action { .file-action {
color: #3498db; color: #3498db;
text-decoration: none; text-decoration: none;
} }
.file-action:hover { .file-action:hover {
text-decoration: underline; text-decoration: underline;
} }
.action-separator { .action-separator {
margin: 0 5px; margin: 0 5px;
color: #999; color: #999;
} }

View file

@ -1,262 +1,262 @@
<!DOCTYPE html> <!DOCTYPE html>
<html> <html>
<head> <head>
<title>{{ file_path }}</title> <title>{{ file_path }}</title>
<link rel="stylesheet" href="/static/css/style.css"> <link rel="stylesheet" href="/static/css/style.css">
<link rel="stylesheet" href="/static/css/epub_viewer.css"> <link rel="stylesheet" href="/static/css/epub_viewer.css">
<script src="https://cdn.jsdelivr.net/npm/epubjs@0.3.93/dist/epub.min.js"></script> <script src="https://cdn.jsdelivr.net/npm/epubjs@0.3.93/dist/epub.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/jszip@3.10.1/dist/jszip.min.js"></script> <script src="https://cdn.jsdelivr.net/npm/jszip@3.10.1/dist/jszip.min.js"></script>
</head> </head>
<body> <body>
<header> <header>
<h1>EPUB: {{ file_path }}</h1> <h1>EPUB: {{ file_path }}</h1>
</header> </header>
<nav class="nav"> <nav class="nav">
<ul> <ul>
<li><a href="/">Home</a></li> <li><a href="/">Home</a></li>
<li><a href="/files">File List</a></li> <li><a href="/files">File List</a></li>
<li><a href="/index_books">Re-Index Books</a></li> <li><a href="/index_books">Re-Index Books</a></li>
</ul> </ul>
</nav> </nav>
<div class="container"> <div class="container">
<div id="viewer"></div> <div id="viewer"></div>
<div class="controls"> <div class="controls">
<button id="prev">Previous</button> <button id="prev">Previous</button>
<button id="next">Next</button> <button id="next">Next</button>
</div> </div>
</div> </div>
<script> <script>
// Debug logging function // Debug logging function
function debug(message, obj) { function debug(message, obj) {
console.log("EPUB DEBUG: " + message, obj || ''); console.log("EPUB DEBUG: " + message, obj || '');
// Add to page for visibility // Add to page for visibility
const debugDiv = document.getElementById('debug-output') || const debugDiv = document.getElementById('debug-output') ||
(function() { (function() {
const div = document.createElement('div'); const div = document.createElement('div');
div.id = 'debug-output'; div.id = 'debug-output';
div.style.position = 'fixed'; div.style.position = 'fixed';
div.style.bottom = '10px'; div.style.bottom = '10px';
div.style.right = '10px'; div.style.right = '10px';
div.style.backgroundColor = 'rgba(0,0,0,0.7)'; div.style.backgroundColor = 'rgba(0,0,0,0.7)';
div.style.color = 'white'; div.style.color = 'white';
div.style.padding = '10px'; div.style.padding = '10px';
div.style.maxHeight = '200px'; div.style.maxHeight = '200px';
div.style.overflow = 'auto'; div.style.overflow = 'auto';
div.style.zIndex = '9999'; div.style.zIndex = '9999';
document.body.appendChild(div); document.body.appendChild(div);
return div; return div;
})(); })();
const logEntry = document.createElement('div'); const logEntry = document.createElement('div');
logEntry.textContent = message + (obj ? ': ' + JSON.stringify(obj) : ''); logEntry.textContent = message + (obj ? ': ' + JSON.stringify(obj) : '');
debugDiv.appendChild(logEntry); debugDiv.appendChild(logEntry);
} }
// Global variables // Global variables
var book = null; var book = null;
var rendition = null; var rendition = null;
function handlePrev() { function handlePrev() {
debug("Previous button clicked"); debug("Previous button clicked");
if (!rendition) { if (!rendition) {
debug("ERROR: rendition not available for prev"); debug("ERROR: rendition not available for prev");
return; return;
} }
try { try {
rendition.prev().then(() => { rendition.prev().then(() => {
debug("Navigation to previous page successful"); debug("Navigation to previous page successful");
}).catch(err => { }).catch(err => {
debug("Navigation to previous page failed", err.message); debug("Navigation to previous page failed", err.message);
}); });
} catch (err) { } catch (err) {
debug("Error in prev navigation", err.message); debug("Error in prev navigation", err.message);
} }
} }
function handleNext() { function handleNext() {
debug("Next button clicked"); debug("Next button clicked");
if (!rendition) { if (!rendition) {
debug("ERROR: rendition not available for next"); debug("ERROR: rendition not available for next");
return; return;
} }
try { try {
rendition.next().then(() => { rendition.next().then(() => {
debug("Navigation to next page successful"); debug("Navigation to next page successful");
}).catch(err => { }).catch(err => {
debug("Navigation to next page failed", err.message); debug("Navigation to next page failed", err.message);
}); });
} catch (err) { } catch (err) {
debug("Error in next navigation", err.message); debug("Error in next navigation", err.message);
} }
} }
function handleKeydown(e) { function handleKeydown(e) {
if (!rendition) { if (!rendition) {
debug("ERROR: rendition not available for keydown"); debug("ERROR: rendition not available for keydown");
return; return;
} }
if (e.keyCode === 37) { if (e.keyCode === 37) {
debug("Left arrow key pressed"); debug("Left arrow key pressed");
rendition.prev(); rendition.prev();
} }
if (e.keyCode === 39) { if (e.keyCode === 39) {
debug("Right arrow key pressed"); debug("Right arrow key pressed");
rendition.next(); rendition.next();
} }
} }
function initializeEPUB() { function initializeEPUB() {
debug("Initializing EPUB viewer"); debug("Initializing EPUB viewer");
try { try {
// Use dedicated endpoint for EPUB files // Use dedicated endpoint for EPUB files
const fileUrl = "/epub/" + encodeURIComponent("{{ file_path }}"); const fileUrl = "/epub/" + encodeURIComponent("{{ file_path }}");
debug("Loading EPUB from URL", fileUrl); debug("Loading EPUB from URL", fileUrl);
// TEST_EPUB_URL: /epub/{{ file_path }} // TEST_EPUB_URL: /epub/{{ file_path }}
// Create book object // Create book object
window.book = book = ePub(fileUrl); window.book = book = ePub(fileUrl);
debug("Book object created successfully"); debug("Book object created successfully");
console.log("Book object details:", book); console.log("Book object details:", book);
if (!book) { if (!book) {
throw new Error("Failed to initialize EPUB reader"); throw new Error("Failed to initialize EPUB reader");
} }
// Set up error handler // Set up error handler
book.on('error', function(err) { book.on('error', function(err) {
debug("EPUB error event", err); debug("EPUB error event", err);
document.getElementById("viewer").innerHTML = document.getElementById("viewer").innerHTML =
'<div class="error">Error loading EPUB: ' + err.message + '</div>'; '<div class="error">Error loading EPUB: ' + err.message + '</div>';
}); });
// Set up ready handler // Set up ready handler
book.on('ready', function() { book.on('ready', function() {
debug("Book ready event fired"); debug("Book ready event fired");
}); });
// Create rendition // Create rendition
debug("Creating rendition"); debug("Creating rendition");
window.rendition = rendition = book.renderTo("viewer", { window.rendition = rendition = book.renderTo("viewer", {
width: "100%", width: "100%",
height: "100%", height: "100%",
spread: "none", spread: "none",
manager: "continuous", manager: "continuous",
style: ` style: `
body { body {
margin: 0; margin: 0;
padding: 20px; padding: 20px;
background-color: white; background-color: white;
color: black; color: black;
font-size: 1.2em; font-size: 1.2em;
line-height: 1.5; line-height: 1.5;
} }
img { img {
max-width: 100%; max-width: 100%;
} }
` `
}); });
// Hide iframe initially to prevent flash of unstyled content // Hide iframe initially to prevent flash of unstyled content
const viewer = document.getElementById("viewer"); const viewer = document.getElementById("viewer");
if (viewer) { if (viewer) {
viewer.style.visibility = "hidden"; viewer.style.visibility = "hidden";
} }
debug("Displaying rendition"); debug("Displaying rendition");
rendition.display() rendition.display()
.then(() => { .then(() => {
debug("Rendition displayed successfully"); debug("Rendition displayed successfully");
// Set up resize handler // Set up resize handler
const resizeHandler = function() { const resizeHandler = function() {
try { try {
if (rendition) { if (rendition) {
rendition.resize(); rendition.resize();
} }
} catch (err) { } catch (err) {
console.error("Resize error:", err); console.error("Resize error:", err);
} }
}; };
window.addEventListener('resize', resizeHandler); window.addEventListener('resize', resizeHandler);
// Show content and initialize navigation // Show content and initialize navigation
setTimeout(() => { setTimeout(() => {
try { try {
if (rendition) { if (rendition) {
rendition.resize(); rendition.resize();
const viewer = document.getElementById('viewer'); const viewer = document.getElementById('viewer');
if (viewer) { if (viewer) {
viewer.style.visibility = 'visible'; viewer.style.visibility = 'visible';
} }
// Initialize navigation // Initialize navigation
rendition.start(); rendition.start();
} }
} catch (err) { } catch (err) {
debug("Content display error", err.message); debug("Content display error", err.message);
} }
}, 100); }, 100);
return rendition; return rendition;
}) })
.catch(err => { .catch(err => {
debug("Rendition error", err); debug("Rendition error", err);
document.getElementById("viewer").innerHTML = document.getElementById("viewer").innerHTML =
'<div class="error">Error displaying EPUB: ' + err.message + '</div>'; '<div class="error">Error displaying EPUB: ' + err.message + '</div>';
}); });
// Set up event listeners // Set up event listeners
debug("Setting up event listeners"); debug("Setting up event listeners");
try { try {
document.getElementById("prev").addEventListener("click", handlePrev); document.getElementById("prev").addEventListener("click", handlePrev);
document.getElementById("next").addEventListener("click", handleNext); document.getElementById("next").addEventListener("click", handleNext);
document.addEventListener("keydown", handleKeydown); document.addEventListener("keydown", handleKeydown);
// Add loading indicator // Add loading indicator
const loadingIndicator = document.createElement('div'); const loadingIndicator = document.createElement('div');
loadingIndicator.id = 'loading-indicator'; loadingIndicator.id = 'loading-indicator';
loadingIndicator.style.position = 'fixed'; loadingIndicator.style.position = 'fixed';
loadingIndicator.style.top = '50%'; loadingIndicator.style.top = '50%';
loadingIndicator.style.left = '50%'; loadingIndicator.style.left = '50%';
loadingIndicator.style.transform = 'translate(-50%, -50%)'; loadingIndicator.style.transform = 'translate(-50%, -50%)';
loadingIndicator.style.backgroundColor = 'rgba(0,0,0,0.7)'; loadingIndicator.style.backgroundColor = 'rgba(0,0,0,0.7)';
loadingIndicator.style.color = 'white'; loadingIndicator.style.color = 'white';
loadingIndicator.style.padding = '20px'; loadingIndicator.style.padding = '20px';
loadingIndicator.style.borderRadius = '5px'; loadingIndicator.style.borderRadius = '5px';
loadingIndicator.style.zIndex = '1000'; loadingIndicator.style.zIndex = '1000';
loadingIndicator.textContent = 'Loading EPUB...'; loadingIndicator.textContent = 'Loading EPUB...';
document.body.appendChild(loadingIndicator); document.body.appendChild(loadingIndicator);
// Remove indicator when loaded // Remove indicator when loaded
book.on('ready', function() { book.on('ready', function() {
const indicator = document.getElementById('loading-indicator'); const indicator = document.getElementById('loading-indicator');
if (indicator) { if (indicator) {
indicator.remove(); indicator.remove();
} }
}); });
} catch (err) { } catch (err) {
debug("Error setting up event listeners", err.message); debug("Error setting up event listeners", err.message);
console.error("Event listener setup error:", err); console.error("Event listener setup error:", err);
} }
} catch (err) { } catch (err) {
debug("EPUB initialization error", err); debug("EPUB initialization error", err);
document.getElementById("viewer").innerHTML = document.getElementById("viewer").innerHTML =
'<div class="error">Failed to load EPUB: ' + err.message + '</div>'; '<div class="error">Failed to load EPUB: ' + err.message + '</div>';
} }
} }
// Initialize when DOM is loaded // Initialize when DOM is loaded
debug("Setting up DOMContentLoaded listener"); debug("Setting up DOMContentLoaded listener");
document.addEventListener('DOMContentLoaded', initializeEPUB); document.addEventListener('DOMContentLoaded', initializeEPUB);
</script> </script>
<footer> <footer>
<p>&copy; 2025 Book Search Engine</p> <p>&copy; 2025 Book Search Engine</p>
</footer> </footer>
</body> </body>
</html> </html>

View file

@ -1,141 +1,141 @@
<!DOCTYPE html> <!DOCTYPE html>
<html lang="en"> <html lang="en">
<head> <head>
<meta charset="UTF-8"> <meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Book Files</title> <title>Book Files</title>
<link rel="stylesheet" href="/static/css/style.css"> <link rel="stylesheet" href="/static/css/style.css">
<style> <style>
.file-list { .file-list {
list-style: none; list-style: none;
padding: 0; padding: 0;
} }
.file-item { .file-item {
display: flex; display: flex;
justify-content: space-between; justify-content: space-between;
padding: 8px 0; padding: 8px 0;
border-bottom: 1px solid #eee; border-bottom: 1px solid #eee;
} }
.file-name { .file-name {
flex: 1; flex: 1;
word-break: break-all; word-break: break-all;
} }
.file-size { .file-size {
color: #666; color: #666;
min-width: 80px; min-width: 80px;
text-align: right; text-align: right;
} }
.book-title { .book-title {
font-weight: bold; font-weight: bold;
color: #333; color: #333;
} }
.file-name-muted { .file-name-muted {
color: #999; color: #999;
font-size: 0.9em; font-size: 0.9em;
margin-left: 8px; margin-left: 8px;
} }
.summary { .summary {
background: #f5f5f5; background: #f5f5f5;
padding: 15px; padding: 15px;
border-radius: 5px; border-radius: 5px;
margin-bottom: 20px; margin-bottom: 20px;
} }
.summary-item { .summary-item {
display: flex; display: flex;
justify-content: space-between; justify-content: space-between;
margin-bottom: 5px; margin-bottom: 5px;
} }
.summary-label { .summary-label {
font-weight: bold; font-weight: bold;
} }
.indexing-status { .indexing-status {
background: #fff8e1; background: #fff8e1;
padding: 15px; padding: 15px;
border-radius: 5px; border-radius: 5px;
margin-bottom: 20px; margin-bottom: 20px;
border-left: 4px solid #ffc107; border-left: 4px solid #ffc107;
} }
.indexing-link { .indexing-link {
color: #2196f3; color: #2196f3;
text-decoration: none; text-decoration: none;
} }
.indexing-link:hover { .indexing-link:hover {
text-decoration: underline; text-decoration: underline;
} }
.plain-view-link { .plain-view-link {
font-size: 0.8em; font-size: 0.8em;
color: #666; color: #666;
text-decoration: none; text-decoration: none;
margin-left: 8px; margin-left: 8px;
} }
.plain-view-link:hover { .plain-view-link:hover {
text-decoration: underline; text-decoration: underline;
color: #2196f3; color: #2196f3;
} }
</style> </style>
</head> </head>
<body> <body>
<header> <header>
<h1>Book Files</h1> <h1>Book Files</h1>
</header> </header>
<nav class="nav"> <nav class="nav">
<ul> <ul>
<li><a href="/">Home</a></li> <li><a href="/">Home</a></li>
<li><a href="/files">File List</a></li> <li><a href="/files">File List</a></li>
<li><a href="/index_books">Re-Index Books</a></li> <li><a href="/index_books">Re-Index Books</a></li>
</ul> </ul>
</nav> </nav>
<div class="container"> <div class="container">
<div class="summary"> <div class="summary">
<div class="summary-item"> <div class="summary-item">
<span class="summary-label">Total Files:</span> <span class="summary-label">Total Files:</span>
<span>{{ total_files }}</span> <span>{{ total_files }}</span>
</div> </div>
<div class="summary-item"> <div class="summary-item">
<span class="summary-label">Total Size:</span> <span class="summary-label">Total Size:</span>
<span>{{ total_size_mb }} MB</span> <span>{{ total_size_mb }} MB</span>
</div> </div>
</div> </div>
{% if indexing_in_progress %} {% if indexing_in_progress %}
<div class="indexing-status"> <div class="indexing-status">
Indexing is currently in progress. Indexing is currently in progress.
<a href="/index_books" class="indexing-link">View re-indexing progress</a> <a href="/index_books" class="indexing-link">View re-indexing progress</a>
</div> </div>
{% endif %} {% endif %}
<h2>Available Files</h2> <h2>Available Files</h2>
{% if files %} {% if files %}
<ul class="file-list"> <ul class="file-list">
{% for file in files %} {% for file in files %}
<li class="file-item"> <li class="file-item">
<span class="file-name"> <span class="file-name">
<a href="/file/{{ file.path }}"> <a href="/file/{{ file.path }}">
{% if file.path.endswith('.epub') %} {% if file.path.endswith('.epub') %}
<br><a href="/file/{{ file.path }}?format=html" class="plain-view-link">(View as HTML)</a> <br><a href="/file/{{ file.path }}?format=html" class="plain-view-link">(View as HTML)</a>
{% endif %} {% endif %}
{% if file.title != file.name %} {% if file.title != file.name %}
<span class="book-title">{{ file.title }}</span> <span class="book-title">{{ file.title }}</span>
<span class="file-name-muted">{{ file.name }}</span> <span class="file-name-muted">{{ file.name }}</span>
{% else %} {% else %}
{{ file.name }} {{ file.name }}
{% endif %} {% endif %}
</a> </a>
</span> </span>
<span class="file-size">{{ file.size_mb }} MB</span> <span class="file-size">{{ file.size_mb }} MB</span>
</li> </li>
{% endfor %} {% endfor %}
</ul> </ul>
{% else %} {% else %}
<p>No files available. Please add files to the books directory.</p> <p>No files available. Please add files to the books directory.</p>
{% endif %} {% endif %}
</div> </div>
<footer> <footer>
<p>&copy; 2025 Intari</p> <p>&copy; 2025 Intari</p>
</footer> </footer>
</body> </body>
</html> </html>

View file

@ -1,163 +1,163 @@
<!DOCTYPE html> <!DOCTYPE html>
<html lang="en"> <html lang="en">
<head> <head>
<meta charset="UTF-8"> <meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Indexing Books</title> <title>Indexing Books</title>
<link rel="stylesheet" href="/static/css/style.css"> <link rel="stylesheet" href="/static/css/style.css">
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script> <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
</head> </head>
<body> <body>
<header> <header>
<h1>Indexing Books</h1> <h1>Indexing Books</h1>
</header> </header>
<nav class="nav"> <nav class="nav">
<ul> <ul>
<li><a href="/">Home</a></li> <li><a href="/">Home</a></li>
<li><a href="/files">File List</a></li> <li><a href="/files">File List</a></li>
<li><a href="/index_books">Re-Index Books</a></li> <li><a href="/index_books">Re-Index Books</a></li>
</ul> </ul>
</nav> </nav>
<div class="container"> <div class="container">
<div class="progress-container"> <div class="progress-container">
<h2>Indexing Progress</h2> <h2>Indexing Progress</h2>
<div class="progress-stats"> <div class="progress-stats">
<span id="processed-files">0</span> of <span id="total-files">0</span> files processed <span id="processed-files">0</span> of <span id="total-files">0</span> files processed
<span id="percentage">0%</span> <span id="percentage">0%</span>
</div> </div>
<div class="progress-bar"> <div class="progress-bar">
<div class="progress-fill" id="progress-fill"></div> <div class="progress-fill" id="progress-fill"></div>
</div> </div>
<div class="current-file" id="current-file"> <div class="current-file" id="current-file">
Current file: Starting indexing... Current file: Starting indexing...
</div> </div>
<div class="time-stats"> <div class="time-stats">
<div class="time-stat"> <div class="time-stat">
<div>CPU cores:</div> <div>CPU cores:</div>
<div>{{ used_cpus }} of {{ available_cpus }}</div> <div>{{ used_cpus }} of {{ available_cpus }}</div>
</div> </div>
<div class="time-stat"> <div class="time-stat">
<div>Time elapsed:</div> <div>Time elapsed:</div>
<div id="elapsed-time">0m 0s</div> <div id="elapsed-time">0m 0s</div>
</div> </div>
<div class="time-stat"> <div class="time-stat">
<div>Estimated remaining:</div> <div>Estimated remaining:</div>
<div id="estimated-remaining">Calculating...</div> <div id="estimated-remaining">Calculating...</div>
</div> </div>
<div class="time-stat"> <div class="time-stat">
<div>Estimated completion:</div> <div>Estimated completion:</div>
<div id="estimated-completion">Calculating...</div> <div id="estimated-completion">Calculating...</div>
</div> </div>
<div class="time-stat"> <div class="time-stat">
<div>Files per minute:</div> <div>Files per minute:</div>
<div id="files-per-minute">0</div> <div id="files-per-minute">0</div>
</div> </div>
</div> </div>
<button class="abort-button" id="abort-button">Abort Indexing</button> <button class="abort-button" id="abort-button">Abort Indexing</button>
</div> </div>
<div class="progress-details"> <div class="progress-details">
<h3>Recent Errors</h3> <h3>Recent Errors</h3>
<div id="error-list" class="error-list"> <div id="error-list" class="error-list">
No errors yet No errors yet
</div> </div>
</div> </div>
</div> </div>
<footer> <footer>
<p>&copy; 2025 Intari</p> <p>&copy; 2025 Intari</p>
</footer> </footer>
<script> <script>
const progressFill = document.getElementById('progress-fill'); const progressFill = document.getElementById('progress-fill');
const processedFiles = document.getElementById('processed-files'); const processedFiles = document.getElementById('processed-files');
const totalFiles = document.getElementById('total-files'); const totalFiles = document.getElementById('total-files');
const percentage = document.getElementById('percentage'); const percentage = document.getElementById('percentage');
const currentFile = document.getElementById('current-file'); const currentFile = document.getElementById('current-file');
const elapsedTime = document.getElementById('elapsed-time'); const elapsedTime = document.getElementById('elapsed-time');
const estimatedRemaining = document.getElementById('estimated-remaining'); const estimatedRemaining = document.getElementById('estimated-remaining');
const estimatedCompletion = document.getElementById('estimated-completion'); const estimatedCompletion = document.getElementById('estimated-completion');
const filesPerMinute = document.getElementById('files-per-minute'); const filesPerMinute = document.getElementById('files-per-minute');
const errorList = document.getElementById('error-list'); const errorList = document.getElementById('error-list');
const abortButton = document.getElementById('abort-button'); const abortButton = document.getElementById('abort-button');
let updateInterval; let updateInterval;
let speedChart; let speedChart;
// Update progress every second // Update progress every second
function updateProgress() { function updateProgress() {
// Get browser timezone // Get browser timezone
const timezone = Intl.DateTimeFormat().resolvedOptions().timeZone; const timezone = Intl.DateTimeFormat().resolvedOptions().timeZone;
fetch('/indexing_progress', { fetch('/indexing_progress', {
headers: { headers: {
'X-Timezone': timezone 'X-Timezone': timezone
} }
}) })
.then(response => response.json()) .then(response => response.json())
.then(data => { .then(data => {
if (data.status === 'not_running') { if (data.status === 'not_running') {
// Indexing completed // Indexing completed
clearInterval(updateInterval); clearInterval(updateInterval);
window.location.href = '/files'; window.location.href = '/files';
return; return;
} }
// Update progress bar // Update progress bar
progressFill.style.width = `${data.percentage}%`; progressFill.style.width = `${data.percentage}%`;
processedFiles.textContent = data.processed_files; processedFiles.textContent = data.processed_files;
totalFiles.textContent = data.total_files; totalFiles.textContent = data.total_files;
percentage.textContent = `${data.percentage.toFixed(1)}%`; percentage.textContent = `${data.percentage.toFixed(1)}%`;
// Update current file // Update current file
currentFile.textContent = `Current file: ${data.current_file || 'Processing...'}`; currentFile.textContent = `Current file: ${data.current_file || 'Processing...'}`;
// Update time stats // Update time stats
elapsedTime.textContent = data.elapsed_time; elapsedTime.textContent = data.elapsed_time;
estimatedRemaining.textContent = data.estimated_remaining; estimatedRemaining.textContent = data.estimated_remaining;
estimatedCompletion.textContent = data.estimated_completion; estimatedCompletion.textContent = data.estimated_completion;
// Calculate files per minute // Calculate files per minute
if (data.elapsed_time) { if (data.elapsed_time) {
const [min, sec] = data.elapsed_time.split(/[ms]/).filter(Boolean).map(Number); const [min, sec] = data.elapsed_time.split(/[ms]/).filter(Boolean).map(Number);
const totalSeconds = min * 60 + sec; const totalSeconds = min * 60 + sec;
if (totalSeconds > 0) { if (totalSeconds > 0) {
const fpm = (data.processed_files / totalSeconds * 60).toFixed(1); const fpm = (data.processed_files / totalSeconds * 60).toFixed(1);
filesPerMinute.textContent = fpm; filesPerMinute.textContent = fpm;
} }
} }
// Update errors // Update errors
if (data.errors && data.errors.length > 0) { if (data.errors && data.errors.length > 0) {
errorList.innerHTML = data.errors.map(err => errorList.innerHTML = data.errors.map(err =>
`<div class="error-item">${err}</div>` `<div class="error-item">${err}</div>`
).join(''); ).join('');
} }
}) })
.catch(error => { .catch(error => {
console.error('Error fetching progress:', error); console.error('Error fetching progress:', error);
}); });
} }
// Start updating progress // Start updating progress
updateInterval = setInterval(updateProgress, 1000); updateInterval = setInterval(updateProgress, 1000);
updateProgress(); updateProgress();
// Handle abort button // Handle abort button
abortButton.addEventListener('click', () => { abortButton.addEventListener('click', () => {
if (confirm('Are you sure you want to abort indexing?')) { if (confirm('Are you sure you want to abort indexing?')) {
fetch('/abort_indexing', { method: 'POST' }) fetch('/abort_indexing', { method: 'POST' })
.then(response => response.json()) .then(response => response.json())
.then(data => { .then(data => {
alert(data.message); alert(data.message);
}); });
} }
}); });
</script> </script>
</body> </body>
</html> </html>

View file

@ -1,56 +1,56 @@
<!DOCTYPE html> <!DOCTYPE html>
<html lang="en"> <html lang="en">
<head> <head>
<meta charset="UTF-8"> <meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Book Search</title> <title>Book Search</title>
<link rel="stylesheet" href="/static/css/style.css"> <link rel="stylesheet" href="/static/css/style.css">
</head> </head>
<body> <body>
<header> <header>
<h1>Book Search Engine</h1> <h1>Book Search Engine</h1>
</header> </header>
<nav class="nav"> <nav class="nav">
<ul> <ul>
<li><a href="/">Home</a></li> <li><a href="/">Home</a></li>
<li><a href="/files">File List</a></li> <li><a href="/files">File List</a></li>
<li><a href="/index_books">Re-Index Books</a></li> <li><a href="/index_books">Re-Index Books</a></li>
</ul> </ul>
</nav> </nav>
<div class="container"> <div class="container">
<div class="search-container"> <div class="search-container">
<form action="/search" method="GET"> <form action="/search" method="GET">
<input type="text" name="query" placeholder="Search for content..." class="search-box" value="{{ query }}"> <input type="text" name="query" placeholder="Search for content..." class="search-box" value="{{ query }}">
<button type="submit" class="search-button">Search</button> <button type="submit" class="search-button">Search</button>
</form> </form>
</div> </div>
{% if results %} {% if results %}
<div class="results"> <div class="results">
<h2>Search Results</h2> <h2>Search Results</h2>
{% for result in results %} {% for result in results %}
<div class="result-item"> <div class="result-item">
<h3>{{ result.file_path.split('/')[-1] }}</h3> <h3>{{ result.file_path.split('/')[-1] }}</h3>
<p>{{ result.snippet }}</p> <p>{{ result.snippet }}</p>
<div class="file-actions"> <div class="file-actions">
<a href="/file/{{ result.file_path.replace('/books/', '') }}" class="file-action">View Full File</a> <a href="/file/{{ result.file_path.replace('/books/', '') }}" class="file-action">View Full File</a>
<span class="action-separator">|</span> <span class="action-separator">|</span>
<a href="/file/{{ result.file_path.replace('/books/', '') }}?format=html" class="file-action">View as HTML</a> <a href="/file/{{ result.file_path.replace('/books/', '') }}?format=html" class="file-action">View as HTML</a>
</div> </div>
</div> </div>
{% endfor %} {% endfor %}
</div> </div>
{% elif query %} {% elif query %}
<div class="results"> <div class="results">
<p>No results found for "{{ query }}"</p> <p>No results found for "{{ query }}"</p>
</div> </div>
{% endif %} {% endif %}
</div> </div>
<footer> <footer>
<p>&copy; 2025 Intari</p> <p>&copy; 2025 Intari</p>
</footer> </footer>
</body> </body>
</html> </html>

View file

@ -1,64 +1,64 @@
и<!DOCTYPE html> и<!DOCTYPE html>
<html> <html>
<head> <head>
<title>{{ file_path }}</title> <title>{{ file_path }}</title>
<link rel="stylesheet" href="/static/css/style.css"> <link rel="stylesheet" href="/static/css/style.css">
<style> <style>
pre { pre {
background-color: white; background-color: white;
padding: 20px; padding: 20px;
border-radius: 5px; border-radius: 5px;
white-space: pre-wrap; white-space: pre-wrap;
word-wrap: break-word; word-wrap: break-word;
} }
.html-content { .html-content {
background-color: white; background-color: white;
padding: 20px; padding: 20px;
border-radius: 5px; border-radius: 5px;
} }
.html-content hr { .html-content hr {
margin: 30px 0; margin: 30px 0;
border: 0; border: 0;
border-top: 1px solid #eee; border-top: 1px solid #eee;
} }
.html-content h1, .html-content h1,
.html-content h2, .html-content h2,
.html-content h3, .html-content h3,
.html-content h4, .html-content h4,
.html-content h5, .html-content h5,
.html-content h6 { .html-content h6 {
margin: 1em 0 0.5em 0; margin: 1em 0 0.5em 0;
line-height: 1.2; line-height: 1.2;
} }
.html-content p { .html-content p {
margin: 0 0 1em 0; margin: 0 0 1em 0;
line-height: 1.5; line-height: 1.5;
} }
</style> </style>
</head> </head>
<body> <body>
<header> <header>
<h1>File: {{ file_path }}</h1> <h1>File: {{ file_path }}</h1>
</header> </header>
<nav class="nav"> <nav class="nav">
<ul> <ul>
<li><a href="/">Home</a></li> <li><a href="/">Home</a></li>
<li><a href="/files">File List</a></li> <li><a href="/files">File List</a></li>
<li><a href="/index_books">Re-Index Books</a></li> <li><a href="/index_books">Re-Index Books</a></li>
</ul> </ul>
</nav> </nav>
<div class="container"> <div class="container">
{% if is_html %} {% if is_html %}
<div class="html-content">{{ content|safe }}</div> <div class="html-content">{{ content|safe }}</div>
{% else %} {% else %}
<pre>{{ content }}</pre> <pre>{{ content }}</pre>
{% endif %} {% endif %}
</div> </div>
<footer> <footer>
<p>&copy; 2025 Book Search Engine</p> <p>&copy; 2025 Book Search Engine</p>
</footer> </footer>
</body> </body>
</html> </html>

View file

@ -1,142 +1,142 @@
from elasticsearch import Elasticsearch from elasticsearch import Elasticsearch
import os import os
import ebooklib import ebooklib
from ebooklib import epub from ebooklib import epub
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
import PyPDF2 import PyPDF2
import time import time
from threading import Lock from threading import Lock
# Elasticsearch Configuration # Elasticsearch Configuration
ELASTICSEARCH_HOST = os.environ.get("ELASTICSEARCH_HOST", "localhost") ELASTICSEARCH_HOST = os.environ.get("ELASTICSEARCH_HOST", "localhost")
ELASTICSEARCH_PORT = int(os.environ.get("ELASTICSEARCH_PORT", 9200)) ELASTICSEARCH_PORT = int(os.environ.get("ELASTICSEARCH_PORT", 9200))
es = Elasticsearch([{'host': ELASTICSEARCH_HOST, 'port': ELASTICSEARCH_PORT, 'scheme': 'http'}]) es = Elasticsearch([{'host': ELASTICSEARCH_HOST, 'port': ELASTICSEARCH_PORT, 'scheme': 'http'}])
INDEX_NAME = "book_index" INDEX_NAME = "book_index"
# Global variables for progress tracking # Global variables for progress tracking
indexing_progress = { indexing_progress = {
'total_files': 0, 'total_files': 0,
'processed_files': 0, 'processed_files': 0,
'start_time': None, 'start_time': None,
'is_running': False, 'is_running': False,
'current_file': '', 'current_file': '',
'errors': [] 'errors': []
} }
progress_lock = Lock() progress_lock = Lock()
def create_index(): def create_index():
if not es.indices.exists(index=INDEX_NAME): if not es.indices.exists(index=INDEX_NAME):
es.indices.create(index=INDEX_NAME) es.indices.create(index=INDEX_NAME)
def extract_text_from_epub(epub_path): def extract_text_from_epub(epub_path):
book = epub.read_epub(epub_path) book = epub.read_epub(epub_path)
text = '' text = ''
for item in book.get_items(): for item in book.get_items():
if item.media_type == 'application/xhtml+xml': if item.media_type == 'application/xhtml+xml':
soup = BeautifulSoup(item.get_content(), 'html.parser') soup = BeautifulSoup(item.get_content(), 'html.parser')
text += soup.get_text() text += soup.get_text()
return text return text
def extract_text_from_pdf(pdf_path): def extract_text_from_pdf(pdf_path):
text = '' text = ''
with open(pdf_path, 'rb') as pdf_file: with open(pdf_path, 'rb') as pdf_file:
pdf_reader = PyPDF2.PdfReader(pdf_file) pdf_reader = PyPDF2.PdfReader(pdf_file)
for page_num in range(len(pdf_reader.pages)): for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num] page = pdf_reader.pages[page_num]
text += page.extract_text() text += page.extract_text()
return text return text
def get_progress(): def get_progress():
with progress_lock: with progress_lock:
if not indexing_progress['is_running']: if not indexing_progress['is_running']:
return None return None
progress = indexing_progress.copy() progress = indexing_progress.copy()
if progress['total_files'] > 0: if progress['total_files'] > 0:
progress['percentage'] = (progress['processed_files'] / progress['total_files']) * 100 progress['percentage'] = (progress['processed_files'] / progress['total_files']) * 100
else: else:
progress['percentage'] = 0 progress['percentage'] = 0
elapsed = time.time() - progress['start_time'] elapsed = time.time() - progress['start_time']
progress['elapsed_time'] = elapsed progress['elapsed_time'] = elapsed
if progress['processed_files'] > 0: if progress['processed_files'] > 0:
time_per_file = elapsed / progress['processed_files'] time_per_file = elapsed / progress['processed_files']
remaining_files = progress['total_files'] - progress['processed_files'] remaining_files = progress['total_files'] - progress['processed_files']
progress['estimated_remaining'] = time_per_file * remaining_files progress['estimated_remaining'] = time_per_file * remaining_files
progress['estimated_completion'] = time.time() + progress['estimated_remaining'] progress['estimated_completion'] = time.time() + progress['estimated_remaining']
else: else:
progress['estimated_remaining'] = 0 progress['estimated_remaining'] = 0
progress['estimated_completion'] = 0 progress['estimated_completion'] = 0
return progress return progress
def index_files(directory): def index_files(directory):
global indexing_progress global indexing_progress
with progress_lock: with progress_lock:
indexing_progress = { indexing_progress = {
'total_files': 0, 'total_files': 0,
'processed_files': 0, 'processed_files': 0,
'start_time': time.time(), 'start_time': time.time(),
'is_running': True, 'is_running': True,
'current_file': '', 'current_file': '',
'errors': [] 'errors': []
} }
try: try:
create_index() create_index()
# First count all files # First count all files
total_files = 0 total_files = 0
for root, _, files in os.walk(directory): for root, _, files in os.walk(directory):
for file in files: for file in files:
if file.endswith(('.epub', '.pdf', '.txt')): if file.endswith(('.epub', '.pdf', '.txt')):
total_files += 1 total_files += 1
with progress_lock: with progress_lock:
indexing_progress['total_files'] = total_files indexing_progress['total_files'] = total_files
# Now process files # Now process files
for root, _, files in os.walk(directory): for root, _, files in os.walk(directory):
for file in files: for file in files:
file_path = os.path.join(root, file) file_path = os.path.join(root, file)
with progress_lock: with progress_lock:
indexing_progress['current_file'] = file_path indexing_progress['current_file'] = file_path
try: try:
encoded_file_path = file_path.encode('utf-8').decode('utf-8') encoded_file_path = file_path.encode('utf-8').decode('utf-8')
if file_path.endswith(".epub"): if file_path.endswith(".epub"):
text = extract_text_from_epub(file_path) text = extract_text_from_epub(file_path)
elif file_path.endswith(".pdf"): elif file_path.endswith(".pdf"):
text = extract_text_from_pdf(file_path) text = extract_text_from_pdf(file_path)
elif file_path.endswith(".txt"): elif file_path.endswith(".txt"):
with open(encoded_file_path, 'r', encoding='utf-8', errors='ignore') as f: with open(encoded_file_path, 'r', encoding='utf-8', errors='ignore') as f:
text = f.read() text = f.read()
else: else:
print(f"Skipping unsupported file type: {file_path}") print(f"Skipping unsupported file type: {file_path}")
continue continue
doc = { doc = {
'file_path': file_path, 'file_path': file_path,
'content': text 'content': text
} }
es.index(index=INDEX_NAME, document=doc) es.index(index=INDEX_NAME, document=doc)
print(f"Indexed: {file_path}") print(f"Indexed: {file_path}")
with progress_lock: with progress_lock:
indexing_progress['processed_files'] += 1 indexing_progress['processed_files'] += 1
except Exception as e: except Exception as e:
error_msg = f"Error indexing {file_path}: {type(e)}, {e}" error_msg = f"Error indexing {file_path}: {type(e)}, {e}"
print(error_msg) print(error_msg)
with progress_lock: with progress_lock:
indexing_progress['errors'].append(error_msg) indexing_progress['errors'].append(error_msg)
finally: finally:
with progress_lock: with progress_lock:
indexing_progress['is_running'] = False indexing_progress['is_running'] = False
if __name__ == '__main__': if __name__ == '__main__':
BOOKS_DIR = "/books" # This should match the volume mount in docker-compose.yml BOOKS_DIR = "/books" # This should match the volume mount in docker-compose.yml
index_files(BOOKS_DIR) index_files(BOOKS_DIR)

View file

@ -1,7 +1,7 @@
flask==3.0.2 flask==3.0.2
ebooklib==0.18 ebooklib==0.18
beautifulsoup4==4.12.3 beautifulsoup4==4.12.3
pytest==8.3.2 pytest==8.3.2
PyPDF2==3.0.1 PyPDF2==3.0.1
pytz==2024.1 pytz==2024.1
elasticsearch>=8.0.0 elasticsearch>=8.0.0

View file

@ -1,127 +1,127 @@
import unittest import unittest
import json import json
import os import os
import tempfile import tempfile
import shutil import shutil
from app import app from app import app
from unittest.mock import patch, MagicMock from unittest.mock import patch, MagicMock
class BookSearchAPITest(unittest.TestCase): class BookSearchAPITest(unittest.TestCase):
def setUp(self): def setUp(self):
app.config['TESTING'] = True app.config['TESTING'] = True
self.client = app.test_client() self.client = app.test_client()
# Create a temporary directory for test books # Create a temporary directory for test books
self.test_books_dir = tempfile.mkdtemp() self.test_books_dir = tempfile.mkdtemp()
# Create a sample test file # Create a sample test file
self.sample_file_path = os.path.join(self.test_books_dir, 'test_sample.txt') self.sample_file_path = os.path.join(self.test_books_dir, 'test_sample.txt')
with open(self.sample_file_path, 'w', encoding='utf-8') as f: with open(self.sample_file_path, 'w', encoding='utf-8') as f:
f.write("This is a test sample file for testing the book search API.") f.write("This is a test sample file for testing the book search API.")
def tearDown(self): def tearDown(self):
# Remove the temporary directory # Remove the temporary directory
shutil.rmtree(self.test_books_dir) shutil.rmtree(self.test_books_dir)
@patch('app.es') @patch('app.es')
@patch('app.index_files') @patch('app.index_files')
def test_index_books_api(self, mock_index_files, mock_es): def test_index_books_api(self, mock_index_files, mock_es):
# Mock the index_files function # Mock the index_files function
mock_index_files.return_value = None mock_index_files.return_value = None
# Test the API endpoint # Test the API endpoint
response = self.client.get('/index_books', headers={'Accept': 'application/json'}) response = self.client.get('/index_books', headers={'Accept': 'application/json'})
# Check if the response is successful # Check if the response is successful
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
# Check if the response contains the expected message # Check if the response contains the expected message
data = json.loads(response.data) data = json.loads(response.data)
self.assertIn('message', data) self.assertIn('message', data)
self.assertEqual(data['message'], 'Indexing completed') self.assertEqual(data['message'], 'Indexing completed')
# Check if the index_files function was called # Check if the index_files function was called
mock_index_files.assert_called_once_with('/books') mock_index_files.assert_called_once_with('/books')
@patch('app.es') @patch('app.es')
def test_search_api(self, mock_es): def test_search_api(self, mock_es):
# Mock the Elasticsearch search method # Mock the Elasticsearch search method
mock_search_result = { mock_search_result = {
'hits': { 'hits': {
'hits': [ 'hits': [
{ {
'_source': { '_source': {
'file_path': '/books/test_sample.txt', 'file_path': '/books/test_sample.txt',
'content': 'This is a test sample file for testing the book search API.' 'content': 'This is a test sample file for testing the book search API.'
} }
} }
] ]
} }
} }
mock_es.search.return_value = mock_search_result mock_es.search.return_value = mock_search_result
# Test the API endpoint # Test the API endpoint
response = self.client.get('/search?query=test', headers={'Accept': 'application/json'}) response = self.client.get('/search?query=test', headers={'Accept': 'application/json'})
# Check if the response is successful # Check if the response is successful
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
# Check if the response contains the expected data # Check if the response contains the expected data
data = json.loads(response.data) data = json.loads(response.data)
self.assertEqual(len(data), 1) self.assertEqual(len(data), 1)
self.assertEqual(data[0]['file_path'], '/books/test_sample.txt') self.assertEqual(data[0]['file_path'], '/books/test_sample.txt')
self.assertIn('snippet', data[0]) self.assertIn('snippet', data[0])
# Check if the Elasticsearch search method was called with the correct parameters # Check if the Elasticsearch search method was called with the correct parameters
mock_es.search.assert_called_once() mock_es.search.assert_called_once()
@patch('app.os.listdir') @patch('app.os.listdir')
@patch('app.os.path.isfile') @patch('app.os.path.isfile')
def test_list_files_api(self, mock_isfile, mock_listdir): def test_list_files_api(self, mock_isfile, mock_listdir):
# Mock the os.listdir function # Mock the os.listdir function
mock_listdir.return_value = ['test_sample.txt', 'another_file.txt'] mock_listdir.return_value = ['test_sample.txt', 'another_file.txt']
# Mock the os.path.isfile function to always return True # Mock the os.path.isfile function to always return True
mock_isfile.return_value = True mock_isfile.return_value = True
# Test the API endpoint # Test the API endpoint
response = self.client.get('/files', headers={'Accept': 'application/json'}) response = self.client.get('/files', headers={'Accept': 'application/json'})
# Check if the response is successful # Check if the response is successful
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
# Check if the response contains the expected data # Check if the response contains the expected data
data = json.loads(response.data) data = json.loads(response.data)
self.assertEqual(len(data), 2) self.assertEqual(len(data), 2)
self.assertEqual(data[0]['name'], 'test_sample.txt') self.assertEqual(data[0]['name'], 'test_sample.txt')
self.assertEqual(data[1]['name'], 'another_file.txt') self.assertEqual(data[1]['name'], 'another_file.txt')
# Check if the os.listdir function was called with the correct parameters # Check if the os.listdir function was called with the correct parameters
mock_listdir.assert_called_once_with('/books') mock_listdir.assert_called_once_with('/books')
@patch('app.open') @patch('app.open')
@patch('app.os.path.isfile') @patch('app.os.path.isfile')
@patch('app.os.path.abspath') @patch('app.os.path.abspath')
def test_get_file_api(self, mock_abspath, mock_isfile, mock_open): def test_get_file_api(self, mock_abspath, mock_isfile, mock_open):
# Mock the necessary functions # Mock the necessary functions
mock_isfile.return_value = True mock_isfile.return_value = True
mock_abspath.side_effect = lambda x: x # Return the input unchanged mock_abspath.side_effect = lambda x: x # Return the input unchanged
# Mock the open function # Mock the open function
mock_file = MagicMock() mock_file = MagicMock()
mock_file.__enter__.return_value.read.return_value = "This is a test sample file." mock_file.__enter__.return_value.read.return_value = "This is a test sample file."
mock_open.return_value = mock_file mock_open.return_value = mock_file
# Test the API endpoint # Test the API endpoint
response = self.client.get('/file/test_sample.txt', headers={'Accept': 'application/json'}) response = self.client.get('/file/test_sample.txt', headers={'Accept': 'application/json'})
# Check if the response is successful # Check if the response is successful
self.assertEqual(response.status_code, 200) self.assertEqual(response.status_code, 200)
# Check if the response contains the expected data # Check if the response contains the expected data
self.assertEqual(response.data.decode('utf-8'), "This is a test sample file.") self.assertEqual(response.data.decode('utf-8'), "This is a test sample file.")
# Check if the open function was called with the correct parameters # Check if the open function was called with the correct parameters
mock_open.assert_called_once() mock_open.assert_called_once()
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View file

@ -1,114 +1,114 @@
import os import os
import pytest import pytest
import tempfile import tempfile
import shutil import shutil
from app import app from app import app
from unittest.mock import patch, MagicMock from unittest.mock import patch, MagicMock
from ebooklib import epub from ebooklib import epub
def create_test_epub(): def create_test_epub():
"""Create a simple test EPUB file""" """Create a simple test EPUB file"""
# Create a simple EPUB file # Create a simple EPUB file
book = epub.EpubBook() book = epub.EpubBook()
# Set metadata # Set metadata
book.set_identifier('test123456') book.set_identifier('test123456')
book.set_title('Test EPUB Book') book.set_title('Test EPUB Book')
book.set_language('en') book.set_language('en')
book.add_author('Test Author') book.add_author('Test Author')
# Add a chapter # Add a chapter
c1 = epub.EpubHtml(title='Chapter 1', file_name='chap_01.xhtml', lang='en') c1 = epub.EpubHtml(title='Chapter 1', file_name='chap_01.xhtml', lang='en')
c1.content = '<html><body><h1>Chapter 1</h1><p>This is a test EPUB file.</p></body></html>' c1.content = '<html><body><h1>Chapter 1</h1><p>This is a test EPUB file.</p></body></html>'
book.add_item(c1) book.add_item(c1)
# Add navigation # Add navigation
book.toc = [c1] book.toc = [c1]
book.spine = ['nav', c1] book.spine = ['nav', c1]
book.add_item(epub.EpubNcx()) book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav()) book.add_item(epub.EpubNav())
# Create temp directory # Create temp directory
temp_dir = tempfile.mkdtemp() temp_dir = tempfile.mkdtemp()
epub_path = os.path.join(temp_dir, 'test.epub') epub_path = os.path.join(temp_dir, 'test.epub')
# Write the EPUB file # Write the EPUB file
epub.write_epub(epub_path, book) epub.write_epub(epub_path, book)
return epub_path, temp_dir return epub_path, temp_dir
@pytest.fixture @pytest.fixture
def client(): def client():
"""Create a test client for the Flask app""" """Create a test client for the Flask app"""
app.config['TESTING'] = True app.config['TESTING'] = True
with app.test_client() as client: with app.test_client() as client:
yield client yield client
@pytest.fixture @pytest.fixture
def test_epub(): def test_epub():
"""Create a test EPUB file and clean up after the test""" """Create a test EPUB file and clean up after the test"""
epub_path, temp_dir = create_test_epub() epub_path, temp_dir = create_test_epub()
# Mock the books directory # Mock the books directory
original_join = os.path.join original_join = os.path.join
def mock_join(path, *paths): def mock_join(path, *paths):
if path == "/books" and paths and paths[0] == "test.epub": if path == "/books" and paths and paths[0] == "test.epub":
return epub_path return epub_path
return original_join(path, *paths) return original_join(path, *paths)
def mock_abspath(path): def mock_abspath(path):
if path == os.path.join("/books", "test.epub"): if path == os.path.join("/books", "test.epub"):
return "/books/test.epub" return "/books/test.epub"
elif path == epub_path: elif path == epub_path:
return "/books/test.epub" return "/books/test.epub"
return path return path
with patch('os.path.join', side_effect=mock_join): with patch('os.path.join', side_effect=mock_join):
with patch('os.path.isfile', return_value=True): with patch('os.path.isfile', return_value=True):
with patch('os.path.abspath', side_effect=mock_abspath): with patch('os.path.abspath', side_effect=mock_abspath):
yield epub_path yield epub_path
# Clean up # Clean up
shutil.rmtree(temp_dir) shutil.rmtree(temp_dir)
def test_epub_viewer_page(client, test_epub): def test_epub_viewer_page(client, test_epub):
"""Test that the EPUB viewer page loads correctly""" """Test that the EPUB viewer page loads correctly"""
response = client.get('/file/test.epub') response = client.get('/file/test.epub')
assert response.status_code == 200 assert response.status_code == 200
assert b'<!DOCTYPE html>' in response.data assert b'<!DOCTYPE html>' in response.data
assert b'<title>test.epub</title>' in response.data assert b'<title>test.epub</title>' in response.data
assert b'<div id="viewer"></div>' in response.data assert b'<div id="viewer"></div>' in response.data
assert b'<script src="https://cdn.jsdelivr.net/npm/epubjs' in response.data assert b'<script src="https://cdn.jsdelivr.net/npm/epubjs' in response.data
def test_epub_file_endpoint(client, test_epub): def test_epub_file_endpoint(client, test_epub):
"""Test that the EPUB file is served with correct headers""" """Test that the EPUB file is served with correct headers"""
response = client.get('/epub/test.epub') response = client.get('/epub/test.epub')
assert response.status_code == 200 assert response.status_code == 200
assert response.headers['Content-Type'] == 'application/epub+zip' assert response.headers['Content-Type'] == 'application/epub+zip'
assert response.headers['Access-Control-Allow-Origin'] == '*' assert response.headers['Access-Control-Allow-Origin'] == '*'
# Check that the response contains EPUB data (at least the magic number) # Check that the response contains EPUB data (at least the magic number)
assert response.data.startswith(b'PK') assert response.data.startswith(b'PK')
def test_epub_viewer_integration(client, test_epub): def test_epub_viewer_integration(client, test_epub):
"""Test the integration between the viewer and the EPUB file""" """Test the integration between the viewer and the EPUB file"""
# This test would ideally use Selenium or Playwright to test the actual rendering # This test would ideally use Selenium or Playwright to test the actual rendering
# Since we can't run a browser in this environment, we'll check for the correct setup # Since we can't run a browser in this environment, we'll check for the correct setup
# First, check that the viewer page loads # First, check that the viewer page loads
viewer_response = client.get('/file/test.epub') viewer_response = client.get('/file/test.epub')
assert viewer_response.status_code == 200 assert viewer_response.status_code == 200
# Check that the JavaScript is correctly set up to load the EPUB # Check that the JavaScript is correctly set up to load the EPUB
assert b'/epub/test.epub' in viewer_response.data assert b'/epub/test.epub' in viewer_response.data
# Check that the EPUB file is accessible # Check that the EPUB file is accessible
epub_response = client.get('/epub/test.epub') epub_response = client.get('/epub/test.epub')
assert epub_response.status_code == 200 assert epub_response.status_code == 200
assert epub_response.headers['Content-Type'] == 'application/epub+zip' assert epub_response.headers['Content-Type'] == 'application/epub+zip'
if __name__ == '__main__': if __name__ == '__main__':
pytest.main(['-xvs', __file__]) pytest.main(['-xvs', __file__])