Update application

This commit is contained in:
Dmitriy Kazimirov 2025-04-01 10:48:30 +00:00
parent 6a46a20c9e
commit 891787be0f
25 changed files with 2474 additions and 2474 deletions

118
.env
View file

@ -1,60 +1,60 @@
# Application Configuration
# ========================
# Base URL for the application (required, string)
# Format: http://hostname:port or https://hostname:port
BASE_URL=http://localhost:8000
# CPU Limit for container (optional, float)
# Number of CPU cores to allocate (e.g., 0.5, 1, 2)
# Default: 2 (will be used if not specified)
CPU_LIMIT=2
# Snippet character limit (optional, integer)
# Maximum length for text snippets in characters
# Default: 100
SNIPPET_CHAR_LIMIT=100
# Debug mode (optional, boolean)
# Enable debug output when set to True
# Default: False
DEBUG=False
# Application port (optional, integer)
# Port the application listens on
# Default: 5000
PORT=5000
# Elasticsearch Configuration
# ==========================
# Elasticsearch host (required, string)
# Hostname or IP of Elasticsearch service
ELASTICSEARCH_HOST=elasticsearch
# Elasticsearch username (sensitive, required, string)
# Admin username for Elasticsearch
ELASTICSEARCH_USERNAME=admin
# Elasticsearch password (sensitive, required, string)
# Admin password for Elasticsearch
ELASTICSEARCH_PASSWORD=password
# File Storage Configuration
# =========================
# SMB share path (optional, string)
# Local path where books are mounted
# Default: ./smb_share
SMB_SHARE_PATH=./smb_share
# Admin Credentials
# ================
# Admin username for API management (required, string)
ADMIN_USER=admin
# Admin password for API management (required, string)
# Application Configuration
# ========================
# Base URL for the application (required, string)
# Format: http://hostname:port or https://hostname:port
BASE_URL=http://localhost:8000
# CPU Limit for container (optional, float)
# Number of CPU cores to allocate (e.g., 0.5, 1, 2)
# Default: 2 (will be used if not specified)
CPU_LIMIT=2
# Snippet character limit (optional, integer)
# Maximum length for text snippets in characters
# Default: 100
SNIPPET_CHAR_LIMIT=100
# Debug mode (optional, boolean)
# Enable debug output when set to True
# Default: False
DEBUG=False
# Application port (optional, integer)
# Port the application listens on
# Default: 5000
PORT=5000
# Elasticsearch Configuration
# ==========================
# Elasticsearch host (required, string)
# Hostname or IP of Elasticsearch service
ELASTICSEARCH_HOST=elasticsearch
# Elasticsearch username (sensitive, required, string)
# Admin username for Elasticsearch
ELASTICSEARCH_USERNAME=admin
# Elasticsearch password (sensitive, required, string)
# Admin password for Elasticsearch
ELASTICSEARCH_PASSWORD=password
# File Storage Configuration
# =========================
# SMB share path (optional, string)
# Local path where books are mounted
# Default: ./smb_share
SMB_SHARE_PATH=./smb_share
# Admin Credentials
# ================
# Admin username for API management (required, string)
ADMIN_USER=admin
# Admin password for API management (required, string)
ADMIN_PASSWORD=securepassword123

View file

@ -1,39 +1,39 @@
name: Test EPUB Viewer
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pytest
pip install flask elasticsearch ebooklib beautifulsoup4 PyPDF2
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Run tests
run: |
cd api
python -m pytest test_epub_viewer.py -v
- name: Run integration tests with Playwright
run: |
pip install playwright pytest-playwright
playwright install
cd api
python -m pytest test_epub_viewer_integration.py -v
name: Test EPUB Viewer
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pytest
pip install flask elasticsearch ebooklib beautifulsoup4 PyPDF2
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Run tests
run: |
cd api
python -m pytest test_epub_viewer.py -v
- name: Run integration tests with Playwright
run: |
pip install playwright pytest-playwright
playwright install
cd api
python -m pytest test_epub_viewer_integration.py -v
if: false # Disabled until we create the integration tests with Playwright

58
.gitignore vendored
View file

@ -1,30 +1,30 @@
# Ignore all EPUB files in smb_share
smb_share/*.epub
# Ignore sample text file
smb_share/sample.txt
# Python cache
__pycache__/
*.py[cod]
*$py.class
# Virtual environment
venv/
# IDE specific files
.vscode/
.idea/
# Logs and databases
*.log
*.sqlite
# OS generated files
.DS_Store
.DS_Store?
._*
.Spotlight-V100
.Trashes
ehthumbs.db
# Ignore all EPUB files in smb_share
smb_share/*.epub
# Ignore sample text file
smb_share/sample.txt
# Python cache
__pycache__/
*.py[cod]
*$py.class
# Virtual environment
venv/
# IDE specific files
.vscode/
.idea/
# Logs and databases
*.log
*.sqlite
# OS generated files
.DS_Store
.DS_Store?
._*
.Spotlight-V100
.Trashes
ehthumbs.db
Thumbs.db

View file

@ -1,29 +1,29 @@
FROM python:3.9-alpine
WORKDIR /app
# Install dependencies
RUN pip install flask elasticsearch ebooklib beautifulsoup4 PyPDF2 pytz
# Create books directory with proper permissions
RUN mkdir -p /books && chmod 777 /books
# Copy the API code and static files
COPY src/api/app.py .
COPY src/api/static /app/static
COPY src/api/templates /app/templates
# Expose the API port
EXPOSE 5000
# Copy the indexing script
COPY src/core/index.py .
# Copy the test file
COPY tests/unit/test_app.py .
# Add a dummy file to invalidate cache
ADD dummy.txt .
# Command to run the API
FROM python:3.9-alpine
WORKDIR /app
# Install dependencies
RUN pip install flask elasticsearch ebooklib beautifulsoup4 PyPDF2 pytz
# Create books directory with proper permissions
RUN mkdir -p /books && chmod 777 /books
# Copy the API code and static files
COPY src/api/app.py .
COPY src/api/static /app/static
COPY src/api/templates /app/templates
# Expose the API port
EXPOSE 5000
# Copy the indexing script
COPY src/core/index.py .
# Copy the test file
COPY tests/unit/test_app.py .
# Add a dummy file to invalidate cache
ADD dummy.txt .
# Command to run the API
CMD ["python", "app.py"]

64
ai.md
View file

@ -1,33 +1,33 @@
# Final Deployment Status
## Configuration Summary:
1. **Container Auto-Restart**:
- Both services configured with `restart: unless-stopped`
- Containers will automatically restart on failures
2. **Resource Limits**:
- CPU: `${CPU_LIMIT}` cores
- Memory: 2GB limit
3. **Dependencies**:
- pytz installed in container (version 2025.2)
- All required Python packages verified
- Dockerfile updated to include pytz for future builds
4. **Known Issues**:
- Docker Compose v1.25.0 limitations:
- Doesn't respect container_name directives
- Shows harmless deploy key warnings
- Solution: Upgrade to Docker Compose v2.x
## Verification:
- All services running
- CORS headers properly configured
- pytz module successfully imported (version 2025.2)
- API endpoints functional
## System Status: OPERATIONAL
- API: Running on port 8000
- Elasticsearch: Running on port 9200
- Auto-restart configured
# Final Deployment Status
## Configuration Summary:
1. **Container Auto-Restart**:
- Both services configured with `restart: unless-stopped`
- Containers will automatically restart on failures
2. **Resource Limits**:
- CPU: `${CPU_LIMIT}` cores
- Memory: 2GB limit
3. **Dependencies**:
- pytz installed in container (version 2025.2)
- All required Python packages verified
- Dockerfile updated to include pytz for future builds
4. **Known Issues**:
- Docker Compose v1.25.0 limitations:
- Doesn't respect container_name directives
- Shows harmless deploy key warnings
- Solution: Upgrade to Docker Compose v2.x
## Verification:
- All services running
- CORS headers properly configured
- pytz module successfully imported (version 2025.2)
- API endpoints functional
## System Status: OPERATIONAL
- API: Running on port 8000
- Elasticsearch: Running on port 9200
- Auto-restart configured
- All features functional

View file

@ -1,39 +1,39 @@
version: '3.7'
services:
booksearch_app:
build: .
container_name: booksearch_app
ports:
- "8000:5000"
environment:
- ELASTICSEARCH_HOST=booksearch_elastic
- BASE_URL=${BASE_URL}
- CPU_LIMIT=${CPU_LIMIT}
- SNIPPET_CHAR_LIMIT=${SNIPPET_CHAR_LIMIT}
volumes:
- ./smb_share:/books
depends_on:
- booksearch_elastic
restart: unless-stopped
deploy:
resources:
limits:
cpus: ${CPU_LIMIT}
memory: 2G
booksearch_elastic:
container_name: booksearch_elastic
image: bitnami/elasticsearch:latest
ports:
- "9200:9200"
- "9300:9300"
environment:
- discovery.type=single-node
- ELASTICSEARCH_USERNAME=admin
- ELASTICSEARCH_PASSWORD=password
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9200"]
interval: 30s
timeout: 10s
version: '3.7'
services:
booksearch_app:
build: .
container_name: booksearch_app
ports:
- "8000:5000"
environment:
- ELASTICSEARCH_HOST=booksearch_elastic
- BASE_URL=${BASE_URL}
- CPU_LIMIT=${CPU_LIMIT}
- SNIPPET_CHAR_LIMIT=${SNIPPET_CHAR_LIMIT}
volumes:
- ./smb_share:/books
depends_on:
- booksearch_elastic
restart: unless-stopped
deploy:
resources:
limits:
cpus: ${CPU_LIMIT}
memory: 2G
booksearch_elastic:
container_name: booksearch_elastic
image: bitnami/elasticsearch:latest
ports:
- "9200:9200"
- "9300:9300"
environment:
- discovery.type=single-node
- ELASTICSEARCH_USERNAME=admin
- ELASTICSEARCH_PASSWORD=password
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9200"]
interval: 30s
timeout: 10s
retries: 5

View file

@ -1,109 +1,109 @@
function search_books(params, userSettings) {
const query = params.query;
const apiUrl = (userSettings.apiUrl || 'http://localhost:8000').replace(/\/$/, '');
const useProxy = userSettings.useProxy || false;
const proxyUrl = userSettings.proxyUrl || 'https://cors-anywhere.herokuapp.com/';
// Debugging headers - WARNING: Only for development/testing
const debugHeaders = userSettings.debugHeaders || {};
if (!query) {
throw new Error('Search query is required');
}
// Prepare the target URL
const targetUrl = `${apiUrl}/search?query=${encodeURIComponent(query)}`;
const requestUrl = useProxy ? `${proxyUrl}${targetUrl}` : targetUrl;
// Add timeout handling
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), 10000);
// Prepare headers
const headers = {
'Accept': 'application/json',
...(useProxy ? { 'X-Requested-With': 'XMLHttpRequest' } : {}),
...debugHeaders // Add debug headers if provided
};
return fetch(requestUrl, {
method: 'GET',
headers: headers,
signal: controller.signal
})
.then(async response => {
clearTimeout(timeoutId);
if (!response.ok) {
const errorBody = await response.text().catch(() => '');
throw new Error(`API request failed with status ${response.status}. Response: ${errorBody}`);
}
const contentType = response.headers.get('content-type');
if (!contentType || !contentType.includes('application/json')) {
throw new Error(`Invalid content type: ${contentType}`);
}
return response.json();
})
.then(results => {
if (!Array.isArray(results)) {
throw new Error(`Invalid response format. Expected array, got ${typeof results}`);
}
if (results.length === 0) {
return 'No books found matching your search';
}
// Format results with book paths and snippets
return results.map(result => {
if (!result.file_path || !result.snippet) {
throw new Error('Invalid result format - missing required fields');
}
// Create properly encoded URL
let formattedUrl = '';
if (result.raw_url) {
try {
// Split URL into parts and encode components separately
const url = new URL(result.raw_url);
const pathParts = url.pathname.split('/').map(part =>
encodeURIComponent(part).replace(/'/g, "%27")
);
const search = url.search ? '?' + encodeURIComponent(url.search.slice(1)) : '';
formattedUrl = `${url.origin}${pathParts.join('/')}${search}`;
} catch (e) {
formattedUrl = result.raw_url; // Fallback to original if URL parsing fails
}
}
return `Book: ${result.file_path}\n` +
`Snippet: ${result.snippet}\n` +
(formattedUrl ? `URL: ${formattedUrl}\n` : '');
}).join('\n\n');
})
.catch(error => {
clearTimeout(timeoutId);
let errorMessage = `Error searching books: ${error.message}`;
if (error.name === 'AbortError') {
errorMessage += '\n\nDiagnostics: Request timed out. Check if:';
errorMessage += `\n- The API is running at ${apiUrl}`;
errorMessage += '\n- The server is accessible from your network';
if (!useProxy) {
errorMessage += '\n- Try enabling proxy in plugin settings';
}
} else if (error.message.includes('Failed to fetch') || error.message.includes('CORS')) {
errorMessage += '\n\nDiagnostics: Network request failed. Check if:';
errorMessage += `\n- The API URL (${apiUrl}) is correct`;
errorMessage += '\n- CORS is properly configured on the server';
errorMessage += '\n- The server is running and accessible';
if (!useProxy) {
errorMessage += '\n- Try enabling proxy in plugin settings to bypass CORS';
}
errorMessage += '\n- For debugging, you can add CORS headers in plugin settings';
}
return errorMessage;
});
function search_books(params, userSettings) {
const query = params.query;
const apiUrl = (userSettings.apiUrl || 'http://localhost:8000').replace(/\/$/, '');
const useProxy = userSettings.useProxy || false;
const proxyUrl = userSettings.proxyUrl || 'https://cors-anywhere.herokuapp.com/';
// Debugging headers - WARNING: Only for development/testing
const debugHeaders = userSettings.debugHeaders || {};
if (!query) {
throw new Error('Search query is required');
}
// Prepare the target URL
const targetUrl = `${apiUrl}/search?query=${encodeURIComponent(query)}`;
const requestUrl = useProxy ? `${proxyUrl}${targetUrl}` : targetUrl;
// Add timeout handling
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), 10000);
// Prepare headers
const headers = {
'Accept': 'application/json',
...(useProxy ? { 'X-Requested-With': 'XMLHttpRequest' } : {}),
...debugHeaders // Add debug headers if provided
};
return fetch(requestUrl, {
method: 'GET',
headers: headers,
signal: controller.signal
})
.then(async response => {
clearTimeout(timeoutId);
if (!response.ok) {
const errorBody = await response.text().catch(() => '');
throw new Error(`API request failed with status ${response.status}. Response: ${errorBody}`);
}
const contentType = response.headers.get('content-type');
if (!contentType || !contentType.includes('application/json')) {
throw new Error(`Invalid content type: ${contentType}`);
}
return response.json();
})
.then(results => {
if (!Array.isArray(results)) {
throw new Error(`Invalid response format. Expected array, got ${typeof results}`);
}
if (results.length === 0) {
return 'No books found matching your search';
}
// Format results with book paths and snippets
return results.map(result => {
if (!result.file_path || !result.snippet) {
throw new Error('Invalid result format - missing required fields');
}
// Create properly encoded URL
let formattedUrl = '';
if (result.raw_url) {
try {
// Split URL into parts and encode components separately
const url = new URL(result.raw_url);
const pathParts = url.pathname.split('/').map(part =>
encodeURIComponent(part).replace(/'/g, "%27")
);
const search = url.search ? '?' + encodeURIComponent(url.search.slice(1)) : '';
formattedUrl = `${url.origin}${pathParts.join('/')}${search}`;
} catch (e) {
formattedUrl = result.raw_url; // Fallback to original if URL parsing fails
}
}
return `Book: ${result.file_path}\n` +
`Snippet: ${result.snippet}\n` +
(formattedUrl ? `URL: ${formattedUrl}\n` : '');
}).join('\n\n');
})
.catch(error => {
clearTimeout(timeoutId);
let errorMessage = `Error searching books: ${error.message}`;
if (error.name === 'AbortError') {
errorMessage += '\n\nDiagnostics: Request timed out. Check if:';
errorMessage += `\n- The API is running at ${apiUrl}`;
errorMessage += '\n- The server is accessible from your network';
if (!useProxy) {
errorMessage += '\n- Try enabling proxy in plugin settings';
}
} else if (error.message.includes('Failed to fetch') || error.message.includes('CORS')) {
errorMessage += '\n\nDiagnostics: Network request failed. Check if:';
errorMessage += `\n- The API URL (${apiUrl}) is correct`;
errorMessage += '\n- CORS is properly configured on the server';
errorMessage += '\n- The server is running and accessible';
if (!useProxy) {
errorMessage += '\n- Try enabling proxy in plugin settings to bypass CORS';
}
errorMessage += '\n- For debugging, you can add CORS headers in plugin settings';
}
return errorMessage;
});
}

View file

@ -1,16 +1,16 @@
{
"name": "search_books",
"description": "Search for books by content using the book search API.",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The search query to find matching book content"
}
},
"required": [
"query"
]
}
{
"name": "search_books",
"description": "Search for books by content using the book search API.",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The search query to find matching book content"
}
},
"required": [
"query"
]
}
}

View file

@ -1,45 +1,45 @@
[
{
"name": "cors_allow_origin",
"label": "CORS Allowed Origin",
"required": false,
"default": "*",
"description": "Value for Access-Control-Allow-Origin header, typically '*' for public APIs"
},
{
"name": "cors_allow_methods",
"label": "CORS Allowed Methods",
"required": false,
"default": "GET, POST, PUT",
"description": "Comma-separated HTTP methods for Access-Control-Allow-Methods header"
},
{
"name": "cors_allow_headers",
"label": "CORS Allowed Headers",
"required": false,
"default": "Content-Type",
"description": "Comma-separated headers for Access-Control-Allow-Headers"
},
{
"name": "proxyUrl",
"label": "Proxy Server URL",
"required": false,
"default": "",
"description": "URL of the proxy server to use for external requests"
},
{
"name": "bookSearchAPIKey",
"label": "Search Engine API Key",
"type": "password",
"default":"",
"required":false,
"description": "API Key to use for while making requests (not yet used)"
},
{
"name": "apiUrl",
"label": "API Base URL",
"required": false,
"default": "http://localhost:8000",
"description": "Base URL for the API endpoints"
}
[
{
"name": "cors_allow_origin",
"label": "CORS Allowed Origin",
"required": false,
"default": "*",
"description": "Value for Access-Control-Allow-Origin header, typically '*' for public APIs"
},
{
"name": "cors_allow_methods",
"label": "CORS Allowed Methods",
"required": false,
"default": "GET, POST, PUT",
"description": "Comma-separated HTTP methods for Access-Control-Allow-Methods header"
},
{
"name": "cors_allow_headers",
"label": "CORS Allowed Headers",
"required": false,
"default": "Content-Type",
"description": "Comma-separated headers for Access-Control-Allow-Headers"
},
{
"name": "proxyUrl",
"label": "Proxy Server URL",
"required": false,
"default": "",
"description": "URL of the proxy server to use for external requests"
},
{
"name": "bookSearchAPIKey",
"label": "Search Engine API Key",
"type": "password",
"default":"",
"required":false,
"description": "API Key to use for while making requests (not yet used)"
},
{
"name": "apiUrl",
"label": "API Base URL",
"required": false,
"default": "http://localhost:8000",
"description": "Base URL for the API endpoints"
}
]

480
readme.md
View file

@ -1,241 +1,241 @@
# What it IS?
## TypeMind Plugin: EPUB/PDF/TXT Search Integration
A plugin for [TypeMind](https://docs.typingmind.com/plugins/build-a-typingmind-plugin) that mimics the **WebSearch** feature but focuses on retrieving books/documents. Users can query, e.g., *"Find me books about Hecate"*, and the plugin returns **clickable links** to relevant files (EPUB, PDF, TXT).
### Features
- **File Formats**: Supports EPUB, PDF, and TXT (assumed compatibility).
- **Requirement**: Users must provide their own files for indexing.
### Technical Context
- **Language**: Python.
- **Skill Level**:
- My Python knowledge is **extremely rusty** (last project: a not too simple game bot years ago).
- Self-assessment: **Python novice**.
- **Tools Used**:
- **Sonnet 3.7** and **DeepSeek-V3-0324** (for AI/ML integration).
- **RooCode**
### Purpose
1. **Experiment**: Test RooCodes capabilities and identify practical applications.
2. **Non-Production**: **⚠️ Do NOT deploy this in production** (even if "fixed" by someone).
---
### Key Notes
- Humor/self-deprecation preserved (e.g., "extremely rusty," "novice").
- Technical terms standardized (Sonnet 3.7, DeepSeek-V3-0324).
- Critical warnings emphasized (**bold + emoji** for production risk).
# Application Deployment Guide (Ubuntu LTS)
## Prerequisites
### System Requirements
- Ubuntu 22.04 LTS (64-bit)
- Minimum 2 CPU cores, 4GB RAM
- 20GB free disk space
- Open ports: 8000 (app), 9200 (Elasticsearch)
### Required Software
```bash
# Update package lists
sudo apt update
# Install Docker and Docker Compose
sudo apt install -y docker.io docker-compose
sudo systemctl enable --now docker
# Add current user to docker group (logout required)
sudo usermod -aG docker $USER
```
## Environment Configuration
1. Clone the repository:
```bash
git clone https://github.com/intari/roocodetests_1.git
cd roocodetests_1
```
2. Configure environment variables:
```bash
# Copy example .env file
cp .env.example .env
# Edit configuration (nano/vim)
nano .env
```
Key variables to configure:
- `BASE_URL`: Public URL of your application
- `ELASTICSEARCH_PASSWORD`: Secure password for Elasticsearch
- `CPU_LIMIT`: CPU cores to allocate (default: 2)
## Application Deployment
1. Start all services:
```bash
docker-compose up -d
```
2. Verify services are running:
```bash
docker-compose ps
```
3. Check application logs:
```bash
docker-compose logs -f api
```
4. Access the application:
- Web interface: http://your-server-ip:8000
- Elasticsearch: http://your-server-ip:9200
## Maintenance
## restart & rebuild
```bash
docker-compose down && docker-compose up -d --build
```
Logs
```bash
docker logs booksearch_app -f
```
### Log Rotation
Configure Docker log rotation in `/etc/docker/daemon.json`:
```json
{
"log-driver": "json-file",
"log-opts": {
"max-size": "10m",
"max-file": "3"
}
}
```
Then restart Docker:
```bash
sudo systemctl restart docker
```
### Backups
1. Create backup script (`/usr/local/bin/backup-app.sh`):
```bash
#!/bin/bash
BACKUP_DIR=/var/backups/app
mkdir -p $BACKUP_DIR
docker-compose exec -T elasticsearch curl -X POST "localhost:9200/_snapshot/backup_repo/_all" -H "Content-Type: application/json"
docker-compose exec -T elasticsearch curl -X GET "localhost:9200/_snapshot/backup_repo/snapshot_$(date +%Y-%m-%d)?pretty"
```
2. Make executable and schedule daily cron job:
```bash
sudo chmod +x /usr/local/bin/backup-app.sh
sudo crontab -e
# Add: 0 3 * * * /usr/local/bin/backup-app.sh
```
### Updates
1. Pull latest changes:
```bash
git pull origin main
```
2. Rebuild containers:
```bash
docker-compose up -d --build
```
## Troubleshooting
### Common Issues
**Application not starting:**
```bash
# Check container status
docker ps -a
# View logs
docker-compose logs api
```
**Elasticsearch health issues:**
```bash
# Check cluster health
curl -X GET "localhost:9200/_cluster/health?pretty"
# Check node stats
curl -X GET "localhost:9200/_nodes/stats?pretty"
```
**Port conflicts:**
```bash
# Check used ports
sudo netstat -tulnp
# Change ports in docker-compose.yml if needed
```
### Debugging
1. Access running container shell:
```bash
docker-compose exec api bash
```
2. Check resource usage:
```bash
docker stats
```
## Check Request via JSON :
curl -H "Accept: application/json" -X GET https://booksearch.yourdomain.com/search?query=android
# Simple search
curl -H "Accept: application/json" "https://booksearch.yourdomain.com/search?query=android"
# Search with format parameter
curl "https://booksearch.yourdomain.com/search?query=android&format=json"
# Error case
curl -H "Accept: application/json" "https://booksearch.yourdomain.com/search"
## API Endpoints
### Search API
```
GET /search?query={query}[&format=json]
```
### Reset Elasticsearch Index
```
POST /reset_index
Headers:
- Authorization: Basic base64(username:password)
```
Example:
```bash
curl -X POST -u admin:securepassword123 https://booksearch.yourdomain.com/reset_index
```
## References
- [Ubuntu Docker Installation](https://docs.docker.com/engine/install/ubuntu/)
- [Docker Compose Reference](https://docs.docker.com/compose/reference/)
- [Elasticsearch Documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/index.html)
## Plugin-alt
method:get
https://booksearch.yourdomain.com/search?query={prompt}&format=json
alt version for plugin
request headers
{
"Accept": "application/json"
# What it IS?
## TypeMind Plugin: EPUB/PDF/TXT Search Integration
A plugin for [TypeMind](https://docs.typingmind.com/plugins/build-a-typingmind-plugin) that mimics the **WebSearch** feature but focuses on retrieving books/documents. Users can query, e.g., *"Find me books about Hecate"*, and the plugin returns **clickable links** to relevant files (EPUB, PDF, TXT).
### Features
- **File Formats**: Supports EPUB, PDF, and TXT (assumed compatibility).
- **Requirement**: Users must provide their own files for indexing.
### Technical Context
- **Language**: Python.
- **Skill Level**:
- My Python knowledge is **extremely rusty** (last project: a not too simple game bot years ago).
- Self-assessment: **Python novice**.
- **Tools Used**:
- **Sonnet 3.7** and **DeepSeek-V3-0324** (for AI/ML integration).
- **RooCode**
### Purpose
1. **Experiment**: Test RooCodes capabilities and identify practical applications.
2. **Non-Production**: **⚠️ Do NOT deploy this in production** (even if "fixed" by someone).
---
### Key Notes
- Humor/self-deprecation preserved (e.g., "extremely rusty," "novice").
- Technical terms standardized (Sonnet 3.7, DeepSeek-V3-0324).
- Critical warnings emphasized (**bold + emoji** for production risk).
# Application Deployment Guide (Ubuntu LTS)
## Prerequisites
### System Requirements
- Ubuntu 22.04 LTS (64-bit)
- Minimum 2 CPU cores, 4GB RAM
- 20GB free disk space
- Open ports: 8000 (app), 9200 (Elasticsearch)
### Required Software
```bash
# Update package lists
sudo apt update
# Install Docker and Docker Compose
sudo apt install -y docker.io docker-compose
sudo systemctl enable --now docker
# Add current user to docker group (logout required)
sudo usermod -aG docker $USER
```
## Environment Configuration
1. Clone the repository:
```bash
git clone https://github.com/intari/roocodetests_1.git
cd roocodetests_1
```
2. Configure environment variables:
```bash
# Copy example .env file
cp .env.example .env
# Edit configuration (nano/vim)
nano .env
```
Key variables to configure:
- `BASE_URL`: Public URL of your application
- `ELASTICSEARCH_PASSWORD`: Secure password for Elasticsearch
- `CPU_LIMIT`: CPU cores to allocate (default: 2)
## Application Deployment
1. Start all services:
```bash
docker-compose up -d
```
2. Verify services are running:
```bash
docker-compose ps
```
3. Check application logs:
```bash
docker-compose logs -f api
```
4. Access the application:
- Web interface: http://your-server-ip:8000
- Elasticsearch: http://your-server-ip:9200
## Maintenance
## restart & rebuild
```bash
docker-compose down && docker-compose up -d --build
```
Logs
```bash
docker logs booksearch_app -f
```
### Log Rotation
Configure Docker log rotation in `/etc/docker/daemon.json`:
```json
{
"log-driver": "json-file",
"log-opts": {
"max-size": "10m",
"max-file": "3"
}
}
```
Then restart Docker:
```bash
sudo systemctl restart docker
```
### Backups
1. Create backup script (`/usr/local/bin/backup-app.sh`):
```bash
#!/bin/bash
BACKUP_DIR=/var/backups/app
mkdir -p $BACKUP_DIR
docker-compose exec -T elasticsearch curl -X POST "localhost:9200/_snapshot/backup_repo/_all" -H "Content-Type: application/json"
docker-compose exec -T elasticsearch curl -X GET "localhost:9200/_snapshot/backup_repo/snapshot_$(date +%Y-%m-%d)?pretty"
```
2. Make executable and schedule daily cron job:
```bash
sudo chmod +x /usr/local/bin/backup-app.sh
sudo crontab -e
# Add: 0 3 * * * /usr/local/bin/backup-app.sh
```
### Updates
1. Pull latest changes:
```bash
git pull origin main
```
2. Rebuild containers:
```bash
docker-compose up -d --build
```
## Troubleshooting
### Common Issues
**Application not starting:**
```bash
# Check container status
docker ps -a
# View logs
docker-compose logs api
```
**Elasticsearch health issues:**
```bash
# Check cluster health
curl -X GET "localhost:9200/_cluster/health?pretty"
# Check node stats
curl -X GET "localhost:9200/_nodes/stats?pretty"
```
**Port conflicts:**
```bash
# Check used ports
sudo netstat -tulnp
# Change ports in docker-compose.yml if needed
```
### Debugging
1. Access running container shell:
```bash
docker-compose exec api bash
```
2. Check resource usage:
```bash
docker stats
```
## Check Request via JSON :
curl -H "Accept: application/json" -X GET https://booksearch.yourdomain.com/search?query=android
# Simple search
curl -H "Accept: application/json" "https://booksearch.yourdomain.com/search?query=android"
# Search with format parameter
curl "https://booksearch.yourdomain.com/search?query=android&format=json"
# Error case
curl -H "Accept: application/json" "https://booksearch.yourdomain.com/search"
## API Endpoints
### Search API
```
GET /search?query={query}[&format=json]
```
### Reset Elasticsearch Index
```
POST /reset_index
Headers:
- Authorization: Basic base64(username:password)
```
Example:
```bash
curl -X POST -u admin:securepassword123 https://booksearch.yourdomain.com/reset_index
```
## References
- [Ubuntu Docker Installation](https://docs.docker.com/engine/install/ubuntu/)
- [Docker Compose Reference](https://docs.docker.com/compose/reference/)
- [Elasticsearch Documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/index.html)
## Plugin-alt
method:get
https://booksearch.yourdomain.com/search?query={prompt}&format=json
alt version for plugin
request headers
{
"Accept": "application/json"
}

View file

@ -1,8 +1,8 @@
{
"folders": [
{
"path": "."
}
],
"settings": {}
{
"folders": [
{
"path": "."
}
],
"settings": {}
}

View file

@ -1,37 +1,37 @@
@echo off
echo Setting up test environment...
echo Checking Python version...
python --version
if errorlevel 1 (
echo Python not found. Please install Python 3.10+ first.
pause
exit /b 1
)
echo Installing Python dependencies...
python -m pip install --upgrade pip --user
if errorlevel 1 (
echo Failed to upgrade pip
pause
exit /b 1
)
pip install -r requirements.txt --user
if errorlevel 1 (
echo Failed to install dependencies
pause
exit /b 1
)
echo Running EPUB viewer tests...
cd api
python -m pytest test_epub_viewer.py -v
if errorlevel 1 (
echo Some tests failed
pause
exit /b 1
)
echo All tests completed successfully!
@echo off
echo Setting up test environment...
echo Checking Python version...
python --version
if errorlevel 1 (
echo Python not found. Please install Python 3.10+ first.
pause
exit /b 1
)
echo Installing Python dependencies...
python -m pip install --upgrade pip --user
if errorlevel 1 (
echo Failed to upgrade pip
pause
exit /b 1
)
pip install -r requirements.txt --user
if errorlevel 1 (
echo Failed to install dependencies
pause
exit /b 1
)
echo Running EPUB viewer tests...
cd api
python -m pytest test_epub_viewer.py -v
if errorlevel 1 (
echo Some tests failed
pause
exit /b 1
)
echo All tests completed successfully!
pause

View file

@ -1,29 +1,29 @@
#!/bin/bash
echo "Setting up test environment..."
echo "Checking Python version..."
python3 --version || {
echo "Python 3 not found. Please install Python 3.10+ first."
exit 1
}
echo "Installing Python dependencies..."
python3 -m pip install --upgrade pip --user || {
echo "Failed to upgrade pip"
exit 1
}
pip3 install -r requirements.txt --user || {
echo "Failed to install dependencies"
exit 1
}
echo "Running EPUB viewer tests..."
cd api
python3 -m pytest test_epub_viewer.py -v || {
echo "Some tests failed"
exit 1
}
#!/bin/bash
echo "Setting up test environment..."
echo "Checking Python version..."
python3 --version || {
echo "Python 3 not found. Please install Python 3.10+ first."
exit 1
}
echo "Installing Python dependencies..."
python3 -m pip install --upgrade pip --user || {
echo "Failed to upgrade pip"
exit 1
}
pip3 install -r requirements.txt --user || {
echo "Failed to install dependencies"
exit 1
}
echo "Running EPUB viewer tests..."
cd api
python3 -m pytest test_epub_viewer.py -v || {
echo "Some tests failed"
exit 1
}
echo "All tests completed successfully!"

View file

@ -1,406 +1,406 @@
from flask import Flask, request, jsonify, render_template, send_from_directory
from urllib.parse import unquote
from elasticsearch import Elasticsearch
import os
import ebooklib
from ebooklib import epub
from bs4 import BeautifulSoup
import PyPDF2
import time
import logging
import multiprocessing
import sys
from pathlib import Path
sys.path.append(str(Path(__file__).parent.parent))
from index import index_files, get_progress
from io import StringIO
import sys
app = Flask(__name__, static_folder='static')
@app.after_request
def add_cors_headers(response):
response.headers['Access-Control-Allow-Origin'] = '*'
response.headers['Access-Control-Allow-Methods'] = 'GET, POST, PUT'
response.headers['Access-Control-Allow-Headers'] = 'Content-Type'
return response
# Elasticsearch Configuration
ELASTICSEARCH_HOST = os.environ.get("ELASTICSEARCH_HOST", "localhost")
ELASTICSEARCH_PORT = int(os.environ.get("ELASTICSEARCH_PORT", 9200))
INDEX_NAME = "book_index"
# Wait for Elasticsearch to be available
es = None
while True:
try:
es = Elasticsearch([{'host': ELASTICSEARCH_HOST, 'port': ELASTICSEARCH_PORT, 'scheme': 'http'}])
if es.ping():
print("Connected to Elasticsearch")
break
else:
print("Elasticsearch not available, retrying...")
except Exception as e:
print(f"Error connecting to Elasticsearch: {e}")
time.sleep(5)
def extract_text_from_epub(epub_path):
try:
book = epub.read_epub(epub_path)
text = ''
for item in book.get_items():
if item.media_type == 'application/xhtml+xml':
content = item.get_content()
if content:
soup = BeautifulSoup(content, 'html.parser')
text += soup.get_text()
return text
except Exception as e:
logging.error(f"Error processing EPUB {epub_path}: {str(e)}")
return f"Error extracting text: {str(e)}"
def extract_text_from_pdf(pdf_path):
text = ''
with open(pdf_path, 'rb') as pdf_file:
pdf_reader = PyPDF2.PdfReader(pdf_file)
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
text += page.extract_text()
return text
@app.route('/', methods=['GET'])
def home():
return render_template('search.html')
@app.route('/search', methods=['GET'])
def search():
query = request.args.get('query')
if not query:
if request.headers.get('Accept') == 'application/json':
return jsonify({"error": "Query parameter is required"}), 400
return render_template('search.html', query='')
try:
results = es.search(index=INDEX_NAME, query={'match': {'content': query}})
hits = results['hits']['hits']
search_results = []
for hit in hits:
file_path = hit['_source']['file_path']
content = hit['_source']['content']
# Highlight snippet (simple version)
snippet_char_limit = int(os.environ.get("SNIPPET_CHAR_LIMIT", 100))
index = content.lower().find(query.lower())
if index != -1:
start = max(0, index - snippet_char_limit)
end = min(len(content), index + snippet_char_limit + len(query))
snippet = content[start:end]
else:
snippet = "No snippet found"
# Get base URL from environment
base_url = os.environ.get("BASE_URL", "http://localhost:8000")
# Construct URLs
# Remove "/books/" from path start if it's here
if file_path.startswith("/books/"):
file_path = file_path[len("/books/"):]
url = f"{base_url}/{file_path}"
raw_url = f"{base_url}/file/{file_path}?format=html"
search_results.append({
"file_path": file_path,
"url": url,
"raw_url": raw_url,
"snippet": snippet,
"score": hit['_score']
})
# If it's an API request or format=json is specified
if request.headers.get('Accept') == 'application/json' or request.args.get('format') == 'json':
response = jsonify({
"query": query,
"results": search_results,
"total": len(search_results),
"took": results['took']
})
response.headers['Content-Type'] = 'application/json'
return response
# Otherwise, render the HTML template
return render_template('search.html', results=search_results, query=query)
except Exception as e:
if request.headers.get('Accept') == 'application/json' or request.args.get('format') == 'json':
response = jsonify({
"error": str(e),
"query": query
})
response.headers['Content-Type'] = 'application/json'
return response, 500
return render_template('search.html', error=str(e), query=query)
@app.route('/files', methods=['GET'])
def list_files():
books_dir = "/books"
files = []
try:
# Check if indexing is in progress
indexing_in_progress = get_progress() is not None
for filename in os.listdir(books_dir):
file_path = os.path.join(books_dir, filename)
if os.path.isfile(file_path):
file_size = os.path.getsize(file_path)
# Extract book title from filename if possible
title = filename
if ' - ' in filename: # Common pattern in filenames
title_parts = filename.split(' - ')
if len(title_parts) > 1:
title = ' - '.join(title_parts[:-1]) # Take all but last part
files.append({
'name': filename,
'title': title,
'path': filename,
'size': file_size,
'size_mb': round(file_size / (1024 * 1024), 2)
})
# Calculate totals
total_files = len(files)
total_size = sum(f['size'] for f in files)
total_size_mb = round(total_size / (1024 * 1024), 2)
# If it's an API request, return JSON
if request.headers.get('Accept') == 'application/json':
return jsonify({
'files': files,
'total_files': total_files,
'total_size': total_size,
'total_size_mb': total_size_mb,
'indexing_in_progress': indexing_in_progress
})
# Otherwise, render the HTML template
return render_template('files.html',
files=files,
total_files=total_files,
total_size=total_size,
total_size_mb=total_size_mb,
indexing_in_progress=indexing_in_progress)
except Exception as e:
if request.headers.get('Accept') == 'application/json':
return jsonify({"error": str(e)}), 500
return render_template('files.html', error=str(e))
@app.route('/file/<path:file_path>', methods=['GET'])
def get_file(file_path):
# Ensure the file path is within the /books directory
books_dir = "/books"
# Decode URL-encoded path and normalize
decoded_path = unquote(file_path)
# Remove any leading slashes or duplicate 'books/' segments
decoded_path = decoded_path.lstrip('/')
if decoded_path.startswith('books/'):
decoded_path = decoded_path[6:]
# Join paths safely
full_path = os.path.normpath(os.path.join(books_dir, decoded_path))
# Validate the path is within the books directory
if not os.path.abspath(full_path).startswith(os.path.abspath(books_dir)):
return jsonify({"error": "Access denied: File path outside of books directory"}), 403
try:
# Handle EPUB files
if file_path.lower().endswith('.epub'):
if request.args.get('format') == 'html':
# Convert EPUB to HTML
try:
book = epub.read_epub(full_path)
html_content = []
for item in book.get_items():
if item.get_type() == ebooklib.ITEM_DOCUMENT:
content = item.get_content()
if content:
soup = BeautifulSoup(content, 'html.parser')
# Preserve basic formatting tags
for tag in soup.find_all():
if tag.name not in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'br', 'div', 'span', 'strong', 'em', 'b', 'i', 'ul', 'ol', 'li']:
tag.unwrap()
html_content.append(str(soup))
except Exception as e:
logging.error(f"Error processing EPUB {full_path}: {str(e)}")
return jsonify({"error": f"Failed to process EPUB: {str(e)}"}), 500
return render_template('text_file.html',
file_path=file_path,
content='<hr>'.join(html_content),
is_html=True)
else:
# Render the viewer template
return render_template('epub_viewer.html', file_path=file_path)
# Handle regular text files
with open(full_path, 'r', encoding='utf-8', errors='ignore') as f:
content = f.read()
# If it's an API request or the Accept header doesn't include HTML, return plain text
if request.headers.get('Accept') == 'application/json' or 'text/html' not in request.headers.get('Accept', ''):
return content, 200, {'Content-Type': 'text/plain; charset=utf-8'}
# Otherwise, render a simple HTML page with the content
return render_template('text_file.html', file_path=file_path, content=content)
except Exception as e:
return jsonify({"error": str(e)}), 404
@app.route('/epub/<path:file_path>', methods=['GET'])
def get_epub_file(file_path):
"""Serve the raw EPUB file with proper headers"""
books_dir = "/books"
full_path = os.path.join(books_dir, file_path)
# Validate the path is within the books directory
if not os.path.abspath(full_path).startswith(os.path.abspath(books_dir)):
return jsonify({"error": "Access denied: File path outside of books directory"}), 403
try:
# Serve the raw EPUB file with proper headers
response = send_from_directory(
books_dir,
file_path,
as_attachment=True,
mimetype='application/epub+zip'
)
response.headers['Access-Control-Allow-Origin'] = '*'
response.headers['Access-Control-Allow-Methods'] = 'GET'
response.headers['Content-Disposition'] = f'attachment; filename="{os.path.basename(file_path)}"'
return response
except Exception as e:
return jsonify({"error": str(e)}), 404
@app.route('/index_books', methods=['GET'])
def index_books():
logging.info("Indexing books endpoint called")
# Get CPU configuration
cpu_limit = os.environ.get("CPU_LIMIT")
available_cpus = multiprocessing.cpu_count()
used_cpus = float(cpu_limit) if cpu_limit else max(1, available_cpus - 1)
# Capture stdout to a string
old_stdout = sys.stdout
sys.stdout = captured_output = StringIO()
try:
# Start indexing in a separate thread
from threading import Thread
index_thread = Thread(target=index_files, args=("/books",))
index_thread.start()
# If it's an API request, return immediately
if request.headers.get('Accept') == 'application/json':
return jsonify({"message": "Indexing started in background"})
# Otherwise, render the progress page with CPU info
return render_template('indexing.html',
available_cpus=available_cpus,
used_cpus=used_cpus)
except Exception as e:
logging.error(f"Indexing failed: {e}")
sys.stdout = old_stdout
if request.headers.get('Accept') == 'application/json':
return jsonify({"error": str(e)}), 500
# Create a simple HTML response for errors
return render_template('indexing_error.html', error=str(e))
finally:
sys.stdout = old_stdout
@app.route('/indexing_progress', methods=['GET'])
def get_indexing_progress():
progress = get_progress()
if progress is None:
return jsonify({"status": "not_running"})
# Format time for display
from datetime import datetime
import pytz
# Get browser timezone from Accept-Language header or use UTC as fallback
browser_tz = request.headers.get('X-Timezone', 'UTC')
try:
tz = pytz.timezone(browser_tz)
except pytz.UnknownTimeZoneError:
tz = pytz.UTC
elapsed_min = int(progress['elapsed_time'] // 60)
elapsed_sec = int(progress['elapsed_time'] % 60)
if progress['estimated_remaining'] > 0:
remaining_min = int(progress['estimated_remaining'] // 60)
remaining_sec = int(progress['estimated_remaining'] % 60)
completion_time = datetime.fromtimestamp(progress['estimated_completion'], tz).strftime('%H:%M:%S (%Z)')
else:
remaining_min = 0
remaining_sec = 0
completion_time = "N/A"
return jsonify({
"status": "running",
"total_files": progress['total_files'],
"processed_files": progress['processed_files'],
"percentage": round(progress['percentage'], 1),
"current_file": progress['current_file'],
"elapsed_time": f"{elapsed_min}m {elapsed_sec}s",
"estimated_remaining": f"{remaining_min}m {remaining_sec}s",
"estimated_completion": completion_time,
"errors": progress['errors']
})
@app.route('/abort_indexing', methods=['POST'])
def abort_indexing():
# In a real implementation, we would set a flag to stop the indexing
# For now, we'll just return a message
return jsonify({"status": "abort_requested", "message": "Indexing will stop after current file"})
@app.route('/reset_index', methods=['POST'])
def reset_index():
"""Reset the Elasticsearch index by deleting and recreating it"""
try:
# Check for basic auth
auth = request.authorization
if not auth or auth.username != os.environ.get("ADMIN_USER") or auth.password != os.environ.get("ADMIN_PASSWORD"):
return jsonify({"error": "Authentication required"}), 401
# Delete existing index if it exists
if es.indices.exists(index=INDEX_NAME):
es.indices.delete(index=INDEX_NAME)
# Create new index with mapping
es.indices.create(index=INDEX_NAME, body={
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0
},
"mappings": {
"properties": {
"file_path": {"type": "keyword"},
"content": {"type": "text"}
}
}
})
return jsonify({"status": "success", "message": "Index reset successfully"})
except Exception as e:
return jsonify({"error": str(e)}), 500
if __name__ == '__main__':
logging.basicConfig(level=logging.DEBUG)
logging.info("Starting the API - inside main block")
from flask import Flask, request, jsonify, render_template, send_from_directory
from urllib.parse import unquote
from elasticsearch import Elasticsearch
import os
import ebooklib
from ebooklib import epub
from bs4 import BeautifulSoup
import PyPDF2
import time
import logging
import multiprocessing
import sys
from pathlib import Path
sys.path.append(str(Path(__file__).parent.parent))
from index import index_files, get_progress
from io import StringIO
import sys
app = Flask(__name__, static_folder='static')
@app.after_request
def add_cors_headers(response):
response.headers['Access-Control-Allow-Origin'] = '*'
response.headers['Access-Control-Allow-Methods'] = 'GET, POST, PUT'
response.headers['Access-Control-Allow-Headers'] = 'Content-Type'
return response
# Elasticsearch Configuration
ELASTICSEARCH_HOST = os.environ.get("ELASTICSEARCH_HOST", "localhost")
ELASTICSEARCH_PORT = int(os.environ.get("ELASTICSEARCH_PORT", 9200))
INDEX_NAME = "book_index"
# Wait for Elasticsearch to be available
es = None
while True:
try:
es = Elasticsearch([{'host': ELASTICSEARCH_HOST, 'port': ELASTICSEARCH_PORT, 'scheme': 'http'}])
if es.ping():
print("Connected to Elasticsearch")
break
else:
print("Elasticsearch not available, retrying...")
except Exception as e:
print(f"Error connecting to Elasticsearch: {e}")
time.sleep(5)
def extract_text_from_epub(epub_path):
try:
book = epub.read_epub(epub_path)
text = ''
for item in book.get_items():
if item.media_type == 'application/xhtml+xml':
content = item.get_content()
if content:
soup = BeautifulSoup(content, 'html.parser')
text += soup.get_text()
return text
except Exception as e:
logging.error(f"Error processing EPUB {epub_path}: {str(e)}")
return f"Error extracting text: {str(e)}"
def extract_text_from_pdf(pdf_path):
text = ''
with open(pdf_path, 'rb') as pdf_file:
pdf_reader = PyPDF2.PdfReader(pdf_file)
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
text += page.extract_text()
return text
@app.route('/', methods=['GET'])
def home():
return render_template('search.html')
@app.route('/search', methods=['GET'])
def search():
query = request.args.get('query')
if not query:
if request.headers.get('Accept') == 'application/json':
return jsonify({"error": "Query parameter is required"}), 400
return render_template('search.html', query='')
try:
results = es.search(index=INDEX_NAME, query={'match': {'content': query}})
hits = results['hits']['hits']
search_results = []
for hit in hits:
file_path = hit['_source']['file_path']
content = hit['_source']['content']
# Highlight snippet (simple version)
snippet_char_limit = int(os.environ.get("SNIPPET_CHAR_LIMIT", 100))
index = content.lower().find(query.lower())
if index != -1:
start = max(0, index - snippet_char_limit)
end = min(len(content), index + snippet_char_limit + len(query))
snippet = content[start:end]
else:
snippet = "No snippet found"
# Get base URL from environment
base_url = os.environ.get("BASE_URL", "http://localhost:8000")
# Construct URLs
# Remove "/books/" from path start if it's here
if file_path.startswith("/books/"):
file_path = file_path[len("/books/"):]
url = f"{base_url}/{file_path}"
raw_url = f"{base_url}/file/{file_path}?format=html"
search_results.append({
"file_path": file_path,
"url": url,
"raw_url": raw_url,
"snippet": snippet,
"score": hit['_score']
})
# If it's an API request or format=json is specified
if request.headers.get('Accept') == 'application/json' or request.args.get('format') == 'json':
response = jsonify({
"query": query,
"results": search_results,
"total": len(search_results),
"took": results['took']
})
response.headers['Content-Type'] = 'application/json'
return response
# Otherwise, render the HTML template
return render_template('search.html', results=search_results, query=query)
except Exception as e:
if request.headers.get('Accept') == 'application/json' or request.args.get('format') == 'json':
response = jsonify({
"error": str(e),
"query": query
})
response.headers['Content-Type'] = 'application/json'
return response, 500
return render_template('search.html', error=str(e), query=query)
@app.route('/files', methods=['GET'])
def list_files():
books_dir = "/books"
files = []
try:
# Check if indexing is in progress
indexing_in_progress = get_progress() is not None
for filename in os.listdir(books_dir):
file_path = os.path.join(books_dir, filename)
if os.path.isfile(file_path):
file_size = os.path.getsize(file_path)
# Extract book title from filename if possible
title = filename
if ' - ' in filename: # Common pattern in filenames
title_parts = filename.split(' - ')
if len(title_parts) > 1:
title = ' - '.join(title_parts[:-1]) # Take all but last part
files.append({
'name': filename,
'title': title,
'path': filename,
'size': file_size,
'size_mb': round(file_size / (1024 * 1024), 2)
})
# Calculate totals
total_files = len(files)
total_size = sum(f['size'] for f in files)
total_size_mb = round(total_size / (1024 * 1024), 2)
# If it's an API request, return JSON
if request.headers.get('Accept') == 'application/json':
return jsonify({
'files': files,
'total_files': total_files,
'total_size': total_size,
'total_size_mb': total_size_mb,
'indexing_in_progress': indexing_in_progress
})
# Otherwise, render the HTML template
return render_template('files.html',
files=files,
total_files=total_files,
total_size=total_size,
total_size_mb=total_size_mb,
indexing_in_progress=indexing_in_progress)
except Exception as e:
if request.headers.get('Accept') == 'application/json':
return jsonify({"error": str(e)}), 500
return render_template('files.html', error=str(e))
@app.route('/file/<path:file_path>', methods=['GET'])
def get_file(file_path):
# Ensure the file path is within the /books directory
books_dir = "/books"
# Decode URL-encoded path and normalize
decoded_path = unquote(file_path)
# Remove any leading slashes or duplicate 'books/' segments
decoded_path = decoded_path.lstrip('/')
if decoded_path.startswith('books/'):
decoded_path = decoded_path[6:]
# Join paths safely
full_path = os.path.normpath(os.path.join(books_dir, decoded_path))
# Validate the path is within the books directory
if not os.path.abspath(full_path).startswith(os.path.abspath(books_dir)):
return jsonify({"error": "Access denied: File path outside of books directory"}), 403
try:
# Handle EPUB files
if file_path.lower().endswith('.epub'):
if request.args.get('format') == 'html':
# Convert EPUB to HTML
try:
book = epub.read_epub(full_path)
html_content = []
for item in book.get_items():
if item.get_type() == ebooklib.ITEM_DOCUMENT:
content = item.get_content()
if content:
soup = BeautifulSoup(content, 'html.parser')
# Preserve basic formatting tags
for tag in soup.find_all():
if tag.name not in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'br', 'div', 'span', 'strong', 'em', 'b', 'i', 'ul', 'ol', 'li']:
tag.unwrap()
html_content.append(str(soup))
except Exception as e:
logging.error(f"Error processing EPUB {full_path}: {str(e)}")
return jsonify({"error": f"Failed to process EPUB: {str(e)}"}), 500
return render_template('text_file.html',
file_path=file_path,
content='<hr>'.join(html_content),
is_html=True)
else:
# Render the viewer template
return render_template('epub_viewer.html', file_path=file_path)
# Handle regular text files
with open(full_path, 'r', encoding='utf-8', errors='ignore') as f:
content = f.read()
# If it's an API request or the Accept header doesn't include HTML, return plain text
if request.headers.get('Accept') == 'application/json' or 'text/html' not in request.headers.get('Accept', ''):
return content, 200, {'Content-Type': 'text/plain; charset=utf-8'}
# Otherwise, render a simple HTML page with the content
return render_template('text_file.html', file_path=file_path, content=content)
except Exception as e:
return jsonify({"error": str(e)}), 404
@app.route('/epub/<path:file_path>', methods=['GET'])
def get_epub_file(file_path):
"""Serve the raw EPUB file with proper headers"""
books_dir = "/books"
full_path = os.path.join(books_dir, file_path)
# Validate the path is within the books directory
if not os.path.abspath(full_path).startswith(os.path.abspath(books_dir)):
return jsonify({"error": "Access denied: File path outside of books directory"}), 403
try:
# Serve the raw EPUB file with proper headers
response = send_from_directory(
books_dir,
file_path,
as_attachment=True,
mimetype='application/epub+zip'
)
response.headers['Access-Control-Allow-Origin'] = '*'
response.headers['Access-Control-Allow-Methods'] = 'GET'
response.headers['Content-Disposition'] = f'attachment; filename="{os.path.basename(file_path)}"'
return response
except Exception as e:
return jsonify({"error": str(e)}), 404
@app.route('/index_books', methods=['GET'])
def index_books():
logging.info("Indexing books endpoint called")
# Get CPU configuration
cpu_limit = os.environ.get("CPU_LIMIT")
available_cpus = multiprocessing.cpu_count()
used_cpus = float(cpu_limit) if cpu_limit else max(1, available_cpus - 1)
# Capture stdout to a string
old_stdout = sys.stdout
sys.stdout = captured_output = StringIO()
try:
# Start indexing in a separate thread
from threading import Thread
index_thread = Thread(target=index_files, args=("/books",))
index_thread.start()
# If it's an API request, return immediately
if request.headers.get('Accept') == 'application/json':
return jsonify({"message": "Indexing started in background"})
# Otherwise, render the progress page with CPU info
return render_template('indexing.html',
available_cpus=available_cpus,
used_cpus=used_cpus)
except Exception as e:
logging.error(f"Indexing failed: {e}")
sys.stdout = old_stdout
if request.headers.get('Accept') == 'application/json':
return jsonify({"error": str(e)}), 500
# Create a simple HTML response for errors
return render_template('indexing_error.html', error=str(e))
finally:
sys.stdout = old_stdout
@app.route('/indexing_progress', methods=['GET'])
def get_indexing_progress():
progress = get_progress()
if progress is None:
return jsonify({"status": "not_running"})
# Format time for display
from datetime import datetime
import pytz
# Get browser timezone from Accept-Language header or use UTC as fallback
browser_tz = request.headers.get('X-Timezone', 'UTC')
try:
tz = pytz.timezone(browser_tz)
except pytz.UnknownTimeZoneError:
tz = pytz.UTC
elapsed_min = int(progress['elapsed_time'] // 60)
elapsed_sec = int(progress['elapsed_time'] % 60)
if progress['estimated_remaining'] > 0:
remaining_min = int(progress['estimated_remaining'] // 60)
remaining_sec = int(progress['estimated_remaining'] % 60)
completion_time = datetime.fromtimestamp(progress['estimated_completion'], tz).strftime('%H:%M:%S (%Z)')
else:
remaining_min = 0
remaining_sec = 0
completion_time = "N/A"
return jsonify({
"status": "running",
"total_files": progress['total_files'],
"processed_files": progress['processed_files'],
"percentage": round(progress['percentage'], 1),
"current_file": progress['current_file'],
"elapsed_time": f"{elapsed_min}m {elapsed_sec}s",
"estimated_remaining": f"{remaining_min}m {remaining_sec}s",
"estimated_completion": completion_time,
"errors": progress['errors']
})
@app.route('/abort_indexing', methods=['POST'])
def abort_indexing():
# In a real implementation, we would set a flag to stop the indexing
# For now, we'll just return a message
return jsonify({"status": "abort_requested", "message": "Indexing will stop after current file"})
@app.route('/reset_index', methods=['POST'])
def reset_index():
"""Reset the Elasticsearch index by deleting and recreating it"""
try:
# Check for basic auth
auth = request.authorization
if not auth or auth.username != os.environ.get("ADMIN_USER") or auth.password != os.environ.get("ADMIN_PASSWORD"):
return jsonify({"error": "Authentication required"}), 401
# Delete existing index if it exists
if es.indices.exists(index=INDEX_NAME):
es.indices.delete(index=INDEX_NAME)
# Create new index with mapping
es.indices.create(index=INDEX_NAME, body={
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0
},
"mappings": {
"properties": {
"file_path": {"type": "keyword"},
"content": {"type": "text"}
}
}
})
return jsonify({"status": "success", "message": "Index reset successfully"})
except Exception as e:
return jsonify({"error": str(e)}), 500
if __name__ == '__main__':
logging.basicConfig(level=logging.DEBUG)
logging.info("Starting the API - inside main block")
app.run(debug=True, host='0.0.0.0')

View file

@ -1,66 +1,66 @@
#viewerContainer {
position: absolute;
top: 50px;
left: 0;
right: 0;
bottom: 0;
overflow: auto;
}
#viewer {
width: 100%;
height: 90vh;
margin: 0 auto;
}
.controls {
text-align: center;
padding: 10px;
}
#prev, #next {
padding: 10px 20px;
margin: 10px;
background: #007bff;
color: white;
border: none;
border-radius: 4px;
cursor: pointer;
}
.error {
color: red;
padding: 20px;
text-align: center;
}
header {
text-align: center;
padding: 20px 0;
}
.nav {
background-color: #f8f9fa;
padding: 10px;
}
.nav ul {
list-style: none;
display: flex;
justify-content: center;
gap: 20px;
padding: 0;
margin: 0;
}
.nav a {
text-decoration: none;
color: #007bff;
}
footer {
text-align: center;
padding: 20px;
margin-top: 20px;
background-color: #f8f9fa;
#viewerContainer {
position: absolute;
top: 50px;
left: 0;
right: 0;
bottom: 0;
overflow: auto;
}
#viewer {
width: 100%;
height: 90vh;
margin: 0 auto;
}
.controls {
text-align: center;
padding: 10px;
}
#prev, #next {
padding: 10px 20px;
margin: 10px;
background: #007bff;
color: white;
border: none;
border-radius: 4px;
cursor: pointer;
}
.error {
color: red;
padding: 20px;
text-align: center;
}
header {
text-align: center;
padding: 20px 0;
}
.nav {
background-color: #f8f9fa;
padding: 10px;
}
.nav ul {
list-style: none;
display: flex;
justify-content: center;
gap: 20px;
padding: 0;
margin: 0;
}
.nav a {
text-decoration: none;
color: #007bff;
}
footer {
text-align: center;
padding: 20px;
margin-top: 20px;
background-color: #f8f9fa;
}

View file

@ -1,236 +1,236 @@
body {
font-family: 'Arial', sans-serif;
line-height: 1.6;
margin: 0;
padding: 0;
background-color: #f4f4f4;
color: #333;
}
.container {
width: 80%;
margin: auto;
overflow: hidden;
padding: 20px;
}
header {
background: #35424a;
color: white;
padding: 20px;
text-align: center;
border-bottom: 4px solid #1abc9c;
}
header h1 {
margin: 0;
}
.search-container {
margin: 30px 0;
text-align: center;
}
.search-box {
width: 70%;
padding: 12px;
border: 1px solid #ddd;
border-radius: 4px;
font-size: 16px;
}
.search-button {
padding: 12px 24px;
background: #1abc9c;
color: white;
border: none;
border-radius: 4px;
cursor: pointer;
font-size: 16px;
}
.search-button:hover {
background: #16a085;
}
.results {
margin-top: 30px;
}
.result-item {
background: white;
padding: 15px;
margin-bottom: 15px;
border-radius: 5px;
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
}
.result-item h3 {
margin-top: 0;
color: #1abc9c;
}
.result-item p {
margin-bottom: 10px;
}
.result-item a {
color: #3498db;
text-decoration: none;
}
.result-item a:hover {
text-decoration: underline;
}
.file-list {
list-style: none;
padding: 0;
}
.file-list li {
background: white;
padding: 15px;
margin-bottom: 10px;
border-radius: 5px;
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
}
.file-list a {
color: #3498db;
text-decoration: none;
font-weight: bold;
}
.file-list a:hover {
text-decoration: underline;
}
.nav {
background: #35424a;
color: white;
padding: 10px 0;
}
.nav ul {
padding: 0;
list-style: none;
text-align: center;
}
.nav li {
display: inline;
margin: 0 15px;
}
.nav a {
color: white;
text-decoration: none;
}
.nav a:hover {
color: #1abc9c;
}
footer {
background: #35424a;
color: white;
text-align: center;
padding: 20px;
margin-top: 40px;
}
/* Indexing page styles */
.progress-container {
margin: 20px 0;
padding: 20px;
background: #f5f5f5;
border-radius: 5px;
}
.progress-bar {
height: 20px;
background: #e0e0e0;
border-radius: 10px;
margin: 10px 0;
overflow: hidden;
}
.progress-fill {
height: 100%;
background: #4CAF50;
width: 0%;
transition: width 0.3s;
}
.progress-stats {
display: flex;
justify-content: space-between;
margin-bottom: 10px;
}
.progress-details {
margin-top: 20px;
}
.current-file {
font-weight: bold;
margin: 10px 0;
word-break: break-all;
}
.time-stats {
display: grid;
grid-template-columns: repeat(2, 1fr);
gap: 10px;
margin-top: 15px;
}
.time-stat {
background: #e9e9e9;
padding: 10px;
border-radius: 5px;
}
.abort-button {
background: #f44336;
color: white;
border: none;
padding: 10px 20px;
border-radius: 5px;
cursor: pointer;
margin-top: 20px;
}
.abort-button:hover {
background: #d32f2f;
}
.error-list {
margin-top: 10px;
}
.error-item {
padding: 10px;
margin-bottom: 5px;
background: #ffebee;
border-left: 3px solid #f44336;
}
.file-actions {
margin-top: 10px;
}
.file-action {
color: #3498db;
text-decoration: none;
}
.file-action:hover {
text-decoration: underline;
}
.action-separator {
margin: 0 5px;
color: #999;
body {
font-family: 'Arial', sans-serif;
line-height: 1.6;
margin: 0;
padding: 0;
background-color: #f4f4f4;
color: #333;
}
.container {
width: 80%;
margin: auto;
overflow: hidden;
padding: 20px;
}
header {
background: #35424a;
color: white;
padding: 20px;
text-align: center;
border-bottom: 4px solid #1abc9c;
}
header h1 {
margin: 0;
}
.search-container {
margin: 30px 0;
text-align: center;
}
.search-box {
width: 70%;
padding: 12px;
border: 1px solid #ddd;
border-radius: 4px;
font-size: 16px;
}
.search-button {
padding: 12px 24px;
background: #1abc9c;
color: white;
border: none;
border-radius: 4px;
cursor: pointer;
font-size: 16px;
}
.search-button:hover {
background: #16a085;
}
.results {
margin-top: 30px;
}
.result-item {
background: white;
padding: 15px;
margin-bottom: 15px;
border-radius: 5px;
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
}
.result-item h3 {
margin-top: 0;
color: #1abc9c;
}
.result-item p {
margin-bottom: 10px;
}
.result-item a {
color: #3498db;
text-decoration: none;
}
.result-item a:hover {
text-decoration: underline;
}
.file-list {
list-style: none;
padding: 0;
}
.file-list li {
background: white;
padding: 15px;
margin-bottom: 10px;
border-radius: 5px;
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
}
.file-list a {
color: #3498db;
text-decoration: none;
font-weight: bold;
}
.file-list a:hover {
text-decoration: underline;
}
.nav {
background: #35424a;
color: white;
padding: 10px 0;
}
.nav ul {
padding: 0;
list-style: none;
text-align: center;
}
.nav li {
display: inline;
margin: 0 15px;
}
.nav a {
color: white;
text-decoration: none;
}
.nav a:hover {
color: #1abc9c;
}
footer {
background: #35424a;
color: white;
text-align: center;
padding: 20px;
margin-top: 40px;
}
/* Indexing page styles */
.progress-container {
margin: 20px 0;
padding: 20px;
background: #f5f5f5;
border-radius: 5px;
}
.progress-bar {
height: 20px;
background: #e0e0e0;
border-radius: 10px;
margin: 10px 0;
overflow: hidden;
}
.progress-fill {
height: 100%;
background: #4CAF50;
width: 0%;
transition: width 0.3s;
}
.progress-stats {
display: flex;
justify-content: space-between;
margin-bottom: 10px;
}
.progress-details {
margin-top: 20px;
}
.current-file {
font-weight: bold;
margin: 10px 0;
word-break: break-all;
}
.time-stats {
display: grid;
grid-template-columns: repeat(2, 1fr);
gap: 10px;
margin-top: 15px;
}
.time-stat {
background: #e9e9e9;
padding: 10px;
border-radius: 5px;
}
.abort-button {
background: #f44336;
color: white;
border: none;
padding: 10px 20px;
border-radius: 5px;
cursor: pointer;
margin-top: 20px;
}
.abort-button:hover {
background: #d32f2f;
}
.error-list {
margin-top: 10px;
}
.error-item {
padding: 10px;
margin-bottom: 5px;
background: #ffebee;
border-left: 3px solid #f44336;
}
.file-actions {
margin-top: 10px;
}
.file-action {
color: #3498db;
text-decoration: none;
}
.file-action:hover {
text-decoration: underline;
}
.action-separator {
margin: 0 5px;
color: #999;
}

View file

@ -1,262 +1,262 @@
<!DOCTYPE html>
<html>
<head>
<title>{{ file_path }}</title>
<link rel="stylesheet" href="/static/css/style.css">
<link rel="stylesheet" href="/static/css/epub_viewer.css">
<script src="https://cdn.jsdelivr.net/npm/epubjs@0.3.93/dist/epub.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/jszip@3.10.1/dist/jszip.min.js"></script>
</head>
<body>
<header>
<h1>EPUB: {{ file_path }}</h1>
</header>
<nav class="nav">
<ul>
<li><a href="/">Home</a></li>
<li><a href="/files">File List</a></li>
<li><a href="/index_books">Re-Index Books</a></li>
</ul>
</nav>
<div class="container">
<div id="viewer"></div>
<div class="controls">
<button id="prev">Previous</button>
<button id="next">Next</button>
</div>
</div>
<script>
// Debug logging function
function debug(message, obj) {
console.log("EPUB DEBUG: " + message, obj || '');
// Add to page for visibility
const debugDiv = document.getElementById('debug-output') ||
(function() {
const div = document.createElement('div');
div.id = 'debug-output';
div.style.position = 'fixed';
div.style.bottom = '10px';
div.style.right = '10px';
div.style.backgroundColor = 'rgba(0,0,0,0.7)';
div.style.color = 'white';
div.style.padding = '10px';
div.style.maxHeight = '200px';
div.style.overflow = 'auto';
div.style.zIndex = '9999';
document.body.appendChild(div);
return div;
})();
const logEntry = document.createElement('div');
logEntry.textContent = message + (obj ? ': ' + JSON.stringify(obj) : '');
debugDiv.appendChild(logEntry);
}
// Global variables
var book = null;
var rendition = null;
function handlePrev() {
debug("Previous button clicked");
if (!rendition) {
debug("ERROR: rendition not available for prev");
return;
}
try {
rendition.prev().then(() => {
debug("Navigation to previous page successful");
}).catch(err => {
debug("Navigation to previous page failed", err.message);
});
} catch (err) {
debug("Error in prev navigation", err.message);
}
}
function handleNext() {
debug("Next button clicked");
if (!rendition) {
debug("ERROR: rendition not available for next");
return;
}
try {
rendition.next().then(() => {
debug("Navigation to next page successful");
}).catch(err => {
debug("Navigation to next page failed", err.message);
});
} catch (err) {
debug("Error in next navigation", err.message);
}
}
function handleKeydown(e) {
if (!rendition) {
debug("ERROR: rendition not available for keydown");
return;
}
if (e.keyCode === 37) {
debug("Left arrow key pressed");
rendition.prev();
}
if (e.keyCode === 39) {
debug("Right arrow key pressed");
rendition.next();
}
}
function initializeEPUB() {
debug("Initializing EPUB viewer");
try {
// Use dedicated endpoint for EPUB files
const fileUrl = "/epub/" + encodeURIComponent("{{ file_path }}");
debug("Loading EPUB from URL", fileUrl);
// TEST_EPUB_URL: /epub/{{ file_path }}
// Create book object
window.book = book = ePub(fileUrl);
debug("Book object created successfully");
console.log("Book object details:", book);
if (!book) {
throw new Error("Failed to initialize EPUB reader");
}
// Set up error handler
book.on('error', function(err) {
debug("EPUB error event", err);
document.getElementById("viewer").innerHTML =
'<div class="error">Error loading EPUB: ' + err.message + '</div>';
});
// Set up ready handler
book.on('ready', function() {
debug("Book ready event fired");
});
// Create rendition
debug("Creating rendition");
window.rendition = rendition = book.renderTo("viewer", {
width: "100%",
height: "100%",
spread: "none",
manager: "continuous",
style: `
body {
margin: 0;
padding: 20px;
background-color: white;
color: black;
font-size: 1.2em;
line-height: 1.5;
}
img {
max-width: 100%;
}
`
});
// Hide iframe initially to prevent flash of unstyled content
const viewer = document.getElementById("viewer");
if (viewer) {
viewer.style.visibility = "hidden";
}
debug("Displaying rendition");
rendition.display()
.then(() => {
debug("Rendition displayed successfully");
// Set up resize handler
const resizeHandler = function() {
try {
if (rendition) {
rendition.resize();
}
} catch (err) {
console.error("Resize error:", err);
}
};
window.addEventListener('resize', resizeHandler);
// Show content and initialize navigation
setTimeout(() => {
try {
if (rendition) {
rendition.resize();
const viewer = document.getElementById('viewer');
if (viewer) {
viewer.style.visibility = 'visible';
}
// Initialize navigation
rendition.start();
}
} catch (err) {
debug("Content display error", err.message);
}
}, 100);
return rendition;
})
.catch(err => {
debug("Rendition error", err);
document.getElementById("viewer").innerHTML =
'<div class="error">Error displaying EPUB: ' + err.message + '</div>';
});
// Set up event listeners
debug("Setting up event listeners");
try {
document.getElementById("prev").addEventListener("click", handlePrev);
document.getElementById("next").addEventListener("click", handleNext);
document.addEventListener("keydown", handleKeydown);
// Add loading indicator
const loadingIndicator = document.createElement('div');
loadingIndicator.id = 'loading-indicator';
loadingIndicator.style.position = 'fixed';
loadingIndicator.style.top = '50%';
loadingIndicator.style.left = '50%';
loadingIndicator.style.transform = 'translate(-50%, -50%)';
loadingIndicator.style.backgroundColor = 'rgba(0,0,0,0.7)';
loadingIndicator.style.color = 'white';
loadingIndicator.style.padding = '20px';
loadingIndicator.style.borderRadius = '5px';
loadingIndicator.style.zIndex = '1000';
loadingIndicator.textContent = 'Loading EPUB...';
document.body.appendChild(loadingIndicator);
// Remove indicator when loaded
book.on('ready', function() {
const indicator = document.getElementById('loading-indicator');
if (indicator) {
indicator.remove();
}
});
} catch (err) {
debug("Error setting up event listeners", err.message);
console.error("Event listener setup error:", err);
}
} catch (err) {
debug("EPUB initialization error", err);
document.getElementById("viewer").innerHTML =
'<div class="error">Failed to load EPUB: ' + err.message + '</div>';
}
}
// Initialize when DOM is loaded
debug("Setting up DOMContentLoaded listener");
document.addEventListener('DOMContentLoaded', initializeEPUB);
</script>
<footer>
<p>&copy; 2025 Book Search Engine</p>
</footer>
</body>
<!DOCTYPE html>
<html>
<head>
<title>{{ file_path }}</title>
<link rel="stylesheet" href="/static/css/style.css">
<link rel="stylesheet" href="/static/css/epub_viewer.css">
<script src="https://cdn.jsdelivr.net/npm/epubjs@0.3.93/dist/epub.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/jszip@3.10.1/dist/jszip.min.js"></script>
</head>
<body>
<header>
<h1>EPUB: {{ file_path }}</h1>
</header>
<nav class="nav">
<ul>
<li><a href="/">Home</a></li>
<li><a href="/files">File List</a></li>
<li><a href="/index_books">Re-Index Books</a></li>
</ul>
</nav>
<div class="container">
<div id="viewer"></div>
<div class="controls">
<button id="prev">Previous</button>
<button id="next">Next</button>
</div>
</div>
<script>
// Debug logging function
function debug(message, obj) {
console.log("EPUB DEBUG: " + message, obj || '');
// Add to page for visibility
const debugDiv = document.getElementById('debug-output') ||
(function() {
const div = document.createElement('div');
div.id = 'debug-output';
div.style.position = 'fixed';
div.style.bottom = '10px';
div.style.right = '10px';
div.style.backgroundColor = 'rgba(0,0,0,0.7)';
div.style.color = 'white';
div.style.padding = '10px';
div.style.maxHeight = '200px';
div.style.overflow = 'auto';
div.style.zIndex = '9999';
document.body.appendChild(div);
return div;
})();
const logEntry = document.createElement('div');
logEntry.textContent = message + (obj ? ': ' + JSON.stringify(obj) : '');
debugDiv.appendChild(logEntry);
}
// Global variables
var book = null;
var rendition = null;
function handlePrev() {
debug("Previous button clicked");
if (!rendition) {
debug("ERROR: rendition not available for prev");
return;
}
try {
rendition.prev().then(() => {
debug("Navigation to previous page successful");
}).catch(err => {
debug("Navigation to previous page failed", err.message);
});
} catch (err) {
debug("Error in prev navigation", err.message);
}
}
function handleNext() {
debug("Next button clicked");
if (!rendition) {
debug("ERROR: rendition not available for next");
return;
}
try {
rendition.next().then(() => {
debug("Navigation to next page successful");
}).catch(err => {
debug("Navigation to next page failed", err.message);
});
} catch (err) {
debug("Error in next navigation", err.message);
}
}
function handleKeydown(e) {
if (!rendition) {
debug("ERROR: rendition not available for keydown");
return;
}
if (e.keyCode === 37) {
debug("Left arrow key pressed");
rendition.prev();
}
if (e.keyCode === 39) {
debug("Right arrow key pressed");
rendition.next();
}
}
function initializeEPUB() {
debug("Initializing EPUB viewer");
try {
// Use dedicated endpoint for EPUB files
const fileUrl = "/epub/" + encodeURIComponent("{{ file_path }}");
debug("Loading EPUB from URL", fileUrl);
// TEST_EPUB_URL: /epub/{{ file_path }}
// Create book object
window.book = book = ePub(fileUrl);
debug("Book object created successfully");
console.log("Book object details:", book);
if (!book) {
throw new Error("Failed to initialize EPUB reader");
}
// Set up error handler
book.on('error', function(err) {
debug("EPUB error event", err);
document.getElementById("viewer").innerHTML =
'<div class="error">Error loading EPUB: ' + err.message + '</div>';
});
// Set up ready handler
book.on('ready', function() {
debug("Book ready event fired");
});
// Create rendition
debug("Creating rendition");
window.rendition = rendition = book.renderTo("viewer", {
width: "100%",
height: "100%",
spread: "none",
manager: "continuous",
style: `
body {
margin: 0;
padding: 20px;
background-color: white;
color: black;
font-size: 1.2em;
line-height: 1.5;
}
img {
max-width: 100%;
}
`
});
// Hide iframe initially to prevent flash of unstyled content
const viewer = document.getElementById("viewer");
if (viewer) {
viewer.style.visibility = "hidden";
}
debug("Displaying rendition");
rendition.display()
.then(() => {
debug("Rendition displayed successfully");
// Set up resize handler
const resizeHandler = function() {
try {
if (rendition) {
rendition.resize();
}
} catch (err) {
console.error("Resize error:", err);
}
};
window.addEventListener('resize', resizeHandler);
// Show content and initialize navigation
setTimeout(() => {
try {
if (rendition) {
rendition.resize();
const viewer = document.getElementById('viewer');
if (viewer) {
viewer.style.visibility = 'visible';
}
// Initialize navigation
rendition.start();
}
} catch (err) {
debug("Content display error", err.message);
}
}, 100);
return rendition;
})
.catch(err => {
debug("Rendition error", err);
document.getElementById("viewer").innerHTML =
'<div class="error">Error displaying EPUB: ' + err.message + '</div>';
});
// Set up event listeners
debug("Setting up event listeners");
try {
document.getElementById("prev").addEventListener("click", handlePrev);
document.getElementById("next").addEventListener("click", handleNext);
document.addEventListener("keydown", handleKeydown);
// Add loading indicator
const loadingIndicator = document.createElement('div');
loadingIndicator.id = 'loading-indicator';
loadingIndicator.style.position = 'fixed';
loadingIndicator.style.top = '50%';
loadingIndicator.style.left = '50%';
loadingIndicator.style.transform = 'translate(-50%, -50%)';
loadingIndicator.style.backgroundColor = 'rgba(0,0,0,0.7)';
loadingIndicator.style.color = 'white';
loadingIndicator.style.padding = '20px';
loadingIndicator.style.borderRadius = '5px';
loadingIndicator.style.zIndex = '1000';
loadingIndicator.textContent = 'Loading EPUB...';
document.body.appendChild(loadingIndicator);
// Remove indicator when loaded
book.on('ready', function() {
const indicator = document.getElementById('loading-indicator');
if (indicator) {
indicator.remove();
}
});
} catch (err) {
debug("Error setting up event listeners", err.message);
console.error("Event listener setup error:", err);
}
} catch (err) {
debug("EPUB initialization error", err);
document.getElementById("viewer").innerHTML =
'<div class="error">Failed to load EPUB: ' + err.message + '</div>';
}
}
// Initialize when DOM is loaded
debug("Setting up DOMContentLoaded listener");
document.addEventListener('DOMContentLoaded', initializeEPUB);
</script>
<footer>
<p>&copy; 2025 Book Search Engine</p>
</footer>
</body>
</html>

View file

@ -1,141 +1,141 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Book Files</title>
<link rel="stylesheet" href="/static/css/style.css">
<style>
.file-list {
list-style: none;
padding: 0;
}
.file-item {
display: flex;
justify-content: space-between;
padding: 8px 0;
border-bottom: 1px solid #eee;
}
.file-name {
flex: 1;
word-break: break-all;
}
.file-size {
color: #666;
min-width: 80px;
text-align: right;
}
.book-title {
font-weight: bold;
color: #333;
}
.file-name-muted {
color: #999;
font-size: 0.9em;
margin-left: 8px;
}
.summary {
background: #f5f5f5;
padding: 15px;
border-radius: 5px;
margin-bottom: 20px;
}
.summary-item {
display: flex;
justify-content: space-between;
margin-bottom: 5px;
}
.summary-label {
font-weight: bold;
}
.indexing-status {
background: #fff8e1;
padding: 15px;
border-radius: 5px;
margin-bottom: 20px;
border-left: 4px solid #ffc107;
}
.indexing-link {
color: #2196f3;
text-decoration: none;
}
.indexing-link:hover {
text-decoration: underline;
}
.plain-view-link {
font-size: 0.8em;
color: #666;
text-decoration: none;
margin-left: 8px;
}
.plain-view-link:hover {
text-decoration: underline;
color: #2196f3;
}
</style>
</head>
<body>
<header>
<h1>Book Files</h1>
</header>
<nav class="nav">
<ul>
<li><a href="/">Home</a></li>
<li><a href="/files">File List</a></li>
<li><a href="/index_books">Re-Index Books</a></li>
</ul>
</nav>
<div class="container">
<div class="summary">
<div class="summary-item">
<span class="summary-label">Total Files:</span>
<span>{{ total_files }}</span>
</div>
<div class="summary-item">
<span class="summary-label">Total Size:</span>
<span>{{ total_size_mb }} MB</span>
</div>
</div>
{% if indexing_in_progress %}
<div class="indexing-status">
Indexing is currently in progress.
<a href="/index_books" class="indexing-link">View re-indexing progress</a>
</div>
{% endif %}
<h2>Available Files</h2>
{% if files %}
<ul class="file-list">
{% for file in files %}
<li class="file-item">
<span class="file-name">
<a href="/file/{{ file.path }}">
{% if file.path.endswith('.epub') %}
<br><a href="/file/{{ file.path }}?format=html" class="plain-view-link">(View as HTML)</a>
{% endif %}
{% if file.title != file.name %}
<span class="book-title">{{ file.title }}</span>
<span class="file-name-muted">{{ file.name }}</span>
{% else %}
{{ file.name }}
{% endif %}
</a>
</span>
<span class="file-size">{{ file.size_mb }} MB</span>
</li>
{% endfor %}
</ul>
{% else %}
<p>No files available. Please add files to the books directory.</p>
{% endif %}
</div>
<footer>
<p>&copy; 2025 Intari</p>
</footer>
</body>
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Book Files</title>
<link rel="stylesheet" href="/static/css/style.css">
<style>
.file-list {
list-style: none;
padding: 0;
}
.file-item {
display: flex;
justify-content: space-between;
padding: 8px 0;
border-bottom: 1px solid #eee;
}
.file-name {
flex: 1;
word-break: break-all;
}
.file-size {
color: #666;
min-width: 80px;
text-align: right;
}
.book-title {
font-weight: bold;
color: #333;
}
.file-name-muted {
color: #999;
font-size: 0.9em;
margin-left: 8px;
}
.summary {
background: #f5f5f5;
padding: 15px;
border-radius: 5px;
margin-bottom: 20px;
}
.summary-item {
display: flex;
justify-content: space-between;
margin-bottom: 5px;
}
.summary-label {
font-weight: bold;
}
.indexing-status {
background: #fff8e1;
padding: 15px;
border-radius: 5px;
margin-bottom: 20px;
border-left: 4px solid #ffc107;
}
.indexing-link {
color: #2196f3;
text-decoration: none;
}
.indexing-link:hover {
text-decoration: underline;
}
.plain-view-link {
font-size: 0.8em;
color: #666;
text-decoration: none;
margin-left: 8px;
}
.plain-view-link:hover {
text-decoration: underline;
color: #2196f3;
}
</style>
</head>
<body>
<header>
<h1>Book Files</h1>
</header>
<nav class="nav">
<ul>
<li><a href="/">Home</a></li>
<li><a href="/files">File List</a></li>
<li><a href="/index_books">Re-Index Books</a></li>
</ul>
</nav>
<div class="container">
<div class="summary">
<div class="summary-item">
<span class="summary-label">Total Files:</span>
<span>{{ total_files }}</span>
</div>
<div class="summary-item">
<span class="summary-label">Total Size:</span>
<span>{{ total_size_mb }} MB</span>
</div>
</div>
{% if indexing_in_progress %}
<div class="indexing-status">
Indexing is currently in progress.
<a href="/index_books" class="indexing-link">View re-indexing progress</a>
</div>
{% endif %}
<h2>Available Files</h2>
{% if files %}
<ul class="file-list">
{% for file in files %}
<li class="file-item">
<span class="file-name">
<a href="/file/{{ file.path }}">
{% if file.path.endswith('.epub') %}
<br><a href="/file/{{ file.path }}?format=html" class="plain-view-link">(View as HTML)</a>
{% endif %}
{% if file.title != file.name %}
<span class="book-title">{{ file.title }}</span>
<span class="file-name-muted">{{ file.name }}</span>
{% else %}
{{ file.name }}
{% endif %}
</a>
</span>
<span class="file-size">{{ file.size_mb }} MB</span>
</li>
{% endfor %}
</ul>
{% else %}
<p>No files available. Please add files to the books directory.</p>
{% endif %}
</div>
<footer>
<p>&copy; 2025 Intari</p>
</footer>
</body>
</html>

View file

@ -1,163 +1,163 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Indexing Books</title>
<link rel="stylesheet" href="/static/css/style.css">
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
</head>
<body>
<header>
<h1>Indexing Books</h1>
</header>
<nav class="nav">
<ul>
<li><a href="/">Home</a></li>
<li><a href="/files">File List</a></li>
<li><a href="/index_books">Re-Index Books</a></li>
</ul>
</nav>
<div class="container">
<div class="progress-container">
<h2>Indexing Progress</h2>
<div class="progress-stats">
<span id="processed-files">0</span> of <span id="total-files">0</span> files processed
<span id="percentage">0%</span>
</div>
<div class="progress-bar">
<div class="progress-fill" id="progress-fill"></div>
</div>
<div class="current-file" id="current-file">
Current file: Starting indexing...
</div>
<div class="time-stats">
<div class="time-stat">
<div>CPU cores:</div>
<div>{{ used_cpus }} of {{ available_cpus }}</div>
</div>
<div class="time-stat">
<div>Time elapsed:</div>
<div id="elapsed-time">0m 0s</div>
</div>
<div class="time-stat">
<div>Estimated remaining:</div>
<div id="estimated-remaining">Calculating...</div>
</div>
<div class="time-stat">
<div>Estimated completion:</div>
<div id="estimated-completion">Calculating...</div>
</div>
<div class="time-stat">
<div>Files per minute:</div>
<div id="files-per-minute">0</div>
</div>
</div>
<button class="abort-button" id="abort-button">Abort Indexing</button>
</div>
<div class="progress-details">
<h3>Recent Errors</h3>
<div id="error-list" class="error-list">
No errors yet
</div>
</div>
</div>
<footer>
<p>&copy; 2025 Intari</p>
</footer>
<script>
const progressFill = document.getElementById('progress-fill');
const processedFiles = document.getElementById('processed-files');
const totalFiles = document.getElementById('total-files');
const percentage = document.getElementById('percentage');
const currentFile = document.getElementById('current-file');
const elapsedTime = document.getElementById('elapsed-time');
const estimatedRemaining = document.getElementById('estimated-remaining');
const estimatedCompletion = document.getElementById('estimated-completion');
const filesPerMinute = document.getElementById('files-per-minute');
const errorList = document.getElementById('error-list');
const abortButton = document.getElementById('abort-button');
let updateInterval;
let speedChart;
// Update progress every second
function updateProgress() {
// Get browser timezone
const timezone = Intl.DateTimeFormat().resolvedOptions().timeZone;
fetch('/indexing_progress', {
headers: {
'X-Timezone': timezone
}
})
.then(response => response.json())
.then(data => {
if (data.status === 'not_running') {
// Indexing completed
clearInterval(updateInterval);
window.location.href = '/files';
return;
}
// Update progress bar
progressFill.style.width = `${data.percentage}%`;
processedFiles.textContent = data.processed_files;
totalFiles.textContent = data.total_files;
percentage.textContent = `${data.percentage.toFixed(1)}%`;
// Update current file
currentFile.textContent = `Current file: ${data.current_file || 'Processing...'}`;
// Update time stats
elapsedTime.textContent = data.elapsed_time;
estimatedRemaining.textContent = data.estimated_remaining;
estimatedCompletion.textContent = data.estimated_completion;
// Calculate files per minute
if (data.elapsed_time) {
const [min, sec] = data.elapsed_time.split(/[ms]/).filter(Boolean).map(Number);
const totalSeconds = min * 60 + sec;
if (totalSeconds > 0) {
const fpm = (data.processed_files / totalSeconds * 60).toFixed(1);
filesPerMinute.textContent = fpm;
}
}
// Update errors
if (data.errors && data.errors.length > 0) {
errorList.innerHTML = data.errors.map(err =>
`<div class="error-item">${err}</div>`
).join('');
}
})
.catch(error => {
console.error('Error fetching progress:', error);
});
}
// Start updating progress
updateInterval = setInterval(updateProgress, 1000);
updateProgress();
// Handle abort button
abortButton.addEventListener('click', () => {
if (confirm('Are you sure you want to abort indexing?')) {
fetch('/abort_indexing', { method: 'POST' })
.then(response => response.json())
.then(data => {
alert(data.message);
});
}
});
</script>
</body>
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Indexing Books</title>
<link rel="stylesheet" href="/static/css/style.css">
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
</head>
<body>
<header>
<h1>Indexing Books</h1>
</header>
<nav class="nav">
<ul>
<li><a href="/">Home</a></li>
<li><a href="/files">File List</a></li>
<li><a href="/index_books">Re-Index Books</a></li>
</ul>
</nav>
<div class="container">
<div class="progress-container">
<h2>Indexing Progress</h2>
<div class="progress-stats">
<span id="processed-files">0</span> of <span id="total-files">0</span> files processed
<span id="percentage">0%</span>
</div>
<div class="progress-bar">
<div class="progress-fill" id="progress-fill"></div>
</div>
<div class="current-file" id="current-file">
Current file: Starting indexing...
</div>
<div class="time-stats">
<div class="time-stat">
<div>CPU cores:</div>
<div>{{ used_cpus }} of {{ available_cpus }}</div>
</div>
<div class="time-stat">
<div>Time elapsed:</div>
<div id="elapsed-time">0m 0s</div>
</div>
<div class="time-stat">
<div>Estimated remaining:</div>
<div id="estimated-remaining">Calculating...</div>
</div>
<div class="time-stat">
<div>Estimated completion:</div>
<div id="estimated-completion">Calculating...</div>
</div>
<div class="time-stat">
<div>Files per minute:</div>
<div id="files-per-minute">0</div>
</div>
</div>
<button class="abort-button" id="abort-button">Abort Indexing</button>
</div>
<div class="progress-details">
<h3>Recent Errors</h3>
<div id="error-list" class="error-list">
No errors yet
</div>
</div>
</div>
<footer>
<p>&copy; 2025 Intari</p>
</footer>
<script>
const progressFill = document.getElementById('progress-fill');
const processedFiles = document.getElementById('processed-files');
const totalFiles = document.getElementById('total-files');
const percentage = document.getElementById('percentage');
const currentFile = document.getElementById('current-file');
const elapsedTime = document.getElementById('elapsed-time');
const estimatedRemaining = document.getElementById('estimated-remaining');
const estimatedCompletion = document.getElementById('estimated-completion');
const filesPerMinute = document.getElementById('files-per-minute');
const errorList = document.getElementById('error-list');
const abortButton = document.getElementById('abort-button');
let updateInterval;
let speedChart;
// Update progress every second
function updateProgress() {
// Get browser timezone
const timezone = Intl.DateTimeFormat().resolvedOptions().timeZone;
fetch('/indexing_progress', {
headers: {
'X-Timezone': timezone
}
})
.then(response => response.json())
.then(data => {
if (data.status === 'not_running') {
// Indexing completed
clearInterval(updateInterval);
window.location.href = '/files';
return;
}
// Update progress bar
progressFill.style.width = `${data.percentage}%`;
processedFiles.textContent = data.processed_files;
totalFiles.textContent = data.total_files;
percentage.textContent = `${data.percentage.toFixed(1)}%`;
// Update current file
currentFile.textContent = `Current file: ${data.current_file || 'Processing...'}`;
// Update time stats
elapsedTime.textContent = data.elapsed_time;
estimatedRemaining.textContent = data.estimated_remaining;
estimatedCompletion.textContent = data.estimated_completion;
// Calculate files per minute
if (data.elapsed_time) {
const [min, sec] = data.elapsed_time.split(/[ms]/).filter(Boolean).map(Number);
const totalSeconds = min * 60 + sec;
if (totalSeconds > 0) {
const fpm = (data.processed_files / totalSeconds * 60).toFixed(1);
filesPerMinute.textContent = fpm;
}
}
// Update errors
if (data.errors && data.errors.length > 0) {
errorList.innerHTML = data.errors.map(err =>
`<div class="error-item">${err}</div>`
).join('');
}
})
.catch(error => {
console.error('Error fetching progress:', error);
});
}
// Start updating progress
updateInterval = setInterval(updateProgress, 1000);
updateProgress();
// Handle abort button
abortButton.addEventListener('click', () => {
if (confirm('Are you sure you want to abort indexing?')) {
fetch('/abort_indexing', { method: 'POST' })
.then(response => response.json())
.then(data => {
alert(data.message);
});
}
});
</script>
</body>
</html>

View file

@ -1,56 +1,56 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Book Search</title>
<link rel="stylesheet" href="/static/css/style.css">
</head>
<body>
<header>
<h1>Book Search Engine</h1>
</header>
<nav class="nav">
<ul>
<li><a href="/">Home</a></li>
<li><a href="/files">File List</a></li>
<li><a href="/index_books">Re-Index Books</a></li>
</ul>
</nav>
<div class="container">
<div class="search-container">
<form action="/search" method="GET">
<input type="text" name="query" placeholder="Search for content..." class="search-box" value="{{ query }}">
<button type="submit" class="search-button">Search</button>
</form>
</div>
{% if results %}
<div class="results">
<h2>Search Results</h2>
{% for result in results %}
<div class="result-item">
<h3>{{ result.file_path.split('/')[-1] }}</h3>
<p>{{ result.snippet }}</p>
<div class="file-actions">
<a href="/file/{{ result.file_path.replace('/books/', '') }}" class="file-action">View Full File</a>
<span class="action-separator">|</span>
<a href="/file/{{ result.file_path.replace('/books/', '') }}?format=html" class="file-action">View as HTML</a>
</div>
</div>
{% endfor %}
</div>
{% elif query %}
<div class="results">
<p>No results found for "{{ query }}"</p>
</div>
{% endif %}
</div>
<footer>
<p>&copy; 2025 Intari</p>
</footer>
</body>
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Book Search</title>
<link rel="stylesheet" href="/static/css/style.css">
</head>
<body>
<header>
<h1>Book Search Engine</h1>
</header>
<nav class="nav">
<ul>
<li><a href="/">Home</a></li>
<li><a href="/files">File List</a></li>
<li><a href="/index_books">Re-Index Books</a></li>
</ul>
</nav>
<div class="container">
<div class="search-container">
<form action="/search" method="GET">
<input type="text" name="query" placeholder="Search for content..." class="search-box" value="{{ query }}">
<button type="submit" class="search-button">Search</button>
</form>
</div>
{% if results %}
<div class="results">
<h2>Search Results</h2>
{% for result in results %}
<div class="result-item">
<h3>{{ result.file_path.split('/')[-1] }}</h3>
<p>{{ result.snippet }}</p>
<div class="file-actions">
<a href="/file/{{ result.file_path.replace('/books/', '') }}" class="file-action">View Full File</a>
<span class="action-separator">|</span>
<a href="/file/{{ result.file_path.replace('/books/', '') }}?format=html" class="file-action">View as HTML</a>
</div>
</div>
{% endfor %}
</div>
{% elif query %}
<div class="results">
<p>No results found for "{{ query }}"</p>
</div>
{% endif %}
</div>
<footer>
<p>&copy; 2025 Intari</p>
</footer>
</body>
</html>

View file

@ -1,64 +1,64 @@
и<!DOCTYPE html>
<html>
<head>
<title>{{ file_path }}</title>
<link rel="stylesheet" href="/static/css/style.css">
<style>
pre {
background-color: white;
padding: 20px;
border-radius: 5px;
white-space: pre-wrap;
word-wrap: break-word;
}
.html-content {
background-color: white;
padding: 20px;
border-radius: 5px;
}
.html-content hr {
margin: 30px 0;
border: 0;
border-top: 1px solid #eee;
}
.html-content h1,
.html-content h2,
.html-content h3,
.html-content h4,
.html-content h5,
.html-content h6 {
margin: 1em 0 0.5em 0;
line-height: 1.2;
}
.html-content p {
margin: 0 0 1em 0;
line-height: 1.5;
}
</style>
</head>
<body>
<header>
<h1>File: {{ file_path }}</h1>
</header>
<nav class="nav">
<ul>
<li><a href="/">Home</a></li>
<li><a href="/files">File List</a></li>
<li><a href="/index_books">Re-Index Books</a></li>
</ul>
</nav>
<div class="container">
{% if is_html %}
<div class="html-content">{{ content|safe }}</div>
{% else %}
<pre>{{ content }}</pre>
{% endif %}
</div>
<footer>
<p>&copy; 2025 Book Search Engine</p>
</footer>
</body>
и<!DOCTYPE html>
<html>
<head>
<title>{{ file_path }}</title>
<link rel="stylesheet" href="/static/css/style.css">
<style>
pre {
background-color: white;
padding: 20px;
border-radius: 5px;
white-space: pre-wrap;
word-wrap: break-word;
}
.html-content {
background-color: white;
padding: 20px;
border-radius: 5px;
}
.html-content hr {
margin: 30px 0;
border: 0;
border-top: 1px solid #eee;
}
.html-content h1,
.html-content h2,
.html-content h3,
.html-content h4,
.html-content h5,
.html-content h6 {
margin: 1em 0 0.5em 0;
line-height: 1.2;
}
.html-content p {
margin: 0 0 1em 0;
line-height: 1.5;
}
</style>
</head>
<body>
<header>
<h1>File: {{ file_path }}</h1>
</header>
<nav class="nav">
<ul>
<li><a href="/">Home</a></li>
<li><a href="/files">File List</a></li>
<li><a href="/index_books">Re-Index Books</a></li>
</ul>
</nav>
<div class="container">
{% if is_html %}
<div class="html-content">{{ content|safe }}</div>
{% else %}
<pre>{{ content }}</pre>
{% endif %}
</div>
<footer>
<p>&copy; 2025 Book Search Engine</p>
</footer>
</body>
</html>

View file

@ -1,142 +1,142 @@
from elasticsearch import Elasticsearch
import os
import ebooklib
from ebooklib import epub
from bs4 import BeautifulSoup
import PyPDF2
import time
from threading import Lock
# Elasticsearch Configuration
ELASTICSEARCH_HOST = os.environ.get("ELASTICSEARCH_HOST", "localhost")
ELASTICSEARCH_PORT = int(os.environ.get("ELASTICSEARCH_PORT", 9200))
es = Elasticsearch([{'host': ELASTICSEARCH_HOST, 'port': ELASTICSEARCH_PORT, 'scheme': 'http'}])
INDEX_NAME = "book_index"
# Global variables for progress tracking
indexing_progress = {
'total_files': 0,
'processed_files': 0,
'start_time': None,
'is_running': False,
'current_file': '',
'errors': []
}
progress_lock = Lock()
def create_index():
if not es.indices.exists(index=INDEX_NAME):
es.indices.create(index=INDEX_NAME)
def extract_text_from_epub(epub_path):
book = epub.read_epub(epub_path)
text = ''
for item in book.get_items():
if item.media_type == 'application/xhtml+xml':
soup = BeautifulSoup(item.get_content(), 'html.parser')
text += soup.get_text()
return text
def extract_text_from_pdf(pdf_path):
text = ''
with open(pdf_path, 'rb') as pdf_file:
pdf_reader = PyPDF2.PdfReader(pdf_file)
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
text += page.extract_text()
return text
def get_progress():
with progress_lock:
if not indexing_progress['is_running']:
return None
progress = indexing_progress.copy()
if progress['total_files'] > 0:
progress['percentage'] = (progress['processed_files'] / progress['total_files']) * 100
else:
progress['percentage'] = 0
elapsed = time.time() - progress['start_time']
progress['elapsed_time'] = elapsed
if progress['processed_files'] > 0:
time_per_file = elapsed / progress['processed_files']
remaining_files = progress['total_files'] - progress['processed_files']
progress['estimated_remaining'] = time_per_file * remaining_files
progress['estimated_completion'] = time.time() + progress['estimated_remaining']
else:
progress['estimated_remaining'] = 0
progress['estimated_completion'] = 0
return progress
def index_files(directory):
global indexing_progress
with progress_lock:
indexing_progress = {
'total_files': 0,
'processed_files': 0,
'start_time': time.time(),
'is_running': True,
'current_file': '',
'errors': []
}
try:
create_index()
# First count all files
total_files = 0
for root, _, files in os.walk(directory):
for file in files:
if file.endswith(('.epub', '.pdf', '.txt')):
total_files += 1
with progress_lock:
indexing_progress['total_files'] = total_files
# Now process files
for root, _, files in os.walk(directory):
for file in files:
file_path = os.path.join(root, file)
with progress_lock:
indexing_progress['current_file'] = file_path
try:
encoded_file_path = file_path.encode('utf-8').decode('utf-8')
if file_path.endswith(".epub"):
text = extract_text_from_epub(file_path)
elif file_path.endswith(".pdf"):
text = extract_text_from_pdf(file_path)
elif file_path.endswith(".txt"):
with open(encoded_file_path, 'r', encoding='utf-8', errors='ignore') as f:
text = f.read()
else:
print(f"Skipping unsupported file type: {file_path}")
continue
doc = {
'file_path': file_path,
'content': text
}
es.index(index=INDEX_NAME, document=doc)
print(f"Indexed: {file_path}")
with progress_lock:
indexing_progress['processed_files'] += 1
except Exception as e:
error_msg = f"Error indexing {file_path}: {type(e)}, {e}"
print(error_msg)
with progress_lock:
indexing_progress['errors'].append(error_msg)
finally:
with progress_lock:
indexing_progress['is_running'] = False
if __name__ == '__main__':
BOOKS_DIR = "/books" # This should match the volume mount in docker-compose.yml
from elasticsearch import Elasticsearch
import os
import ebooklib
from ebooklib import epub
from bs4 import BeautifulSoup
import PyPDF2
import time
from threading import Lock
# Elasticsearch Configuration
ELASTICSEARCH_HOST = os.environ.get("ELASTICSEARCH_HOST", "localhost")
ELASTICSEARCH_PORT = int(os.environ.get("ELASTICSEARCH_PORT", 9200))
es = Elasticsearch([{'host': ELASTICSEARCH_HOST, 'port': ELASTICSEARCH_PORT, 'scheme': 'http'}])
INDEX_NAME = "book_index"
# Global variables for progress tracking
indexing_progress = {
'total_files': 0,
'processed_files': 0,
'start_time': None,
'is_running': False,
'current_file': '',
'errors': []
}
progress_lock = Lock()
def create_index():
if not es.indices.exists(index=INDEX_NAME):
es.indices.create(index=INDEX_NAME)
def extract_text_from_epub(epub_path):
book = epub.read_epub(epub_path)
text = ''
for item in book.get_items():
if item.media_type == 'application/xhtml+xml':
soup = BeautifulSoup(item.get_content(), 'html.parser')
text += soup.get_text()
return text
def extract_text_from_pdf(pdf_path):
text = ''
with open(pdf_path, 'rb') as pdf_file:
pdf_reader = PyPDF2.PdfReader(pdf_file)
for page_num in range(len(pdf_reader.pages)):
page = pdf_reader.pages[page_num]
text += page.extract_text()
return text
def get_progress():
with progress_lock:
if not indexing_progress['is_running']:
return None
progress = indexing_progress.copy()
if progress['total_files'] > 0:
progress['percentage'] = (progress['processed_files'] / progress['total_files']) * 100
else:
progress['percentage'] = 0
elapsed = time.time() - progress['start_time']
progress['elapsed_time'] = elapsed
if progress['processed_files'] > 0:
time_per_file = elapsed / progress['processed_files']
remaining_files = progress['total_files'] - progress['processed_files']
progress['estimated_remaining'] = time_per_file * remaining_files
progress['estimated_completion'] = time.time() + progress['estimated_remaining']
else:
progress['estimated_remaining'] = 0
progress['estimated_completion'] = 0
return progress
def index_files(directory):
global indexing_progress
with progress_lock:
indexing_progress = {
'total_files': 0,
'processed_files': 0,
'start_time': time.time(),
'is_running': True,
'current_file': '',
'errors': []
}
try:
create_index()
# First count all files
total_files = 0
for root, _, files in os.walk(directory):
for file in files:
if file.endswith(('.epub', '.pdf', '.txt')):
total_files += 1
with progress_lock:
indexing_progress['total_files'] = total_files
# Now process files
for root, _, files in os.walk(directory):
for file in files:
file_path = os.path.join(root, file)
with progress_lock:
indexing_progress['current_file'] = file_path
try:
encoded_file_path = file_path.encode('utf-8').decode('utf-8')
if file_path.endswith(".epub"):
text = extract_text_from_epub(file_path)
elif file_path.endswith(".pdf"):
text = extract_text_from_pdf(file_path)
elif file_path.endswith(".txt"):
with open(encoded_file_path, 'r', encoding='utf-8', errors='ignore') as f:
text = f.read()
else:
print(f"Skipping unsupported file type: {file_path}")
continue
doc = {
'file_path': file_path,
'content': text
}
es.index(index=INDEX_NAME, document=doc)
print(f"Indexed: {file_path}")
with progress_lock:
indexing_progress['processed_files'] += 1
except Exception as e:
error_msg = f"Error indexing {file_path}: {type(e)}, {e}"
print(error_msg)
with progress_lock:
indexing_progress['errors'].append(error_msg)
finally:
with progress_lock:
indexing_progress['is_running'] = False
if __name__ == '__main__':
BOOKS_DIR = "/books" # This should match the volume mount in docker-compose.yml
index_files(BOOKS_DIR)

View file

@ -1,7 +1,7 @@
flask==3.0.2
ebooklib==0.18
beautifulsoup4==4.12.3
pytest==8.3.2
PyPDF2==3.0.1
pytz==2024.1
flask==3.0.2
ebooklib==0.18
beautifulsoup4==4.12.3
pytest==8.3.2
PyPDF2==3.0.1
pytz==2024.1
elasticsearch>=8.0.0

View file

@ -1,127 +1,127 @@
import unittest
import json
import os
import tempfile
import shutil
from app import app
from unittest.mock import patch, MagicMock
class BookSearchAPITest(unittest.TestCase):
def setUp(self):
app.config['TESTING'] = True
self.client = app.test_client()
# Create a temporary directory for test books
self.test_books_dir = tempfile.mkdtemp()
# Create a sample test file
self.sample_file_path = os.path.join(self.test_books_dir, 'test_sample.txt')
with open(self.sample_file_path, 'w', encoding='utf-8') as f:
f.write("This is a test sample file for testing the book search API.")
def tearDown(self):
# Remove the temporary directory
shutil.rmtree(self.test_books_dir)
@patch('app.es')
@patch('app.index_files')
def test_index_books_api(self, mock_index_files, mock_es):
# Mock the index_files function
mock_index_files.return_value = None
# Test the API endpoint
response = self.client.get('/index_books', headers={'Accept': 'application/json'})
# Check if the response is successful
self.assertEqual(response.status_code, 200)
# Check if the response contains the expected message
data = json.loads(response.data)
self.assertIn('message', data)
self.assertEqual(data['message'], 'Indexing completed')
# Check if the index_files function was called
mock_index_files.assert_called_once_with('/books')
@patch('app.es')
def test_search_api(self, mock_es):
# Mock the Elasticsearch search method
mock_search_result = {
'hits': {
'hits': [
{
'_source': {
'file_path': '/books/test_sample.txt',
'content': 'This is a test sample file for testing the book search API.'
}
}
]
}
}
mock_es.search.return_value = mock_search_result
# Test the API endpoint
response = self.client.get('/search?query=test', headers={'Accept': 'application/json'})
# Check if the response is successful
self.assertEqual(response.status_code, 200)
# Check if the response contains the expected data
data = json.loads(response.data)
self.assertEqual(len(data), 1)
self.assertEqual(data[0]['file_path'], '/books/test_sample.txt')
self.assertIn('snippet', data[0])
# Check if the Elasticsearch search method was called with the correct parameters
mock_es.search.assert_called_once()
@patch('app.os.listdir')
@patch('app.os.path.isfile')
def test_list_files_api(self, mock_isfile, mock_listdir):
# Mock the os.listdir function
mock_listdir.return_value = ['test_sample.txt', 'another_file.txt']
# Mock the os.path.isfile function to always return True
mock_isfile.return_value = True
# Test the API endpoint
response = self.client.get('/files', headers={'Accept': 'application/json'})
# Check if the response is successful
self.assertEqual(response.status_code, 200)
# Check if the response contains the expected data
data = json.loads(response.data)
self.assertEqual(len(data), 2)
self.assertEqual(data[0]['name'], 'test_sample.txt')
self.assertEqual(data[1]['name'], 'another_file.txt')
# Check if the os.listdir function was called with the correct parameters
mock_listdir.assert_called_once_with('/books')
@patch('app.open')
@patch('app.os.path.isfile')
@patch('app.os.path.abspath')
def test_get_file_api(self, mock_abspath, mock_isfile, mock_open):
# Mock the necessary functions
mock_isfile.return_value = True
mock_abspath.side_effect = lambda x: x # Return the input unchanged
# Mock the open function
mock_file = MagicMock()
mock_file.__enter__.return_value.read.return_value = "This is a test sample file."
mock_open.return_value = mock_file
# Test the API endpoint
response = self.client.get('/file/test_sample.txt', headers={'Accept': 'application/json'})
# Check if the response is successful
self.assertEqual(response.status_code, 200)
# Check if the response contains the expected data
self.assertEqual(response.data.decode('utf-8'), "This is a test sample file.")
# Check if the open function was called with the correct parameters
mock_open.assert_called_once()
if __name__ == '__main__':
import unittest
import json
import os
import tempfile
import shutil
from app import app
from unittest.mock import patch, MagicMock
class BookSearchAPITest(unittest.TestCase):
def setUp(self):
app.config['TESTING'] = True
self.client = app.test_client()
# Create a temporary directory for test books
self.test_books_dir = tempfile.mkdtemp()
# Create a sample test file
self.sample_file_path = os.path.join(self.test_books_dir, 'test_sample.txt')
with open(self.sample_file_path, 'w', encoding='utf-8') as f:
f.write("This is a test sample file for testing the book search API.")
def tearDown(self):
# Remove the temporary directory
shutil.rmtree(self.test_books_dir)
@patch('app.es')
@patch('app.index_files')
def test_index_books_api(self, mock_index_files, mock_es):
# Mock the index_files function
mock_index_files.return_value = None
# Test the API endpoint
response = self.client.get('/index_books', headers={'Accept': 'application/json'})
# Check if the response is successful
self.assertEqual(response.status_code, 200)
# Check if the response contains the expected message
data = json.loads(response.data)
self.assertIn('message', data)
self.assertEqual(data['message'], 'Indexing completed')
# Check if the index_files function was called
mock_index_files.assert_called_once_with('/books')
@patch('app.es')
def test_search_api(self, mock_es):
# Mock the Elasticsearch search method
mock_search_result = {
'hits': {
'hits': [
{
'_source': {
'file_path': '/books/test_sample.txt',
'content': 'This is a test sample file for testing the book search API.'
}
}
]
}
}
mock_es.search.return_value = mock_search_result
# Test the API endpoint
response = self.client.get('/search?query=test', headers={'Accept': 'application/json'})
# Check if the response is successful
self.assertEqual(response.status_code, 200)
# Check if the response contains the expected data
data = json.loads(response.data)
self.assertEqual(len(data), 1)
self.assertEqual(data[0]['file_path'], '/books/test_sample.txt')
self.assertIn('snippet', data[0])
# Check if the Elasticsearch search method was called with the correct parameters
mock_es.search.assert_called_once()
@patch('app.os.listdir')
@patch('app.os.path.isfile')
def test_list_files_api(self, mock_isfile, mock_listdir):
# Mock the os.listdir function
mock_listdir.return_value = ['test_sample.txt', 'another_file.txt']
# Mock the os.path.isfile function to always return True
mock_isfile.return_value = True
# Test the API endpoint
response = self.client.get('/files', headers={'Accept': 'application/json'})
# Check if the response is successful
self.assertEqual(response.status_code, 200)
# Check if the response contains the expected data
data = json.loads(response.data)
self.assertEqual(len(data), 2)
self.assertEqual(data[0]['name'], 'test_sample.txt')
self.assertEqual(data[1]['name'], 'another_file.txt')
# Check if the os.listdir function was called with the correct parameters
mock_listdir.assert_called_once_with('/books')
@patch('app.open')
@patch('app.os.path.isfile')
@patch('app.os.path.abspath')
def test_get_file_api(self, mock_abspath, mock_isfile, mock_open):
# Mock the necessary functions
mock_isfile.return_value = True
mock_abspath.side_effect = lambda x: x # Return the input unchanged
# Mock the open function
mock_file = MagicMock()
mock_file.__enter__.return_value.read.return_value = "This is a test sample file."
mock_open.return_value = mock_file
# Test the API endpoint
response = self.client.get('/file/test_sample.txt', headers={'Accept': 'application/json'})
# Check if the response is successful
self.assertEqual(response.status_code, 200)
# Check if the response contains the expected data
self.assertEqual(response.data.decode('utf-8'), "This is a test sample file.")
# Check if the open function was called with the correct parameters
mock_open.assert_called_once()
if __name__ == '__main__':
unittest.main()

View file

@ -1,114 +1,114 @@
import os
import pytest
import tempfile
import shutil
from app import app
from unittest.mock import patch, MagicMock
from ebooklib import epub
def create_test_epub():
"""Create a simple test EPUB file"""
# Create a simple EPUB file
book = epub.EpubBook()
# Set metadata
book.set_identifier('test123456')
book.set_title('Test EPUB Book')
book.set_language('en')
book.add_author('Test Author')
# Add a chapter
c1 = epub.EpubHtml(title='Chapter 1', file_name='chap_01.xhtml', lang='en')
c1.content = '<html><body><h1>Chapter 1</h1><p>This is a test EPUB file.</p></body></html>'
book.add_item(c1)
# Add navigation
book.toc = [c1]
book.spine = ['nav', c1]
book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())
# Create temp directory
temp_dir = tempfile.mkdtemp()
epub_path = os.path.join(temp_dir, 'test.epub')
# Write the EPUB file
epub.write_epub(epub_path, book)
return epub_path, temp_dir
@pytest.fixture
def client():
"""Create a test client for the Flask app"""
app.config['TESTING'] = True
with app.test_client() as client:
yield client
@pytest.fixture
def test_epub():
"""Create a test EPUB file and clean up after the test"""
epub_path, temp_dir = create_test_epub()
# Mock the books directory
original_join = os.path.join
def mock_join(path, *paths):
if path == "/books" and paths and paths[0] == "test.epub":
return epub_path
return original_join(path, *paths)
def mock_abspath(path):
if path == os.path.join("/books", "test.epub"):
return "/books/test.epub"
elif path == epub_path:
return "/books/test.epub"
return path
with patch('os.path.join', side_effect=mock_join):
with patch('os.path.isfile', return_value=True):
with patch('os.path.abspath', side_effect=mock_abspath):
yield epub_path
# Clean up
shutil.rmtree(temp_dir)
def test_epub_viewer_page(client, test_epub):
"""Test that the EPUB viewer page loads correctly"""
response = client.get('/file/test.epub')
assert response.status_code == 200
assert b'<!DOCTYPE html>' in response.data
assert b'<title>test.epub</title>' in response.data
assert b'<div id="viewer"></div>' in response.data
assert b'<script src="https://cdn.jsdelivr.net/npm/epubjs' in response.data
def test_epub_file_endpoint(client, test_epub):
"""Test that the EPUB file is served with correct headers"""
response = client.get('/epub/test.epub')
assert response.status_code == 200
assert response.headers['Content-Type'] == 'application/epub+zip'
assert response.headers['Access-Control-Allow-Origin'] == '*'
# Check that the response contains EPUB data (at least the magic number)
assert response.data.startswith(b'PK')
def test_epub_viewer_integration(client, test_epub):
"""Test the integration between the viewer and the EPUB file"""
# This test would ideally use Selenium or Playwright to test the actual rendering
# Since we can't run a browser in this environment, we'll check for the correct setup
# First, check that the viewer page loads
viewer_response = client.get('/file/test.epub')
assert viewer_response.status_code == 200
# Check that the JavaScript is correctly set up to load the EPUB
assert b'/epub/test.epub' in viewer_response.data
# Check that the EPUB file is accessible
epub_response = client.get('/epub/test.epub')
assert epub_response.status_code == 200
assert epub_response.headers['Content-Type'] == 'application/epub+zip'
if __name__ == '__main__':
import os
import pytest
import tempfile
import shutil
from app import app
from unittest.mock import patch, MagicMock
from ebooklib import epub
def create_test_epub():
"""Create a simple test EPUB file"""
# Create a simple EPUB file
book = epub.EpubBook()
# Set metadata
book.set_identifier('test123456')
book.set_title('Test EPUB Book')
book.set_language('en')
book.add_author('Test Author')
# Add a chapter
c1 = epub.EpubHtml(title='Chapter 1', file_name='chap_01.xhtml', lang='en')
c1.content = '<html><body><h1>Chapter 1</h1><p>This is a test EPUB file.</p></body></html>'
book.add_item(c1)
# Add navigation
book.toc = [c1]
book.spine = ['nav', c1]
book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())
# Create temp directory
temp_dir = tempfile.mkdtemp()
epub_path = os.path.join(temp_dir, 'test.epub')
# Write the EPUB file
epub.write_epub(epub_path, book)
return epub_path, temp_dir
@pytest.fixture
def client():
"""Create a test client for the Flask app"""
app.config['TESTING'] = True
with app.test_client() as client:
yield client
@pytest.fixture
def test_epub():
"""Create a test EPUB file and clean up after the test"""
epub_path, temp_dir = create_test_epub()
# Mock the books directory
original_join = os.path.join
def mock_join(path, *paths):
if path == "/books" and paths and paths[0] == "test.epub":
return epub_path
return original_join(path, *paths)
def mock_abspath(path):
if path == os.path.join("/books", "test.epub"):
return "/books/test.epub"
elif path == epub_path:
return "/books/test.epub"
return path
with patch('os.path.join', side_effect=mock_join):
with patch('os.path.isfile', return_value=True):
with patch('os.path.abspath', side_effect=mock_abspath):
yield epub_path
# Clean up
shutil.rmtree(temp_dir)
def test_epub_viewer_page(client, test_epub):
"""Test that the EPUB viewer page loads correctly"""
response = client.get('/file/test.epub')
assert response.status_code == 200
assert b'<!DOCTYPE html>' in response.data
assert b'<title>test.epub</title>' in response.data
assert b'<div id="viewer"></div>' in response.data
assert b'<script src="https://cdn.jsdelivr.net/npm/epubjs' in response.data
def test_epub_file_endpoint(client, test_epub):
"""Test that the EPUB file is served with correct headers"""
response = client.get('/epub/test.epub')
assert response.status_code == 200
assert response.headers['Content-Type'] == 'application/epub+zip'
assert response.headers['Access-Control-Allow-Origin'] == '*'
# Check that the response contains EPUB data (at least the magic number)
assert response.data.startswith(b'PK')
def test_epub_viewer_integration(client, test_epub):
"""Test the integration between the viewer and the EPUB file"""
# This test would ideally use Selenium or Playwright to test the actual rendering
# Since we can't run a browser in this environment, we'll check for the correct setup
# First, check that the viewer page loads
viewer_response = client.get('/file/test.epub')
assert viewer_response.status_code == 200
# Check that the JavaScript is correctly set up to load the EPUB
assert b'/epub/test.epub' in viewer_response.data
# Check that the EPUB file is accessible
epub_response = client.get('/epub/test.epub')
assert epub_response.status_code == 200
assert epub_response.headers['Content-Type'] == 'application/epub+zip'
if __name__ == '__main__':
pytest.main(['-xvs', __file__])