Update application
This commit is contained in:
parent
6a46a20c9e
commit
891787be0f
25 changed files with 2474 additions and 2474 deletions
.env
.github/workflows
.gitignoreDockerfileai.mddocker-compose.ymlplugin
readme.mdroocodetest.code-workspacescripts
src
tests/unit
118
.env
118
.env
|
@ -1,60 +1,60 @@
|
||||||
# Application Configuration
|
# Application Configuration
|
||||||
# ========================
|
# ========================
|
||||||
|
|
||||||
# Base URL for the application (required, string)
|
# Base URL for the application (required, string)
|
||||||
# Format: http://hostname:port or https://hostname:port
|
# Format: http://hostname:port or https://hostname:port
|
||||||
BASE_URL=http://localhost:8000
|
BASE_URL=http://localhost:8000
|
||||||
|
|
||||||
# CPU Limit for container (optional, float)
|
# CPU Limit for container (optional, float)
|
||||||
# Number of CPU cores to allocate (e.g., 0.5, 1, 2)
|
# Number of CPU cores to allocate (e.g., 0.5, 1, 2)
|
||||||
# Default: 2 (will be used if not specified)
|
# Default: 2 (will be used if not specified)
|
||||||
CPU_LIMIT=2
|
CPU_LIMIT=2
|
||||||
|
|
||||||
# Snippet character limit (optional, integer)
|
# Snippet character limit (optional, integer)
|
||||||
# Maximum length for text snippets in characters
|
# Maximum length for text snippets in characters
|
||||||
# Default: 100
|
# Default: 100
|
||||||
SNIPPET_CHAR_LIMIT=100
|
SNIPPET_CHAR_LIMIT=100
|
||||||
|
|
||||||
# Debug mode (optional, boolean)
|
# Debug mode (optional, boolean)
|
||||||
# Enable debug output when set to True
|
# Enable debug output when set to True
|
||||||
# Default: False
|
# Default: False
|
||||||
DEBUG=False
|
DEBUG=False
|
||||||
|
|
||||||
# Application port (optional, integer)
|
# Application port (optional, integer)
|
||||||
# Port the application listens on
|
# Port the application listens on
|
||||||
# Default: 5000
|
# Default: 5000
|
||||||
PORT=5000
|
PORT=5000
|
||||||
|
|
||||||
|
|
||||||
# Elasticsearch Configuration
|
# Elasticsearch Configuration
|
||||||
# ==========================
|
# ==========================
|
||||||
|
|
||||||
# Elasticsearch host (required, string)
|
# Elasticsearch host (required, string)
|
||||||
# Hostname or IP of Elasticsearch service
|
# Hostname or IP of Elasticsearch service
|
||||||
ELASTICSEARCH_HOST=elasticsearch
|
ELASTICSEARCH_HOST=elasticsearch
|
||||||
|
|
||||||
# Elasticsearch username (sensitive, required, string)
|
# Elasticsearch username (sensitive, required, string)
|
||||||
# Admin username for Elasticsearch
|
# Admin username for Elasticsearch
|
||||||
ELASTICSEARCH_USERNAME=admin
|
ELASTICSEARCH_USERNAME=admin
|
||||||
|
|
||||||
# Elasticsearch password (sensitive, required, string)
|
# Elasticsearch password (sensitive, required, string)
|
||||||
# Admin password for Elasticsearch
|
# Admin password for Elasticsearch
|
||||||
ELASTICSEARCH_PASSWORD=password
|
ELASTICSEARCH_PASSWORD=password
|
||||||
|
|
||||||
|
|
||||||
# File Storage Configuration
|
# File Storage Configuration
|
||||||
# =========================
|
# =========================
|
||||||
|
|
||||||
# SMB share path (optional, string)
|
# SMB share path (optional, string)
|
||||||
# Local path where books are mounted
|
# Local path where books are mounted
|
||||||
# Default: ./smb_share
|
# Default: ./smb_share
|
||||||
SMB_SHARE_PATH=./smb_share
|
SMB_SHARE_PATH=./smb_share
|
||||||
|
|
||||||
# Admin Credentials
|
# Admin Credentials
|
||||||
# ================
|
# ================
|
||||||
|
|
||||||
# Admin username for API management (required, string)
|
# Admin username for API management (required, string)
|
||||||
ADMIN_USER=admin
|
ADMIN_USER=admin
|
||||||
|
|
||||||
# Admin password for API management (required, string)
|
# Admin password for API management (required, string)
|
||||||
ADMIN_PASSWORD=securepassword123
|
ADMIN_PASSWORD=securepassword123
|
76
.github/workflows/test-epub-viewer.yml
vendored
76
.github/workflows/test-epub-viewer.yml
vendored
|
@ -1,39 +1,39 @@
|
||||||
name: Test EPUB Viewer
|
name: Test EPUB Viewer
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches: [ main ]
|
branches: [ main ]
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [ main ]
|
branches: [ main ]
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
test:
|
test:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
|
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
uses: actions/setup-python@v4
|
uses: actions/setup-python@v4
|
||||||
with:
|
with:
|
||||||
python-version: '3.10'
|
python-version: '3.10'
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install pytest
|
pip install pytest
|
||||||
pip install flask elasticsearch ebooklib beautifulsoup4 PyPDF2
|
pip install flask elasticsearch ebooklib beautifulsoup4 PyPDF2
|
||||||
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
||||||
|
|
||||||
- name: Run tests
|
- name: Run tests
|
||||||
run: |
|
run: |
|
||||||
cd api
|
cd api
|
||||||
python -m pytest test_epub_viewer.py -v
|
python -m pytest test_epub_viewer.py -v
|
||||||
|
|
||||||
- name: Run integration tests with Playwright
|
- name: Run integration tests with Playwright
|
||||||
run: |
|
run: |
|
||||||
pip install playwright pytest-playwright
|
pip install playwright pytest-playwright
|
||||||
playwright install
|
playwright install
|
||||||
cd api
|
cd api
|
||||||
python -m pytest test_epub_viewer_integration.py -v
|
python -m pytest test_epub_viewer_integration.py -v
|
||||||
if: false # Disabled until we create the integration tests with Playwright
|
if: false # Disabled until we create the integration tests with Playwright
|
58
.gitignore
vendored
58
.gitignore
vendored
|
@ -1,30 +1,30 @@
|
||||||
# Ignore all EPUB files in smb_share
|
# Ignore all EPUB files in smb_share
|
||||||
smb_share/*.epub
|
smb_share/*.epub
|
||||||
|
|
||||||
# Ignore sample text file
|
# Ignore sample text file
|
||||||
smb_share/sample.txt
|
smb_share/sample.txt
|
||||||
|
|
||||||
# Python cache
|
# Python cache
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.py[cod]
|
*.py[cod]
|
||||||
*$py.class
|
*$py.class
|
||||||
|
|
||||||
# Virtual environment
|
# Virtual environment
|
||||||
venv/
|
venv/
|
||||||
|
|
||||||
# IDE specific files
|
# IDE specific files
|
||||||
.vscode/
|
.vscode/
|
||||||
.idea/
|
.idea/
|
||||||
|
|
||||||
# Logs and databases
|
# Logs and databases
|
||||||
*.log
|
*.log
|
||||||
*.sqlite
|
*.sqlite
|
||||||
|
|
||||||
# OS generated files
|
# OS generated files
|
||||||
.DS_Store
|
.DS_Store
|
||||||
.DS_Store?
|
.DS_Store?
|
||||||
._*
|
._*
|
||||||
.Spotlight-V100
|
.Spotlight-V100
|
||||||
.Trashes
|
.Trashes
|
||||||
ehthumbs.db
|
ehthumbs.db
|
||||||
Thumbs.db
|
Thumbs.db
|
56
Dockerfile
56
Dockerfile
|
@ -1,29 +1,29 @@
|
||||||
FROM python:3.9-alpine
|
FROM python:3.9-alpine
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
# Install dependencies
|
# Install dependencies
|
||||||
RUN pip install flask elasticsearch ebooklib beautifulsoup4 PyPDF2 pytz
|
RUN pip install flask elasticsearch ebooklib beautifulsoup4 PyPDF2 pytz
|
||||||
|
|
||||||
# Create books directory with proper permissions
|
# Create books directory with proper permissions
|
||||||
RUN mkdir -p /books && chmod 777 /books
|
RUN mkdir -p /books && chmod 777 /books
|
||||||
|
|
||||||
# Copy the API code and static files
|
# Copy the API code and static files
|
||||||
COPY src/api/app.py .
|
COPY src/api/app.py .
|
||||||
COPY src/api/static /app/static
|
COPY src/api/static /app/static
|
||||||
COPY src/api/templates /app/templates
|
COPY src/api/templates /app/templates
|
||||||
|
|
||||||
# Expose the API port
|
# Expose the API port
|
||||||
EXPOSE 5000
|
EXPOSE 5000
|
||||||
|
|
||||||
# Copy the indexing script
|
# Copy the indexing script
|
||||||
COPY src/core/index.py .
|
COPY src/core/index.py .
|
||||||
|
|
||||||
# Copy the test file
|
# Copy the test file
|
||||||
COPY tests/unit/test_app.py .
|
COPY tests/unit/test_app.py .
|
||||||
|
|
||||||
# Add a dummy file to invalidate cache
|
# Add a dummy file to invalidate cache
|
||||||
ADD dummy.txt .
|
ADD dummy.txt .
|
||||||
|
|
||||||
# Command to run the API
|
# Command to run the API
|
||||||
CMD ["python", "app.py"]
|
CMD ["python", "app.py"]
|
64
ai.md
64
ai.md
|
@ -1,33 +1,33 @@
|
||||||
# Final Deployment Status
|
# Final Deployment Status
|
||||||
|
|
||||||
## Configuration Summary:
|
## Configuration Summary:
|
||||||
1. **Container Auto-Restart**:
|
1. **Container Auto-Restart**:
|
||||||
- Both services configured with `restart: unless-stopped`
|
- Both services configured with `restart: unless-stopped`
|
||||||
- Containers will automatically restart on failures
|
- Containers will automatically restart on failures
|
||||||
|
|
||||||
2. **Resource Limits**:
|
2. **Resource Limits**:
|
||||||
- CPU: `${CPU_LIMIT}` cores
|
- CPU: `${CPU_LIMIT}` cores
|
||||||
- Memory: 2GB limit
|
- Memory: 2GB limit
|
||||||
|
|
||||||
3. **Dependencies**:
|
3. **Dependencies**:
|
||||||
- pytz installed in container (version 2025.2)
|
- pytz installed in container (version 2025.2)
|
||||||
- All required Python packages verified
|
- All required Python packages verified
|
||||||
- Dockerfile updated to include pytz for future builds
|
- Dockerfile updated to include pytz for future builds
|
||||||
|
|
||||||
4. **Known Issues**:
|
4. **Known Issues**:
|
||||||
- Docker Compose v1.25.0 limitations:
|
- Docker Compose v1.25.0 limitations:
|
||||||
- Doesn't respect container_name directives
|
- Doesn't respect container_name directives
|
||||||
- Shows harmless deploy key warnings
|
- Shows harmless deploy key warnings
|
||||||
- Solution: Upgrade to Docker Compose v2.x
|
- Solution: Upgrade to Docker Compose v2.x
|
||||||
|
|
||||||
## Verification:
|
## Verification:
|
||||||
- All services running
|
- All services running
|
||||||
- CORS headers properly configured
|
- CORS headers properly configured
|
||||||
- pytz module successfully imported (version 2025.2)
|
- pytz module successfully imported (version 2025.2)
|
||||||
- API endpoints functional
|
- API endpoints functional
|
||||||
|
|
||||||
## System Status: OPERATIONAL
|
## System Status: OPERATIONAL
|
||||||
- API: Running on port 8000
|
- API: Running on port 8000
|
||||||
- Elasticsearch: Running on port 9200
|
- Elasticsearch: Running on port 9200
|
||||||
- Auto-restart configured
|
- Auto-restart configured
|
||||||
- All features functional
|
- All features functional
|
|
@ -1,39 +1,39 @@
|
||||||
version: '3.7'
|
version: '3.7'
|
||||||
services:
|
services:
|
||||||
booksearch_app:
|
booksearch_app:
|
||||||
build: .
|
build: .
|
||||||
container_name: booksearch_app
|
container_name: booksearch_app
|
||||||
ports:
|
ports:
|
||||||
- "8000:5000"
|
- "8000:5000"
|
||||||
environment:
|
environment:
|
||||||
- ELASTICSEARCH_HOST=booksearch_elastic
|
- ELASTICSEARCH_HOST=booksearch_elastic
|
||||||
- BASE_URL=${BASE_URL}
|
- BASE_URL=${BASE_URL}
|
||||||
- CPU_LIMIT=${CPU_LIMIT}
|
- CPU_LIMIT=${CPU_LIMIT}
|
||||||
- SNIPPET_CHAR_LIMIT=${SNIPPET_CHAR_LIMIT}
|
- SNIPPET_CHAR_LIMIT=${SNIPPET_CHAR_LIMIT}
|
||||||
volumes:
|
volumes:
|
||||||
- ./smb_share:/books
|
- ./smb_share:/books
|
||||||
depends_on:
|
depends_on:
|
||||||
- booksearch_elastic
|
- booksearch_elastic
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
deploy:
|
deploy:
|
||||||
resources:
|
resources:
|
||||||
limits:
|
limits:
|
||||||
cpus: ${CPU_LIMIT}
|
cpus: ${CPU_LIMIT}
|
||||||
memory: 2G
|
memory: 2G
|
||||||
|
|
||||||
booksearch_elastic:
|
booksearch_elastic:
|
||||||
container_name: booksearch_elastic
|
container_name: booksearch_elastic
|
||||||
image: bitnami/elasticsearch:latest
|
image: bitnami/elasticsearch:latest
|
||||||
ports:
|
ports:
|
||||||
- "9200:9200"
|
- "9200:9200"
|
||||||
- "9300:9300"
|
- "9300:9300"
|
||||||
environment:
|
environment:
|
||||||
- discovery.type=single-node
|
- discovery.type=single-node
|
||||||
- ELASTICSEARCH_USERNAME=admin
|
- ELASTICSEARCH_USERNAME=admin
|
||||||
- ELASTICSEARCH_PASSWORD=password
|
- ELASTICSEARCH_PASSWORD=password
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD", "curl", "-f", "http://localhost:9200"]
|
test: ["CMD", "curl", "-f", "http://localhost:9200"]
|
||||||
interval: 30s
|
interval: 30s
|
||||||
timeout: 10s
|
timeout: 10s
|
||||||
retries: 5
|
retries: 5
|
|
@ -1,109 +1,109 @@
|
||||||
function search_books(params, userSettings) {
|
function search_books(params, userSettings) {
|
||||||
const query = params.query;
|
const query = params.query;
|
||||||
const apiUrl = (userSettings.apiUrl || 'http://localhost:8000').replace(/\/$/, '');
|
const apiUrl = (userSettings.apiUrl || 'http://localhost:8000').replace(/\/$/, '');
|
||||||
const useProxy = userSettings.useProxy || false;
|
const useProxy = userSettings.useProxy || false;
|
||||||
const proxyUrl = userSettings.proxyUrl || 'https://cors-anywhere.herokuapp.com/';
|
const proxyUrl = userSettings.proxyUrl || 'https://cors-anywhere.herokuapp.com/';
|
||||||
|
|
||||||
// Debugging headers - WARNING: Only for development/testing
|
// Debugging headers - WARNING: Only for development/testing
|
||||||
const debugHeaders = userSettings.debugHeaders || {};
|
const debugHeaders = userSettings.debugHeaders || {};
|
||||||
|
|
||||||
if (!query) {
|
if (!query) {
|
||||||
throw new Error('Search query is required');
|
throw new Error('Search query is required');
|
||||||
}
|
}
|
||||||
|
|
||||||
// Prepare the target URL
|
// Prepare the target URL
|
||||||
const targetUrl = `${apiUrl}/search?query=${encodeURIComponent(query)}`;
|
const targetUrl = `${apiUrl}/search?query=${encodeURIComponent(query)}`;
|
||||||
const requestUrl = useProxy ? `${proxyUrl}${targetUrl}` : targetUrl;
|
const requestUrl = useProxy ? `${proxyUrl}${targetUrl}` : targetUrl;
|
||||||
|
|
||||||
// Add timeout handling
|
// Add timeout handling
|
||||||
const controller = new AbortController();
|
const controller = new AbortController();
|
||||||
const timeoutId = setTimeout(() => controller.abort(), 10000);
|
const timeoutId = setTimeout(() => controller.abort(), 10000);
|
||||||
|
|
||||||
// Prepare headers
|
// Prepare headers
|
||||||
const headers = {
|
const headers = {
|
||||||
'Accept': 'application/json',
|
'Accept': 'application/json',
|
||||||
...(useProxy ? { 'X-Requested-With': 'XMLHttpRequest' } : {}),
|
...(useProxy ? { 'X-Requested-With': 'XMLHttpRequest' } : {}),
|
||||||
...debugHeaders // Add debug headers if provided
|
...debugHeaders // Add debug headers if provided
|
||||||
};
|
};
|
||||||
|
|
||||||
return fetch(requestUrl, {
|
return fetch(requestUrl, {
|
||||||
method: 'GET',
|
method: 'GET',
|
||||||
headers: headers,
|
headers: headers,
|
||||||
signal: controller.signal
|
signal: controller.signal
|
||||||
})
|
})
|
||||||
.then(async response => {
|
.then(async response => {
|
||||||
clearTimeout(timeoutId);
|
clearTimeout(timeoutId);
|
||||||
|
|
||||||
if (!response.ok) {
|
if (!response.ok) {
|
||||||
const errorBody = await response.text().catch(() => '');
|
const errorBody = await response.text().catch(() => '');
|
||||||
throw new Error(`API request failed with status ${response.status}. Response: ${errorBody}`);
|
throw new Error(`API request failed with status ${response.status}. Response: ${errorBody}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
const contentType = response.headers.get('content-type');
|
const contentType = response.headers.get('content-type');
|
||||||
if (!contentType || !contentType.includes('application/json')) {
|
if (!contentType || !contentType.includes('application/json')) {
|
||||||
throw new Error(`Invalid content type: ${contentType}`);
|
throw new Error(`Invalid content type: ${contentType}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
return response.json();
|
return response.json();
|
||||||
})
|
})
|
||||||
.then(results => {
|
.then(results => {
|
||||||
if (!Array.isArray(results)) {
|
if (!Array.isArray(results)) {
|
||||||
throw new Error(`Invalid response format. Expected array, got ${typeof results}`);
|
throw new Error(`Invalid response format. Expected array, got ${typeof results}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (results.length === 0) {
|
if (results.length === 0) {
|
||||||
return 'No books found matching your search';
|
return 'No books found matching your search';
|
||||||
}
|
}
|
||||||
|
|
||||||
// Format results with book paths and snippets
|
// Format results with book paths and snippets
|
||||||
return results.map(result => {
|
return results.map(result => {
|
||||||
if (!result.file_path || !result.snippet) {
|
if (!result.file_path || !result.snippet) {
|
||||||
throw new Error('Invalid result format - missing required fields');
|
throw new Error('Invalid result format - missing required fields');
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create properly encoded URL
|
// Create properly encoded URL
|
||||||
let formattedUrl = '';
|
let formattedUrl = '';
|
||||||
if (result.raw_url) {
|
if (result.raw_url) {
|
||||||
try {
|
try {
|
||||||
// Split URL into parts and encode components separately
|
// Split URL into parts and encode components separately
|
||||||
const url = new URL(result.raw_url);
|
const url = new URL(result.raw_url);
|
||||||
const pathParts = url.pathname.split('/').map(part =>
|
const pathParts = url.pathname.split('/').map(part =>
|
||||||
encodeURIComponent(part).replace(/'/g, "%27")
|
encodeURIComponent(part).replace(/'/g, "%27")
|
||||||
);
|
);
|
||||||
const search = url.search ? '?' + encodeURIComponent(url.search.slice(1)) : '';
|
const search = url.search ? '?' + encodeURIComponent(url.search.slice(1)) : '';
|
||||||
formattedUrl = `${url.origin}${pathParts.join('/')}${search}`;
|
formattedUrl = `${url.origin}${pathParts.join('/')}${search}`;
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
formattedUrl = result.raw_url; // Fallback to original if URL parsing fails
|
formattedUrl = result.raw_url; // Fallback to original if URL parsing fails
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return `Book: ${result.file_path}\n` +
|
return `Book: ${result.file_path}\n` +
|
||||||
`Snippet: ${result.snippet}\n` +
|
`Snippet: ${result.snippet}\n` +
|
||||||
(formattedUrl ? `URL: ${formattedUrl}\n` : '');
|
(formattedUrl ? `URL: ${formattedUrl}\n` : '');
|
||||||
}).join('\n\n');
|
}).join('\n\n');
|
||||||
})
|
})
|
||||||
.catch(error => {
|
.catch(error => {
|
||||||
clearTimeout(timeoutId);
|
clearTimeout(timeoutId);
|
||||||
let errorMessage = `Error searching books: ${error.message}`;
|
let errorMessage = `Error searching books: ${error.message}`;
|
||||||
|
|
||||||
if (error.name === 'AbortError') {
|
if (error.name === 'AbortError') {
|
||||||
errorMessage += '\n\nDiagnostics: Request timed out. Check if:';
|
errorMessage += '\n\nDiagnostics: Request timed out. Check if:';
|
||||||
errorMessage += `\n- The API is running at ${apiUrl}`;
|
errorMessage += `\n- The API is running at ${apiUrl}`;
|
||||||
errorMessage += '\n- The server is accessible from your network';
|
errorMessage += '\n- The server is accessible from your network';
|
||||||
if (!useProxy) {
|
if (!useProxy) {
|
||||||
errorMessage += '\n- Try enabling proxy in plugin settings';
|
errorMessage += '\n- Try enabling proxy in plugin settings';
|
||||||
}
|
}
|
||||||
} else if (error.message.includes('Failed to fetch') || error.message.includes('CORS')) {
|
} else if (error.message.includes('Failed to fetch') || error.message.includes('CORS')) {
|
||||||
errorMessage += '\n\nDiagnostics: Network request failed. Check if:';
|
errorMessage += '\n\nDiagnostics: Network request failed. Check if:';
|
||||||
errorMessage += `\n- The API URL (${apiUrl}) is correct`;
|
errorMessage += `\n- The API URL (${apiUrl}) is correct`;
|
||||||
errorMessage += '\n- CORS is properly configured on the server';
|
errorMessage += '\n- CORS is properly configured on the server';
|
||||||
errorMessage += '\n- The server is running and accessible';
|
errorMessage += '\n- The server is running and accessible';
|
||||||
if (!useProxy) {
|
if (!useProxy) {
|
||||||
errorMessage += '\n- Try enabling proxy in plugin settings to bypass CORS';
|
errorMessage += '\n- Try enabling proxy in plugin settings to bypass CORS';
|
||||||
}
|
}
|
||||||
errorMessage += '\n- For debugging, you can add CORS headers in plugin settings';
|
errorMessage += '\n- For debugging, you can add CORS headers in plugin settings';
|
||||||
}
|
}
|
||||||
|
|
||||||
return errorMessage;
|
return errorMessage;
|
||||||
});
|
});
|
||||||
}
|
}
|
|
@ -1,16 +1,16 @@
|
||||||
{
|
{
|
||||||
"name": "search_books",
|
"name": "search_books",
|
||||||
"description": "Search for books by content using the book search API.",
|
"description": "Search for books by content using the book search API.",
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"query": {
|
"query": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "The search query to find matching book content"
|
"description": "The search query to find matching book content"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"required": [
|
"required": [
|
||||||
"query"
|
"query"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -1,45 +1,45 @@
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
"name": "cors_allow_origin",
|
"name": "cors_allow_origin",
|
||||||
"label": "CORS Allowed Origin",
|
"label": "CORS Allowed Origin",
|
||||||
"required": false,
|
"required": false,
|
||||||
"default": "*",
|
"default": "*",
|
||||||
"description": "Value for Access-Control-Allow-Origin header, typically '*' for public APIs"
|
"description": "Value for Access-Control-Allow-Origin header, typically '*' for public APIs"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "cors_allow_methods",
|
"name": "cors_allow_methods",
|
||||||
"label": "CORS Allowed Methods",
|
"label": "CORS Allowed Methods",
|
||||||
"required": false,
|
"required": false,
|
||||||
"default": "GET, POST, PUT",
|
"default": "GET, POST, PUT",
|
||||||
"description": "Comma-separated HTTP methods for Access-Control-Allow-Methods header"
|
"description": "Comma-separated HTTP methods for Access-Control-Allow-Methods header"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "cors_allow_headers",
|
"name": "cors_allow_headers",
|
||||||
"label": "CORS Allowed Headers",
|
"label": "CORS Allowed Headers",
|
||||||
"required": false,
|
"required": false,
|
||||||
"default": "Content-Type",
|
"default": "Content-Type",
|
||||||
"description": "Comma-separated headers for Access-Control-Allow-Headers"
|
"description": "Comma-separated headers for Access-Control-Allow-Headers"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "proxyUrl",
|
"name": "proxyUrl",
|
||||||
"label": "Proxy Server URL",
|
"label": "Proxy Server URL",
|
||||||
"required": false,
|
"required": false,
|
||||||
"default": "",
|
"default": "",
|
||||||
"description": "URL of the proxy server to use for external requests"
|
"description": "URL of the proxy server to use for external requests"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "bookSearchAPIKey",
|
"name": "bookSearchAPIKey",
|
||||||
"label": "Search Engine API Key",
|
"label": "Search Engine API Key",
|
||||||
"type": "password",
|
"type": "password",
|
||||||
"default":"",
|
"default":"",
|
||||||
"required":false,
|
"required":false,
|
||||||
"description": "API Key to use for while making requests (not yet used)"
|
"description": "API Key to use for while making requests (not yet used)"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "apiUrl",
|
"name": "apiUrl",
|
||||||
"label": "API Base URL",
|
"label": "API Base URL",
|
||||||
"required": false,
|
"required": false,
|
||||||
"default": "http://localhost:8000",
|
"default": "http://localhost:8000",
|
||||||
"description": "Base URL for the API endpoints"
|
"description": "Base URL for the API endpoints"
|
||||||
}
|
}
|
||||||
]
|
]
|
480
readme.md
480
readme.md
|
@ -1,241 +1,241 @@
|
||||||
|
|
||||||
# What it IS?
|
# What it IS?
|
||||||
|
|
||||||
## TypeMind Plugin: EPUB/PDF/TXT Search Integration
|
## TypeMind Plugin: EPUB/PDF/TXT Search Integration
|
||||||
|
|
||||||
A plugin for [TypeMind](https://docs.typingmind.com/plugins/build-a-typingmind-plugin) that mimics the **WebSearch** feature but focuses on retrieving books/documents. Users can query, e.g., *"Find me books about Hecate"*, and the plugin returns **clickable links** to relevant files (EPUB, PDF, TXT).
|
A plugin for [TypeMind](https://docs.typingmind.com/plugins/build-a-typingmind-plugin) that mimics the **WebSearch** feature but focuses on retrieving books/documents. Users can query, e.g., *"Find me books about Hecate"*, and the plugin returns **clickable links** to relevant files (EPUB, PDF, TXT).
|
||||||
|
|
||||||
### Features
|
### Features
|
||||||
- **File Formats**: Supports EPUB, PDF, and TXT (assumed compatibility).
|
- **File Formats**: Supports EPUB, PDF, and TXT (assumed compatibility).
|
||||||
- **Requirement**: Users must provide their own files for indexing.
|
- **Requirement**: Users must provide their own files for indexing.
|
||||||
|
|
||||||
### Technical Context
|
### Technical Context
|
||||||
- **Language**: Python.
|
- **Language**: Python.
|
||||||
- **Skill Level**:
|
- **Skill Level**:
|
||||||
- My Python knowledge is **extremely rusty** (last project: a not too simple game bot years ago).
|
- My Python knowledge is **extremely rusty** (last project: a not too simple game bot years ago).
|
||||||
- Self-assessment: **Python novice**.
|
- Self-assessment: **Python novice**.
|
||||||
- **Tools Used**:
|
- **Tools Used**:
|
||||||
- **Sonnet 3.7** and **DeepSeek-V3-0324** (for AI/ML integration).
|
- **Sonnet 3.7** and **DeepSeek-V3-0324** (for AI/ML integration).
|
||||||
- **RooCode**
|
- **RooCode**
|
||||||
|
|
||||||
### Purpose
|
### Purpose
|
||||||
1. **Experiment**: Test RooCode’s capabilities and identify practical applications.
|
1. **Experiment**: Test RooCode’s capabilities and identify practical applications.
|
||||||
2. **Non-Production**: **⚠️ Do NOT deploy this in production** (even if "fixed" by someone).
|
2. **Non-Production**: **⚠️ Do NOT deploy this in production** (even if "fixed" by someone).
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
### Key Notes
|
### Key Notes
|
||||||
- Humor/self-deprecation preserved (e.g., "extremely rusty," "novice").
|
- Humor/self-deprecation preserved (e.g., "extremely rusty," "novice").
|
||||||
- Technical terms standardized (Sonnet 3.7, DeepSeek-V3-0324).
|
- Technical terms standardized (Sonnet 3.7, DeepSeek-V3-0324).
|
||||||
- Critical warnings emphasized (**bold + emoji** for production risk).
|
- Critical warnings emphasized (**bold + emoji** for production risk).
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Application Deployment Guide (Ubuntu LTS)
|
# Application Deployment Guide (Ubuntu LTS)
|
||||||
|
|
||||||
## Prerequisites
|
## Prerequisites
|
||||||
|
|
||||||
### System Requirements
|
### System Requirements
|
||||||
- Ubuntu 22.04 LTS (64-bit)
|
- Ubuntu 22.04 LTS (64-bit)
|
||||||
- Minimum 2 CPU cores, 4GB RAM
|
- Minimum 2 CPU cores, 4GB RAM
|
||||||
- 20GB free disk space
|
- 20GB free disk space
|
||||||
- Open ports: 8000 (app), 9200 (Elasticsearch)
|
- Open ports: 8000 (app), 9200 (Elasticsearch)
|
||||||
|
|
||||||
### Required Software
|
### Required Software
|
||||||
```bash
|
```bash
|
||||||
# Update package lists
|
# Update package lists
|
||||||
sudo apt update
|
sudo apt update
|
||||||
|
|
||||||
# Install Docker and Docker Compose
|
# Install Docker and Docker Compose
|
||||||
sudo apt install -y docker.io docker-compose
|
sudo apt install -y docker.io docker-compose
|
||||||
sudo systemctl enable --now docker
|
sudo systemctl enable --now docker
|
||||||
|
|
||||||
# Add current user to docker group (logout required)
|
# Add current user to docker group (logout required)
|
||||||
sudo usermod -aG docker $USER
|
sudo usermod -aG docker $USER
|
||||||
```
|
```
|
||||||
|
|
||||||
## Environment Configuration
|
## Environment Configuration
|
||||||
|
|
||||||
1. Clone the repository:
|
1. Clone the repository:
|
||||||
```bash
|
```bash
|
||||||
git clone https://github.com/intari/roocodetests_1.git
|
git clone https://github.com/intari/roocodetests_1.git
|
||||||
cd roocodetests_1
|
cd roocodetests_1
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Configure environment variables:
|
2. Configure environment variables:
|
||||||
```bash
|
```bash
|
||||||
# Copy example .env file
|
# Copy example .env file
|
||||||
cp .env.example .env
|
cp .env.example .env
|
||||||
|
|
||||||
# Edit configuration (nano/vim)
|
# Edit configuration (nano/vim)
|
||||||
nano .env
|
nano .env
|
||||||
```
|
```
|
||||||
Key variables to configure:
|
Key variables to configure:
|
||||||
- `BASE_URL`: Public URL of your application
|
- `BASE_URL`: Public URL of your application
|
||||||
- `ELASTICSEARCH_PASSWORD`: Secure password for Elasticsearch
|
- `ELASTICSEARCH_PASSWORD`: Secure password for Elasticsearch
|
||||||
- `CPU_LIMIT`: CPU cores to allocate (default: 2)
|
- `CPU_LIMIT`: CPU cores to allocate (default: 2)
|
||||||
|
|
||||||
## Application Deployment
|
## Application Deployment
|
||||||
|
|
||||||
1. Start all services:
|
1. Start all services:
|
||||||
```bash
|
```bash
|
||||||
docker-compose up -d
|
docker-compose up -d
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Verify services are running:
|
2. Verify services are running:
|
||||||
```bash
|
```bash
|
||||||
docker-compose ps
|
docker-compose ps
|
||||||
```
|
```
|
||||||
|
|
||||||
3. Check application logs:
|
3. Check application logs:
|
||||||
```bash
|
```bash
|
||||||
docker-compose logs -f api
|
docker-compose logs -f api
|
||||||
```
|
```
|
||||||
|
|
||||||
4. Access the application:
|
4. Access the application:
|
||||||
- Web interface: http://your-server-ip:8000
|
- Web interface: http://your-server-ip:8000
|
||||||
- Elasticsearch: http://your-server-ip:9200
|
- Elasticsearch: http://your-server-ip:9200
|
||||||
|
|
||||||
## Maintenance
|
## Maintenance
|
||||||
|
|
||||||
## restart & rebuild
|
## restart & rebuild
|
||||||
```bash
|
```bash
|
||||||
docker-compose down && docker-compose up -d --build
|
docker-compose down && docker-compose up -d --build
|
||||||
```
|
```
|
||||||
|
|
||||||
Logs
|
Logs
|
||||||
```bash
|
```bash
|
||||||
docker logs booksearch_app -f
|
docker logs booksearch_app -f
|
||||||
```
|
```
|
||||||
|
|
||||||
### Log Rotation
|
### Log Rotation
|
||||||
Configure Docker log rotation in `/etc/docker/daemon.json`:
|
Configure Docker log rotation in `/etc/docker/daemon.json`:
|
||||||
```json
|
```json
|
||||||
{
|
{
|
||||||
"log-driver": "json-file",
|
"log-driver": "json-file",
|
||||||
"log-opts": {
|
"log-opts": {
|
||||||
"max-size": "10m",
|
"max-size": "10m",
|
||||||
"max-file": "3"
|
"max-file": "3"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
Then restart Docker:
|
Then restart Docker:
|
||||||
```bash
|
```bash
|
||||||
sudo systemctl restart docker
|
sudo systemctl restart docker
|
||||||
```
|
```
|
||||||
|
|
||||||
### Backups
|
### Backups
|
||||||
1. Create backup script (`/usr/local/bin/backup-app.sh`):
|
1. Create backup script (`/usr/local/bin/backup-app.sh`):
|
||||||
```bash
|
```bash
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
BACKUP_DIR=/var/backups/app
|
BACKUP_DIR=/var/backups/app
|
||||||
mkdir -p $BACKUP_DIR
|
mkdir -p $BACKUP_DIR
|
||||||
docker-compose exec -T elasticsearch curl -X POST "localhost:9200/_snapshot/backup_repo/_all" -H "Content-Type: application/json"
|
docker-compose exec -T elasticsearch curl -X POST "localhost:9200/_snapshot/backup_repo/_all" -H "Content-Type: application/json"
|
||||||
docker-compose exec -T elasticsearch curl -X GET "localhost:9200/_snapshot/backup_repo/snapshot_$(date +%Y-%m-%d)?pretty"
|
docker-compose exec -T elasticsearch curl -X GET "localhost:9200/_snapshot/backup_repo/snapshot_$(date +%Y-%m-%d)?pretty"
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Make executable and schedule daily cron job:
|
2. Make executable and schedule daily cron job:
|
||||||
```bash
|
```bash
|
||||||
sudo chmod +x /usr/local/bin/backup-app.sh
|
sudo chmod +x /usr/local/bin/backup-app.sh
|
||||||
sudo crontab -e
|
sudo crontab -e
|
||||||
# Add: 0 3 * * * /usr/local/bin/backup-app.sh
|
# Add: 0 3 * * * /usr/local/bin/backup-app.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
### Updates
|
### Updates
|
||||||
1. Pull latest changes:
|
1. Pull latest changes:
|
||||||
```bash
|
```bash
|
||||||
git pull origin main
|
git pull origin main
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Rebuild containers:
|
2. Rebuild containers:
|
||||||
```bash
|
```bash
|
||||||
docker-compose up -d --build
|
docker-compose up -d --build
|
||||||
```
|
```
|
||||||
|
|
||||||
## Troubleshooting
|
## Troubleshooting
|
||||||
|
|
||||||
### Common Issues
|
### Common Issues
|
||||||
|
|
||||||
**Application not starting:**
|
**Application not starting:**
|
||||||
```bash
|
```bash
|
||||||
# Check container status
|
# Check container status
|
||||||
docker ps -a
|
docker ps -a
|
||||||
|
|
||||||
# View logs
|
# View logs
|
||||||
docker-compose logs api
|
docker-compose logs api
|
||||||
```
|
```
|
||||||
|
|
||||||
**Elasticsearch health issues:**
|
**Elasticsearch health issues:**
|
||||||
```bash
|
```bash
|
||||||
# Check cluster health
|
# Check cluster health
|
||||||
curl -X GET "localhost:9200/_cluster/health?pretty"
|
curl -X GET "localhost:9200/_cluster/health?pretty"
|
||||||
|
|
||||||
# Check node stats
|
# Check node stats
|
||||||
curl -X GET "localhost:9200/_nodes/stats?pretty"
|
curl -X GET "localhost:9200/_nodes/stats?pretty"
|
||||||
```
|
```
|
||||||
|
|
||||||
**Port conflicts:**
|
**Port conflicts:**
|
||||||
```bash
|
```bash
|
||||||
# Check used ports
|
# Check used ports
|
||||||
sudo netstat -tulnp
|
sudo netstat -tulnp
|
||||||
|
|
||||||
# Change ports in docker-compose.yml if needed
|
# Change ports in docker-compose.yml if needed
|
||||||
```
|
```
|
||||||
|
|
||||||
### Debugging
|
### Debugging
|
||||||
1. Access running container shell:
|
1. Access running container shell:
|
||||||
```bash
|
```bash
|
||||||
docker-compose exec api bash
|
docker-compose exec api bash
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Check resource usage:
|
2. Check resource usage:
|
||||||
```bash
|
```bash
|
||||||
docker stats
|
docker stats
|
||||||
```
|
```
|
||||||
|
|
||||||
## Check Request via JSON :
|
## Check Request via JSON :
|
||||||
curl -H "Accept: application/json" -X GET https://booksearch.yourdomain.com/search?query=android
|
curl -H "Accept: application/json" -X GET https://booksearch.yourdomain.com/search?query=android
|
||||||
|
|
||||||
# Simple search
|
# Simple search
|
||||||
curl -H "Accept: application/json" "https://booksearch.yourdomain.com/search?query=android"
|
curl -H "Accept: application/json" "https://booksearch.yourdomain.com/search?query=android"
|
||||||
|
|
||||||
# Search with format parameter
|
# Search with format parameter
|
||||||
curl "https://booksearch.yourdomain.com/search?query=android&format=json"
|
curl "https://booksearch.yourdomain.com/search?query=android&format=json"
|
||||||
|
|
||||||
# Error case
|
# Error case
|
||||||
curl -H "Accept: application/json" "https://booksearch.yourdomain.com/search"
|
curl -H "Accept: application/json" "https://booksearch.yourdomain.com/search"
|
||||||
|
|
||||||
|
|
||||||
## API Endpoints
|
## API Endpoints
|
||||||
|
|
||||||
### Search API
|
### Search API
|
||||||
```
|
```
|
||||||
GET /search?query={query}[&format=json]
|
GET /search?query={query}[&format=json]
|
||||||
```
|
```
|
||||||
|
|
||||||
### Reset Elasticsearch Index
|
### Reset Elasticsearch Index
|
||||||
```
|
```
|
||||||
POST /reset_index
|
POST /reset_index
|
||||||
Headers:
|
Headers:
|
||||||
- Authorization: Basic base64(username:password)
|
- Authorization: Basic base64(username:password)
|
||||||
```
|
```
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
```bash
|
```bash
|
||||||
curl -X POST -u admin:securepassword123 https://booksearch.yourdomain.com/reset_index
|
curl -X POST -u admin:securepassword123 https://booksearch.yourdomain.com/reset_index
|
||||||
```
|
```
|
||||||
|
|
||||||
## References
|
## References
|
||||||
- [Ubuntu Docker Installation](https://docs.docker.com/engine/install/ubuntu/)
|
- [Ubuntu Docker Installation](https://docs.docker.com/engine/install/ubuntu/)
|
||||||
- [Docker Compose Reference](https://docs.docker.com/compose/reference/)
|
- [Docker Compose Reference](https://docs.docker.com/compose/reference/)
|
||||||
- [Elasticsearch Documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/index.html)
|
- [Elasticsearch Documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/index.html)
|
||||||
|
|
||||||
|
|
||||||
## Plugin-alt
|
## Plugin-alt
|
||||||
method:get
|
method:get
|
||||||
https://booksearch.yourdomain.com/search?query={prompt}&format=json
|
https://booksearch.yourdomain.com/search?query={prompt}&format=json
|
||||||
alt version for plugin
|
alt version for plugin
|
||||||
request headers
|
request headers
|
||||||
{
|
{
|
||||||
"Accept": "application/json"
|
"Accept": "application/json"
|
||||||
}
|
}
|
|
@ -1,8 +1,8 @@
|
||||||
{
|
{
|
||||||
"folders": [
|
"folders": [
|
||||||
{
|
{
|
||||||
"path": "."
|
"path": "."
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"settings": {}
|
"settings": {}
|
||||||
}
|
}
|
|
@ -1,37 +1,37 @@
|
||||||
@echo off
|
@echo off
|
||||||
echo Setting up test environment...
|
echo Setting up test environment...
|
||||||
|
|
||||||
echo Checking Python version...
|
echo Checking Python version...
|
||||||
python --version
|
python --version
|
||||||
if errorlevel 1 (
|
if errorlevel 1 (
|
||||||
echo Python not found. Please install Python 3.10+ first.
|
echo Python not found. Please install Python 3.10+ first.
|
||||||
pause
|
pause
|
||||||
exit /b 1
|
exit /b 1
|
||||||
)
|
)
|
||||||
|
|
||||||
echo Installing Python dependencies...
|
echo Installing Python dependencies...
|
||||||
python -m pip install --upgrade pip --user
|
python -m pip install --upgrade pip --user
|
||||||
if errorlevel 1 (
|
if errorlevel 1 (
|
||||||
echo Failed to upgrade pip
|
echo Failed to upgrade pip
|
||||||
pause
|
pause
|
||||||
exit /b 1
|
exit /b 1
|
||||||
)
|
)
|
||||||
|
|
||||||
pip install -r requirements.txt --user
|
pip install -r requirements.txt --user
|
||||||
if errorlevel 1 (
|
if errorlevel 1 (
|
||||||
echo Failed to install dependencies
|
echo Failed to install dependencies
|
||||||
pause
|
pause
|
||||||
exit /b 1
|
exit /b 1
|
||||||
)
|
)
|
||||||
|
|
||||||
echo Running EPUB viewer tests...
|
echo Running EPUB viewer tests...
|
||||||
cd api
|
cd api
|
||||||
python -m pytest test_epub_viewer.py -v
|
python -m pytest test_epub_viewer.py -v
|
||||||
if errorlevel 1 (
|
if errorlevel 1 (
|
||||||
echo Some tests failed
|
echo Some tests failed
|
||||||
pause
|
pause
|
||||||
exit /b 1
|
exit /b 1
|
||||||
)
|
)
|
||||||
|
|
||||||
echo All tests completed successfully!
|
echo All tests completed successfully!
|
||||||
pause
|
pause
|
|
@ -1,29 +1,29 @@
|
||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
echo "Setting up test environment..."
|
echo "Setting up test environment..."
|
||||||
|
|
||||||
echo "Checking Python version..."
|
echo "Checking Python version..."
|
||||||
python3 --version || {
|
python3 --version || {
|
||||||
echo "Python 3 not found. Please install Python 3.10+ first."
|
echo "Python 3 not found. Please install Python 3.10+ first."
|
||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
|
|
||||||
echo "Installing Python dependencies..."
|
echo "Installing Python dependencies..."
|
||||||
python3 -m pip install --upgrade pip --user || {
|
python3 -m pip install --upgrade pip --user || {
|
||||||
echo "Failed to upgrade pip"
|
echo "Failed to upgrade pip"
|
||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
|
|
||||||
pip3 install -r requirements.txt --user || {
|
pip3 install -r requirements.txt --user || {
|
||||||
echo "Failed to install dependencies"
|
echo "Failed to install dependencies"
|
||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
|
|
||||||
echo "Running EPUB viewer tests..."
|
echo "Running EPUB viewer tests..."
|
||||||
cd api
|
cd api
|
||||||
python3 -m pytest test_epub_viewer.py -v || {
|
python3 -m pytest test_epub_viewer.py -v || {
|
||||||
echo "Some tests failed"
|
echo "Some tests failed"
|
||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
|
|
||||||
echo "All tests completed successfully!"
|
echo "All tests completed successfully!"
|
810
src/api/app.py
810
src/api/app.py
|
@ -1,406 +1,406 @@
|
||||||
from flask import Flask, request, jsonify, render_template, send_from_directory
|
from flask import Flask, request, jsonify, render_template, send_from_directory
|
||||||
from urllib.parse import unquote
|
from urllib.parse import unquote
|
||||||
from elasticsearch import Elasticsearch
|
from elasticsearch import Elasticsearch
|
||||||
import os
|
import os
|
||||||
import ebooklib
|
import ebooklib
|
||||||
from ebooklib import epub
|
from ebooklib import epub
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import PyPDF2
|
import PyPDF2
|
||||||
import time
|
import time
|
||||||
import logging
|
import logging
|
||||||
import multiprocessing
|
import multiprocessing
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
sys.path.append(str(Path(__file__).parent.parent))
|
sys.path.append(str(Path(__file__).parent.parent))
|
||||||
from index import index_files, get_progress
|
from index import index_files, get_progress
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
app = Flask(__name__, static_folder='static')
|
app = Flask(__name__, static_folder='static')
|
||||||
|
|
||||||
@app.after_request
|
@app.after_request
|
||||||
def add_cors_headers(response):
|
def add_cors_headers(response):
|
||||||
response.headers['Access-Control-Allow-Origin'] = '*'
|
response.headers['Access-Control-Allow-Origin'] = '*'
|
||||||
response.headers['Access-Control-Allow-Methods'] = 'GET, POST, PUT'
|
response.headers['Access-Control-Allow-Methods'] = 'GET, POST, PUT'
|
||||||
response.headers['Access-Control-Allow-Headers'] = 'Content-Type'
|
response.headers['Access-Control-Allow-Headers'] = 'Content-Type'
|
||||||
return response
|
return response
|
||||||
|
|
||||||
# Elasticsearch Configuration
|
# Elasticsearch Configuration
|
||||||
ELASTICSEARCH_HOST = os.environ.get("ELASTICSEARCH_HOST", "localhost")
|
ELASTICSEARCH_HOST = os.environ.get("ELASTICSEARCH_HOST", "localhost")
|
||||||
ELASTICSEARCH_PORT = int(os.environ.get("ELASTICSEARCH_PORT", 9200))
|
ELASTICSEARCH_PORT = int(os.environ.get("ELASTICSEARCH_PORT", 9200))
|
||||||
INDEX_NAME = "book_index"
|
INDEX_NAME = "book_index"
|
||||||
|
|
||||||
# Wait for Elasticsearch to be available
|
# Wait for Elasticsearch to be available
|
||||||
es = None
|
es = None
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
es = Elasticsearch([{'host': ELASTICSEARCH_HOST, 'port': ELASTICSEARCH_PORT, 'scheme': 'http'}])
|
es = Elasticsearch([{'host': ELASTICSEARCH_HOST, 'port': ELASTICSEARCH_PORT, 'scheme': 'http'}])
|
||||||
if es.ping():
|
if es.ping():
|
||||||
print("Connected to Elasticsearch")
|
print("Connected to Elasticsearch")
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
print("Elasticsearch not available, retrying...")
|
print("Elasticsearch not available, retrying...")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Error connecting to Elasticsearch: {e}")
|
print(f"Error connecting to Elasticsearch: {e}")
|
||||||
time.sleep(5)
|
time.sleep(5)
|
||||||
|
|
||||||
def extract_text_from_epub(epub_path):
|
def extract_text_from_epub(epub_path):
|
||||||
try:
|
try:
|
||||||
book = epub.read_epub(epub_path)
|
book = epub.read_epub(epub_path)
|
||||||
text = ''
|
text = ''
|
||||||
for item in book.get_items():
|
for item in book.get_items():
|
||||||
if item.media_type == 'application/xhtml+xml':
|
if item.media_type == 'application/xhtml+xml':
|
||||||
content = item.get_content()
|
content = item.get_content()
|
||||||
if content:
|
if content:
|
||||||
soup = BeautifulSoup(content, 'html.parser')
|
soup = BeautifulSoup(content, 'html.parser')
|
||||||
text += soup.get_text()
|
text += soup.get_text()
|
||||||
return text
|
return text
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Error processing EPUB {epub_path}: {str(e)}")
|
logging.error(f"Error processing EPUB {epub_path}: {str(e)}")
|
||||||
return f"Error extracting text: {str(e)}"
|
return f"Error extracting text: {str(e)}"
|
||||||
|
|
||||||
def extract_text_from_pdf(pdf_path):
|
def extract_text_from_pdf(pdf_path):
|
||||||
text = ''
|
text = ''
|
||||||
with open(pdf_path, 'rb') as pdf_file:
|
with open(pdf_path, 'rb') as pdf_file:
|
||||||
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
||||||
for page_num in range(len(pdf_reader.pages)):
|
for page_num in range(len(pdf_reader.pages)):
|
||||||
page = pdf_reader.pages[page_num]
|
page = pdf_reader.pages[page_num]
|
||||||
text += page.extract_text()
|
text += page.extract_text()
|
||||||
return text
|
return text
|
||||||
|
|
||||||
@app.route('/', methods=['GET'])
|
@app.route('/', methods=['GET'])
|
||||||
def home():
|
def home():
|
||||||
return render_template('search.html')
|
return render_template('search.html')
|
||||||
|
|
||||||
@app.route('/search', methods=['GET'])
|
@app.route('/search', methods=['GET'])
|
||||||
def search():
|
def search():
|
||||||
query = request.args.get('query')
|
query = request.args.get('query')
|
||||||
if not query:
|
if not query:
|
||||||
if request.headers.get('Accept') == 'application/json':
|
if request.headers.get('Accept') == 'application/json':
|
||||||
return jsonify({"error": "Query parameter is required"}), 400
|
return jsonify({"error": "Query parameter is required"}), 400
|
||||||
return render_template('search.html', query='')
|
return render_template('search.html', query='')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
results = es.search(index=INDEX_NAME, query={'match': {'content': query}})
|
results = es.search(index=INDEX_NAME, query={'match': {'content': query}})
|
||||||
hits = results['hits']['hits']
|
hits = results['hits']['hits']
|
||||||
|
|
||||||
search_results = []
|
search_results = []
|
||||||
for hit in hits:
|
for hit in hits:
|
||||||
file_path = hit['_source']['file_path']
|
file_path = hit['_source']['file_path']
|
||||||
content = hit['_source']['content']
|
content = hit['_source']['content']
|
||||||
|
|
||||||
# Highlight snippet (simple version)
|
# Highlight snippet (simple version)
|
||||||
snippet_char_limit = int(os.environ.get("SNIPPET_CHAR_LIMIT", 100))
|
snippet_char_limit = int(os.environ.get("SNIPPET_CHAR_LIMIT", 100))
|
||||||
index = content.lower().find(query.lower())
|
index = content.lower().find(query.lower())
|
||||||
if index != -1:
|
if index != -1:
|
||||||
start = max(0, index - snippet_char_limit)
|
start = max(0, index - snippet_char_limit)
|
||||||
end = min(len(content), index + snippet_char_limit + len(query))
|
end = min(len(content), index + snippet_char_limit + len(query))
|
||||||
snippet = content[start:end]
|
snippet = content[start:end]
|
||||||
else:
|
else:
|
||||||
snippet = "No snippet found"
|
snippet = "No snippet found"
|
||||||
|
|
||||||
# Get base URL from environment
|
# Get base URL from environment
|
||||||
base_url = os.environ.get("BASE_URL", "http://localhost:8000")
|
base_url = os.environ.get("BASE_URL", "http://localhost:8000")
|
||||||
# Construct URLs
|
# Construct URLs
|
||||||
# Remove "/books/" from path start if it's here
|
# Remove "/books/" from path start if it's here
|
||||||
if file_path.startswith("/books/"):
|
if file_path.startswith("/books/"):
|
||||||
file_path = file_path[len("/books/"):]
|
file_path = file_path[len("/books/"):]
|
||||||
|
|
||||||
url = f"{base_url}/{file_path}"
|
url = f"{base_url}/{file_path}"
|
||||||
raw_url = f"{base_url}/file/{file_path}?format=html"
|
raw_url = f"{base_url}/file/{file_path}?format=html"
|
||||||
|
|
||||||
search_results.append({
|
search_results.append({
|
||||||
"file_path": file_path,
|
"file_path": file_path,
|
||||||
"url": url,
|
"url": url,
|
||||||
"raw_url": raw_url,
|
"raw_url": raw_url,
|
||||||
"snippet": snippet,
|
"snippet": snippet,
|
||||||
"score": hit['_score']
|
"score": hit['_score']
|
||||||
})
|
})
|
||||||
|
|
||||||
# If it's an API request or format=json is specified
|
# If it's an API request or format=json is specified
|
||||||
if request.headers.get('Accept') == 'application/json' or request.args.get('format') == 'json':
|
if request.headers.get('Accept') == 'application/json' or request.args.get('format') == 'json':
|
||||||
response = jsonify({
|
response = jsonify({
|
||||||
"query": query,
|
"query": query,
|
||||||
"results": search_results,
|
"results": search_results,
|
||||||
"total": len(search_results),
|
"total": len(search_results),
|
||||||
"took": results['took']
|
"took": results['took']
|
||||||
})
|
})
|
||||||
response.headers['Content-Type'] = 'application/json'
|
response.headers['Content-Type'] = 'application/json'
|
||||||
return response
|
return response
|
||||||
|
|
||||||
# Otherwise, render the HTML template
|
# Otherwise, render the HTML template
|
||||||
return render_template('search.html', results=search_results, query=query)
|
return render_template('search.html', results=search_results, query=query)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if request.headers.get('Accept') == 'application/json' or request.args.get('format') == 'json':
|
if request.headers.get('Accept') == 'application/json' or request.args.get('format') == 'json':
|
||||||
response = jsonify({
|
response = jsonify({
|
||||||
"error": str(e),
|
"error": str(e),
|
||||||
"query": query
|
"query": query
|
||||||
})
|
})
|
||||||
response.headers['Content-Type'] = 'application/json'
|
response.headers['Content-Type'] = 'application/json'
|
||||||
return response, 500
|
return response, 500
|
||||||
return render_template('search.html', error=str(e), query=query)
|
return render_template('search.html', error=str(e), query=query)
|
||||||
|
|
||||||
@app.route('/files', methods=['GET'])
|
@app.route('/files', methods=['GET'])
|
||||||
def list_files():
|
def list_files():
|
||||||
books_dir = "/books"
|
books_dir = "/books"
|
||||||
files = []
|
files = []
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Check if indexing is in progress
|
# Check if indexing is in progress
|
||||||
indexing_in_progress = get_progress() is not None
|
indexing_in_progress = get_progress() is not None
|
||||||
|
|
||||||
for filename in os.listdir(books_dir):
|
for filename in os.listdir(books_dir):
|
||||||
file_path = os.path.join(books_dir, filename)
|
file_path = os.path.join(books_dir, filename)
|
||||||
if os.path.isfile(file_path):
|
if os.path.isfile(file_path):
|
||||||
file_size = os.path.getsize(file_path)
|
file_size = os.path.getsize(file_path)
|
||||||
# Extract book title from filename if possible
|
# Extract book title from filename if possible
|
||||||
title = filename
|
title = filename
|
||||||
if ' - ' in filename: # Common pattern in filenames
|
if ' - ' in filename: # Common pattern in filenames
|
||||||
title_parts = filename.split(' - ')
|
title_parts = filename.split(' - ')
|
||||||
if len(title_parts) > 1:
|
if len(title_parts) > 1:
|
||||||
title = ' - '.join(title_parts[:-1]) # Take all but last part
|
title = ' - '.join(title_parts[:-1]) # Take all but last part
|
||||||
|
|
||||||
files.append({
|
files.append({
|
||||||
'name': filename,
|
'name': filename,
|
||||||
'title': title,
|
'title': title,
|
||||||
'path': filename,
|
'path': filename,
|
||||||
'size': file_size,
|
'size': file_size,
|
||||||
'size_mb': round(file_size / (1024 * 1024), 2)
|
'size_mb': round(file_size / (1024 * 1024), 2)
|
||||||
})
|
})
|
||||||
|
|
||||||
# Calculate totals
|
# Calculate totals
|
||||||
total_files = len(files)
|
total_files = len(files)
|
||||||
total_size = sum(f['size'] for f in files)
|
total_size = sum(f['size'] for f in files)
|
||||||
total_size_mb = round(total_size / (1024 * 1024), 2)
|
total_size_mb = round(total_size / (1024 * 1024), 2)
|
||||||
|
|
||||||
# If it's an API request, return JSON
|
# If it's an API request, return JSON
|
||||||
if request.headers.get('Accept') == 'application/json':
|
if request.headers.get('Accept') == 'application/json':
|
||||||
return jsonify({
|
return jsonify({
|
||||||
'files': files,
|
'files': files,
|
||||||
'total_files': total_files,
|
'total_files': total_files,
|
||||||
'total_size': total_size,
|
'total_size': total_size,
|
||||||
'total_size_mb': total_size_mb,
|
'total_size_mb': total_size_mb,
|
||||||
'indexing_in_progress': indexing_in_progress
|
'indexing_in_progress': indexing_in_progress
|
||||||
})
|
})
|
||||||
|
|
||||||
# Otherwise, render the HTML template
|
# Otherwise, render the HTML template
|
||||||
return render_template('files.html',
|
return render_template('files.html',
|
||||||
files=files,
|
files=files,
|
||||||
total_files=total_files,
|
total_files=total_files,
|
||||||
total_size=total_size,
|
total_size=total_size,
|
||||||
total_size_mb=total_size_mb,
|
total_size_mb=total_size_mb,
|
||||||
indexing_in_progress=indexing_in_progress)
|
indexing_in_progress=indexing_in_progress)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if request.headers.get('Accept') == 'application/json':
|
if request.headers.get('Accept') == 'application/json':
|
||||||
return jsonify({"error": str(e)}), 500
|
return jsonify({"error": str(e)}), 500
|
||||||
return render_template('files.html', error=str(e))
|
return render_template('files.html', error=str(e))
|
||||||
|
|
||||||
@app.route('/file/<path:file_path>', methods=['GET'])
|
@app.route('/file/<path:file_path>', methods=['GET'])
|
||||||
def get_file(file_path):
|
def get_file(file_path):
|
||||||
# Ensure the file path is within the /books directory
|
# Ensure the file path is within the /books directory
|
||||||
books_dir = "/books"
|
books_dir = "/books"
|
||||||
|
|
||||||
# Decode URL-encoded path and normalize
|
# Decode URL-encoded path and normalize
|
||||||
decoded_path = unquote(file_path)
|
decoded_path = unquote(file_path)
|
||||||
# Remove any leading slashes or duplicate 'books/' segments
|
# Remove any leading slashes or duplicate 'books/' segments
|
||||||
decoded_path = decoded_path.lstrip('/')
|
decoded_path = decoded_path.lstrip('/')
|
||||||
if decoded_path.startswith('books/'):
|
if decoded_path.startswith('books/'):
|
||||||
decoded_path = decoded_path[6:]
|
decoded_path = decoded_path[6:]
|
||||||
|
|
||||||
# Join paths safely
|
# Join paths safely
|
||||||
full_path = os.path.normpath(os.path.join(books_dir, decoded_path))
|
full_path = os.path.normpath(os.path.join(books_dir, decoded_path))
|
||||||
|
|
||||||
# Validate the path is within the books directory
|
# Validate the path is within the books directory
|
||||||
if not os.path.abspath(full_path).startswith(os.path.abspath(books_dir)):
|
if not os.path.abspath(full_path).startswith(os.path.abspath(books_dir)):
|
||||||
return jsonify({"error": "Access denied: File path outside of books directory"}), 403
|
return jsonify({"error": "Access denied: File path outside of books directory"}), 403
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Handle EPUB files
|
# Handle EPUB files
|
||||||
if file_path.lower().endswith('.epub'):
|
if file_path.lower().endswith('.epub'):
|
||||||
if request.args.get('format') == 'html':
|
if request.args.get('format') == 'html':
|
||||||
# Convert EPUB to HTML
|
# Convert EPUB to HTML
|
||||||
try:
|
try:
|
||||||
book = epub.read_epub(full_path)
|
book = epub.read_epub(full_path)
|
||||||
html_content = []
|
html_content = []
|
||||||
for item in book.get_items():
|
for item in book.get_items():
|
||||||
if item.get_type() == ebooklib.ITEM_DOCUMENT:
|
if item.get_type() == ebooklib.ITEM_DOCUMENT:
|
||||||
content = item.get_content()
|
content = item.get_content()
|
||||||
if content:
|
if content:
|
||||||
soup = BeautifulSoup(content, 'html.parser')
|
soup = BeautifulSoup(content, 'html.parser')
|
||||||
# Preserve basic formatting tags
|
# Preserve basic formatting tags
|
||||||
for tag in soup.find_all():
|
for tag in soup.find_all():
|
||||||
if tag.name not in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'br', 'div', 'span', 'strong', 'em', 'b', 'i', 'ul', 'ol', 'li']:
|
if tag.name not in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'br', 'div', 'span', 'strong', 'em', 'b', 'i', 'ul', 'ol', 'li']:
|
||||||
tag.unwrap()
|
tag.unwrap()
|
||||||
html_content.append(str(soup))
|
html_content.append(str(soup))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Error processing EPUB {full_path}: {str(e)}")
|
logging.error(f"Error processing EPUB {full_path}: {str(e)}")
|
||||||
return jsonify({"error": f"Failed to process EPUB: {str(e)}"}), 500
|
return jsonify({"error": f"Failed to process EPUB: {str(e)}"}), 500
|
||||||
return render_template('text_file.html',
|
return render_template('text_file.html',
|
||||||
file_path=file_path,
|
file_path=file_path,
|
||||||
content='<hr>'.join(html_content),
|
content='<hr>'.join(html_content),
|
||||||
is_html=True)
|
is_html=True)
|
||||||
else:
|
else:
|
||||||
# Render the viewer template
|
# Render the viewer template
|
||||||
return render_template('epub_viewer.html', file_path=file_path)
|
return render_template('epub_viewer.html', file_path=file_path)
|
||||||
|
|
||||||
# Handle regular text files
|
# Handle regular text files
|
||||||
with open(full_path, 'r', encoding='utf-8', errors='ignore') as f:
|
with open(full_path, 'r', encoding='utf-8', errors='ignore') as f:
|
||||||
content = f.read()
|
content = f.read()
|
||||||
|
|
||||||
# If it's an API request or the Accept header doesn't include HTML, return plain text
|
# If it's an API request or the Accept header doesn't include HTML, return plain text
|
||||||
if request.headers.get('Accept') == 'application/json' or 'text/html' not in request.headers.get('Accept', ''):
|
if request.headers.get('Accept') == 'application/json' or 'text/html' not in request.headers.get('Accept', ''):
|
||||||
return content, 200, {'Content-Type': 'text/plain; charset=utf-8'}
|
return content, 200, {'Content-Type': 'text/plain; charset=utf-8'}
|
||||||
|
|
||||||
# Otherwise, render a simple HTML page with the content
|
# Otherwise, render a simple HTML page with the content
|
||||||
return render_template('text_file.html', file_path=file_path, content=content)
|
return render_template('text_file.html', file_path=file_path, content=content)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return jsonify({"error": str(e)}), 404
|
return jsonify({"error": str(e)}), 404
|
||||||
|
|
||||||
@app.route('/epub/<path:file_path>', methods=['GET'])
|
@app.route('/epub/<path:file_path>', methods=['GET'])
|
||||||
def get_epub_file(file_path):
|
def get_epub_file(file_path):
|
||||||
"""Serve the raw EPUB file with proper headers"""
|
"""Serve the raw EPUB file with proper headers"""
|
||||||
books_dir = "/books"
|
books_dir = "/books"
|
||||||
full_path = os.path.join(books_dir, file_path)
|
full_path = os.path.join(books_dir, file_path)
|
||||||
|
|
||||||
# Validate the path is within the books directory
|
# Validate the path is within the books directory
|
||||||
if not os.path.abspath(full_path).startswith(os.path.abspath(books_dir)):
|
if not os.path.abspath(full_path).startswith(os.path.abspath(books_dir)):
|
||||||
return jsonify({"error": "Access denied: File path outside of books directory"}), 403
|
return jsonify({"error": "Access denied: File path outside of books directory"}), 403
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Serve the raw EPUB file with proper headers
|
# Serve the raw EPUB file with proper headers
|
||||||
response = send_from_directory(
|
response = send_from_directory(
|
||||||
books_dir,
|
books_dir,
|
||||||
file_path,
|
file_path,
|
||||||
as_attachment=True,
|
as_attachment=True,
|
||||||
mimetype='application/epub+zip'
|
mimetype='application/epub+zip'
|
||||||
)
|
)
|
||||||
response.headers['Access-Control-Allow-Origin'] = '*'
|
response.headers['Access-Control-Allow-Origin'] = '*'
|
||||||
response.headers['Access-Control-Allow-Methods'] = 'GET'
|
response.headers['Access-Control-Allow-Methods'] = 'GET'
|
||||||
response.headers['Content-Disposition'] = f'attachment; filename="{os.path.basename(file_path)}"'
|
response.headers['Content-Disposition'] = f'attachment; filename="{os.path.basename(file_path)}"'
|
||||||
return response
|
return response
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return jsonify({"error": str(e)}), 404
|
return jsonify({"error": str(e)}), 404
|
||||||
|
|
||||||
@app.route('/index_books', methods=['GET'])
|
@app.route('/index_books', methods=['GET'])
|
||||||
def index_books():
|
def index_books():
|
||||||
logging.info("Indexing books endpoint called")
|
logging.info("Indexing books endpoint called")
|
||||||
|
|
||||||
# Get CPU configuration
|
# Get CPU configuration
|
||||||
cpu_limit = os.environ.get("CPU_LIMIT")
|
cpu_limit = os.environ.get("CPU_LIMIT")
|
||||||
available_cpus = multiprocessing.cpu_count()
|
available_cpus = multiprocessing.cpu_count()
|
||||||
used_cpus = float(cpu_limit) if cpu_limit else max(1, available_cpus - 1)
|
used_cpus = float(cpu_limit) if cpu_limit else max(1, available_cpus - 1)
|
||||||
|
|
||||||
# Capture stdout to a string
|
# Capture stdout to a string
|
||||||
old_stdout = sys.stdout
|
old_stdout = sys.stdout
|
||||||
sys.stdout = captured_output = StringIO()
|
sys.stdout = captured_output = StringIO()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Start indexing in a separate thread
|
# Start indexing in a separate thread
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
index_thread = Thread(target=index_files, args=("/books",))
|
index_thread = Thread(target=index_files, args=("/books",))
|
||||||
index_thread.start()
|
index_thread.start()
|
||||||
|
|
||||||
# If it's an API request, return immediately
|
# If it's an API request, return immediately
|
||||||
if request.headers.get('Accept') == 'application/json':
|
if request.headers.get('Accept') == 'application/json':
|
||||||
return jsonify({"message": "Indexing started in background"})
|
return jsonify({"message": "Indexing started in background"})
|
||||||
|
|
||||||
# Otherwise, render the progress page with CPU info
|
# Otherwise, render the progress page with CPU info
|
||||||
return render_template('indexing.html',
|
return render_template('indexing.html',
|
||||||
available_cpus=available_cpus,
|
available_cpus=available_cpus,
|
||||||
used_cpus=used_cpus)
|
used_cpus=used_cpus)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Indexing failed: {e}")
|
logging.error(f"Indexing failed: {e}")
|
||||||
sys.stdout = old_stdout
|
sys.stdout = old_stdout
|
||||||
|
|
||||||
if request.headers.get('Accept') == 'application/json':
|
if request.headers.get('Accept') == 'application/json':
|
||||||
return jsonify({"error": str(e)}), 500
|
return jsonify({"error": str(e)}), 500
|
||||||
|
|
||||||
# Create a simple HTML response for errors
|
# Create a simple HTML response for errors
|
||||||
return render_template('indexing_error.html', error=str(e))
|
return render_template('indexing_error.html', error=str(e))
|
||||||
finally:
|
finally:
|
||||||
sys.stdout = old_stdout
|
sys.stdout = old_stdout
|
||||||
|
|
||||||
@app.route('/indexing_progress', methods=['GET'])
|
@app.route('/indexing_progress', methods=['GET'])
|
||||||
def get_indexing_progress():
|
def get_indexing_progress():
|
||||||
progress = get_progress()
|
progress = get_progress()
|
||||||
if progress is None:
|
if progress is None:
|
||||||
return jsonify({"status": "not_running"})
|
return jsonify({"status": "not_running"})
|
||||||
|
|
||||||
# Format time for display
|
# Format time for display
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import pytz
|
import pytz
|
||||||
|
|
||||||
# Get browser timezone from Accept-Language header or use UTC as fallback
|
# Get browser timezone from Accept-Language header or use UTC as fallback
|
||||||
browser_tz = request.headers.get('X-Timezone', 'UTC')
|
browser_tz = request.headers.get('X-Timezone', 'UTC')
|
||||||
try:
|
try:
|
||||||
tz = pytz.timezone(browser_tz)
|
tz = pytz.timezone(browser_tz)
|
||||||
except pytz.UnknownTimeZoneError:
|
except pytz.UnknownTimeZoneError:
|
||||||
tz = pytz.UTC
|
tz = pytz.UTC
|
||||||
|
|
||||||
elapsed_min = int(progress['elapsed_time'] // 60)
|
elapsed_min = int(progress['elapsed_time'] // 60)
|
||||||
elapsed_sec = int(progress['elapsed_time'] % 60)
|
elapsed_sec = int(progress['elapsed_time'] % 60)
|
||||||
|
|
||||||
if progress['estimated_remaining'] > 0:
|
if progress['estimated_remaining'] > 0:
|
||||||
remaining_min = int(progress['estimated_remaining'] // 60)
|
remaining_min = int(progress['estimated_remaining'] // 60)
|
||||||
remaining_sec = int(progress['estimated_remaining'] % 60)
|
remaining_sec = int(progress['estimated_remaining'] % 60)
|
||||||
completion_time = datetime.fromtimestamp(progress['estimated_completion'], tz).strftime('%H:%M:%S (%Z)')
|
completion_time = datetime.fromtimestamp(progress['estimated_completion'], tz).strftime('%H:%M:%S (%Z)')
|
||||||
else:
|
else:
|
||||||
remaining_min = 0
|
remaining_min = 0
|
||||||
remaining_sec = 0
|
remaining_sec = 0
|
||||||
completion_time = "N/A"
|
completion_time = "N/A"
|
||||||
|
|
||||||
return jsonify({
|
return jsonify({
|
||||||
"status": "running",
|
"status": "running",
|
||||||
"total_files": progress['total_files'],
|
"total_files": progress['total_files'],
|
||||||
"processed_files": progress['processed_files'],
|
"processed_files": progress['processed_files'],
|
||||||
"percentage": round(progress['percentage'], 1),
|
"percentage": round(progress['percentage'], 1),
|
||||||
"current_file": progress['current_file'],
|
"current_file": progress['current_file'],
|
||||||
"elapsed_time": f"{elapsed_min}m {elapsed_sec}s",
|
"elapsed_time": f"{elapsed_min}m {elapsed_sec}s",
|
||||||
"estimated_remaining": f"{remaining_min}m {remaining_sec}s",
|
"estimated_remaining": f"{remaining_min}m {remaining_sec}s",
|
||||||
"estimated_completion": completion_time,
|
"estimated_completion": completion_time,
|
||||||
"errors": progress['errors']
|
"errors": progress['errors']
|
||||||
})
|
})
|
||||||
|
|
||||||
@app.route('/abort_indexing', methods=['POST'])
|
@app.route('/abort_indexing', methods=['POST'])
|
||||||
def abort_indexing():
|
def abort_indexing():
|
||||||
# In a real implementation, we would set a flag to stop the indexing
|
# In a real implementation, we would set a flag to stop the indexing
|
||||||
# For now, we'll just return a message
|
# For now, we'll just return a message
|
||||||
return jsonify({"status": "abort_requested", "message": "Indexing will stop after current file"})
|
return jsonify({"status": "abort_requested", "message": "Indexing will stop after current file"})
|
||||||
|
|
||||||
@app.route('/reset_index', methods=['POST'])
|
@app.route('/reset_index', methods=['POST'])
|
||||||
def reset_index():
|
def reset_index():
|
||||||
"""Reset the Elasticsearch index by deleting and recreating it"""
|
"""Reset the Elasticsearch index by deleting and recreating it"""
|
||||||
try:
|
try:
|
||||||
# Check for basic auth
|
# Check for basic auth
|
||||||
auth = request.authorization
|
auth = request.authorization
|
||||||
if not auth or auth.username != os.environ.get("ADMIN_USER") or auth.password != os.environ.get("ADMIN_PASSWORD"):
|
if not auth or auth.username != os.environ.get("ADMIN_USER") or auth.password != os.environ.get("ADMIN_PASSWORD"):
|
||||||
return jsonify({"error": "Authentication required"}), 401
|
return jsonify({"error": "Authentication required"}), 401
|
||||||
|
|
||||||
# Delete existing index if it exists
|
# Delete existing index if it exists
|
||||||
if es.indices.exists(index=INDEX_NAME):
|
if es.indices.exists(index=INDEX_NAME):
|
||||||
es.indices.delete(index=INDEX_NAME)
|
es.indices.delete(index=INDEX_NAME)
|
||||||
|
|
||||||
# Create new index with mapping
|
# Create new index with mapping
|
||||||
es.indices.create(index=INDEX_NAME, body={
|
es.indices.create(index=INDEX_NAME, body={
|
||||||
"settings": {
|
"settings": {
|
||||||
"number_of_shards": 1,
|
"number_of_shards": 1,
|
||||||
"number_of_replicas": 0
|
"number_of_replicas": 0
|
||||||
},
|
},
|
||||||
"mappings": {
|
"mappings": {
|
||||||
"properties": {
|
"properties": {
|
||||||
"file_path": {"type": "keyword"},
|
"file_path": {"type": "keyword"},
|
||||||
"content": {"type": "text"}
|
"content": {"type": "text"}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
return jsonify({"status": "success", "message": "Index reset successfully"})
|
return jsonify({"status": "success", "message": "Index reset successfully"})
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return jsonify({"error": str(e)}), 500
|
return jsonify({"error": str(e)}), 500
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
logging.basicConfig(level=logging.DEBUG)
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
logging.info("Starting the API - inside main block")
|
logging.info("Starting the API - inside main block")
|
||||||
app.run(debug=True, host='0.0.0.0')
|
app.run(debug=True, host='0.0.0.0')
|
|
@ -1,66 +1,66 @@
|
||||||
#viewerContainer {
|
#viewerContainer {
|
||||||
position: absolute;
|
position: absolute;
|
||||||
top: 50px;
|
top: 50px;
|
||||||
left: 0;
|
left: 0;
|
||||||
right: 0;
|
right: 0;
|
||||||
bottom: 0;
|
bottom: 0;
|
||||||
overflow: auto;
|
overflow: auto;
|
||||||
}
|
}
|
||||||
|
|
||||||
#viewer {
|
#viewer {
|
||||||
width: 100%;
|
width: 100%;
|
||||||
height: 90vh;
|
height: 90vh;
|
||||||
margin: 0 auto;
|
margin: 0 auto;
|
||||||
}
|
}
|
||||||
|
|
||||||
.controls {
|
.controls {
|
||||||
text-align: center;
|
text-align: center;
|
||||||
padding: 10px;
|
padding: 10px;
|
||||||
}
|
}
|
||||||
|
|
||||||
#prev, #next {
|
#prev, #next {
|
||||||
padding: 10px 20px;
|
padding: 10px 20px;
|
||||||
margin: 10px;
|
margin: 10px;
|
||||||
background: #007bff;
|
background: #007bff;
|
||||||
color: white;
|
color: white;
|
||||||
border: none;
|
border: none;
|
||||||
border-radius: 4px;
|
border-radius: 4px;
|
||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
}
|
}
|
||||||
|
|
||||||
.error {
|
.error {
|
||||||
color: red;
|
color: red;
|
||||||
padding: 20px;
|
padding: 20px;
|
||||||
text-align: center;
|
text-align: center;
|
||||||
}
|
}
|
||||||
|
|
||||||
header {
|
header {
|
||||||
text-align: center;
|
text-align: center;
|
||||||
padding: 20px 0;
|
padding: 20px 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
.nav {
|
.nav {
|
||||||
background-color: #f8f9fa;
|
background-color: #f8f9fa;
|
||||||
padding: 10px;
|
padding: 10px;
|
||||||
}
|
}
|
||||||
|
|
||||||
.nav ul {
|
.nav ul {
|
||||||
list-style: none;
|
list-style: none;
|
||||||
display: flex;
|
display: flex;
|
||||||
justify-content: center;
|
justify-content: center;
|
||||||
gap: 20px;
|
gap: 20px;
|
||||||
padding: 0;
|
padding: 0;
|
||||||
margin: 0;
|
margin: 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
.nav a {
|
.nav a {
|
||||||
text-decoration: none;
|
text-decoration: none;
|
||||||
color: #007bff;
|
color: #007bff;
|
||||||
}
|
}
|
||||||
|
|
||||||
footer {
|
footer {
|
||||||
text-align: center;
|
text-align: center;
|
||||||
padding: 20px;
|
padding: 20px;
|
||||||
margin-top: 20px;
|
margin-top: 20px;
|
||||||
background-color: #f8f9fa;
|
background-color: #f8f9fa;
|
||||||
}
|
}
|
|
@ -1,236 +1,236 @@
|
||||||
body {
|
body {
|
||||||
font-family: 'Arial', sans-serif;
|
font-family: 'Arial', sans-serif;
|
||||||
line-height: 1.6;
|
line-height: 1.6;
|
||||||
margin: 0;
|
margin: 0;
|
||||||
padding: 0;
|
padding: 0;
|
||||||
background-color: #f4f4f4;
|
background-color: #f4f4f4;
|
||||||
color: #333;
|
color: #333;
|
||||||
}
|
}
|
||||||
|
|
||||||
.container {
|
.container {
|
||||||
width: 80%;
|
width: 80%;
|
||||||
margin: auto;
|
margin: auto;
|
||||||
overflow: hidden;
|
overflow: hidden;
|
||||||
padding: 20px;
|
padding: 20px;
|
||||||
}
|
}
|
||||||
|
|
||||||
header {
|
header {
|
||||||
background: #35424a;
|
background: #35424a;
|
||||||
color: white;
|
color: white;
|
||||||
padding: 20px;
|
padding: 20px;
|
||||||
text-align: center;
|
text-align: center;
|
||||||
border-bottom: 4px solid #1abc9c;
|
border-bottom: 4px solid #1abc9c;
|
||||||
}
|
}
|
||||||
|
|
||||||
header h1 {
|
header h1 {
|
||||||
margin: 0;
|
margin: 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
.search-container {
|
.search-container {
|
||||||
margin: 30px 0;
|
margin: 30px 0;
|
||||||
text-align: center;
|
text-align: center;
|
||||||
}
|
}
|
||||||
|
|
||||||
.search-box {
|
.search-box {
|
||||||
width: 70%;
|
width: 70%;
|
||||||
padding: 12px;
|
padding: 12px;
|
||||||
border: 1px solid #ddd;
|
border: 1px solid #ddd;
|
||||||
border-radius: 4px;
|
border-radius: 4px;
|
||||||
font-size: 16px;
|
font-size: 16px;
|
||||||
}
|
}
|
||||||
|
|
||||||
.search-button {
|
.search-button {
|
||||||
padding: 12px 24px;
|
padding: 12px 24px;
|
||||||
background: #1abc9c;
|
background: #1abc9c;
|
||||||
color: white;
|
color: white;
|
||||||
border: none;
|
border: none;
|
||||||
border-radius: 4px;
|
border-radius: 4px;
|
||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
font-size: 16px;
|
font-size: 16px;
|
||||||
}
|
}
|
||||||
|
|
||||||
.search-button:hover {
|
.search-button:hover {
|
||||||
background: #16a085;
|
background: #16a085;
|
||||||
}
|
}
|
||||||
|
|
||||||
.results {
|
.results {
|
||||||
margin-top: 30px;
|
margin-top: 30px;
|
||||||
}
|
}
|
||||||
|
|
||||||
.result-item {
|
.result-item {
|
||||||
background: white;
|
background: white;
|
||||||
padding: 15px;
|
padding: 15px;
|
||||||
margin-bottom: 15px;
|
margin-bottom: 15px;
|
||||||
border-radius: 5px;
|
border-radius: 5px;
|
||||||
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
|
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
|
||||||
}
|
}
|
||||||
|
|
||||||
.result-item h3 {
|
.result-item h3 {
|
||||||
margin-top: 0;
|
margin-top: 0;
|
||||||
color: #1abc9c;
|
color: #1abc9c;
|
||||||
}
|
}
|
||||||
|
|
||||||
.result-item p {
|
.result-item p {
|
||||||
margin-bottom: 10px;
|
margin-bottom: 10px;
|
||||||
}
|
}
|
||||||
|
|
||||||
.result-item a {
|
.result-item a {
|
||||||
color: #3498db;
|
color: #3498db;
|
||||||
text-decoration: none;
|
text-decoration: none;
|
||||||
}
|
}
|
||||||
|
|
||||||
.result-item a:hover {
|
.result-item a:hover {
|
||||||
text-decoration: underline;
|
text-decoration: underline;
|
||||||
}
|
}
|
||||||
|
|
||||||
.file-list {
|
.file-list {
|
||||||
list-style: none;
|
list-style: none;
|
||||||
padding: 0;
|
padding: 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
.file-list li {
|
.file-list li {
|
||||||
background: white;
|
background: white;
|
||||||
padding: 15px;
|
padding: 15px;
|
||||||
margin-bottom: 10px;
|
margin-bottom: 10px;
|
||||||
border-radius: 5px;
|
border-radius: 5px;
|
||||||
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
|
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
|
||||||
}
|
}
|
||||||
|
|
||||||
.file-list a {
|
.file-list a {
|
||||||
color: #3498db;
|
color: #3498db;
|
||||||
text-decoration: none;
|
text-decoration: none;
|
||||||
font-weight: bold;
|
font-weight: bold;
|
||||||
}
|
}
|
||||||
|
|
||||||
.file-list a:hover {
|
.file-list a:hover {
|
||||||
text-decoration: underline;
|
text-decoration: underline;
|
||||||
}
|
}
|
||||||
|
|
||||||
.nav {
|
.nav {
|
||||||
background: #35424a;
|
background: #35424a;
|
||||||
color: white;
|
color: white;
|
||||||
padding: 10px 0;
|
padding: 10px 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
.nav ul {
|
.nav ul {
|
||||||
padding: 0;
|
padding: 0;
|
||||||
list-style: none;
|
list-style: none;
|
||||||
text-align: center;
|
text-align: center;
|
||||||
}
|
}
|
||||||
|
|
||||||
.nav li {
|
.nav li {
|
||||||
display: inline;
|
display: inline;
|
||||||
margin: 0 15px;
|
margin: 0 15px;
|
||||||
}
|
}
|
||||||
|
|
||||||
.nav a {
|
.nav a {
|
||||||
color: white;
|
color: white;
|
||||||
text-decoration: none;
|
text-decoration: none;
|
||||||
}
|
}
|
||||||
|
|
||||||
.nav a:hover {
|
.nav a:hover {
|
||||||
color: #1abc9c;
|
color: #1abc9c;
|
||||||
}
|
}
|
||||||
|
|
||||||
footer {
|
footer {
|
||||||
background: #35424a;
|
background: #35424a;
|
||||||
color: white;
|
color: white;
|
||||||
text-align: center;
|
text-align: center;
|
||||||
padding: 20px;
|
padding: 20px;
|
||||||
margin-top: 40px;
|
margin-top: 40px;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Indexing page styles */
|
/* Indexing page styles */
|
||||||
.progress-container {
|
.progress-container {
|
||||||
margin: 20px 0;
|
margin: 20px 0;
|
||||||
padding: 20px;
|
padding: 20px;
|
||||||
background: #f5f5f5;
|
background: #f5f5f5;
|
||||||
border-radius: 5px;
|
border-radius: 5px;
|
||||||
}
|
}
|
||||||
|
|
||||||
.progress-bar {
|
.progress-bar {
|
||||||
height: 20px;
|
height: 20px;
|
||||||
background: #e0e0e0;
|
background: #e0e0e0;
|
||||||
border-radius: 10px;
|
border-radius: 10px;
|
||||||
margin: 10px 0;
|
margin: 10px 0;
|
||||||
overflow: hidden;
|
overflow: hidden;
|
||||||
}
|
}
|
||||||
|
|
||||||
.progress-fill {
|
.progress-fill {
|
||||||
height: 100%;
|
height: 100%;
|
||||||
background: #4CAF50;
|
background: #4CAF50;
|
||||||
width: 0%;
|
width: 0%;
|
||||||
transition: width 0.3s;
|
transition: width 0.3s;
|
||||||
}
|
}
|
||||||
|
|
||||||
.progress-stats {
|
.progress-stats {
|
||||||
display: flex;
|
display: flex;
|
||||||
justify-content: space-between;
|
justify-content: space-between;
|
||||||
margin-bottom: 10px;
|
margin-bottom: 10px;
|
||||||
}
|
}
|
||||||
|
|
||||||
.progress-details {
|
.progress-details {
|
||||||
margin-top: 20px;
|
margin-top: 20px;
|
||||||
}
|
}
|
||||||
|
|
||||||
.current-file {
|
.current-file {
|
||||||
font-weight: bold;
|
font-weight: bold;
|
||||||
margin: 10px 0;
|
margin: 10px 0;
|
||||||
word-break: break-all;
|
word-break: break-all;
|
||||||
}
|
}
|
||||||
|
|
||||||
.time-stats {
|
.time-stats {
|
||||||
display: grid;
|
display: grid;
|
||||||
grid-template-columns: repeat(2, 1fr);
|
grid-template-columns: repeat(2, 1fr);
|
||||||
gap: 10px;
|
gap: 10px;
|
||||||
margin-top: 15px;
|
margin-top: 15px;
|
||||||
}
|
}
|
||||||
|
|
||||||
.time-stat {
|
.time-stat {
|
||||||
background: #e9e9e9;
|
background: #e9e9e9;
|
||||||
padding: 10px;
|
padding: 10px;
|
||||||
border-radius: 5px;
|
border-radius: 5px;
|
||||||
}
|
}
|
||||||
|
|
||||||
.abort-button {
|
.abort-button {
|
||||||
background: #f44336;
|
background: #f44336;
|
||||||
color: white;
|
color: white;
|
||||||
border: none;
|
border: none;
|
||||||
padding: 10px 20px;
|
padding: 10px 20px;
|
||||||
border-radius: 5px;
|
border-radius: 5px;
|
||||||
cursor: pointer;
|
cursor: pointer;
|
||||||
margin-top: 20px;
|
margin-top: 20px;
|
||||||
}
|
}
|
||||||
|
|
||||||
.abort-button:hover {
|
.abort-button:hover {
|
||||||
background: #d32f2f;
|
background: #d32f2f;
|
||||||
}
|
}
|
||||||
|
|
||||||
.error-list {
|
.error-list {
|
||||||
margin-top: 10px;
|
margin-top: 10px;
|
||||||
}
|
}
|
||||||
|
|
||||||
.error-item {
|
.error-item {
|
||||||
padding: 10px;
|
padding: 10px;
|
||||||
margin-bottom: 5px;
|
margin-bottom: 5px;
|
||||||
background: #ffebee;
|
background: #ffebee;
|
||||||
border-left: 3px solid #f44336;
|
border-left: 3px solid #f44336;
|
||||||
}
|
}
|
||||||
|
|
||||||
.file-actions {
|
.file-actions {
|
||||||
margin-top: 10px;
|
margin-top: 10px;
|
||||||
}
|
}
|
||||||
|
|
||||||
.file-action {
|
.file-action {
|
||||||
color: #3498db;
|
color: #3498db;
|
||||||
text-decoration: none;
|
text-decoration: none;
|
||||||
}
|
}
|
||||||
|
|
||||||
.file-action:hover {
|
.file-action:hover {
|
||||||
text-decoration: underline;
|
text-decoration: underline;
|
||||||
}
|
}
|
||||||
|
|
||||||
.action-separator {
|
.action-separator {
|
||||||
margin: 0 5px;
|
margin: 0 5px;
|
||||||
color: #999;
|
color: #999;
|
||||||
}
|
}
|
|
@ -1,262 +1,262 @@
|
||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
<html>
|
<html>
|
||||||
<head>
|
<head>
|
||||||
<title>{{ file_path }}</title>
|
<title>{{ file_path }}</title>
|
||||||
<link rel="stylesheet" href="/static/css/style.css">
|
<link rel="stylesheet" href="/static/css/style.css">
|
||||||
<link rel="stylesheet" href="/static/css/epub_viewer.css">
|
<link rel="stylesheet" href="/static/css/epub_viewer.css">
|
||||||
<script src="https://cdn.jsdelivr.net/npm/epubjs@0.3.93/dist/epub.min.js"></script>
|
<script src="https://cdn.jsdelivr.net/npm/epubjs@0.3.93/dist/epub.min.js"></script>
|
||||||
<script src="https://cdn.jsdelivr.net/npm/jszip@3.10.1/dist/jszip.min.js"></script>
|
<script src="https://cdn.jsdelivr.net/npm/jszip@3.10.1/dist/jszip.min.js"></script>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<header>
|
<header>
|
||||||
<h1>EPUB: {{ file_path }}</h1>
|
<h1>EPUB: {{ file_path }}</h1>
|
||||||
</header>
|
</header>
|
||||||
|
|
||||||
<nav class="nav">
|
<nav class="nav">
|
||||||
<ul>
|
<ul>
|
||||||
<li><a href="/">Home</a></li>
|
<li><a href="/">Home</a></li>
|
||||||
<li><a href="/files">File List</a></li>
|
<li><a href="/files">File List</a></li>
|
||||||
<li><a href="/index_books">Re-Index Books</a></li>
|
<li><a href="/index_books">Re-Index Books</a></li>
|
||||||
</ul>
|
</ul>
|
||||||
</nav>
|
</nav>
|
||||||
|
|
||||||
<div class="container">
|
<div class="container">
|
||||||
<div id="viewer"></div>
|
<div id="viewer"></div>
|
||||||
<div class="controls">
|
<div class="controls">
|
||||||
<button id="prev">Previous</button>
|
<button id="prev">Previous</button>
|
||||||
<button id="next">Next</button>
|
<button id="next">Next</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<script>
|
<script>
|
||||||
// Debug logging function
|
// Debug logging function
|
||||||
function debug(message, obj) {
|
function debug(message, obj) {
|
||||||
console.log("EPUB DEBUG: " + message, obj || '');
|
console.log("EPUB DEBUG: " + message, obj || '');
|
||||||
// Add to page for visibility
|
// Add to page for visibility
|
||||||
const debugDiv = document.getElementById('debug-output') ||
|
const debugDiv = document.getElementById('debug-output') ||
|
||||||
(function() {
|
(function() {
|
||||||
const div = document.createElement('div');
|
const div = document.createElement('div');
|
||||||
div.id = 'debug-output';
|
div.id = 'debug-output';
|
||||||
div.style.position = 'fixed';
|
div.style.position = 'fixed';
|
||||||
div.style.bottom = '10px';
|
div.style.bottom = '10px';
|
||||||
div.style.right = '10px';
|
div.style.right = '10px';
|
||||||
div.style.backgroundColor = 'rgba(0,0,0,0.7)';
|
div.style.backgroundColor = 'rgba(0,0,0,0.7)';
|
||||||
div.style.color = 'white';
|
div.style.color = 'white';
|
||||||
div.style.padding = '10px';
|
div.style.padding = '10px';
|
||||||
div.style.maxHeight = '200px';
|
div.style.maxHeight = '200px';
|
||||||
div.style.overflow = 'auto';
|
div.style.overflow = 'auto';
|
||||||
div.style.zIndex = '9999';
|
div.style.zIndex = '9999';
|
||||||
document.body.appendChild(div);
|
document.body.appendChild(div);
|
||||||
return div;
|
return div;
|
||||||
})();
|
})();
|
||||||
|
|
||||||
const logEntry = document.createElement('div');
|
const logEntry = document.createElement('div');
|
||||||
logEntry.textContent = message + (obj ? ': ' + JSON.stringify(obj) : '');
|
logEntry.textContent = message + (obj ? ': ' + JSON.stringify(obj) : '');
|
||||||
debugDiv.appendChild(logEntry);
|
debugDiv.appendChild(logEntry);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Global variables
|
// Global variables
|
||||||
var book = null;
|
var book = null;
|
||||||
var rendition = null;
|
var rendition = null;
|
||||||
|
|
||||||
function handlePrev() {
|
function handlePrev() {
|
||||||
debug("Previous button clicked");
|
debug("Previous button clicked");
|
||||||
if (!rendition) {
|
if (!rendition) {
|
||||||
debug("ERROR: rendition not available for prev");
|
debug("ERROR: rendition not available for prev");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
rendition.prev().then(() => {
|
rendition.prev().then(() => {
|
||||||
debug("Navigation to previous page successful");
|
debug("Navigation to previous page successful");
|
||||||
}).catch(err => {
|
}).catch(err => {
|
||||||
debug("Navigation to previous page failed", err.message);
|
debug("Navigation to previous page failed", err.message);
|
||||||
});
|
});
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
debug("Error in prev navigation", err.message);
|
debug("Error in prev navigation", err.message);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function handleNext() {
|
function handleNext() {
|
||||||
debug("Next button clicked");
|
debug("Next button clicked");
|
||||||
if (!rendition) {
|
if (!rendition) {
|
||||||
debug("ERROR: rendition not available for next");
|
debug("ERROR: rendition not available for next");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
rendition.next().then(() => {
|
rendition.next().then(() => {
|
||||||
debug("Navigation to next page successful");
|
debug("Navigation to next page successful");
|
||||||
}).catch(err => {
|
}).catch(err => {
|
||||||
debug("Navigation to next page failed", err.message);
|
debug("Navigation to next page failed", err.message);
|
||||||
});
|
});
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
debug("Error in next navigation", err.message);
|
debug("Error in next navigation", err.message);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function handleKeydown(e) {
|
function handleKeydown(e) {
|
||||||
if (!rendition) {
|
if (!rendition) {
|
||||||
debug("ERROR: rendition not available for keydown");
|
debug("ERROR: rendition not available for keydown");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (e.keyCode === 37) {
|
if (e.keyCode === 37) {
|
||||||
debug("Left arrow key pressed");
|
debug("Left arrow key pressed");
|
||||||
rendition.prev();
|
rendition.prev();
|
||||||
}
|
}
|
||||||
if (e.keyCode === 39) {
|
if (e.keyCode === 39) {
|
||||||
debug("Right arrow key pressed");
|
debug("Right arrow key pressed");
|
||||||
rendition.next();
|
rendition.next();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function initializeEPUB() {
|
function initializeEPUB() {
|
||||||
debug("Initializing EPUB viewer");
|
debug("Initializing EPUB viewer");
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Use dedicated endpoint for EPUB files
|
// Use dedicated endpoint for EPUB files
|
||||||
const fileUrl = "/epub/" + encodeURIComponent("{{ file_path }}");
|
const fileUrl = "/epub/" + encodeURIComponent("{{ file_path }}");
|
||||||
debug("Loading EPUB from URL", fileUrl);
|
debug("Loading EPUB from URL", fileUrl);
|
||||||
// TEST_EPUB_URL: /epub/{{ file_path }}
|
// TEST_EPUB_URL: /epub/{{ file_path }}
|
||||||
|
|
||||||
// Create book object
|
// Create book object
|
||||||
window.book = book = ePub(fileUrl);
|
window.book = book = ePub(fileUrl);
|
||||||
debug("Book object created successfully");
|
debug("Book object created successfully");
|
||||||
console.log("Book object details:", book);
|
console.log("Book object details:", book);
|
||||||
|
|
||||||
if (!book) {
|
if (!book) {
|
||||||
throw new Error("Failed to initialize EPUB reader");
|
throw new Error("Failed to initialize EPUB reader");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set up error handler
|
// Set up error handler
|
||||||
book.on('error', function(err) {
|
book.on('error', function(err) {
|
||||||
debug("EPUB error event", err);
|
debug("EPUB error event", err);
|
||||||
document.getElementById("viewer").innerHTML =
|
document.getElementById("viewer").innerHTML =
|
||||||
'<div class="error">Error loading EPUB: ' + err.message + '</div>';
|
'<div class="error">Error loading EPUB: ' + err.message + '</div>';
|
||||||
});
|
});
|
||||||
|
|
||||||
// Set up ready handler
|
// Set up ready handler
|
||||||
book.on('ready', function() {
|
book.on('ready', function() {
|
||||||
debug("Book ready event fired");
|
debug("Book ready event fired");
|
||||||
});
|
});
|
||||||
|
|
||||||
// Create rendition
|
// Create rendition
|
||||||
debug("Creating rendition");
|
debug("Creating rendition");
|
||||||
window.rendition = rendition = book.renderTo("viewer", {
|
window.rendition = rendition = book.renderTo("viewer", {
|
||||||
width: "100%",
|
width: "100%",
|
||||||
height: "100%",
|
height: "100%",
|
||||||
spread: "none",
|
spread: "none",
|
||||||
manager: "continuous",
|
manager: "continuous",
|
||||||
style: `
|
style: `
|
||||||
body {
|
body {
|
||||||
margin: 0;
|
margin: 0;
|
||||||
padding: 20px;
|
padding: 20px;
|
||||||
background-color: white;
|
background-color: white;
|
||||||
color: black;
|
color: black;
|
||||||
font-size: 1.2em;
|
font-size: 1.2em;
|
||||||
line-height: 1.5;
|
line-height: 1.5;
|
||||||
}
|
}
|
||||||
img {
|
img {
|
||||||
max-width: 100%;
|
max-width: 100%;
|
||||||
}
|
}
|
||||||
`
|
`
|
||||||
});
|
});
|
||||||
|
|
||||||
// Hide iframe initially to prevent flash of unstyled content
|
// Hide iframe initially to prevent flash of unstyled content
|
||||||
const viewer = document.getElementById("viewer");
|
const viewer = document.getElementById("viewer");
|
||||||
if (viewer) {
|
if (viewer) {
|
||||||
viewer.style.visibility = "hidden";
|
viewer.style.visibility = "hidden";
|
||||||
}
|
}
|
||||||
|
|
||||||
debug("Displaying rendition");
|
debug("Displaying rendition");
|
||||||
rendition.display()
|
rendition.display()
|
||||||
.then(() => {
|
.then(() => {
|
||||||
debug("Rendition displayed successfully");
|
debug("Rendition displayed successfully");
|
||||||
|
|
||||||
// Set up resize handler
|
// Set up resize handler
|
||||||
const resizeHandler = function() {
|
const resizeHandler = function() {
|
||||||
try {
|
try {
|
||||||
if (rendition) {
|
if (rendition) {
|
||||||
rendition.resize();
|
rendition.resize();
|
||||||
}
|
}
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
console.error("Resize error:", err);
|
console.error("Resize error:", err);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
window.addEventListener('resize', resizeHandler);
|
window.addEventListener('resize', resizeHandler);
|
||||||
|
|
||||||
// Show content and initialize navigation
|
// Show content and initialize navigation
|
||||||
setTimeout(() => {
|
setTimeout(() => {
|
||||||
try {
|
try {
|
||||||
if (rendition) {
|
if (rendition) {
|
||||||
rendition.resize();
|
rendition.resize();
|
||||||
const viewer = document.getElementById('viewer');
|
const viewer = document.getElementById('viewer');
|
||||||
if (viewer) {
|
if (viewer) {
|
||||||
viewer.style.visibility = 'visible';
|
viewer.style.visibility = 'visible';
|
||||||
}
|
}
|
||||||
// Initialize navigation
|
// Initialize navigation
|
||||||
rendition.start();
|
rendition.start();
|
||||||
}
|
}
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
debug("Content display error", err.message);
|
debug("Content display error", err.message);
|
||||||
}
|
}
|
||||||
}, 100);
|
}, 100);
|
||||||
|
|
||||||
return rendition;
|
return rendition;
|
||||||
})
|
})
|
||||||
.catch(err => {
|
.catch(err => {
|
||||||
debug("Rendition error", err);
|
debug("Rendition error", err);
|
||||||
document.getElementById("viewer").innerHTML =
|
document.getElementById("viewer").innerHTML =
|
||||||
'<div class="error">Error displaying EPUB: ' + err.message + '</div>';
|
'<div class="error">Error displaying EPUB: ' + err.message + '</div>';
|
||||||
});
|
});
|
||||||
|
|
||||||
// Set up event listeners
|
// Set up event listeners
|
||||||
debug("Setting up event listeners");
|
debug("Setting up event listeners");
|
||||||
try {
|
try {
|
||||||
document.getElementById("prev").addEventListener("click", handlePrev);
|
document.getElementById("prev").addEventListener("click", handlePrev);
|
||||||
document.getElementById("next").addEventListener("click", handleNext);
|
document.getElementById("next").addEventListener("click", handleNext);
|
||||||
document.addEventListener("keydown", handleKeydown);
|
document.addEventListener("keydown", handleKeydown);
|
||||||
|
|
||||||
// Add loading indicator
|
// Add loading indicator
|
||||||
const loadingIndicator = document.createElement('div');
|
const loadingIndicator = document.createElement('div');
|
||||||
loadingIndicator.id = 'loading-indicator';
|
loadingIndicator.id = 'loading-indicator';
|
||||||
loadingIndicator.style.position = 'fixed';
|
loadingIndicator.style.position = 'fixed';
|
||||||
loadingIndicator.style.top = '50%';
|
loadingIndicator.style.top = '50%';
|
||||||
loadingIndicator.style.left = '50%';
|
loadingIndicator.style.left = '50%';
|
||||||
loadingIndicator.style.transform = 'translate(-50%, -50%)';
|
loadingIndicator.style.transform = 'translate(-50%, -50%)';
|
||||||
loadingIndicator.style.backgroundColor = 'rgba(0,0,0,0.7)';
|
loadingIndicator.style.backgroundColor = 'rgba(0,0,0,0.7)';
|
||||||
loadingIndicator.style.color = 'white';
|
loadingIndicator.style.color = 'white';
|
||||||
loadingIndicator.style.padding = '20px';
|
loadingIndicator.style.padding = '20px';
|
||||||
loadingIndicator.style.borderRadius = '5px';
|
loadingIndicator.style.borderRadius = '5px';
|
||||||
loadingIndicator.style.zIndex = '1000';
|
loadingIndicator.style.zIndex = '1000';
|
||||||
loadingIndicator.textContent = 'Loading EPUB...';
|
loadingIndicator.textContent = 'Loading EPUB...';
|
||||||
document.body.appendChild(loadingIndicator);
|
document.body.appendChild(loadingIndicator);
|
||||||
|
|
||||||
// Remove indicator when loaded
|
// Remove indicator when loaded
|
||||||
book.on('ready', function() {
|
book.on('ready', function() {
|
||||||
const indicator = document.getElementById('loading-indicator');
|
const indicator = document.getElementById('loading-indicator');
|
||||||
if (indicator) {
|
if (indicator) {
|
||||||
indicator.remove();
|
indicator.remove();
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
debug("Error setting up event listeners", err.message);
|
debug("Error setting up event listeners", err.message);
|
||||||
console.error("Event listener setup error:", err);
|
console.error("Event listener setup error:", err);
|
||||||
}
|
}
|
||||||
|
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
debug("EPUB initialization error", err);
|
debug("EPUB initialization error", err);
|
||||||
document.getElementById("viewer").innerHTML =
|
document.getElementById("viewer").innerHTML =
|
||||||
'<div class="error">Failed to load EPUB: ' + err.message + '</div>';
|
'<div class="error">Failed to load EPUB: ' + err.message + '</div>';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize when DOM is loaded
|
// Initialize when DOM is loaded
|
||||||
debug("Setting up DOMContentLoaded listener");
|
debug("Setting up DOMContentLoaded listener");
|
||||||
document.addEventListener('DOMContentLoaded', initializeEPUB);
|
document.addEventListener('DOMContentLoaded', initializeEPUB);
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<footer>
|
<footer>
|
||||||
<p>© 2025 Book Search Engine</p>
|
<p>© 2025 Book Search Engine</p>
|
||||||
</footer>
|
</footer>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
|
@ -1,141 +1,141 @@
|
||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
<html lang="en">
|
<html lang="en">
|
||||||
<head>
|
<head>
|
||||||
<meta charset="UTF-8">
|
<meta charset="UTF-8">
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
<title>Book Files</title>
|
<title>Book Files</title>
|
||||||
<link rel="stylesheet" href="/static/css/style.css">
|
<link rel="stylesheet" href="/static/css/style.css">
|
||||||
<style>
|
<style>
|
||||||
.file-list {
|
.file-list {
|
||||||
list-style: none;
|
list-style: none;
|
||||||
padding: 0;
|
padding: 0;
|
||||||
}
|
}
|
||||||
.file-item {
|
.file-item {
|
||||||
display: flex;
|
display: flex;
|
||||||
justify-content: space-between;
|
justify-content: space-between;
|
||||||
padding: 8px 0;
|
padding: 8px 0;
|
||||||
border-bottom: 1px solid #eee;
|
border-bottom: 1px solid #eee;
|
||||||
}
|
}
|
||||||
.file-name {
|
.file-name {
|
||||||
flex: 1;
|
flex: 1;
|
||||||
word-break: break-all;
|
word-break: break-all;
|
||||||
}
|
}
|
||||||
.file-size {
|
.file-size {
|
||||||
color: #666;
|
color: #666;
|
||||||
min-width: 80px;
|
min-width: 80px;
|
||||||
text-align: right;
|
text-align: right;
|
||||||
}
|
}
|
||||||
.book-title {
|
.book-title {
|
||||||
font-weight: bold;
|
font-weight: bold;
|
||||||
color: #333;
|
color: #333;
|
||||||
}
|
}
|
||||||
.file-name-muted {
|
.file-name-muted {
|
||||||
color: #999;
|
color: #999;
|
||||||
font-size: 0.9em;
|
font-size: 0.9em;
|
||||||
margin-left: 8px;
|
margin-left: 8px;
|
||||||
}
|
}
|
||||||
.summary {
|
.summary {
|
||||||
background: #f5f5f5;
|
background: #f5f5f5;
|
||||||
padding: 15px;
|
padding: 15px;
|
||||||
border-radius: 5px;
|
border-radius: 5px;
|
||||||
margin-bottom: 20px;
|
margin-bottom: 20px;
|
||||||
}
|
}
|
||||||
.summary-item {
|
.summary-item {
|
||||||
display: flex;
|
display: flex;
|
||||||
justify-content: space-between;
|
justify-content: space-between;
|
||||||
margin-bottom: 5px;
|
margin-bottom: 5px;
|
||||||
}
|
}
|
||||||
.summary-label {
|
.summary-label {
|
||||||
font-weight: bold;
|
font-weight: bold;
|
||||||
}
|
}
|
||||||
.indexing-status {
|
.indexing-status {
|
||||||
background: #fff8e1;
|
background: #fff8e1;
|
||||||
padding: 15px;
|
padding: 15px;
|
||||||
border-radius: 5px;
|
border-radius: 5px;
|
||||||
margin-bottom: 20px;
|
margin-bottom: 20px;
|
||||||
border-left: 4px solid #ffc107;
|
border-left: 4px solid #ffc107;
|
||||||
}
|
}
|
||||||
.indexing-link {
|
.indexing-link {
|
||||||
color: #2196f3;
|
color: #2196f3;
|
||||||
text-decoration: none;
|
text-decoration: none;
|
||||||
}
|
}
|
||||||
.indexing-link:hover {
|
.indexing-link:hover {
|
||||||
text-decoration: underline;
|
text-decoration: underline;
|
||||||
}
|
}
|
||||||
.plain-view-link {
|
.plain-view-link {
|
||||||
font-size: 0.8em;
|
font-size: 0.8em;
|
||||||
color: #666;
|
color: #666;
|
||||||
text-decoration: none;
|
text-decoration: none;
|
||||||
margin-left: 8px;
|
margin-left: 8px;
|
||||||
}
|
}
|
||||||
.plain-view-link:hover {
|
.plain-view-link:hover {
|
||||||
text-decoration: underline;
|
text-decoration: underline;
|
||||||
color: #2196f3;
|
color: #2196f3;
|
||||||
}
|
}
|
||||||
</style>
|
</style>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<header>
|
<header>
|
||||||
<h1>Book Files</h1>
|
<h1>Book Files</h1>
|
||||||
</header>
|
</header>
|
||||||
|
|
||||||
<nav class="nav">
|
<nav class="nav">
|
||||||
<ul>
|
<ul>
|
||||||
<li><a href="/">Home</a></li>
|
<li><a href="/">Home</a></li>
|
||||||
<li><a href="/files">File List</a></li>
|
<li><a href="/files">File List</a></li>
|
||||||
<li><a href="/index_books">Re-Index Books</a></li>
|
<li><a href="/index_books">Re-Index Books</a></li>
|
||||||
</ul>
|
</ul>
|
||||||
</nav>
|
</nav>
|
||||||
|
|
||||||
<div class="container">
|
<div class="container">
|
||||||
<div class="summary">
|
<div class="summary">
|
||||||
<div class="summary-item">
|
<div class="summary-item">
|
||||||
<span class="summary-label">Total Files:</span>
|
<span class="summary-label">Total Files:</span>
|
||||||
<span>{{ total_files }}</span>
|
<span>{{ total_files }}</span>
|
||||||
</div>
|
</div>
|
||||||
<div class="summary-item">
|
<div class="summary-item">
|
||||||
<span class="summary-label">Total Size:</span>
|
<span class="summary-label">Total Size:</span>
|
||||||
<span>{{ total_size_mb }} MB</span>
|
<span>{{ total_size_mb }} MB</span>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{% if indexing_in_progress %}
|
{% if indexing_in_progress %}
|
||||||
<div class="indexing-status">
|
<div class="indexing-status">
|
||||||
Indexing is currently in progress.
|
Indexing is currently in progress.
|
||||||
<a href="/index_books" class="indexing-link">View re-indexing progress</a>
|
<a href="/index_books" class="indexing-link">View re-indexing progress</a>
|
||||||
</div>
|
</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
<h2>Available Files</h2>
|
<h2>Available Files</h2>
|
||||||
|
|
||||||
{% if files %}
|
{% if files %}
|
||||||
<ul class="file-list">
|
<ul class="file-list">
|
||||||
{% for file in files %}
|
{% for file in files %}
|
||||||
<li class="file-item">
|
<li class="file-item">
|
||||||
<span class="file-name">
|
<span class="file-name">
|
||||||
<a href="/file/{{ file.path }}">
|
<a href="/file/{{ file.path }}">
|
||||||
{% if file.path.endswith('.epub') %}
|
{% if file.path.endswith('.epub') %}
|
||||||
<br><a href="/file/{{ file.path }}?format=html" class="plain-view-link">(View as HTML)</a>
|
<br><a href="/file/{{ file.path }}?format=html" class="plain-view-link">(View as HTML)</a>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% if file.title != file.name %}
|
{% if file.title != file.name %}
|
||||||
<span class="book-title">{{ file.title }}</span>
|
<span class="book-title">{{ file.title }}</span>
|
||||||
<span class="file-name-muted">{{ file.name }}</span>
|
<span class="file-name-muted">{{ file.name }}</span>
|
||||||
{% else %}
|
{% else %}
|
||||||
{{ file.name }}
|
{{ file.name }}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</a>
|
</a>
|
||||||
</span>
|
</span>
|
||||||
<span class="file-size">{{ file.size_mb }} MB</span>
|
<span class="file-size">{{ file.size_mb }} MB</span>
|
||||||
</li>
|
</li>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</ul>
|
</ul>
|
||||||
{% else %}
|
{% else %}
|
||||||
<p>No files available. Please add files to the books directory.</p>
|
<p>No files available. Please add files to the books directory.</p>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<footer>
|
<footer>
|
||||||
<p>© 2025 Intari</p>
|
<p>© 2025 Intari</p>
|
||||||
</footer>
|
</footer>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
|
@ -1,163 +1,163 @@
|
||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
<html lang="en">
|
<html lang="en">
|
||||||
<head>
|
<head>
|
||||||
<meta charset="UTF-8">
|
<meta charset="UTF-8">
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
<title>Indexing Books</title>
|
<title>Indexing Books</title>
|
||||||
<link rel="stylesheet" href="/static/css/style.css">
|
<link rel="stylesheet" href="/static/css/style.css">
|
||||||
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<header>
|
<header>
|
||||||
<h1>Indexing Books</h1>
|
<h1>Indexing Books</h1>
|
||||||
</header>
|
</header>
|
||||||
|
|
||||||
<nav class="nav">
|
<nav class="nav">
|
||||||
<ul>
|
<ul>
|
||||||
<li><a href="/">Home</a></li>
|
<li><a href="/">Home</a></li>
|
||||||
<li><a href="/files">File List</a></li>
|
<li><a href="/files">File List</a></li>
|
||||||
<li><a href="/index_books">Re-Index Books</a></li>
|
<li><a href="/index_books">Re-Index Books</a></li>
|
||||||
</ul>
|
</ul>
|
||||||
</nav>
|
</nav>
|
||||||
|
|
||||||
<div class="container">
|
<div class="container">
|
||||||
<div class="progress-container">
|
<div class="progress-container">
|
||||||
<h2>Indexing Progress</h2>
|
<h2>Indexing Progress</h2>
|
||||||
<div class="progress-stats">
|
<div class="progress-stats">
|
||||||
<span id="processed-files">0</span> of <span id="total-files">0</span> files processed
|
<span id="processed-files">0</span> of <span id="total-files">0</span> files processed
|
||||||
<span id="percentage">0%</span>
|
<span id="percentage">0%</span>
|
||||||
</div>
|
</div>
|
||||||
<div class="progress-bar">
|
<div class="progress-bar">
|
||||||
<div class="progress-fill" id="progress-fill"></div>
|
<div class="progress-fill" id="progress-fill"></div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="current-file" id="current-file">
|
<div class="current-file" id="current-file">
|
||||||
Current file: Starting indexing...
|
Current file: Starting indexing...
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="time-stats">
|
<div class="time-stats">
|
||||||
<div class="time-stat">
|
<div class="time-stat">
|
||||||
<div>CPU cores:</div>
|
<div>CPU cores:</div>
|
||||||
<div>{{ used_cpus }} of {{ available_cpus }}</div>
|
<div>{{ used_cpus }} of {{ available_cpus }}</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="time-stat">
|
<div class="time-stat">
|
||||||
<div>Time elapsed:</div>
|
<div>Time elapsed:</div>
|
||||||
<div id="elapsed-time">0m 0s</div>
|
<div id="elapsed-time">0m 0s</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="time-stat">
|
<div class="time-stat">
|
||||||
<div>Estimated remaining:</div>
|
<div>Estimated remaining:</div>
|
||||||
<div id="estimated-remaining">Calculating...</div>
|
<div id="estimated-remaining">Calculating...</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="time-stat">
|
<div class="time-stat">
|
||||||
<div>Estimated completion:</div>
|
<div>Estimated completion:</div>
|
||||||
<div id="estimated-completion">Calculating...</div>
|
<div id="estimated-completion">Calculating...</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="time-stat">
|
<div class="time-stat">
|
||||||
<div>Files per minute:</div>
|
<div>Files per minute:</div>
|
||||||
<div id="files-per-minute">0</div>
|
<div id="files-per-minute">0</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<button class="abort-button" id="abort-button">Abort Indexing</button>
|
<button class="abort-button" id="abort-button">Abort Indexing</button>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="progress-details">
|
<div class="progress-details">
|
||||||
<h3>Recent Errors</h3>
|
<h3>Recent Errors</h3>
|
||||||
<div id="error-list" class="error-list">
|
<div id="error-list" class="error-list">
|
||||||
No errors yet
|
No errors yet
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<footer>
|
<footer>
|
||||||
<p>© 2025 Intari</p>
|
<p>© 2025 Intari</p>
|
||||||
</footer>
|
</footer>
|
||||||
|
|
||||||
<script>
|
<script>
|
||||||
const progressFill = document.getElementById('progress-fill');
|
const progressFill = document.getElementById('progress-fill');
|
||||||
const processedFiles = document.getElementById('processed-files');
|
const processedFiles = document.getElementById('processed-files');
|
||||||
const totalFiles = document.getElementById('total-files');
|
const totalFiles = document.getElementById('total-files');
|
||||||
const percentage = document.getElementById('percentage');
|
const percentage = document.getElementById('percentage');
|
||||||
const currentFile = document.getElementById('current-file');
|
const currentFile = document.getElementById('current-file');
|
||||||
const elapsedTime = document.getElementById('elapsed-time');
|
const elapsedTime = document.getElementById('elapsed-time');
|
||||||
const estimatedRemaining = document.getElementById('estimated-remaining');
|
const estimatedRemaining = document.getElementById('estimated-remaining');
|
||||||
const estimatedCompletion = document.getElementById('estimated-completion');
|
const estimatedCompletion = document.getElementById('estimated-completion');
|
||||||
const filesPerMinute = document.getElementById('files-per-minute');
|
const filesPerMinute = document.getElementById('files-per-minute');
|
||||||
const errorList = document.getElementById('error-list');
|
const errorList = document.getElementById('error-list');
|
||||||
const abortButton = document.getElementById('abort-button');
|
const abortButton = document.getElementById('abort-button');
|
||||||
|
|
||||||
let updateInterval;
|
let updateInterval;
|
||||||
let speedChart;
|
let speedChart;
|
||||||
|
|
||||||
// Update progress every second
|
// Update progress every second
|
||||||
function updateProgress() {
|
function updateProgress() {
|
||||||
// Get browser timezone
|
// Get browser timezone
|
||||||
const timezone = Intl.DateTimeFormat().resolvedOptions().timeZone;
|
const timezone = Intl.DateTimeFormat().resolvedOptions().timeZone;
|
||||||
|
|
||||||
fetch('/indexing_progress', {
|
fetch('/indexing_progress', {
|
||||||
headers: {
|
headers: {
|
||||||
'X-Timezone': timezone
|
'X-Timezone': timezone
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.then(response => response.json())
|
.then(response => response.json())
|
||||||
.then(data => {
|
.then(data => {
|
||||||
if (data.status === 'not_running') {
|
if (data.status === 'not_running') {
|
||||||
// Indexing completed
|
// Indexing completed
|
||||||
clearInterval(updateInterval);
|
clearInterval(updateInterval);
|
||||||
window.location.href = '/files';
|
window.location.href = '/files';
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update progress bar
|
// Update progress bar
|
||||||
progressFill.style.width = `${data.percentage}%`;
|
progressFill.style.width = `${data.percentage}%`;
|
||||||
processedFiles.textContent = data.processed_files;
|
processedFiles.textContent = data.processed_files;
|
||||||
totalFiles.textContent = data.total_files;
|
totalFiles.textContent = data.total_files;
|
||||||
percentage.textContent = `${data.percentage.toFixed(1)}%`;
|
percentage.textContent = `${data.percentage.toFixed(1)}%`;
|
||||||
|
|
||||||
// Update current file
|
// Update current file
|
||||||
currentFile.textContent = `Current file: ${data.current_file || 'Processing...'}`;
|
currentFile.textContent = `Current file: ${data.current_file || 'Processing...'}`;
|
||||||
|
|
||||||
// Update time stats
|
// Update time stats
|
||||||
elapsedTime.textContent = data.elapsed_time;
|
elapsedTime.textContent = data.elapsed_time;
|
||||||
estimatedRemaining.textContent = data.estimated_remaining;
|
estimatedRemaining.textContent = data.estimated_remaining;
|
||||||
estimatedCompletion.textContent = data.estimated_completion;
|
estimatedCompletion.textContent = data.estimated_completion;
|
||||||
|
|
||||||
// Calculate files per minute
|
// Calculate files per minute
|
||||||
if (data.elapsed_time) {
|
if (data.elapsed_time) {
|
||||||
const [min, sec] = data.elapsed_time.split(/[ms]/).filter(Boolean).map(Number);
|
const [min, sec] = data.elapsed_time.split(/[ms]/).filter(Boolean).map(Number);
|
||||||
const totalSeconds = min * 60 + sec;
|
const totalSeconds = min * 60 + sec;
|
||||||
if (totalSeconds > 0) {
|
if (totalSeconds > 0) {
|
||||||
const fpm = (data.processed_files / totalSeconds * 60).toFixed(1);
|
const fpm = (data.processed_files / totalSeconds * 60).toFixed(1);
|
||||||
filesPerMinute.textContent = fpm;
|
filesPerMinute.textContent = fpm;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update errors
|
// Update errors
|
||||||
if (data.errors && data.errors.length > 0) {
|
if (data.errors && data.errors.length > 0) {
|
||||||
errorList.innerHTML = data.errors.map(err =>
|
errorList.innerHTML = data.errors.map(err =>
|
||||||
`<div class="error-item">${err}</div>`
|
`<div class="error-item">${err}</div>`
|
||||||
).join('');
|
).join('');
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.catch(error => {
|
.catch(error => {
|
||||||
console.error('Error fetching progress:', error);
|
console.error('Error fetching progress:', error);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start updating progress
|
// Start updating progress
|
||||||
updateInterval = setInterval(updateProgress, 1000);
|
updateInterval = setInterval(updateProgress, 1000);
|
||||||
updateProgress();
|
updateProgress();
|
||||||
|
|
||||||
// Handle abort button
|
// Handle abort button
|
||||||
abortButton.addEventListener('click', () => {
|
abortButton.addEventListener('click', () => {
|
||||||
if (confirm('Are you sure you want to abort indexing?')) {
|
if (confirm('Are you sure you want to abort indexing?')) {
|
||||||
fetch('/abort_indexing', { method: 'POST' })
|
fetch('/abort_indexing', { method: 'POST' })
|
||||||
.then(response => response.json())
|
.then(response => response.json())
|
||||||
.then(data => {
|
.then(data => {
|
||||||
alert(data.message);
|
alert(data.message);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
</script>
|
</script>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
|
@ -1,56 +1,56 @@
|
||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
<html lang="en">
|
<html lang="en">
|
||||||
<head>
|
<head>
|
||||||
<meta charset="UTF-8">
|
<meta charset="UTF-8">
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
<title>Book Search</title>
|
<title>Book Search</title>
|
||||||
<link rel="stylesheet" href="/static/css/style.css">
|
<link rel="stylesheet" href="/static/css/style.css">
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<header>
|
<header>
|
||||||
<h1>Book Search Engine</h1>
|
<h1>Book Search Engine</h1>
|
||||||
</header>
|
</header>
|
||||||
|
|
||||||
<nav class="nav">
|
<nav class="nav">
|
||||||
<ul>
|
<ul>
|
||||||
<li><a href="/">Home</a></li>
|
<li><a href="/">Home</a></li>
|
||||||
<li><a href="/files">File List</a></li>
|
<li><a href="/files">File List</a></li>
|
||||||
<li><a href="/index_books">Re-Index Books</a></li>
|
<li><a href="/index_books">Re-Index Books</a></li>
|
||||||
</ul>
|
</ul>
|
||||||
</nav>
|
</nav>
|
||||||
|
|
||||||
<div class="container">
|
<div class="container">
|
||||||
<div class="search-container">
|
<div class="search-container">
|
||||||
<form action="/search" method="GET">
|
<form action="/search" method="GET">
|
||||||
<input type="text" name="query" placeholder="Search for content..." class="search-box" value="{{ query }}">
|
<input type="text" name="query" placeholder="Search for content..." class="search-box" value="{{ query }}">
|
||||||
<button type="submit" class="search-button">Search</button>
|
<button type="submit" class="search-button">Search</button>
|
||||||
</form>
|
</form>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{% if results %}
|
{% if results %}
|
||||||
<div class="results">
|
<div class="results">
|
||||||
<h2>Search Results</h2>
|
<h2>Search Results</h2>
|
||||||
{% for result in results %}
|
{% for result in results %}
|
||||||
<div class="result-item">
|
<div class="result-item">
|
||||||
<h3>{{ result.file_path.split('/')[-1] }}</h3>
|
<h3>{{ result.file_path.split('/')[-1] }}</h3>
|
||||||
<p>{{ result.snippet }}</p>
|
<p>{{ result.snippet }}</p>
|
||||||
<div class="file-actions">
|
<div class="file-actions">
|
||||||
<a href="/file/{{ result.file_path.replace('/books/', '') }}" class="file-action">View Full File</a>
|
<a href="/file/{{ result.file_path.replace('/books/', '') }}" class="file-action">View Full File</a>
|
||||||
<span class="action-separator">|</span>
|
<span class="action-separator">|</span>
|
||||||
<a href="/file/{{ result.file_path.replace('/books/', '') }}?format=html" class="file-action">View as HTML</a>
|
<a href="/file/{{ result.file_path.replace('/books/', '') }}?format=html" class="file-action">View as HTML</a>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</div>
|
</div>
|
||||||
{% elif query %}
|
{% elif query %}
|
||||||
<div class="results">
|
<div class="results">
|
||||||
<p>No results found for "{{ query }}"</p>
|
<p>No results found for "{{ query }}"</p>
|
||||||
</div>
|
</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<footer>
|
<footer>
|
||||||
<p>© 2025 Intari</p>
|
<p>© 2025 Intari</p>
|
||||||
</footer>
|
</footer>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
|
@ -1,64 +1,64 @@
|
||||||
и<!DOCTYPE html>
|
и<!DOCTYPE html>
|
||||||
<html>
|
<html>
|
||||||
<head>
|
<head>
|
||||||
<title>{{ file_path }}</title>
|
<title>{{ file_path }}</title>
|
||||||
<link rel="stylesheet" href="/static/css/style.css">
|
<link rel="stylesheet" href="/static/css/style.css">
|
||||||
<style>
|
<style>
|
||||||
pre {
|
pre {
|
||||||
background-color: white;
|
background-color: white;
|
||||||
padding: 20px;
|
padding: 20px;
|
||||||
border-radius: 5px;
|
border-radius: 5px;
|
||||||
white-space: pre-wrap;
|
white-space: pre-wrap;
|
||||||
word-wrap: break-word;
|
word-wrap: break-word;
|
||||||
}
|
}
|
||||||
.html-content {
|
.html-content {
|
||||||
background-color: white;
|
background-color: white;
|
||||||
padding: 20px;
|
padding: 20px;
|
||||||
border-radius: 5px;
|
border-radius: 5px;
|
||||||
}
|
}
|
||||||
.html-content hr {
|
.html-content hr {
|
||||||
margin: 30px 0;
|
margin: 30px 0;
|
||||||
border: 0;
|
border: 0;
|
||||||
border-top: 1px solid #eee;
|
border-top: 1px solid #eee;
|
||||||
}
|
}
|
||||||
.html-content h1,
|
.html-content h1,
|
||||||
.html-content h2,
|
.html-content h2,
|
||||||
.html-content h3,
|
.html-content h3,
|
||||||
.html-content h4,
|
.html-content h4,
|
||||||
.html-content h5,
|
.html-content h5,
|
||||||
.html-content h6 {
|
.html-content h6 {
|
||||||
margin: 1em 0 0.5em 0;
|
margin: 1em 0 0.5em 0;
|
||||||
line-height: 1.2;
|
line-height: 1.2;
|
||||||
}
|
}
|
||||||
.html-content p {
|
.html-content p {
|
||||||
margin: 0 0 1em 0;
|
margin: 0 0 1em 0;
|
||||||
line-height: 1.5;
|
line-height: 1.5;
|
||||||
}
|
}
|
||||||
</style>
|
</style>
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body>
|
||||||
<header>
|
<header>
|
||||||
<h1>File: {{ file_path }}</h1>
|
<h1>File: {{ file_path }}</h1>
|
||||||
</header>
|
</header>
|
||||||
|
|
||||||
<nav class="nav">
|
<nav class="nav">
|
||||||
<ul>
|
<ul>
|
||||||
<li><a href="/">Home</a></li>
|
<li><a href="/">Home</a></li>
|
||||||
<li><a href="/files">File List</a></li>
|
<li><a href="/files">File List</a></li>
|
||||||
<li><a href="/index_books">Re-Index Books</a></li>
|
<li><a href="/index_books">Re-Index Books</a></li>
|
||||||
</ul>
|
</ul>
|
||||||
</nav>
|
</nav>
|
||||||
|
|
||||||
<div class="container">
|
<div class="container">
|
||||||
{% if is_html %}
|
{% if is_html %}
|
||||||
<div class="html-content">{{ content|safe }}</div>
|
<div class="html-content">{{ content|safe }}</div>
|
||||||
{% else %}
|
{% else %}
|
||||||
<pre>{{ content }}</pre>
|
<pre>{{ content }}</pre>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<footer>
|
<footer>
|
||||||
<p>© 2025 Book Search Engine</p>
|
<p>© 2025 Book Search Engine</p>
|
||||||
</footer>
|
</footer>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
|
@ -1,142 +1,142 @@
|
||||||
from elasticsearch import Elasticsearch
|
from elasticsearch import Elasticsearch
|
||||||
import os
|
import os
|
||||||
import ebooklib
|
import ebooklib
|
||||||
from ebooklib import epub
|
from ebooklib import epub
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import PyPDF2
|
import PyPDF2
|
||||||
import time
|
import time
|
||||||
from threading import Lock
|
from threading import Lock
|
||||||
|
|
||||||
# Elasticsearch Configuration
|
# Elasticsearch Configuration
|
||||||
ELASTICSEARCH_HOST = os.environ.get("ELASTICSEARCH_HOST", "localhost")
|
ELASTICSEARCH_HOST = os.environ.get("ELASTICSEARCH_HOST", "localhost")
|
||||||
ELASTICSEARCH_PORT = int(os.environ.get("ELASTICSEARCH_PORT", 9200))
|
ELASTICSEARCH_PORT = int(os.environ.get("ELASTICSEARCH_PORT", 9200))
|
||||||
es = Elasticsearch([{'host': ELASTICSEARCH_HOST, 'port': ELASTICSEARCH_PORT, 'scheme': 'http'}])
|
es = Elasticsearch([{'host': ELASTICSEARCH_HOST, 'port': ELASTICSEARCH_PORT, 'scheme': 'http'}])
|
||||||
INDEX_NAME = "book_index"
|
INDEX_NAME = "book_index"
|
||||||
|
|
||||||
# Global variables for progress tracking
|
# Global variables for progress tracking
|
||||||
indexing_progress = {
|
indexing_progress = {
|
||||||
'total_files': 0,
|
'total_files': 0,
|
||||||
'processed_files': 0,
|
'processed_files': 0,
|
||||||
'start_time': None,
|
'start_time': None,
|
||||||
'is_running': False,
|
'is_running': False,
|
||||||
'current_file': '',
|
'current_file': '',
|
||||||
'errors': []
|
'errors': []
|
||||||
}
|
}
|
||||||
progress_lock = Lock()
|
progress_lock = Lock()
|
||||||
|
|
||||||
def create_index():
|
def create_index():
|
||||||
if not es.indices.exists(index=INDEX_NAME):
|
if not es.indices.exists(index=INDEX_NAME):
|
||||||
es.indices.create(index=INDEX_NAME)
|
es.indices.create(index=INDEX_NAME)
|
||||||
|
|
||||||
def extract_text_from_epub(epub_path):
|
def extract_text_from_epub(epub_path):
|
||||||
book = epub.read_epub(epub_path)
|
book = epub.read_epub(epub_path)
|
||||||
text = ''
|
text = ''
|
||||||
for item in book.get_items():
|
for item in book.get_items():
|
||||||
if item.media_type == 'application/xhtml+xml':
|
if item.media_type == 'application/xhtml+xml':
|
||||||
soup = BeautifulSoup(item.get_content(), 'html.parser')
|
soup = BeautifulSoup(item.get_content(), 'html.parser')
|
||||||
text += soup.get_text()
|
text += soup.get_text()
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def extract_text_from_pdf(pdf_path):
|
def extract_text_from_pdf(pdf_path):
|
||||||
text = ''
|
text = ''
|
||||||
with open(pdf_path, 'rb') as pdf_file:
|
with open(pdf_path, 'rb') as pdf_file:
|
||||||
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
||||||
for page_num in range(len(pdf_reader.pages)):
|
for page_num in range(len(pdf_reader.pages)):
|
||||||
page = pdf_reader.pages[page_num]
|
page = pdf_reader.pages[page_num]
|
||||||
text += page.extract_text()
|
text += page.extract_text()
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def get_progress():
|
def get_progress():
|
||||||
with progress_lock:
|
with progress_lock:
|
||||||
if not indexing_progress['is_running']:
|
if not indexing_progress['is_running']:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
progress = indexing_progress.copy()
|
progress = indexing_progress.copy()
|
||||||
if progress['total_files'] > 0:
|
if progress['total_files'] > 0:
|
||||||
progress['percentage'] = (progress['processed_files'] / progress['total_files']) * 100
|
progress['percentage'] = (progress['processed_files'] / progress['total_files']) * 100
|
||||||
else:
|
else:
|
||||||
progress['percentage'] = 0
|
progress['percentage'] = 0
|
||||||
|
|
||||||
elapsed = time.time() - progress['start_time']
|
elapsed = time.time() - progress['start_time']
|
||||||
progress['elapsed_time'] = elapsed
|
progress['elapsed_time'] = elapsed
|
||||||
if progress['processed_files'] > 0:
|
if progress['processed_files'] > 0:
|
||||||
time_per_file = elapsed / progress['processed_files']
|
time_per_file = elapsed / progress['processed_files']
|
||||||
remaining_files = progress['total_files'] - progress['processed_files']
|
remaining_files = progress['total_files'] - progress['processed_files']
|
||||||
progress['estimated_remaining'] = time_per_file * remaining_files
|
progress['estimated_remaining'] = time_per_file * remaining_files
|
||||||
progress['estimated_completion'] = time.time() + progress['estimated_remaining']
|
progress['estimated_completion'] = time.time() + progress['estimated_remaining']
|
||||||
else:
|
else:
|
||||||
progress['estimated_remaining'] = 0
|
progress['estimated_remaining'] = 0
|
||||||
progress['estimated_completion'] = 0
|
progress['estimated_completion'] = 0
|
||||||
|
|
||||||
return progress
|
return progress
|
||||||
|
|
||||||
def index_files(directory):
|
def index_files(directory):
|
||||||
global indexing_progress
|
global indexing_progress
|
||||||
|
|
||||||
with progress_lock:
|
with progress_lock:
|
||||||
indexing_progress = {
|
indexing_progress = {
|
||||||
'total_files': 0,
|
'total_files': 0,
|
||||||
'processed_files': 0,
|
'processed_files': 0,
|
||||||
'start_time': time.time(),
|
'start_time': time.time(),
|
||||||
'is_running': True,
|
'is_running': True,
|
||||||
'current_file': '',
|
'current_file': '',
|
||||||
'errors': []
|
'errors': []
|
||||||
}
|
}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
create_index()
|
create_index()
|
||||||
|
|
||||||
# First count all files
|
# First count all files
|
||||||
total_files = 0
|
total_files = 0
|
||||||
for root, _, files in os.walk(directory):
|
for root, _, files in os.walk(directory):
|
||||||
for file in files:
|
for file in files:
|
||||||
if file.endswith(('.epub', '.pdf', '.txt')):
|
if file.endswith(('.epub', '.pdf', '.txt')):
|
||||||
total_files += 1
|
total_files += 1
|
||||||
|
|
||||||
with progress_lock:
|
with progress_lock:
|
||||||
indexing_progress['total_files'] = total_files
|
indexing_progress['total_files'] = total_files
|
||||||
|
|
||||||
# Now process files
|
# Now process files
|
||||||
for root, _, files in os.walk(directory):
|
for root, _, files in os.walk(directory):
|
||||||
for file in files:
|
for file in files:
|
||||||
file_path = os.path.join(root, file)
|
file_path = os.path.join(root, file)
|
||||||
|
|
||||||
with progress_lock:
|
with progress_lock:
|
||||||
indexing_progress['current_file'] = file_path
|
indexing_progress['current_file'] = file_path
|
||||||
|
|
||||||
try:
|
try:
|
||||||
encoded_file_path = file_path.encode('utf-8').decode('utf-8')
|
encoded_file_path = file_path.encode('utf-8').decode('utf-8')
|
||||||
if file_path.endswith(".epub"):
|
if file_path.endswith(".epub"):
|
||||||
text = extract_text_from_epub(file_path)
|
text = extract_text_from_epub(file_path)
|
||||||
elif file_path.endswith(".pdf"):
|
elif file_path.endswith(".pdf"):
|
||||||
text = extract_text_from_pdf(file_path)
|
text = extract_text_from_pdf(file_path)
|
||||||
elif file_path.endswith(".txt"):
|
elif file_path.endswith(".txt"):
|
||||||
with open(encoded_file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
with open(encoded_file_path, 'r', encoding='utf-8', errors='ignore') as f:
|
||||||
text = f.read()
|
text = f.read()
|
||||||
else:
|
else:
|
||||||
print(f"Skipping unsupported file type: {file_path}")
|
print(f"Skipping unsupported file type: {file_path}")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
doc = {
|
doc = {
|
||||||
'file_path': file_path,
|
'file_path': file_path,
|
||||||
'content': text
|
'content': text
|
||||||
}
|
}
|
||||||
es.index(index=INDEX_NAME, document=doc)
|
es.index(index=INDEX_NAME, document=doc)
|
||||||
print(f"Indexed: {file_path}")
|
print(f"Indexed: {file_path}")
|
||||||
|
|
||||||
with progress_lock:
|
with progress_lock:
|
||||||
indexing_progress['processed_files'] += 1
|
indexing_progress['processed_files'] += 1
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error_msg = f"Error indexing {file_path}: {type(e)}, {e}"
|
error_msg = f"Error indexing {file_path}: {type(e)}, {e}"
|
||||||
print(error_msg)
|
print(error_msg)
|
||||||
with progress_lock:
|
with progress_lock:
|
||||||
indexing_progress['errors'].append(error_msg)
|
indexing_progress['errors'].append(error_msg)
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
with progress_lock:
|
with progress_lock:
|
||||||
indexing_progress['is_running'] = False
|
indexing_progress['is_running'] = False
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
BOOKS_DIR = "/books" # This should match the volume mount in docker-compose.yml
|
BOOKS_DIR = "/books" # This should match the volume mount in docker-compose.yml
|
||||||
index_files(BOOKS_DIR)
|
index_files(BOOKS_DIR)
|
|
@ -1,7 +1,7 @@
|
||||||
flask==3.0.2
|
flask==3.0.2
|
||||||
ebooklib==0.18
|
ebooklib==0.18
|
||||||
beautifulsoup4==4.12.3
|
beautifulsoup4==4.12.3
|
||||||
pytest==8.3.2
|
pytest==8.3.2
|
||||||
PyPDF2==3.0.1
|
PyPDF2==3.0.1
|
||||||
pytz==2024.1
|
pytz==2024.1
|
||||||
elasticsearch>=8.0.0
|
elasticsearch>=8.0.0
|
|
@ -1,127 +1,127 @@
|
||||||
import unittest
|
import unittest
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
import shutil
|
import shutil
|
||||||
from app import app
|
from app import app
|
||||||
from unittest.mock import patch, MagicMock
|
from unittest.mock import patch, MagicMock
|
||||||
|
|
||||||
class BookSearchAPITest(unittest.TestCase):
|
class BookSearchAPITest(unittest.TestCase):
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
app.config['TESTING'] = True
|
app.config['TESTING'] = True
|
||||||
self.client = app.test_client()
|
self.client = app.test_client()
|
||||||
|
|
||||||
# Create a temporary directory for test books
|
# Create a temporary directory for test books
|
||||||
self.test_books_dir = tempfile.mkdtemp()
|
self.test_books_dir = tempfile.mkdtemp()
|
||||||
|
|
||||||
# Create a sample test file
|
# Create a sample test file
|
||||||
self.sample_file_path = os.path.join(self.test_books_dir, 'test_sample.txt')
|
self.sample_file_path = os.path.join(self.test_books_dir, 'test_sample.txt')
|
||||||
with open(self.sample_file_path, 'w', encoding='utf-8') as f:
|
with open(self.sample_file_path, 'w', encoding='utf-8') as f:
|
||||||
f.write("This is a test sample file for testing the book search API.")
|
f.write("This is a test sample file for testing the book search API.")
|
||||||
|
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
# Remove the temporary directory
|
# Remove the temporary directory
|
||||||
shutil.rmtree(self.test_books_dir)
|
shutil.rmtree(self.test_books_dir)
|
||||||
|
|
||||||
@patch('app.es')
|
@patch('app.es')
|
||||||
@patch('app.index_files')
|
@patch('app.index_files')
|
||||||
def test_index_books_api(self, mock_index_files, mock_es):
|
def test_index_books_api(self, mock_index_files, mock_es):
|
||||||
# Mock the index_files function
|
# Mock the index_files function
|
||||||
mock_index_files.return_value = None
|
mock_index_files.return_value = None
|
||||||
|
|
||||||
# Test the API endpoint
|
# Test the API endpoint
|
||||||
response = self.client.get('/index_books', headers={'Accept': 'application/json'})
|
response = self.client.get('/index_books', headers={'Accept': 'application/json'})
|
||||||
|
|
||||||
# Check if the response is successful
|
# Check if the response is successful
|
||||||
self.assertEqual(response.status_code, 200)
|
self.assertEqual(response.status_code, 200)
|
||||||
|
|
||||||
# Check if the response contains the expected message
|
# Check if the response contains the expected message
|
||||||
data = json.loads(response.data)
|
data = json.loads(response.data)
|
||||||
self.assertIn('message', data)
|
self.assertIn('message', data)
|
||||||
self.assertEqual(data['message'], 'Indexing completed')
|
self.assertEqual(data['message'], 'Indexing completed')
|
||||||
|
|
||||||
# Check if the index_files function was called
|
# Check if the index_files function was called
|
||||||
mock_index_files.assert_called_once_with('/books')
|
mock_index_files.assert_called_once_with('/books')
|
||||||
|
|
||||||
@patch('app.es')
|
@patch('app.es')
|
||||||
def test_search_api(self, mock_es):
|
def test_search_api(self, mock_es):
|
||||||
# Mock the Elasticsearch search method
|
# Mock the Elasticsearch search method
|
||||||
mock_search_result = {
|
mock_search_result = {
|
||||||
'hits': {
|
'hits': {
|
||||||
'hits': [
|
'hits': [
|
||||||
{
|
{
|
||||||
'_source': {
|
'_source': {
|
||||||
'file_path': '/books/test_sample.txt',
|
'file_path': '/books/test_sample.txt',
|
||||||
'content': 'This is a test sample file for testing the book search API.'
|
'content': 'This is a test sample file for testing the book search API.'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
mock_es.search.return_value = mock_search_result
|
mock_es.search.return_value = mock_search_result
|
||||||
|
|
||||||
# Test the API endpoint
|
# Test the API endpoint
|
||||||
response = self.client.get('/search?query=test', headers={'Accept': 'application/json'})
|
response = self.client.get('/search?query=test', headers={'Accept': 'application/json'})
|
||||||
|
|
||||||
# Check if the response is successful
|
# Check if the response is successful
|
||||||
self.assertEqual(response.status_code, 200)
|
self.assertEqual(response.status_code, 200)
|
||||||
|
|
||||||
# Check if the response contains the expected data
|
# Check if the response contains the expected data
|
||||||
data = json.loads(response.data)
|
data = json.loads(response.data)
|
||||||
self.assertEqual(len(data), 1)
|
self.assertEqual(len(data), 1)
|
||||||
self.assertEqual(data[0]['file_path'], '/books/test_sample.txt')
|
self.assertEqual(data[0]['file_path'], '/books/test_sample.txt')
|
||||||
self.assertIn('snippet', data[0])
|
self.assertIn('snippet', data[0])
|
||||||
|
|
||||||
# Check if the Elasticsearch search method was called with the correct parameters
|
# Check if the Elasticsearch search method was called with the correct parameters
|
||||||
mock_es.search.assert_called_once()
|
mock_es.search.assert_called_once()
|
||||||
|
|
||||||
@patch('app.os.listdir')
|
@patch('app.os.listdir')
|
||||||
@patch('app.os.path.isfile')
|
@patch('app.os.path.isfile')
|
||||||
def test_list_files_api(self, mock_isfile, mock_listdir):
|
def test_list_files_api(self, mock_isfile, mock_listdir):
|
||||||
# Mock the os.listdir function
|
# Mock the os.listdir function
|
||||||
mock_listdir.return_value = ['test_sample.txt', 'another_file.txt']
|
mock_listdir.return_value = ['test_sample.txt', 'another_file.txt']
|
||||||
# Mock the os.path.isfile function to always return True
|
# Mock the os.path.isfile function to always return True
|
||||||
mock_isfile.return_value = True
|
mock_isfile.return_value = True
|
||||||
|
|
||||||
# Test the API endpoint
|
# Test the API endpoint
|
||||||
response = self.client.get('/files', headers={'Accept': 'application/json'})
|
response = self.client.get('/files', headers={'Accept': 'application/json'})
|
||||||
|
|
||||||
# Check if the response is successful
|
# Check if the response is successful
|
||||||
self.assertEqual(response.status_code, 200)
|
self.assertEqual(response.status_code, 200)
|
||||||
|
|
||||||
# Check if the response contains the expected data
|
# Check if the response contains the expected data
|
||||||
data = json.loads(response.data)
|
data = json.loads(response.data)
|
||||||
self.assertEqual(len(data), 2)
|
self.assertEqual(len(data), 2)
|
||||||
self.assertEqual(data[0]['name'], 'test_sample.txt')
|
self.assertEqual(data[0]['name'], 'test_sample.txt')
|
||||||
self.assertEqual(data[1]['name'], 'another_file.txt')
|
self.assertEqual(data[1]['name'], 'another_file.txt')
|
||||||
|
|
||||||
# Check if the os.listdir function was called with the correct parameters
|
# Check if the os.listdir function was called with the correct parameters
|
||||||
mock_listdir.assert_called_once_with('/books')
|
mock_listdir.assert_called_once_with('/books')
|
||||||
|
|
||||||
@patch('app.open')
|
@patch('app.open')
|
||||||
@patch('app.os.path.isfile')
|
@patch('app.os.path.isfile')
|
||||||
@patch('app.os.path.abspath')
|
@patch('app.os.path.abspath')
|
||||||
def test_get_file_api(self, mock_abspath, mock_isfile, mock_open):
|
def test_get_file_api(self, mock_abspath, mock_isfile, mock_open):
|
||||||
# Mock the necessary functions
|
# Mock the necessary functions
|
||||||
mock_isfile.return_value = True
|
mock_isfile.return_value = True
|
||||||
mock_abspath.side_effect = lambda x: x # Return the input unchanged
|
mock_abspath.side_effect = lambda x: x # Return the input unchanged
|
||||||
|
|
||||||
# Mock the open function
|
# Mock the open function
|
||||||
mock_file = MagicMock()
|
mock_file = MagicMock()
|
||||||
mock_file.__enter__.return_value.read.return_value = "This is a test sample file."
|
mock_file.__enter__.return_value.read.return_value = "This is a test sample file."
|
||||||
mock_open.return_value = mock_file
|
mock_open.return_value = mock_file
|
||||||
|
|
||||||
# Test the API endpoint
|
# Test the API endpoint
|
||||||
response = self.client.get('/file/test_sample.txt', headers={'Accept': 'application/json'})
|
response = self.client.get('/file/test_sample.txt', headers={'Accept': 'application/json'})
|
||||||
|
|
||||||
# Check if the response is successful
|
# Check if the response is successful
|
||||||
self.assertEqual(response.status_code, 200)
|
self.assertEqual(response.status_code, 200)
|
||||||
|
|
||||||
# Check if the response contains the expected data
|
# Check if the response contains the expected data
|
||||||
self.assertEqual(response.data.decode('utf-8'), "This is a test sample file.")
|
self.assertEqual(response.data.decode('utf-8'), "This is a test sample file.")
|
||||||
|
|
||||||
# Check if the open function was called with the correct parameters
|
# Check if the open function was called with the correct parameters
|
||||||
mock_open.assert_called_once()
|
mock_open.assert_called_once()
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
unittest.main()
|
unittest.main()
|
|
@ -1,114 +1,114 @@
|
||||||
import os
|
import os
|
||||||
import pytest
|
import pytest
|
||||||
import tempfile
|
import tempfile
|
||||||
import shutil
|
import shutil
|
||||||
from app import app
|
from app import app
|
||||||
from unittest.mock import patch, MagicMock
|
from unittest.mock import patch, MagicMock
|
||||||
from ebooklib import epub
|
from ebooklib import epub
|
||||||
|
|
||||||
def create_test_epub():
|
def create_test_epub():
|
||||||
"""Create a simple test EPUB file"""
|
"""Create a simple test EPUB file"""
|
||||||
# Create a simple EPUB file
|
# Create a simple EPUB file
|
||||||
book = epub.EpubBook()
|
book = epub.EpubBook()
|
||||||
|
|
||||||
# Set metadata
|
# Set metadata
|
||||||
book.set_identifier('test123456')
|
book.set_identifier('test123456')
|
||||||
book.set_title('Test EPUB Book')
|
book.set_title('Test EPUB Book')
|
||||||
book.set_language('en')
|
book.set_language('en')
|
||||||
book.add_author('Test Author')
|
book.add_author('Test Author')
|
||||||
|
|
||||||
# Add a chapter
|
# Add a chapter
|
||||||
c1 = epub.EpubHtml(title='Chapter 1', file_name='chap_01.xhtml', lang='en')
|
c1 = epub.EpubHtml(title='Chapter 1', file_name='chap_01.xhtml', lang='en')
|
||||||
c1.content = '<html><body><h1>Chapter 1</h1><p>This is a test EPUB file.</p></body></html>'
|
c1.content = '<html><body><h1>Chapter 1</h1><p>This is a test EPUB file.</p></body></html>'
|
||||||
book.add_item(c1)
|
book.add_item(c1)
|
||||||
|
|
||||||
# Add navigation
|
# Add navigation
|
||||||
book.toc = [c1]
|
book.toc = [c1]
|
||||||
book.spine = ['nav', c1]
|
book.spine = ['nav', c1]
|
||||||
book.add_item(epub.EpubNcx())
|
book.add_item(epub.EpubNcx())
|
||||||
book.add_item(epub.EpubNav())
|
book.add_item(epub.EpubNav())
|
||||||
|
|
||||||
# Create temp directory
|
# Create temp directory
|
||||||
temp_dir = tempfile.mkdtemp()
|
temp_dir = tempfile.mkdtemp()
|
||||||
epub_path = os.path.join(temp_dir, 'test.epub')
|
epub_path = os.path.join(temp_dir, 'test.epub')
|
||||||
|
|
||||||
# Write the EPUB file
|
# Write the EPUB file
|
||||||
epub.write_epub(epub_path, book)
|
epub.write_epub(epub_path, book)
|
||||||
|
|
||||||
return epub_path, temp_dir
|
return epub_path, temp_dir
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def client():
|
def client():
|
||||||
"""Create a test client for the Flask app"""
|
"""Create a test client for the Flask app"""
|
||||||
app.config['TESTING'] = True
|
app.config['TESTING'] = True
|
||||||
with app.test_client() as client:
|
with app.test_client() as client:
|
||||||
yield client
|
yield client
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def test_epub():
|
def test_epub():
|
||||||
"""Create a test EPUB file and clean up after the test"""
|
"""Create a test EPUB file and clean up after the test"""
|
||||||
epub_path, temp_dir = create_test_epub()
|
epub_path, temp_dir = create_test_epub()
|
||||||
|
|
||||||
# Mock the books directory
|
# Mock the books directory
|
||||||
original_join = os.path.join
|
original_join = os.path.join
|
||||||
|
|
||||||
def mock_join(path, *paths):
|
def mock_join(path, *paths):
|
||||||
if path == "/books" and paths and paths[0] == "test.epub":
|
if path == "/books" and paths and paths[0] == "test.epub":
|
||||||
return epub_path
|
return epub_path
|
||||||
return original_join(path, *paths)
|
return original_join(path, *paths)
|
||||||
|
|
||||||
def mock_abspath(path):
|
def mock_abspath(path):
|
||||||
if path == os.path.join("/books", "test.epub"):
|
if path == os.path.join("/books", "test.epub"):
|
||||||
return "/books/test.epub"
|
return "/books/test.epub"
|
||||||
elif path == epub_path:
|
elif path == epub_path:
|
||||||
return "/books/test.epub"
|
return "/books/test.epub"
|
||||||
return path
|
return path
|
||||||
|
|
||||||
with patch('os.path.join', side_effect=mock_join):
|
with patch('os.path.join', side_effect=mock_join):
|
||||||
with patch('os.path.isfile', return_value=True):
|
with patch('os.path.isfile', return_value=True):
|
||||||
with patch('os.path.abspath', side_effect=mock_abspath):
|
with patch('os.path.abspath', side_effect=mock_abspath):
|
||||||
yield epub_path
|
yield epub_path
|
||||||
|
|
||||||
# Clean up
|
# Clean up
|
||||||
shutil.rmtree(temp_dir)
|
shutil.rmtree(temp_dir)
|
||||||
|
|
||||||
def test_epub_viewer_page(client, test_epub):
|
def test_epub_viewer_page(client, test_epub):
|
||||||
"""Test that the EPUB viewer page loads correctly"""
|
"""Test that the EPUB viewer page loads correctly"""
|
||||||
response = client.get('/file/test.epub')
|
response = client.get('/file/test.epub')
|
||||||
|
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
assert b'<!DOCTYPE html>' in response.data
|
assert b'<!DOCTYPE html>' in response.data
|
||||||
assert b'<title>test.epub</title>' in response.data
|
assert b'<title>test.epub</title>' in response.data
|
||||||
assert b'<div id="viewer"></div>' in response.data
|
assert b'<div id="viewer"></div>' in response.data
|
||||||
assert b'<script src="https://cdn.jsdelivr.net/npm/epubjs' in response.data
|
assert b'<script src="https://cdn.jsdelivr.net/npm/epubjs' in response.data
|
||||||
|
|
||||||
def test_epub_file_endpoint(client, test_epub):
|
def test_epub_file_endpoint(client, test_epub):
|
||||||
"""Test that the EPUB file is served with correct headers"""
|
"""Test that the EPUB file is served with correct headers"""
|
||||||
response = client.get('/epub/test.epub')
|
response = client.get('/epub/test.epub')
|
||||||
|
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
assert response.headers['Content-Type'] == 'application/epub+zip'
|
assert response.headers['Content-Type'] == 'application/epub+zip'
|
||||||
assert response.headers['Access-Control-Allow-Origin'] == '*'
|
assert response.headers['Access-Control-Allow-Origin'] == '*'
|
||||||
|
|
||||||
# Check that the response contains EPUB data (at least the magic number)
|
# Check that the response contains EPUB data (at least the magic number)
|
||||||
assert response.data.startswith(b'PK')
|
assert response.data.startswith(b'PK')
|
||||||
|
|
||||||
def test_epub_viewer_integration(client, test_epub):
|
def test_epub_viewer_integration(client, test_epub):
|
||||||
"""Test the integration between the viewer and the EPUB file"""
|
"""Test the integration between the viewer and the EPUB file"""
|
||||||
# This test would ideally use Selenium or Playwright to test the actual rendering
|
# This test would ideally use Selenium or Playwright to test the actual rendering
|
||||||
# Since we can't run a browser in this environment, we'll check for the correct setup
|
# Since we can't run a browser in this environment, we'll check for the correct setup
|
||||||
|
|
||||||
# First, check that the viewer page loads
|
# First, check that the viewer page loads
|
||||||
viewer_response = client.get('/file/test.epub')
|
viewer_response = client.get('/file/test.epub')
|
||||||
assert viewer_response.status_code == 200
|
assert viewer_response.status_code == 200
|
||||||
|
|
||||||
# Check that the JavaScript is correctly set up to load the EPUB
|
# Check that the JavaScript is correctly set up to load the EPUB
|
||||||
assert b'/epub/test.epub' in viewer_response.data
|
assert b'/epub/test.epub' in viewer_response.data
|
||||||
|
|
||||||
# Check that the EPUB file is accessible
|
# Check that the EPUB file is accessible
|
||||||
epub_response = client.get('/epub/test.epub')
|
epub_response = client.get('/epub/test.epub')
|
||||||
assert epub_response.status_code == 200
|
assert epub_response.status_code == 200
|
||||||
assert epub_response.headers['Content-Type'] == 'application/epub+zip'
|
assert epub_response.headers['Content-Type'] == 'application/epub+zip'
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
pytest.main(['-xvs', __file__])
|
pytest.main(['-xvs', __file__])
|
Loading…
Add table
Reference in a new issue