Initial test version

This commit is contained in:
Dmitriy Kazimirov 2025-03-29 23:46:15 +06:00
commit 158ab589a5
10 changed files with 205 additions and 0 deletions

View file

@ -0,0 +1,8 @@
{
"name": "Search Dev",
"dockerComposeFile": "docker-compose.yml",
"service": "backend",
"workspaceFolder": "/app",
"extensions": ["ms-python.python"],
"remoteUser": "root"
}

5
.env Normal file
View file

@ -0,0 +1,5 @@
STORAGE_TYPE=smb # "smb" èëè "local"
LOCAL_STORAGE_PATH=./local_files
SMB_STORAGE_PATH=//IP_ÈËÈ_ÕÎÑÒ_ÑÌÁ/ÏÓÒÜ_Ê_ØÀÐÅ
SMB_USER=ÒÂÎÉ_ËÎÃÈÍ
SMB_PASSWORD=ÒÂÎÉ_ÏÀÐÎËÜ

10
backend/Dockerfile Normal file
View file

@ -0,0 +1,10 @@
FROM python:3.11
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]

24
backend/app.py Normal file
View file

@ -0,0 +1,24 @@
from fastapi import FastAPI
from fastapi.responses import FileResponse
import requests
import os
from dotenv import load_dotenv
load_dotenv()
FILES_DIR = os.getenv("LOCAL_STORAGE_PATH", "/mnt/storage")
SEARCH_ENGINE = "http://meilisearch:7700"
app = FastAPI()
@app.get("/search")
def search(q: str):
response = requests.get(f"{SEARCH_ENGINE}/indexes/documents/search", params={"q": q})
results = response.json()
return {"results": results.get("hits", [])}
@app.get("/files/{filename}")
def get_file(filename: str):
file_path = os.path.join(FILES_DIR, filename)
if os.path.exists(file_path):
return FileResponse(file_path, filename=filename)
return {"error": "Ôàéë íå íàéäåí"}

47
backend/indexer.py Normal file
View file

@ -0,0 +1,47 @@
import os
import requests
from pdfminer.high_level import extract_text
from ebooklib import epub
from bs4 import BeautifulSoup
from dotenv import load_dotenv
load_dotenv()
FILES_DIR = os.getenv("LOCAL_STORAGE_PATH", "/mnt/storage")
SEARCH_ENGINE = "http://meilisearch:7700"
INDEX_NAME = "documents"
def extract_text_from_pdf(pdf_path):
return extract_text(pdf_path)
def extract_text_from_epub(epub_path):
book = epub.read_epub(epub_path)
text = []
for item in book.get_items():
if item.get_type() == 9:
soup = BeautifulSoup(item.content, "html.parser")
text.append(soup.get_text())
return "\n".join(text)
def index_files():
docs = []
for filename in os.listdir(FILES_DIR):
file_path = os.path.join(FILES_DIR, filename)
if filename.endswith(".txt"):
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
elif filename.endswith(".pdf"):
content = extract_text_from_pdf(file_path)
elif filename.endswith(".epub"):
content = extract_text_from_epub(file_path)
else:
continue
docs.append({"id": filename, "content": content})
if docs:
requests.post(f"{SEARCH_ENGINE}/indexes/{INDEX_NAME}/documents", json=docs)
print(f"Èíäåêñèðîâàíî {len(docs)} ôàéëîâ!")
if __name__ == "__main__":
index_files()

7
backend/requirements.txt Normal file
View file

@ -0,0 +1,7 @@
fastapi
uvicorn
requests
pdfminer.six
ebooklib
beautifulsoup4
python-dotenv

58
docker-compose.yml Normal file
View file

@ -0,0 +1,58 @@
version: "3.8"
services:
meilisearch:
image: getmeili/meilisearch:latest
container_name: meilisearch
ports:
- "7700:7700"
environment:
- MEILI_NO_ANALYTICS=true
restart: unless-stopped
volumes:
- meili_data:/meili_data
backend:
build: ./backend
container_name: backend
ports:
- "8000:8000"
env_file: .env
volumes:
- ${LOCAL_STORAGE_PATH}:/mnt/storage
depends_on:
- meilisearch
deploy:
resources:
reservations:
devices:
- driver: cifs
count: 1
capabilities: [gpu]
frontend:
build: ./frontend
container_name: frontend
ports:
- "8080:80"
depends_on:
- backend
nginx:
image: nginx:latest
container_name: nginx
ports:
- "80:80"
volumes:
- ${LOCAL_STORAGE_PATH}:/usr/share/nginx/html/files
depends_on:
- backend
- frontend
volumes:
meili_data:
smb_share:
driver_opts:
type: cifs
o: username=${SMB_USER},password=${SMB_PASSWORD},vers=3.0
device: ${SMB_STORAGE_PATH}

2
frontend/Dockerfile Normal file
View file

@ -0,0 +1,2 @@
FROM nginx:latest
COPY index.html /usr/share/nginx/html/index.html

27
frontend/index.html Normal file
View file

@ -0,0 +1,27 @@
<!DOCTYPE html>
<html lang="ru">
<head>
<meta charset="UTF-8">
<title>Ïîèñê</title>
</head>
<body>
<input id="query" type="text" placeholder="Ââåäèòå çàïðîñ">
<button onclick="search()">Èñêàòü</button>
<ul id="results"></ul>
<script>
async function search() {
let query = document.getElementById("query").value;
let res = await fetch(`/search?q=` + query);
let data = await res.json();
let list = document.getElementById("results");
list.innerHTML = "";
data.results.forEach(doc => {
let item = document.createElement("li");
item.innerHTML = `<a href="/files/${doc.id}">${doc.id}</a>: ${doc.content.substring(0, 200)}...`;
list.appendChild(item);
});
}
</script>
</body>
</html>

17
nginx/default.conf Normal file
View file

@ -0,0 +1,17 @@
server {
listen 80;
location / {
root /usr/share/nginx/html;
index index.html;
}
location /files/ {
root /usr/share/nginx/html;
autoindex on;
}
location /search {
proxy_pass http://backend:8000;
}
}