mirror of
https://github.com/intari/search2_chatgpt.git
synced 2025-04-25 16:09:11 +00:00
Initial test version
This commit is contained in:
commit
158ab589a5
10 changed files with 205 additions and 0 deletions
8
.devcontainer/devcontainer.json
Normal file
8
.devcontainer/devcontainer.json
Normal file
|
@ -0,0 +1,8 @@
|
|||
{
|
||||
"name": "Search Dev",
|
||||
"dockerComposeFile": "docker-compose.yml",
|
||||
"service": "backend",
|
||||
"workspaceFolder": "/app",
|
||||
"extensions": ["ms-python.python"],
|
||||
"remoteUser": "root"
|
||||
}
|
5
.env
Normal file
5
.env
Normal file
|
@ -0,0 +1,5 @@
|
|||
STORAGE_TYPE=smb # "smb" èëè "local"
|
||||
LOCAL_STORAGE_PATH=./local_files
|
||||
SMB_STORAGE_PATH=//IP_ÈËÈ_ÕÎÑÒ_ÑÌÁ/ÏÓÒÜ_Ê_ØÀÐÅ
|
||||
SMB_USER=ÒÂÎÉ_ËÎÃÈÍ
|
||||
SMB_PASSWORD=ÒÂÎÉ_ÏÀÐÎËÜ
|
10
backend/Dockerfile
Normal file
10
backend/Dockerfile
Normal file
|
@ -0,0 +1,10 @@
|
|||
FROM python:3.11
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY . .
|
||||
|
||||
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "8000"]
|
24
backend/app.py
Normal file
24
backend/app.py
Normal file
|
@ -0,0 +1,24 @@
|
|||
from fastapi import FastAPI
|
||||
from fastapi.responses import FileResponse
|
||||
import requests
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
FILES_DIR = os.getenv("LOCAL_STORAGE_PATH", "/mnt/storage")
|
||||
SEARCH_ENGINE = "http://meilisearch:7700"
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
@app.get("/search")
|
||||
def search(q: str):
|
||||
response = requests.get(f"{SEARCH_ENGINE}/indexes/documents/search", params={"q": q})
|
||||
results = response.json()
|
||||
return {"results": results.get("hits", [])}
|
||||
|
||||
@app.get("/files/{filename}")
|
||||
def get_file(filename: str):
|
||||
file_path = os.path.join(FILES_DIR, filename)
|
||||
if os.path.exists(file_path):
|
||||
return FileResponse(file_path, filename=filename)
|
||||
return {"error": "Ôàéë íå íàéäåí"}
|
47
backend/indexer.py
Normal file
47
backend/indexer.py
Normal file
|
@ -0,0 +1,47 @@
|
|||
import os
|
||||
import requests
|
||||
from pdfminer.high_level import extract_text
|
||||
from ebooklib import epub
|
||||
from bs4 import BeautifulSoup
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
FILES_DIR = os.getenv("LOCAL_STORAGE_PATH", "/mnt/storage")
|
||||
SEARCH_ENGINE = "http://meilisearch:7700"
|
||||
INDEX_NAME = "documents"
|
||||
|
||||
def extract_text_from_pdf(pdf_path):
|
||||
return extract_text(pdf_path)
|
||||
|
||||
def extract_text_from_epub(epub_path):
|
||||
book = epub.read_epub(epub_path)
|
||||
text = []
|
||||
for item in book.get_items():
|
||||
if item.get_type() == 9:
|
||||
soup = BeautifulSoup(item.content, "html.parser")
|
||||
text.append(soup.get_text())
|
||||
return "\n".join(text)
|
||||
|
||||
def index_files():
|
||||
docs = []
|
||||
for filename in os.listdir(FILES_DIR):
|
||||
file_path = os.path.join(FILES_DIR, filename)
|
||||
|
||||
if filename.endswith(".txt"):
|
||||
with open(file_path, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
elif filename.endswith(".pdf"):
|
||||
content = extract_text_from_pdf(file_path)
|
||||
elif filename.endswith(".epub"):
|
||||
content = extract_text_from_epub(file_path)
|
||||
else:
|
||||
continue
|
||||
|
||||
docs.append({"id": filename, "content": content})
|
||||
|
||||
if docs:
|
||||
requests.post(f"{SEARCH_ENGINE}/indexes/{INDEX_NAME}/documents", json=docs)
|
||||
print(f"Èíäåêñèðîâàíî {len(docs)} ôàéëîâ!")
|
||||
|
||||
if __name__ == "__main__":
|
||||
index_files()
|
7
backend/requirements.txt
Normal file
7
backend/requirements.txt
Normal file
|
@ -0,0 +1,7 @@
|
|||
fastapi
|
||||
uvicorn
|
||||
requests
|
||||
pdfminer.six
|
||||
ebooklib
|
||||
beautifulsoup4
|
||||
python-dotenv
|
58
docker-compose.yml
Normal file
58
docker-compose.yml
Normal file
|
@ -0,0 +1,58 @@
|
|||
version: "3.8"
|
||||
|
||||
services:
|
||||
meilisearch:
|
||||
image: getmeili/meilisearch:latest
|
||||
container_name: meilisearch
|
||||
ports:
|
||||
- "7700:7700"
|
||||
environment:
|
||||
- MEILI_NO_ANALYTICS=true
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- meili_data:/meili_data
|
||||
|
||||
backend:
|
||||
build: ./backend
|
||||
container_name: backend
|
||||
ports:
|
||||
- "8000:8000"
|
||||
env_file: .env
|
||||
volumes:
|
||||
- ${LOCAL_STORAGE_PATH}:/mnt/storage
|
||||
depends_on:
|
||||
- meilisearch
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: cifs
|
||||
count: 1
|
||||
capabilities: [gpu]
|
||||
|
||||
frontend:
|
||||
build: ./frontend
|
||||
container_name: frontend
|
||||
ports:
|
||||
- "8080:80"
|
||||
depends_on:
|
||||
- backend
|
||||
|
||||
nginx:
|
||||
image: nginx:latest
|
||||
container_name: nginx
|
||||
ports:
|
||||
- "80:80"
|
||||
volumes:
|
||||
- ${LOCAL_STORAGE_PATH}:/usr/share/nginx/html/files
|
||||
depends_on:
|
||||
- backend
|
||||
- frontend
|
||||
|
||||
volumes:
|
||||
meili_data:
|
||||
smb_share:
|
||||
driver_opts:
|
||||
type: cifs
|
||||
o: username=${SMB_USER},password=${SMB_PASSWORD},vers=3.0
|
||||
device: ${SMB_STORAGE_PATH}
|
2
frontend/Dockerfile
Normal file
2
frontend/Dockerfile
Normal file
|
@ -0,0 +1,2 @@
|
|||
FROM nginx:latest
|
||||
COPY index.html /usr/share/nginx/html/index.html
|
27
frontend/index.html
Normal file
27
frontend/index.html
Normal file
|
@ -0,0 +1,27 @@
|
|||
<!DOCTYPE html>
|
||||
<html lang="ru">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>Ïîèñê</title>
|
||||
</head>
|
||||
<body>
|
||||
<input id="query" type="text" placeholder="Ââåäèòå çàïðîñ">
|
||||
<button onclick="search()">Èñêàòü</button>
|
||||
<ul id="results"></ul>
|
||||
|
||||
<script>
|
||||
async function search() {
|
||||
let query = document.getElementById("query").value;
|
||||
let res = await fetch(`/search?q=` + query);
|
||||
let data = await res.json();
|
||||
let list = document.getElementById("results");
|
||||
list.innerHTML = "";
|
||||
data.results.forEach(doc => {
|
||||
let item = document.createElement("li");
|
||||
item.innerHTML = `<a href="/files/${doc.id}">${doc.id}</a>: ${doc.content.substring(0, 200)}...`;
|
||||
list.appendChild(item);
|
||||
});
|
||||
}
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
17
nginx/default.conf
Normal file
17
nginx/default.conf
Normal file
|
@ -0,0 +1,17 @@
|
|||
server {
|
||||
listen 80;
|
||||
|
||||
location / {
|
||||
root /usr/share/nginx/html;
|
||||
index index.html;
|
||||
}
|
||||
|
||||
location /files/ {
|
||||
root /usr/share/nginx/html;
|
||||
autoindex on;
|
||||
}
|
||||
|
||||
location /search {
|
||||
proxy_pass http://backend:8000;
|
||||
}
|
||||
}
|
Loading…
Add table
Reference in a new issue