mirror of
https://github.com/cyclotruc/gitingest.git
synced 2026-04-28 10:29:30 +00:00
fix: improved server side cleanup after ingest (#477)
This commit is contained in:
parent
d1f8a80826
commit
2df0eb4398
4 changed files with 21 additions and 126 deletions
|
|
@ -19,7 +19,7 @@ from gitingest.utils.logging_config import get_logger
|
|||
from server.metrics_server import start_metrics_server
|
||||
from server.routers import dynamic, index, ingest
|
||||
from server.server_config import templates
|
||||
from server.server_utils import lifespan, limiter, rate_limit_exception_handler
|
||||
from server.server_utils import limiter, rate_limit_exception_handler
|
||||
|
||||
# Load environment variables from .env file
|
||||
load_dotenv()
|
||||
|
|
@ -55,8 +55,8 @@ if os.getenv("GITINGEST_SENTRY_ENABLED") is not None:
|
|||
environment=sentry_environment,
|
||||
)
|
||||
|
||||
# Initialize the FastAPI application with lifespan
|
||||
app = FastAPI(lifespan=lifespan, docs_url=None, redoc_url=None)
|
||||
# Initialize the FastAPI application
|
||||
app = FastAPI(docs_url=None, redoc_url=None)
|
||||
app.state.limiter = limiter
|
||||
|
||||
# Register the custom exception handler for rate limits
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
from __future__ import annotations
|
||||
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, cast
|
||||
|
||||
|
|
@ -31,6 +32,17 @@ if TYPE_CHECKING:
|
|||
from gitingest.schemas.ingestion import IngestionQuery
|
||||
|
||||
|
||||
def _cleanup_repository(clone_config: CloneConfig) -> None:
|
||||
"""Clean up the cloned repository after processing."""
|
||||
try:
|
||||
local_path = Path(clone_config.local_path)
|
||||
if local_path.exists():
|
||||
shutil.rmtree(local_path)
|
||||
logger.info("Successfully cleaned up repository", extra={"local_path": str(local_path)})
|
||||
except (PermissionError, OSError):
|
||||
logger.exception("Could not delete repository", extra={"local_path": str(clone_config.local_path)})
|
||||
|
||||
|
||||
async def _check_s3_cache(
|
||||
query: IngestionQuery,
|
||||
input_text: str,
|
||||
|
|
@ -292,6 +304,8 @@ async def process_query(
|
|||
_store_digest_content(query, clone_config, digest_content, summary, tree, content)
|
||||
except Exception as exc:
|
||||
_print_error(query.url, exc, max_file_size, pattern_type, pattern)
|
||||
# Clean up repository even if processing failed
|
||||
_cleanup_repository(clone_config)
|
||||
return IngestErrorResponse(error=str(exc))
|
||||
|
||||
if len(content) > MAX_DISPLAY_SIZE:
|
||||
|
|
@ -310,6 +324,9 @@ async def process_query(
|
|||
|
||||
digest_url = _generate_digest_url(query)
|
||||
|
||||
# Clean up the repository after successful processing
|
||||
_cleanup_repository(clone_config)
|
||||
|
||||
return IngestSuccessResponse(
|
||||
repo_url=input_text,
|
||||
short_repo_url=short_repo_url,
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@ from pathlib import Path
|
|||
from fastapi.templating import Jinja2Templates
|
||||
|
||||
MAX_DISPLAY_SIZE: int = 300_000
|
||||
DELETE_REPO_AFTER: int = 60 * 60 # In seconds (1 hour)
|
||||
|
||||
# Slider configuration (if updated, update the logSliderToSize function in src/static/js/utils.js)
|
||||
DEFAULT_FILE_SIZE_KB: int = 5 * 1024 # 5 mb
|
||||
|
|
|
|||
|
|
@ -1,21 +1,12 @@
|
|||
"""Utility functions for the server."""
|
||||
|
||||
import asyncio
|
||||
import shutil
|
||||
import time
|
||||
from contextlib import asynccontextmanager, suppress
|
||||
from pathlib import Path
|
||||
from typing import AsyncGenerator
|
||||
|
||||
from fastapi import FastAPI, Request
|
||||
from fastapi import Request
|
||||
from fastapi.responses import Response
|
||||
from slowapi import Limiter, _rate_limit_exceeded_handler
|
||||
from slowapi.errors import RateLimitExceeded
|
||||
from slowapi.util import get_remote_address
|
||||
|
||||
from gitingest.config import TMP_BASE_PATH
|
||||
from gitingest.utils.logging_config import get_logger
|
||||
from server.server_config import DELETE_REPO_AFTER
|
||||
|
||||
# Initialize logger for this module
|
||||
logger = get_logger(__name__)
|
||||
|
|
@ -52,118 +43,6 @@ async def rate_limit_exception_handler(request: Request, exc: Exception) -> Resp
|
|||
raise exc
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(_: FastAPI) -> AsyncGenerator[None, None]:
|
||||
"""Manage startup & graceful-shutdown tasks for the FastAPI app.
|
||||
|
||||
Returns
|
||||
-------
|
||||
AsyncGenerator[None, None]
|
||||
Yields control back to the FastAPI application while the background task runs.
|
||||
|
||||
"""
|
||||
task = asyncio.create_task(_remove_old_repositories())
|
||||
|
||||
yield # app runs while the background task is alive
|
||||
|
||||
task.cancel() # ask the worker to stop
|
||||
with suppress(asyncio.CancelledError):
|
||||
await task # swallow the cancellation signal
|
||||
|
||||
|
||||
async def _remove_old_repositories(
|
||||
base_path: Path = TMP_BASE_PATH,
|
||||
scan_interval: int = 60,
|
||||
delete_after: int = DELETE_REPO_AFTER,
|
||||
) -> None:
|
||||
"""Periodically delete old repositories/directories.
|
||||
|
||||
Every ``scan_interval`` seconds the coroutine scans ``base_path`` and deletes directories older than
|
||||
``delete_after`` seconds. The repository URL is extracted from the first ``.txt`` file in each directory
|
||||
and appended to ``history.txt``, assuming the filename format: "owner-repository.txt". Filesystem errors are
|
||||
logged and the loop continues.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
base_path : Path
|
||||
The path to the base directory where repositories are stored (default: ``TMP_BASE_PATH``).
|
||||
scan_interval : int
|
||||
The number of seconds between scans (default: 60).
|
||||
delete_after : int
|
||||
The number of seconds after which a repository is considered old and will be deleted
|
||||
(default: ``DELETE_REPO_AFTER``).
|
||||
|
||||
"""
|
||||
while True:
|
||||
if not base_path.exists():
|
||||
await asyncio.sleep(scan_interval)
|
||||
continue
|
||||
|
||||
now = time.time()
|
||||
try:
|
||||
for folder in base_path.iterdir():
|
||||
if now - folder.stat().st_ctime <= delete_after: # Not old enough
|
||||
continue
|
||||
|
||||
await _process_folder(folder)
|
||||
|
||||
except (OSError, PermissionError):
|
||||
logger.exception("Error in repository cleanup", extra={"base_path": str(base_path)})
|
||||
|
||||
await asyncio.sleep(scan_interval)
|
||||
|
||||
|
||||
async def _process_folder(folder: Path) -> None:
|
||||
"""Append the repo URL (if discoverable) to ``history.txt`` and delete ``folder``.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
folder : Path
|
||||
The path to the folder to be processed.
|
||||
|
||||
"""
|
||||
history_file = Path("history.txt")
|
||||
loop = asyncio.get_running_loop()
|
||||
|
||||
try:
|
||||
first_txt_file = next(folder.glob("*.txt"))
|
||||
except StopIteration: # No .txt file found
|
||||
return
|
||||
|
||||
# Append owner/repo to history.txt
|
||||
try:
|
||||
filename = first_txt_file.stem # "owner-repo"
|
||||
if "-" in filename:
|
||||
owner, repo = filename.split("-", 1)
|
||||
repo_url = f"{owner}/{repo}"
|
||||
await loop.run_in_executor(None, _append_line, history_file, repo_url)
|
||||
except (OSError, PermissionError):
|
||||
logger.exception("Error logging repository URL", extra={"folder": str(folder)})
|
||||
|
||||
# Delete the cloned repo
|
||||
try:
|
||||
await loop.run_in_executor(None, shutil.rmtree, folder)
|
||||
except PermissionError:
|
||||
logger.exception("No permission to delete folder", extra={"folder": str(folder)})
|
||||
except OSError:
|
||||
logger.exception("Could not delete folder", extra={"folder": str(folder)})
|
||||
|
||||
|
||||
def _append_line(path: Path, line: str) -> None:
|
||||
"""Append a line to a file.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
path : Path
|
||||
The path to the file to append the line to.
|
||||
line : str
|
||||
The line to append to the file.
|
||||
|
||||
"""
|
||||
with path.open("a", encoding="utf-8") as fp:
|
||||
fp.write(f"{line}\n")
|
||||
|
||||
|
||||
## Color printing utility
|
||||
class Colors:
|
||||
"""ANSI color codes."""
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue