SurfSense/surfsense_backend/tests/integration/document_upload/test_upload_limits.py
DESKTOP-RTLN3BA\$punk 62e698d8aa refactor: streamline document upload limits and enhance handling of mentioned documents
- Updated maximum file size limit to 500 MB per file.
- Removed restrictions on the number of files per upload and total upload size.
- Enhanced handling of user-mentioning documents in the knowledge base search middleware.
- Improved document reading and processing logic to accommodate new features and optimizations.
2026-04-02 19:39:10 -07:00

93 lines
2.9 KiB
Python

"""
Integration tests for backend file upload limit enforcement.
These tests verify that the API rejects uploads that exceed:
- Max per-file size (500 MB)
No file count or total size limits are enforced — the frontend batches
uploads in groups of 5 and there is no cap on how many files a user can
upload in a single session.
Prerequisites:
- PostgreSQL + pgvector
"""
from __future__ import annotations
import io
import httpx
import pytest
pytestmark = pytest.mark.integration
# ---------------------------------------------------------------------------
# Test: Per-file size limit (500 MB)
# ---------------------------------------------------------------------------
class TestPerFileSizeLimit:
"""A single file exceeding 500 MB should be rejected."""
async def test_oversized_file_returns_413(
self,
client: httpx.AsyncClient,
headers: dict[str, str],
search_space_id: int,
):
oversized = io.BytesIO(b"\x00" * (500 * 1024 * 1024 + 1))
resp = await client.post(
"/api/v1/documents/fileupload",
headers=headers,
files=[("files", ("big.pdf", oversized, "application/pdf"))],
data={"search_space_id": str(search_space_id)},
)
assert resp.status_code == 413
assert "per-file limit" in resp.json()["detail"].lower()
async def test_file_at_limit_accepted(
self,
client: httpx.AsyncClient,
headers: dict[str, str],
search_space_id: int,
cleanup_doc_ids: list[int],
):
at_limit = io.BytesIO(b"\x00" * (500 * 1024 * 1024))
resp = await client.post(
"/api/v1/documents/fileupload",
headers=headers,
files=[("files", ("exact500mb.txt", at_limit, "text/plain"))],
data={"search_space_id": str(search_space_id)},
)
assert resp.status_code == 200
cleanup_doc_ids.extend(resp.json().get("document_ids", []))
# ---------------------------------------------------------------------------
# Test: Multiple files accepted without count limit
# ---------------------------------------------------------------------------
class TestNoFileCountLimit:
"""Many files in a single request should be accepted."""
async def test_many_files_accepted(
self,
client: httpx.AsyncClient,
headers: dict[str, str],
search_space_id: int,
cleanup_doc_ids: list[int],
):
files = [
("files", (f"file_{i}.txt", io.BytesIO(b"test content"), "text/plain"))
for i in range(20)
]
resp = await client.post(
"/api/v1/documents/fileupload",
headers=headers,
files=files,
data={"search_space_id": str(search_space_id)},
)
assert resp.status_code == 200
cleanup_doc_ids.extend(resp.json().get("document_ids", []))