mirror of
https://github.com/MODSetter/SurfSense.git
synced 2025-09-04 11:39:19 +00:00
Refactor: Cleanup DOCLING PR
This commit is contained in:
parent
641f784f77
commit
621590c049
5 changed files with 2331 additions and 2326 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -1,5 +1,3 @@
|
|||
.flashrank_cache*
|
||||
podcasts/
|
||||
reports/
|
||||
SURFSENSE_CRITICAL_FIXES_REPORT.md
|
||||
.env
|
||||
|
|
|
@ -11,10 +11,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||
wget \
|
||||
unzip \
|
||||
gnupg2 \
|
||||
tesseract-ocr \
|
||||
tesseract-ocr-eng \
|
||||
libtesseract-dev \
|
||||
libleptonica-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Update certificates and install SSL tools
|
||||
|
@ -60,9 +56,6 @@ COPY . .
|
|||
ENV PYTHONPATH=/app
|
||||
ENV UVICORN_LOOP=asyncio
|
||||
|
||||
# Set Tesseract data path
|
||||
ENV TESSDATA_PREFIX=/usr/share/tesseract-ocr/5/tessdata/
|
||||
|
||||
# Run
|
||||
EXPOSE 8000
|
||||
CMD ["python", "main.py"]
|
|
@ -30,7 +30,6 @@ dependencies = [
|
|||
"slack-sdk>=3.34.0",
|
||||
"static-ffmpeg>=2.13",
|
||||
"tavily-python>=0.3.2",
|
||||
"tesserocr>=2.8.0",
|
||||
"unstructured-client>=0.30.0",
|
||||
"unstructured[all-docs]>=0.16.25",
|
||||
"uvicorn[standard]>=0.34.0",
|
||||
|
|
4480
surfsense_backend/uv.lock
generated
4480
surfsense_backend/uv.lock
generated
File diff suppressed because it is too large
Load diff
|
@ -53,8 +53,11 @@ export default function FileUploader() {
|
|||
};
|
||||
|
||||
// Conditionally set accepted file types based on ETL service
|
||||
const acceptedFileTypes = process.env.NEXT_PUBLIC_ETL_SERVICE === 'LLAMACLOUD'
|
||||
? {
|
||||
const getAcceptedFileTypes = () => {
|
||||
const etlService = process.env.NEXT_PUBLIC_ETL_SERVICE;
|
||||
|
||||
if (etlService === 'LLAMACLOUD') {
|
||||
return {
|
||||
// LlamaCloud supported file types
|
||||
'application/pdf': ['.pdf'],
|
||||
'application/msword': ['.doc'],
|
||||
|
@ -100,8 +103,16 @@ export default function FileUploader() {
|
|||
'text/x-spreadsheet': ['.dif', '.sylk', '.slk', '.prn', '.et', '.uos1', '.uos2', '.wk1', '.wk2', '.wk3', '.wk4', '.wks', '.wq1', '.wq2', '.wb1', '.wb2', '.wb3', '.qpw', '.xlr', '.eth'],
|
||||
// Audio files (always supported)
|
||||
...audioFileTypes,
|
||||
}
|
||||
: {
|
||||
};
|
||||
} else if (etlService === 'DOCLING') {
|
||||
return {
|
||||
// Docling supported file types (currently only PDF)
|
||||
'application/pdf': ['.pdf'],
|
||||
// Audio files (always supported)
|
||||
...audioFileTypes,
|
||||
};
|
||||
} else {
|
||||
return {
|
||||
// Unstructured supported file types
|
||||
'image/bmp': ['.bmp'],
|
||||
'text/csv': ['.csv'],
|
||||
|
@ -130,6 +141,10 @@ export default function FileUploader() {
|
|||
// Audio files (always supported)
|
||||
...audioFileTypes,
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
const acceptedFileTypes = getAcceptedFileTypes();
|
||||
|
||||
const supportedExtensions = Array.from(new Set(Object.values(acceptedFileTypes).flat())).sort()
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue