mirror of
https://github.com/MODSetter/SurfSense.git
synced 2025-09-04 11:39:19 +00:00
feat: Removed Hard Dependency on Unstructured.io
- Added Llamaparse Support :)
This commit is contained in:
parent
5737ea80c0
commit
73751c0eb1
11 changed files with 402 additions and 84 deletions
36
README.md
36
README.md
|
@ -31,7 +31,7 @@ https://github.com/user-attachments/assets/bf64a6ca-934b-47ac-9e1b-edac5fe972ec
|
||||||
### 💡 **Idea**:
|
### 💡 **Idea**:
|
||||||
Have your own highly customizable private NotebookLM and Perplexity integrated with external sources.
|
Have your own highly customizable private NotebookLM and Perplexity integrated with external sources.
|
||||||
### 📁 **Multiple File Format Uploading Support**
|
### 📁 **Multiple File Format Uploading Support**
|
||||||
Save content from your own personal files *(Documents, images, videos and supports **34 file extensions**)* to your own personal knowledge base .
|
Save content from your own personal files *(Documents, images, videos and supports **50+ file extensions**)* to your own personal knowledge base .
|
||||||
### 🔍 **Powerful Search**
|
### 🔍 **Powerful Search**
|
||||||
Quickly research or find anything in your saved content .
|
Quickly research or find anything in your saved content .
|
||||||
### 💬 **Chat with your Saved Content**
|
### 💬 **Chat with your Saved Content**
|
||||||
|
@ -66,35 +66,33 @@ Open source and easy to deploy locally.
|
||||||
|
|
||||||
### 📄 **Supported File Extensions**
|
### 📄 **Supported File Extensions**
|
||||||
|
|
||||||
#### Document
|
> **Note**: File format support depends on your ETL service configuration. LlamaCloud supports 100+ formats, while Unstructured supports 34+ core formats.
|
||||||
|
|
||||||
`.doc`, `.docx`, `.odt`, `.rtf`, `.pdf`, `.xml`
|
#### Documents & Text
|
||||||
|
**LlamaCloud**: `.pdf`, `.doc`, `.docx`, `.docm`, `.dot`, `.dotm`, `.rtf`, `.txt`, `.xml`, `.epub`, `.odt`, `.wpd`, `.pages`, `.key`, `.numbers`, `.602`, `.abw`, `.cgm`, `.cwk`, `.hwp`, `.lwp`, `.mw`, `.mcw`, `.pbd`, `.sda`, `.sdd`, `.sdp`, `.sdw`, `.sgl`, `.sti`, `.sxi`, `.sxw`, `.stw`, `.sxg`, `.uof`, `.uop`, `.uot`, `.vor`, `.wps`, `.zabw`
|
||||||
|
|
||||||
#### Text & Markup
|
**Unstructured**: `.doc`, `.docx`, `.odt`, `.rtf`, `.pdf`, `.xml`, `.txt`, `.md`, `.markdown`, `.rst`, `.html`, `.org`, `.epub`
|
||||||
|
|
||||||
`.txt`, `.md`, `.markdown`, `.rst`, `.html`, `.org`
|
#### Presentations
|
||||||
|
**LlamaCloud**: `.ppt`, `.pptx`, `.pptm`, `.pot`, `.potm`, `.potx`, `.odp`, `.key`
|
||||||
|
|
||||||
#### Spreadsheets & Tables
|
**Unstructured**: `.ppt`, `.pptx`
|
||||||
|
|
||||||
`.xls`, `.xlsx`, `.csv`, `.tsv`
|
#### Spreadsheets & Data
|
||||||
|
**LlamaCloud**: `.xlsx`, `.xls`, `.xlsm`, `.xlsb`, `.xlw`, `.csv`, `.tsv`, `.ods`, `.fods`, `.numbers`, `.dbf`, `.123`, `.dif`, `.sylk`, `.slk`, `.prn`, `.et`, `.uos1`, `.uos2`, `.wk1`, `.wk2`, `.wk3`, `.wk4`, `.wks`, `.wq1`, `.wq2`, `.wb1`, `.wb2`, `.wb3`, `.qpw`, `.xlr`, `.eth`
|
||||||
|
|
||||||
#### Audio & Video
|
**Unstructured**: `.xls`, `.xlsx`, `.csv`, `.tsv`
|
||||||
|
|
||||||
`.mp3`, `.mpga`, `.m4a`, `.wav`, `.mp4`, `.mpeg`, `.webm`
|
|
||||||
|
|
||||||
#### Images
|
#### Images
|
||||||
|
**LlamaCloud**: `.jpg`, `.jpeg`, `.png`, `.gif`, `.bmp`, `.svg`, `.tiff`, `.webp`, `.html`, `.htm`, `.web`
|
||||||
|
|
||||||
`.jpg`, `.jpeg`, `.png`, `.bmp`, `.tiff`, `.heic`
|
**Unstructured**: `.jpg`, `.jpeg`, `.png`, `.bmp`, `.tiff`, `.heic`
|
||||||
|
|
||||||
#### Email & eBooks
|
|
||||||
|
|
||||||
`.eml`, `.msg`, `.epub`
|
|
||||||
|
|
||||||
#### PowerPoint Presentations & Other
|
|
||||||
|
|
||||||
`.ppt`, `.pptx`, `.p7s`
|
|
||||||
|
|
||||||
|
#### Audio & Video *(Always Supported)*
|
||||||
|
`.mp3`, `.mpga`, `.m4a`, `.wav`, `.mp4`, `.mpeg`, `.webm`
|
||||||
|
|
||||||
|
#### Email & Communication
|
||||||
|
**Unstructured**: `.eml`, `.msg`, `.p7s`
|
||||||
|
|
||||||
### 🔖 Cross Browser Extension
|
### 🔖 Cross Browser Extension
|
||||||
- The SurfSense extension can be used to save any webpage you like.
|
- The SurfSense extension can be used to save any webpage you like.
|
||||||
|
|
|
@ -30,9 +30,13 @@ STT_SERVICE="openai/whisper-1"
|
||||||
OPENAI_API_KEY="sk-proj-iA"
|
OPENAI_API_KEY="sk-proj-iA"
|
||||||
GEMINI_API_KEY="AIzaSyB6-1641124124124124124124124124124"
|
GEMINI_API_KEY="AIzaSyB6-1641124124124124124124124124124"
|
||||||
|
|
||||||
UNSTRUCTURED_API_KEY="Tpu3P0U8iy"
|
|
||||||
FIRECRAWL_API_KEY="fcr-01J0000000000000000000000"
|
FIRECRAWL_API_KEY="fcr-01J0000000000000000000000"
|
||||||
|
|
||||||
|
#File Parser Service
|
||||||
|
ETL_SERVICE="UNSTRUCTURED" or "LLAMACLOUD"
|
||||||
|
UNSTRUCTURED_API_KEY="Tpu3P0U8iy"
|
||||||
|
LLAMA_CLOUD_API_KEY="llx-nnn"
|
||||||
|
|
||||||
#OPTIONAL: Add these for LangSmith Observability
|
#OPTIONAL: Add these for LangSmith Observability
|
||||||
LANGSMITH_TRACING=true
|
LANGSMITH_TRACING=true
|
||||||
LANGSMITH_ENDPOINT="https://api.smith.langchain.com"
|
LANGSMITH_ENDPOINT="https://api.smith.langchain.com"
|
||||||
|
|
|
@ -96,9 +96,18 @@ class Config:
|
||||||
# OAuth JWT
|
# OAuth JWT
|
||||||
SECRET_KEY = os.getenv("SECRET_KEY")
|
SECRET_KEY = os.getenv("SECRET_KEY")
|
||||||
|
|
||||||
|
# ETL Service
|
||||||
|
ETL_SERVICE = os.getenv("ETL_SERVICE")
|
||||||
|
|
||||||
|
if ETL_SERVICE == "UNSTRUCTURED":
|
||||||
# Unstructured API Key
|
# Unstructured API Key
|
||||||
UNSTRUCTURED_API_KEY = os.getenv("UNSTRUCTURED_API_KEY")
|
UNSTRUCTURED_API_KEY = os.getenv("UNSTRUCTURED_API_KEY")
|
||||||
|
|
||||||
|
elif ETL_SERVICE == "LLAMACLOUD":
|
||||||
|
# LlamaCloud API Key
|
||||||
|
LLAMA_CLOUD_API_KEY = os.getenv("LLAMA_CLOUD_API_KEY")
|
||||||
|
|
||||||
|
|
||||||
# Firecrawl API Key
|
# Firecrawl API Key
|
||||||
FIRECRAWL_API_KEY = os.getenv("FIRECRAWL_API_KEY", None)
|
FIRECRAWL_API_KEY = os.getenv("FIRECRAWL_API_KEY", None)
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,7 @@ from app.db import get_async_session, User, SearchSpace, Document, DocumentType
|
||||||
from app.schemas import DocumentsCreate, DocumentUpdate, DocumentRead
|
from app.schemas import DocumentsCreate, DocumentUpdate, DocumentRead
|
||||||
from app.users import current_active_user
|
from app.users import current_active_user
|
||||||
from app.utils.check_ownership import check_ownership
|
from app.utils.check_ownership import check_ownership
|
||||||
from app.tasks.background_tasks import add_received_markdown_file_document, add_extension_received_document, add_received_file_document, add_crawled_url_document, add_youtube_video_document
|
from app.tasks.background_tasks import add_received_markdown_file_document, add_extension_received_document, add_received_file_document_using_unstructured, add_crawled_url_document, add_youtube_video_document, add_received_file_document_using_llamacloud
|
||||||
from app.config import config as app_config
|
from app.config import config as app_config
|
||||||
# Force asyncio to use standard event loop before unstructured imports
|
# Force asyncio to use standard event loop before unstructured imports
|
||||||
import asyncio
|
import asyncio
|
||||||
|
@ -102,7 +102,6 @@ async def create_documents(
|
||||||
with open(temp_path, "wb") as f:
|
with open(temp_path, "wb") as f:
|
||||||
f.write(content)
|
f.write(content)
|
||||||
|
|
||||||
# Process in background to avoid uvloop conflicts
|
|
||||||
fastapi_background_tasks.add_task(
|
fastapi_background_tasks.add_task(
|
||||||
process_file_in_background_with_new_session,
|
process_file_in_background_with_new_session,
|
||||||
temp_path,
|
temp_path,
|
||||||
|
@ -191,7 +190,7 @@ async def process_file_in_background(
|
||||||
search_space_id
|
search_space_id
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
# Use synchronous unstructured API to avoid event loop issues
|
if app_config.ETL_SERVICE == "UNSTRUCTURED":
|
||||||
from langchain_unstructured import UnstructuredLoader
|
from langchain_unstructured import UnstructuredLoader
|
||||||
|
|
||||||
# Process the file
|
# Process the file
|
||||||
|
@ -215,12 +214,50 @@ async def process_file_in_background(
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# Pass the documents to the existing background task
|
# Pass the documents to the existing background task
|
||||||
await add_received_file_document(
|
await add_received_file_document_using_unstructured(
|
||||||
session,
|
session,
|
||||||
filename,
|
filename,
|
||||||
docs,
|
docs,
|
||||||
search_space_id
|
search_space_id
|
||||||
)
|
)
|
||||||
|
elif app_config.ETL_SERVICE == "LLAMACLOUD":
|
||||||
|
from llama_cloud_services import LlamaParse
|
||||||
|
from llama_cloud_services.parse.utils import ResultType
|
||||||
|
|
||||||
|
|
||||||
|
# Create LlamaParse parser instance
|
||||||
|
parser = LlamaParse(
|
||||||
|
api_key=app_config.LLAMA_CLOUD_API_KEY,
|
||||||
|
num_workers=1, # Use single worker for file processing
|
||||||
|
verbose=True,
|
||||||
|
language="en",
|
||||||
|
result_type=ResultType.MD
|
||||||
|
)
|
||||||
|
|
||||||
|
# Parse the file asynchronously
|
||||||
|
result = await parser.aparse(file_path)
|
||||||
|
|
||||||
|
# Clean up the temp file
|
||||||
|
import os
|
||||||
|
try:
|
||||||
|
os.unlink(file_path)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Get markdown documents from the result
|
||||||
|
markdown_documents = await result.aget_markdown_documents(split_by_page=False)
|
||||||
|
|
||||||
|
for doc in markdown_documents:
|
||||||
|
# Extract text content from the markdown documents
|
||||||
|
markdown_content = doc.text
|
||||||
|
|
||||||
|
# Process the documents using our LlamaCloud background task
|
||||||
|
await add_received_file_document_using_llamacloud(
|
||||||
|
session,
|
||||||
|
filename,
|
||||||
|
llamacloud_markdown_document=markdown_content,
|
||||||
|
search_space_id=search_space_id
|
||||||
|
)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
import logging
|
import logging
|
||||||
logging.error(f"Error processing file in background: {str(e)}")
|
logging.error(f"Error processing file in background: {str(e)}")
|
||||||
|
@ -442,3 +479,5 @@ async def process_youtube_video_with_new_session(
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
import logging
|
import logging
|
||||||
logging.error(f"Error processing YouTube video: {str(e)}")
|
logging.error(f"Error processing YouTube video: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -289,7 +289,7 @@ async def add_received_markdown_file_document(
|
||||||
raise RuntimeError(f"Failed to process file document: {str(e)}")
|
raise RuntimeError(f"Failed to process file document: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
async def add_received_file_document(
|
async def add_received_file_document_using_unstructured(
|
||||||
session: AsyncSession,
|
session: AsyncSession,
|
||||||
file_name: str,
|
file_name: str,
|
||||||
unstructured_processed_elements: List[LangChainDocument],
|
unstructured_processed_elements: List[LangChainDocument],
|
||||||
|
@ -357,6 +357,83 @@ async def add_received_file_document(
|
||||||
raise RuntimeError(f"Failed to process file document: {str(e)}")
|
raise RuntimeError(f"Failed to process file document: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
async def add_received_file_document_using_llamacloud(
|
||||||
|
session: AsyncSession,
|
||||||
|
file_name: str,
|
||||||
|
llamacloud_markdown_document: str,
|
||||||
|
search_space_id: int,
|
||||||
|
) -> Optional[Document]:
|
||||||
|
"""
|
||||||
|
Process and store document content parsed by LlamaCloud.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
session: Database session
|
||||||
|
file_name: Name of the processed file
|
||||||
|
llamacloud_markdown_documents: List of markdown content from LlamaCloud parsing
|
||||||
|
search_space_id: ID of the search space
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Document object if successful, None if failed
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Combine all markdown documents into one
|
||||||
|
file_in_markdown = llamacloud_markdown_document
|
||||||
|
|
||||||
|
content_hash = generate_content_hash(file_in_markdown)
|
||||||
|
|
||||||
|
# Check if document with this content hash already exists
|
||||||
|
existing_doc_result = await session.execute(
|
||||||
|
select(Document).where(Document.content_hash == content_hash)
|
||||||
|
)
|
||||||
|
existing_document = existing_doc_result.scalars().first()
|
||||||
|
|
||||||
|
if existing_document:
|
||||||
|
logging.info(f"Document with content hash {content_hash} already exists. Skipping processing.")
|
||||||
|
return existing_document
|
||||||
|
|
||||||
|
# Generate summary
|
||||||
|
summary_chain = SUMMARY_PROMPT_TEMPLATE | config.long_context_llm_instance
|
||||||
|
summary_result = await summary_chain.ainvoke({"document": file_in_markdown})
|
||||||
|
summary_content = summary_result.content
|
||||||
|
summary_embedding = config.embedding_model_instance.embed(summary_content)
|
||||||
|
|
||||||
|
# Process chunks
|
||||||
|
chunks = [
|
||||||
|
Chunk(
|
||||||
|
content=chunk.text,
|
||||||
|
embedding=config.embedding_model_instance.embed(chunk.text),
|
||||||
|
)
|
||||||
|
for chunk in config.chunker_instance.chunk(file_in_markdown)
|
||||||
|
]
|
||||||
|
|
||||||
|
# Create and store document
|
||||||
|
document = Document(
|
||||||
|
search_space_id=search_space_id,
|
||||||
|
title=file_name,
|
||||||
|
document_type=DocumentType.FILE,
|
||||||
|
document_metadata={
|
||||||
|
"FILE_NAME": file_name,
|
||||||
|
"ETL_SERVICE": "LLAMACLOUD",
|
||||||
|
},
|
||||||
|
content=summary_content,
|
||||||
|
embedding=summary_embedding,
|
||||||
|
chunks=chunks,
|
||||||
|
content_hash=content_hash,
|
||||||
|
)
|
||||||
|
|
||||||
|
session.add(document)
|
||||||
|
await session.commit()
|
||||||
|
await session.refresh(document)
|
||||||
|
|
||||||
|
return document
|
||||||
|
except SQLAlchemyError as db_error:
|
||||||
|
await session.rollback()
|
||||||
|
raise db_error
|
||||||
|
except Exception as e:
|
||||||
|
await session.rollback()
|
||||||
|
raise RuntimeError(f"Failed to process file document using LlamaCloud: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
async def add_youtube_video_document(
|
async def add_youtube_video_document(
|
||||||
session: AsyncSession, url: str, search_space_id: int
|
session: AsyncSession, url: str, search_space_id: int
|
||||||
):
|
):
|
||||||
|
|
|
@ -17,6 +17,7 @@ dependencies = [
|
||||||
"langgraph>=0.3.29",
|
"langgraph>=0.3.29",
|
||||||
"linkup-sdk>=0.2.4",
|
"linkup-sdk>=0.2.4",
|
||||||
"litellm>=1.61.4",
|
"litellm>=1.61.4",
|
||||||
|
"llama-cloud-services>=0.6.25",
|
||||||
"markdownify>=0.14.1",
|
"markdownify>=0.14.1",
|
||||||
"notion-client>=2.3.0",
|
"notion-client>=2.3.0",
|
||||||
"pgvector>=0.3.6",
|
"pgvector>=0.3.6",
|
||||||
|
|
128
surfsense_backend/uv.lock
generated
128
surfsense_backend/uv.lock
generated
|
@ -110,6 +110,18 @@ wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/ec/6a/bc7e17a3e87a2985d3e8f4da4cd0f481060eb78fb08596c42be62c90a4d9/aiosignal-1.3.2-py2.py3-none-any.whl", hash = "sha256:45cde58e409a301715980c2b01d0c28bdde3770d8290b5eb2173759d9acb31a5", size = 7597 },
|
{ url = "https://files.pythonhosted.org/packages/ec/6a/bc7e17a3e87a2985d3e8f4da4cd0f481060eb78fb08596c42be62c90a4d9/aiosignal-1.3.2-py2.py3-none-any.whl", hash = "sha256:45cde58e409a301715980c2b01d0c28bdde3770d8290b5eb2173759d9acb31a5", size = 7597 },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "aiosqlite"
|
||||||
|
version = "0.21.0"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "typing-extensions" },
|
||||||
|
]
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/13/7d/8bca2bf9a247c2c5dfeec1d7a5f40db6518f88d314b8bca9da29670d2671/aiosqlite-0.21.0.tar.gz", hash = "sha256:131bb8056daa3bc875608c631c678cda73922a2d4ba8aec373b19f18c17e7aa3", size = 13454 }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/f5/10/6c25ed6de94c49f88a91fa5018cb4c0f3625f31d5be9f771ebe5cc7cd506/aiosqlite-0.21.0-py3-none-any.whl", hash = "sha256:2549cf4057f95f53dcba16f2b64e8e2791d7e1adedb13197dd8ed77bb226d7d0", size = 15792 },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "alembic"
|
name = "alembic"
|
||||||
version = "1.15.2"
|
version = "1.15.2"
|
||||||
|
@ -228,6 +240,22 @@ wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/df/73/b6e24bd22e6720ca8ee9a85a0c4a2971af8497d8f3193fa05390cbd46e09/backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8", size = 15148 },
|
{ url = "https://files.pythonhosted.org/packages/df/73/b6e24bd22e6720ca8ee9a85a0c4a2971af8497d8f3193fa05390cbd46e09/backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8", size = 15148 },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "banks"
|
||||||
|
version = "2.1.2"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "deprecated" },
|
||||||
|
{ name = "griffe" },
|
||||||
|
{ name = "jinja2" },
|
||||||
|
{ name = "platformdirs" },
|
||||||
|
{ name = "pydantic" },
|
||||||
|
]
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/77/34/2b6697f02ffb68bee50e5fd37d6c64432244d3245603fd62950169dfed7e/banks-2.1.2.tar.gz", hash = "sha256:a0651db9d14b57fa2e115e78f68dbb1b36fe226ad6eef96192542908b1d20c1f", size = 173332 }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/04/4a/7fdca29d1db62f5f5c3446bf8f668beacdb0b5a8aff4247574ddfddc6bcd/banks-2.1.2-py3-none-any.whl", hash = "sha256:7fba451069f6bea376483b8136a0f29cb1e6883133626d00e077e20a3d102c0e", size = 28064 },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bcrypt"
|
name = "bcrypt"
|
||||||
version = "4.2.1"
|
version = "4.2.1"
|
||||||
|
@ -572,6 +600,15 @@ wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/6e/c6/ac0b6c1e2d138f1002bcf799d330bd6d85084fece321e662a14223794041/Deprecated-1.2.18-py2.py3-none-any.whl", hash = "sha256:bd5011788200372a32418f888e326a09ff80d0214bd961147cfed01b5c018eec", size = 9998 },
|
{ url = "https://files.pythonhosted.org/packages/6e/c6/ac0b6c1e2d138f1002bcf799d330bd6d85084fece321e662a14223794041/Deprecated-1.2.18-py2.py3-none-any.whl", hash = "sha256:bd5011788200372a32418f888e326a09ff80d0214bd961147cfed01b5c018eec", size = 9998 },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "dirtyjson"
|
||||||
|
version = "1.0.8"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/db/04/d24f6e645ad82ba0ef092fa17d9ef7a21953781663648a01c9371d9e8e98/dirtyjson-1.0.8.tar.gz", hash = "sha256:90ca4a18f3ff30ce849d100dcf4a003953c79d3a2348ef056f1d9c22231a25fd", size = 30782 }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/68/69/1bcf70f81de1b4a9f21b3a62ec0c83bdff991c88d6cc2267d02408457e88/dirtyjson-1.0.8-py3-none-any.whl", hash = "sha256:125e27248435a58acace26d5c2c4c11a1c0de0a9c5124c5a94ba78e517d74f53", size = 25197 },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "distro"
|
name = "distro"
|
||||||
version = "1.9.0"
|
version = "1.9.0"
|
||||||
|
@ -988,6 +1025,18 @@ wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/ac/38/08cc303ddddc4b3d7c628c3039a61a3aae36c241ed01393d00c2fd663473/greenlet-3.1.1-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:411f015496fec93c1c8cd4e5238da364e1da7a124bcb293f085bf2860c32c6f6", size = 1142112 },
|
{ url = "https://files.pythonhosted.org/packages/ac/38/08cc303ddddc4b3d7c628c3039a61a3aae36c241ed01393d00c2fd663473/greenlet-3.1.1-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:411f015496fec93c1c8cd4e5238da364e1da7a124bcb293f085bf2860c32c6f6", size = 1142112 },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "griffe"
|
||||||
|
version = "1.7.3"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "colorama" },
|
||||||
|
]
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/a9/3e/5aa9a61f7c3c47b0b52a1d930302992229d191bf4bc76447b324b731510a/griffe-1.7.3.tar.gz", hash = "sha256:52ee893c6a3a968b639ace8015bec9d36594961e156e23315c8e8e51401fa50b", size = 395137 }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/58/c6/5c20af38c2a57c15d87f7f38bee77d63c1d2a3689f74fefaf35915dd12b2/griffe-1.7.3-py3-none-any.whl", hash = "sha256:c6b3ee30c2f0f17f30bcdef5068d6ab7a2a4f1b8bf1a3e74b56fffd21e1c5f75", size = 129303 },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "grpcio"
|
name = "grpcio"
|
||||||
version = "1.71.0"
|
version = "1.71.0"
|
||||||
|
@ -1604,6 +1653,72 @@ wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/f9/c2/1b6c502909b7af9054736af61e27558a3341e8c1ba28e7f82473e6dd936f/litellm-1.61.4-py3-none-any.whl", hash = "sha256:e87e0d397a191795b4217f9299fc9b21eaacaab91409695f0a4780cceccda6e1", size = 6814517 },
|
{ url = "https://files.pythonhosted.org/packages/f9/c2/1b6c502909b7af9054736af61e27558a3341e8c1ba28e7f82473e6dd936f/litellm-1.61.4-py3-none-any.whl", hash = "sha256:e87e0d397a191795b4217f9299fc9b21eaacaab91409695f0a4780cceccda6e1", size = 6814517 },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "llama-cloud"
|
||||||
|
version = "0.1.23"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "certifi" },
|
||||||
|
{ name = "httpx" },
|
||||||
|
{ name = "pydantic" },
|
||||||
|
]
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/5b/e4/d1a30167ed6690a408382be1cf7de220a506085f4371baaf067d65bad8fd/llama_cloud-0.1.23.tar.gz", hash = "sha256:3d84a24a860f046d39a106c06742ec0ea39a574ac42bbf91706fe025f44e233e", size = 101292 }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/8a/15/3b56acef877dbc5d01d7e1a782c2cc50ef8a08d5773121c3bc20546de582/llama_cloud-0.1.23-py3-none-any.whl", hash = "sha256:ce95b0705d85c99b3b27b0af0d16a17d9a81b14c96bf13c1063a1bd13d8d0446", size = 267343 },
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "llama-cloud-services"
|
||||||
|
version = "0.6.25"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "click" },
|
||||||
|
{ name = "llama-cloud" },
|
||||||
|
{ name = "llama-index-core" },
|
||||||
|
{ name = "platformdirs" },
|
||||||
|
{ name = "pydantic" },
|
||||||
|
{ name = "python-dotenv" },
|
||||||
|
]
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/79/c0/89f89dfc2c2b6c2d5c1c5fde9f445696eb12f9c2a4e17637ab0aaf7cc373/llama_cloud_services-0.6.25.tar.gz", hash = "sha256:3608004b0cf984640a3a36657b8b40394d7ce2c48e3eb9dd24fc654df7643595", size = 32303 }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/e6/f1/99b8ef4a636dafd5f1ae1e1b19eb9f793f51573d782919bf01d9b9f797f4/llama_cloud_services-0.6.25-py3-none-any.whl", hash = "sha256:aef0afbbf0d6dc485e6566af2daeeefa8caa7bc7f6511d860036bc0aac15361b", size = 37231 },
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "llama-index-core"
|
||||||
|
version = "0.12.39"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "aiohttp" },
|
||||||
|
{ name = "aiosqlite" },
|
||||||
|
{ name = "banks" },
|
||||||
|
{ name = "dataclasses-json" },
|
||||||
|
{ name = "deprecated" },
|
||||||
|
{ name = "dirtyjson" },
|
||||||
|
{ name = "filetype" },
|
||||||
|
{ name = "fsspec" },
|
||||||
|
{ name = "httpx" },
|
||||||
|
{ name = "nest-asyncio" },
|
||||||
|
{ name = "networkx" },
|
||||||
|
{ name = "nltk" },
|
||||||
|
{ name = "numpy" },
|
||||||
|
{ name = "pillow" },
|
||||||
|
{ name = "pydantic" },
|
||||||
|
{ name = "pyyaml" },
|
||||||
|
{ name = "requests" },
|
||||||
|
{ name = "sqlalchemy", extra = ["asyncio"] },
|
||||||
|
{ name = "tenacity" },
|
||||||
|
{ name = "tiktoken" },
|
||||||
|
{ name = "tqdm" },
|
||||||
|
{ name = "typing-extensions" },
|
||||||
|
{ name = "typing-inspect" },
|
||||||
|
{ name = "wrapt" },
|
||||||
|
]
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/f7/45/163806502804ff75ace474f868cc33158774c4eb31d565133f32932e930e/llama_index_core-0.12.39.tar.gz", hash = "sha256:0cca9de59953542a3c2f1db61327c5204e0b1e997f31f1200e49392b2879593a", size = 7292040 }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/dd/a3/583d80764df75aefc9885f28dcc06a0e5aefc993fa5318186e70f2340d73/llama_index_core-0.12.39-py3-none-any.whl", hash = "sha256:c255ed87aa85e43893f2bb05870b61ce7701d7a6a931d174ba925def5856b4c2", size = 7664906 },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lxml"
|
name = "lxml"
|
||||||
version = "5.3.1"
|
version = "5.3.1"
|
||||||
|
@ -2468,6 +2583,15 @@ wheels = [
|
||||||
{ url = "https://files.pythonhosted.org/packages/cf/6c/41c21c6c8af92b9fea313aa47c75de49e2f9a467964ee33eb0135d47eb64/pillow-11.1.0-cp313-cp313t-win_arm64.whl", hash = "sha256:67cd427c68926108778a9005f2a04adbd5e67c442ed21d95389fe1d595458756", size = 2377651 },
|
{ url = "https://files.pythonhosted.org/packages/cf/6c/41c21c6c8af92b9fea313aa47c75de49e2f9a467964ee33eb0135d47eb64/pillow-11.1.0-cp313-cp313t-win_arm64.whl", hash = "sha256:67cd427c68926108778a9005f2a04adbd5e67c442ed21d95389fe1d595458756", size = 2377651 },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "platformdirs"
|
||||||
|
version = "4.3.8"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/fe/8b/3c73abc9c759ecd3f1f7ceff6685840859e8070c4d947c93fae71f6a0bf2/platformdirs-4.3.8.tar.gz", hash = "sha256:3d512d96e16bcb959a814c9f348431070822a6496326a4be0911c40b5a74c2bc", size = 21362 }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/fe/39/979e8e21520d4e47a0bbe349e2713c0aac6f3d853d0e5b34d76206c439aa/platformdirs-4.3.8-py3-none-any.whl", hash = "sha256:ff7059bb7eb1179e2685604f4aaf157cfd9535242bd23742eadc3c13542139b4", size = 18567 },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "playwright"
|
name = "playwright"
|
||||||
version = "1.50.0"
|
version = "1.50.0"
|
||||||
|
@ -3392,7 +3516,7 @@ wheels = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "surf-new-backend"
|
name = "surf-new-backend"
|
||||||
version = "0.0.6"
|
version = "0.0.7"
|
||||||
source = { virtual = "." }
|
source = { virtual = "." }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "alembic" },
|
{ name = "alembic" },
|
||||||
|
@ -3407,6 +3531,7 @@ dependencies = [
|
||||||
{ name = "langgraph" },
|
{ name = "langgraph" },
|
||||||
{ name = "linkup-sdk" },
|
{ name = "linkup-sdk" },
|
||||||
{ name = "litellm" },
|
{ name = "litellm" },
|
||||||
|
{ name = "llama-cloud-services" },
|
||||||
{ name = "markdownify" },
|
{ name = "markdownify" },
|
||||||
{ name = "notion-client" },
|
{ name = "notion-client" },
|
||||||
{ name = "pgvector" },
|
{ name = "pgvector" },
|
||||||
|
@ -3438,6 +3563,7 @@ requires-dist = [
|
||||||
{ name = "langgraph", specifier = ">=0.3.29" },
|
{ name = "langgraph", specifier = ">=0.3.29" },
|
||||||
{ name = "linkup-sdk", specifier = ">=0.2.4" },
|
{ name = "linkup-sdk", specifier = ">=0.2.4" },
|
||||||
{ name = "litellm", specifier = ">=1.61.4" },
|
{ name = "litellm", specifier = ">=1.61.4" },
|
||||||
|
{ name = "llama-cloud-services", specifier = ">=0.6.25" },
|
||||||
{ name = "markdownify", specifier = ">=0.14.1" },
|
{ name = "markdownify", specifier = ">=0.14.1" },
|
||||||
{ name = "notion-client", specifier = ">=2.3.0" },
|
{ name = "notion-client", specifier = ">=2.3.0" },
|
||||||
{ name = "pgvector", specifier = ">=0.3.6" },
|
{ name = "pgvector", specifier = ">=0.3.6" },
|
||||||
|
|
|
@ -1,2 +1,3 @@
|
||||||
NEXT_PUBLIC_FASTAPI_BACKEND_URL=http://localhost:8000
|
NEXT_PUBLIC_FASTAPI_BACKEND_URL=http://localhost:8000
|
||||||
NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE=LOCAL or GOOGLE
|
NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE=LOCAL or GOOGLE
|
||||||
|
NEXT_PUBLIC_ETL_SERVICE=UNSTRUCTURED or LLAMACLOUD
|
|
@ -42,7 +42,66 @@ export default function FileUploader() {
|
||||||
const router = useRouter();
|
const router = useRouter();
|
||||||
const fileInputRef = useRef<HTMLInputElement>(null);
|
const fileInputRef = useRef<HTMLInputElement>(null);
|
||||||
|
|
||||||
const acceptedFileTypes = {
|
// Audio files are always supported (using whisper)
|
||||||
|
const audioFileTypes = {
|
||||||
|
'audio/mpeg': ['.mp3', '.mpeg', '.mpga'],
|
||||||
|
'audio/mp4': ['.mp4', '.m4a'],
|
||||||
|
'audio/wav': ['.wav'],
|
||||||
|
'audio/webm': ['.webm'],
|
||||||
|
};
|
||||||
|
|
||||||
|
// Conditionally set accepted file types based on ETL service
|
||||||
|
const acceptedFileTypes = process.env.NEXT_PUBLIC_ETL_SERVICE === 'LLAMACLOUD'
|
||||||
|
? {
|
||||||
|
// LlamaCloud supported file types
|
||||||
|
'application/pdf': ['.pdf'],
|
||||||
|
'application/msword': ['.doc'],
|
||||||
|
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': ['.docx'],
|
||||||
|
'application/vnd.ms-word.document.macroEnabled.12': ['.docm'],
|
||||||
|
'application/msword-template': ['.dot'],
|
||||||
|
'application/vnd.ms-word.template.macroEnabled.12': ['.dotm'],
|
||||||
|
'application/vnd.ms-powerpoint': ['.ppt'],
|
||||||
|
'application/vnd.ms-powerpoint.template.macroEnabled.12': ['.pptm'],
|
||||||
|
'application/vnd.openxmlformats-officedocument.presentationml.presentation': ['.pptx'],
|
||||||
|
'application/vnd.ms-powerpoint.template': ['.pot'],
|
||||||
|
'application/vnd.openxmlformats-officedocument.presentationml.template': ['.potx'],
|
||||||
|
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': ['.xlsx'],
|
||||||
|
'application/vnd.ms-excel': ['.xls'],
|
||||||
|
'application/vnd.ms-excel.sheet.macroEnabled.12': ['.xlsm'],
|
||||||
|
'application/vnd.ms-excel.sheet.binary.macroEnabled.12': ['.xlsb'],
|
||||||
|
'application/vnd.ms-excel.workspace': ['.xlw'],
|
||||||
|
'application/rtf': ['.rtf'],
|
||||||
|
'application/xml': ['.xml'],
|
||||||
|
'application/epub+zip': ['.epub'],
|
||||||
|
'application/vnd.apple.keynote': ['.key'],
|
||||||
|
'application/vnd.apple.pages': ['.pages'],
|
||||||
|
'application/vnd.apple.numbers': ['.numbers'],
|
||||||
|
'application/vnd.wordperfect': ['.wpd'],
|
||||||
|
'application/vnd.oasis.opendocument.text': ['.odt'],
|
||||||
|
'application/vnd.oasis.opendocument.presentation': ['.odp'],
|
||||||
|
'application/vnd.oasis.opendocument.graphics': ['.odg'],
|
||||||
|
'application/vnd.oasis.opendocument.spreadsheet': ['.ods'],
|
||||||
|
'application/vnd.oasis.opendocument.formula': ['.fods'],
|
||||||
|
'text/plain': ['.txt'],
|
||||||
|
'text/csv': ['.csv'],
|
||||||
|
'text/tab-separated-values': ['.tsv'],
|
||||||
|
'text/html': ['.html', '.htm', '.web'],
|
||||||
|
'image/jpeg': ['.jpg', '.jpeg'],
|
||||||
|
'image/png': ['.png'],
|
||||||
|
'image/gif': ['.gif'],
|
||||||
|
'image/bmp': ['.bmp'],
|
||||||
|
'image/svg+xml': ['.svg'],
|
||||||
|
'image/tiff': ['.tiff'],
|
||||||
|
'image/webp': ['.webp'],
|
||||||
|
'application/dbase': ['.dbf'],
|
||||||
|
'application/vnd.lotus-1-2-3': ['.123'],
|
||||||
|
'text/x-web-markdown': ['.602', '.abw', '.cgm', '.cwk', '.hwp', '.lwp', '.mw', '.mcw', '.pbd', '.sda', '.sdd', '.sdp', '.sdw', '.sgl', '.sti', '.sxi', '.sxw', '.stw', '.sxg', '.uof', '.uop', '.uot', '.vor', '.wps', '.zabw'],
|
||||||
|
'text/x-spreadsheet': ['.dif', '.sylk', '.slk', '.prn', '.et', '.uos1', '.uos2', '.wk1', '.wk2', '.wk3', '.wk4', '.wks', '.wq1', '.wq2', '.wb1', '.wb2', '.wb3', '.qpw', '.xlr', '.eth'],
|
||||||
|
// Audio files (always supported)
|
||||||
|
...audioFileTypes,
|
||||||
|
}
|
||||||
|
: {
|
||||||
|
// Unstructured supported file types
|
||||||
'image/bmp': ['.bmp'],
|
'image/bmp': ['.bmp'],
|
||||||
'text/csv': ['.csv'],
|
'text/csv': ['.csv'],
|
||||||
'application/msword': ['.doc'],
|
'application/msword': ['.doc'],
|
||||||
|
@ -69,11 +128,9 @@ export default function FileUploader() {
|
||||||
'application/vnd.ms-excel': ['.xls'],
|
'application/vnd.ms-excel': ['.xls'],
|
||||||
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': ['.xlsx'],
|
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': ['.xlsx'],
|
||||||
'application/xml': ['.xml'],
|
'application/xml': ['.xml'],
|
||||||
'audio/mpeg': ['.mp3', '.mpeg', '.mpga'],
|
// Audio files (always supported)
|
||||||
'audio/mp4': ['.mp4', '.m4a'],
|
...audioFileTypes,
|
||||||
'audio/wav': ['.wav'],
|
};
|
||||||
'audio/webm': ['.webm'],
|
|
||||||
}
|
|
||||||
|
|
||||||
const supportedExtensions = Array.from(new Set(Object.values(acceptedFileTypes).flat())).sort()
|
const supportedExtensions = Array.from(new Set(Object.values(acceptedFileTypes).flat())).sort()
|
||||||
|
|
||||||
|
|
|
@ -90,7 +90,9 @@ Before you begin, ensure you have:
|
||||||
| FAST_LLM | LiteLLM routed smaller, faster LLM (e.g., `openai/gpt-4o-mini`, `ollama/deepseek-r1:8b`) |
|
| FAST_LLM | LiteLLM routed smaller, faster LLM (e.g., `openai/gpt-4o-mini`, `ollama/deepseek-r1:8b`) |
|
||||||
| STRATEGIC_LLM | LiteLLM routed advanced LLM for complex tasks (e.g., `openai/gpt-4o`, `ollama/gemma3:12b`) |
|
| STRATEGIC_LLM | LiteLLM routed advanced LLM for complex tasks (e.g., `openai/gpt-4o`, `ollama/gemma3:12b`) |
|
||||||
| LONG_CONTEXT_LLM | LiteLLM routed LLM for longer context windows (e.g., `gemini/gemini-2.0-flash`, `ollama/deepseek-r1:8b`) |
|
| LONG_CONTEXT_LLM | LiteLLM routed LLM for longer context windows (e.g., `gemini/gemini-2.0-flash`, `ollama/deepseek-r1:8b`) |
|
||||||
| UNSTRUCTURED_API_KEY | API key for Unstructured.io service for document parsing |
|
| ETL_SERVICE | Document parsing service: `UNSTRUCTURED` (supports 34+ formats) or `LLAMACLOUD` (supports 50+ formats including legacy document types) |
|
||||||
|
| UNSTRUCTURED_API_KEY | API key for Unstructured.io service for document parsing (required if ETL_SERVICE=UNSTRUCTURED) |
|
||||||
|
| LLAMA_CLOUD_API_KEY | API key for LlamaCloud service for document parsing (required if ETL_SERVICE=LLAMACLOUD) |
|
||||||
| FIRECRAWL_API_KEY | API key for Firecrawl service for web crawling |
|
| FIRECRAWL_API_KEY | API key for Firecrawl service for web crawling |
|
||||||
| TTS_SERVICE | Text-to-Speech API provider for Podcasts (e.g., `openai/tts-1`, `azure/neural`, `vertex_ai/`). See [supported providers](https://docs.litellm.ai/docs/text_to_speech#supported-providers) |
|
| TTS_SERVICE | Text-to-Speech API provider for Podcasts (e.g., `openai/tts-1`, `azure/neural`, `vertex_ai/`). See [supported providers](https://docs.litellm.ai/docs/text_to_speech#supported-providers) |
|
||||||
| STT_SERVICE | Speech-to-Text API provider for Podcasts (e.g., `openai/whisper-1`). See [supported providers](https://docs.litellm.ai/docs/audio_transcription#supported-providers) |
|
| STT_SERVICE | Speech-to-Text API provider for Podcasts (e.g., `openai/whisper-1`). See [supported providers](https://docs.litellm.ai/docs/audio_transcription#supported-providers) |
|
||||||
|
@ -136,6 +138,7 @@ For other LLM providers, refer to the [LiteLLM documentation](https://docs.litel
|
||||||
| ------------------------------- | ---------------------------------------------------------- |
|
| ------------------------------- | ---------------------------------------------------------- |
|
||||||
| NEXT_PUBLIC_FASTAPI_BACKEND_URL | URL of the backend service (e.g., `http://localhost:8000`) |
|
| NEXT_PUBLIC_FASTAPI_BACKEND_URL | URL of the backend service (e.g., `http://localhost:8000`) |
|
||||||
| NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE | Same value as set in backend AUTH_TYPE i.e `GOOGLE` for OAuth with Google, `LOCAL` for email/password authentication |
|
| NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE | Same value as set in backend AUTH_TYPE i.e `GOOGLE` for OAuth with Google, `LOCAL` for email/password authentication |
|
||||||
|
| NEXT_PUBLIC_ETL_SERVICE | Document parsing service (should match backend ETL_SERVICE): `UNSTRUCTURED` or `LLAMACLOUD` - affects supported file formats in upload interface |
|
||||||
|
|
||||||
2. **Build and Start Containers**
|
2. **Build and Start Containers**
|
||||||
|
|
||||||
|
|
|
@ -61,7 +61,9 @@ Edit the `.env` file and set the following variables:
|
||||||
| FAST_LLM | LiteLLM routed smaller, faster LLM (e.g., `openai/gpt-4o-mini`, `ollama/deepseek-r1:8b`) |
|
| FAST_LLM | LiteLLM routed smaller, faster LLM (e.g., `openai/gpt-4o-mini`, `ollama/deepseek-r1:8b`) |
|
||||||
| STRATEGIC_LLM | LiteLLM routed advanced LLM for complex tasks (e.g., `openai/gpt-4o`, `ollama/gemma3:12b`) |
|
| STRATEGIC_LLM | LiteLLM routed advanced LLM for complex tasks (e.g., `openai/gpt-4o`, `ollama/gemma3:12b`) |
|
||||||
| LONG_CONTEXT_LLM | LiteLLM routed LLM for longer context windows (e.g., `gemini/gemini-2.0-flash`, `ollama/deepseek-r1:8b`) |
|
| LONG_CONTEXT_LLM | LiteLLM routed LLM for longer context windows (e.g., `gemini/gemini-2.0-flash`, `ollama/deepseek-r1:8b`) |
|
||||||
| UNSTRUCTURED_API_KEY | API key for Unstructured.io service for document parsing |
|
| ETL_SERVICE | Document parsing service: `UNSTRUCTURED` (supports 34+ formats) or `LLAMACLOUD` (supports 50+ formats including legacy document types) |
|
||||||
|
| UNSTRUCTURED_API_KEY | API key for Unstructured.io service for document parsing (required if ETL_SERVICE=UNSTRUCTURED) |
|
||||||
|
| LLAMA_CLOUD_API_KEY | API key for LlamaCloud service for document parsing (required if ETL_SERVICE=LLAMACLOUD) |
|
||||||
| FIRECRAWL_API_KEY | API key for Firecrawl service for web crawling |
|
| FIRECRAWL_API_KEY | API key for Firecrawl service for web crawling |
|
||||||
| TTS_SERVICE | Text-to-Speech API provider for Podcasts (e.g., `openai/tts-1`, `azure/neural`, `vertex_ai/`). See [supported providers](https://docs.litellm.ai/docs/text_to_speech#supported-providers) |
|
| TTS_SERVICE | Text-to-Speech API provider for Podcasts (e.g., `openai/tts-1`, `azure/neural`, `vertex_ai/`). See [supported providers](https://docs.litellm.ai/docs/text_to_speech#supported-providers) |
|
||||||
| STT_SERVICE | Speech-to-Text API provider for Podcasts (e.g., `openai/whisper-1`). See [supported providers](https://docs.litellm.ai/docs/audio_transcription#supported-providers) |
|
| STT_SERVICE | Speech-to-Text API provider for Podcasts (e.g., `openai/whisper-1`). See [supported providers](https://docs.litellm.ai/docs/audio_transcription#supported-providers) |
|
||||||
|
@ -182,6 +184,7 @@ Edit the `.env` file and set:
|
||||||
| ------------------------------- | ------------------------------------------- |
|
| ------------------------------- | ------------------------------------------- |
|
||||||
| NEXT_PUBLIC_FASTAPI_BACKEND_URL | Backend URL (e.g., `http://localhost:8000`) |
|
| NEXT_PUBLIC_FASTAPI_BACKEND_URL | Backend URL (e.g., `http://localhost:8000`) |
|
||||||
| NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE | Same value as set in backend AUTH_TYPE i.e `GOOGLE` for OAuth with Google, `LOCAL` for email/password authentication |
|
| NEXT_PUBLIC_FASTAPI_BACKEND_AUTH_TYPE | Same value as set in backend AUTH_TYPE i.e `GOOGLE` for OAuth with Google, `LOCAL` for email/password authentication |
|
||||||
|
| NEXT_PUBLIC_ETL_SERVICE | Document parsing service (should match backend ETL_SERVICE): `UNSTRUCTURED` or `LLAMACLOUD` - affects supported file formats in upload interface |
|
||||||
|
|
||||||
### 2. Install Dependencies
|
### 2. Install Dependencies
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue