fix: Added API_BASE param for LiteLLM.

This commit is contained in:
DESKTOP-RTLN3BA\$punk 2025-05-08 19:31:47 -07:00
parent cae5f835af
commit 4a2be4b98e
7 changed files with 151 additions and 73 deletions

View file

@ -30,3 +30,8 @@ LANGSMITH_TRACING=true
LANGSMITH_ENDPOINT="https://api.smith.langchain.com"
LANGSMITH_API_KEY="lsv2_pt_....."
LANGSMITH_PROJECT="surfsense"
# OPTIONAL: LiteLLM API Base
FAST_LLM_API_BASE=""
STRATEGIC_LLM_API_BASE=""
LONG_CONTEXT_LLM_API_BASE=""

View file

@ -5,3 +5,4 @@ data/
__pycache__/
.flashrank_cache
surf_new_backend.egg-info/
podcasts/

View file

@ -3,7 +3,7 @@
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Optional, Any
from typing import List, Optional, Any
from sqlalchemy.ext.asyncio import AsyncSession
from app.utils.streaming_service import StreamingService
@ -21,7 +21,7 @@ class State:
# Streaming service
streaming_service: StreamingService
# Intermediate state - populated during workflow
# chat_history: Optional[List[Any]] = field(default=None)
# Using field to explicitly mark as part of state
answer_outline: Optional[Any] = field(default=None)

View file

@ -46,14 +46,27 @@ class Config:
# LONG-CONTEXT LLMS
LONG_CONTEXT_LLM = os.getenv("LONG_CONTEXT_LLM")
long_context_llm_instance = ChatLiteLLM(model=LONG_CONTEXT_LLM)
FAST_LLM_API_BASE = os.getenv("FAST_LLM_API_BASE")
if FAST_LLM_API_BASE:
long_context_llm_instance = ChatLiteLLM(model=LONG_CONTEXT_LLM, api_base=FAST_LLM_API_BASE)
else:
long_context_llm_instance = ChatLiteLLM(model=LONG_CONTEXT_LLM)
# FAST & STRATEGIC LLM's
# FAST LLM
FAST_LLM = os.getenv("FAST_LLM")
STRATEGIC_LLM = os.getenv("STRATEGIC_LLM")
fast_llm_instance = ChatLiteLLM(model=FAST_LLM)
strategic_llm_instance = ChatLiteLLM(model=STRATEGIC_LLM)
FAST_LLM_API_BASE = os.getenv("FAST_LLM_API_BASE")
if FAST_LLM_API_BASE:
fast_llm_instance = ChatLiteLLM(model=FAST_LLM, api_base=FAST_LLM_API_BASE)
else:
fast_llm_instance = ChatLiteLLM(model=FAST_LLM)
# STRATEGIC LLM
STRATEGIC_LLM = os.getenv("STRATEGIC_LLM")
STRATEGIC_LLM_API_BASE = os.getenv("STRATEGIC_LLM_API_BASE")
if STRATEGIC_LLM_API_BASE:
strategic_llm_instance = ChatLiteLLM(model=STRATEGIC_LLM, api_base=STRATEGIC_LLM_API_BASE)
else:
strategic_llm_instance = ChatLiteLLM(model=STRATEGIC_LLM)
# Chonkie Configuration | Edit this to your needs
EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL")

View file

@ -9,7 +9,7 @@ interface PageProps {
export default async function ChatsPage({ params }: PageProps) {
// Get search space ID from the route parameter
const { search_space_id: searchSpaceId } = params;
const { search_space_id: searchSpaceId } = await Promise.resolve(params);
return (
<Suspense fallback={<div className="flex items-center justify-center h-[60vh]">

View file

@ -3,6 +3,7 @@ title: Docker Installation
description: Setting up SurfSense using Docker
full: true
---
## Known Limitations
⚠️ **Important Note:** Currently, the following features have limited functionality when running in Docker:
@ -12,7 +13,6 @@ full: true
We're actively working to resolve these limitations in future releases.
# Docker Installation
This guide explains how to run SurfSense using Docker Compose, which is the preferred and recommended method for deployment.
@ -36,6 +36,7 @@ Before you begin, ensure you have:
Set up the necessary environment variables:
**Linux/macOS:**
```bash
# Copy example environment files
cp surfsense_backend/.env.example surfsense_backend/.env
@ -43,12 +44,14 @@ Before you begin, ensure you have:
```
**Windows (Command Prompt):**
```cmd
copy surfsense_backend\.env.example surfsense_backend\.env
copy surfsense_web\.env.example surfsense_web\.env
```
**Windows (PowerShell):**
```powershell
Copy-Item -Path surfsense_backend\.env.example -Destination surfsense_backend\.env
Copy-Item -Path surfsense_web\.env.example -Destination surfsense_web\.env
@ -58,40 +61,57 @@ Before you begin, ensure you have:
**Backend Environment Variables:**
| ENV VARIABLE | DESCRIPTION |
|--------------|-------------|
| DATABASE_URL | PostgreSQL connection string (e.g., `postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense`) |
| SECRET_KEY | JWT Secret key for authentication (should be a secure random string) |
| GOOGLE_OAUTH_CLIENT_ID | Google OAuth client ID obtained from Google Cloud Console |
| GOOGLE_OAUTH_CLIENT_SECRET | Google OAuth client secret obtained from Google Cloud Console |
| NEXT_FRONTEND_URL | URL where your frontend application is hosted (e.g., `http://localhost:3000`) |
| EMBEDDING_MODEL | Name of the embedding model (e.g., `openai://text-embedding-ada-002`, `anthropic://claude-v1`, `mixedbread-ai/mxbai-embed-large-v1`) |
| RERANKERS_MODEL_NAME | Name of the reranker model (e.g., `ms-marco-MiniLM-L-12-v2`) |
| RERANKERS_MODEL_TYPE | Type of reranker model (e.g., `flashrank`) |
| FAST_LLM | LiteLLM routed smaller, faster LLM (e.g., `openai/gpt-4o-mini`, `ollama/deepseek-r1:8b`) |
| STRATEGIC_LLM | LiteLLM routed advanced LLM for complex tasks (e.g., `openai/gpt-4o`, `ollama/gemma3:12b`) |
| LONG_CONTEXT_LLM | LiteLLM routed LLM for longer context windows (e.g., `gemini/gemini-2.0-flash`, `ollama/deepseek-r1:8b`) |
| UNSTRUCTURED_API_KEY | API key for Unstructured.io service for document parsing |
| FIRECRAWL_API_KEY | API key for Firecrawl service for web crawling |
| TTS_SERVICE | Text-to-Speech API provider for Podcasts (e.g., `openai/tts-1`, `azure/neural`, `vertex_ai/`). See [supported providers](https://docs.litellm.ai/docs/text_to_speech#supported-providers) |
| ENV VARIABLE | DESCRIPTION |
| -------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| DATABASE_URL | PostgreSQL connection string (e.g., `postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense`) |
| SECRET_KEY | JWT Secret key for authentication (should be a secure random string) |
| GOOGLE_OAUTH_CLIENT_ID | Google OAuth client ID obtained from Google Cloud Console |
| GOOGLE_OAUTH_CLIENT_SECRET | Google OAuth client secret obtained from Google Cloud Console |
| NEXT_FRONTEND_URL | URL where your frontend application is hosted (e.g., `http://localhost:3000`) |
| EMBEDDING_MODEL | Name of the embedding model (e.g., `openai://text-embedding-ada-002`, `anthropic://claude-v1`, `mixedbread-ai/mxbai-embed-large-v1`) |
| RERANKERS_MODEL_NAME | Name of the reranker model (e.g., `ms-marco-MiniLM-L-12-v2`) |
| RERANKERS_MODEL_TYPE | Type of reranker model (e.g., `flashrank`) |
| FAST_LLM | LiteLLM routed smaller, faster LLM (e.g., `openai/gpt-4o-mini`, `ollama/deepseek-r1:8b`) |
| STRATEGIC_LLM | LiteLLM routed advanced LLM for complex tasks (e.g., `openai/gpt-4o`, `ollama/gemma3:12b`) |
| LONG_CONTEXT_LLM | LiteLLM routed LLM for longer context windows (e.g., `gemini/gemini-2.0-flash`, `ollama/deepseek-r1:8b`) |
| UNSTRUCTURED_API_KEY | API key for Unstructured.io service for document parsing |
| FIRECRAWL_API_KEY | API key for Firecrawl service for web crawling |
| TTS_SERVICE | Text-to-Speech API provider for Podcasts (e.g., `openai/tts-1`, `azure/neural`, `vertex_ai/`). See [supported providers](https://docs.litellm.ai/docs/text_to_speech#supported-providers) |
Include API keys for the LLM providers you're using. For example:
- `OPENAI_API_KEY`: If using OpenAI models
- `GEMINI_API_KEY`: If using Google Gemini models
Include API keys for the LLM providers you're using. For example:
For other LLM providers, refer to the [LiteLLM documentation](https://docs.litellm.ai/docs/providers).
- `OPENAI_API_KEY`: If using OpenAI models
- `GEMINI_API_KEY`: If using Google Gemini models
**Frontend Environment Variables:**
**Optional LangSmith Observability:**
| ENV VARIABLE | DESCRIPTION |
|--------------|-------------|
| LANGSMITH_TRACING | Enable LangSmith tracing (e.g., `true`) |
| LANGSMITH_ENDPOINT | LangSmith API endpoint (e.g., `https://api.smith.langchain.com`) |
| LANGSMITH_API_KEY | Your LangSmith API key |
| LANGSMITH_PROJECT | LangSmith project name (e.g., `surfsense`) |
| ENV VARIABLE | DESCRIPTION |
|--------------|-------------|
| NEXT_PUBLIC_FASTAPI_BACKEND_URL | URL of the backend service (e.g., `http://localhost:8000`) |
**Optional LiteLLM API Base URLs:**
| ENV VARIABLE | DESCRIPTION |
|--------------|-------------|
| FAST_LLM_API_BASE | Custom API base URL for the fast LLM |
| STRATEGIC_LLM_API_BASE | Custom API base URL for the strategic LLM |
| LONG_CONTEXT_LLM_API_BASE | Custom API base URL for the long context LLM |
For other LLM providers, refer to the [LiteLLM documentation](https://docs.litellm.ai/docs/providers).
**Frontend Environment Variables:**
| ENV VARIABLE | DESCRIPTION |
| ------------------------------- | ---------------------------------------------------------- |
| NEXT_PUBLIC_FASTAPI_BACKEND_URL | URL of the backend service (e.g., `http://localhost:8000`) |
2. **Build and Start Containers**
Start the Docker containers:
**Linux/macOS/Windows:**
```bash
docker-compose up --build
```
@ -99,6 +119,7 @@ Before you begin, ensure you have:
To run in detached mode (in the background):
**Linux/macOS/Windows:**
```bash
docker-compose up -d
```
@ -108,6 +129,7 @@ Before you begin, ensure you have:
3. **Access the Applications**
Once the containers are running, you can access:
- Frontend: [http://localhost:3000](http://localhost:3000)
- Backend API: [http://localhost:8000](http://localhost:8000)
- API Documentation: [http://localhost:8000/docs](http://localhost:8000/docs)
@ -119,6 +141,7 @@ Before you begin, ensure you have:
- **Stop containers:**
**Linux/macOS/Windows:**
```bash
docker-compose down
```
@ -126,6 +149,7 @@ Before you begin, ensure you have:
- **View logs:**
**Linux/macOS/Windows:**
```bash
# All services
docker-compose logs -f
@ -139,6 +163,7 @@ Before you begin, ensure you have:
- **Restart a specific service:**
**Linux/macOS/Windows:**
```bash
docker-compose restart backend
```
@ -146,6 +171,7 @@ Before you begin, ensure you have:
- **Execute commands in a running container:**
**Linux/macOS/Windows:**
```bash
# Backend
docker-compose exec backend python -m pytest
@ -163,7 +189,6 @@ Before you begin, ensure you have:
- For frontend dependency issues, check the `Dockerfile` in the frontend directory.
- **Windows-specific:** If you encounter line ending issues (CRLF vs LF), configure Git to handle line endings properly with `git config --global core.autocrlf true` before cloning the repository.
## Next Steps
Once your installation is complete, you can start using SurfSense! Navigate to the frontend URL and log in using your Google account.

View file

@ -27,18 +27,21 @@ The backend is the core of SurfSense. Follow these steps to set it up:
First, create and configure your environment variables by copying the example file:
**Linux/macOS:**
```bash
cd surfsense_backend
cp .env.example .env
```
**Windows (Command Prompt):**
```cmd
cd surfsense_backend
copy .env.example .env
```
**Windows (PowerShell):**
```powershell
cd surfsense_backend
Copy-Item -Path .env.example -Destination .env
@ -46,33 +49,50 @@ Copy-Item -Path .env.example -Destination .env
Edit the `.env` file and set the following variables:
| ENV VARIABLE | DESCRIPTION |
|--------------|-------------|
| DATABASE_URL | PostgreSQL connection string (e.g., `postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense`) |
| SECRET_KEY | JWT Secret key for authentication (should be a secure random string) |
| GOOGLE_OAUTH_CLIENT_ID | Google OAuth client ID |
| GOOGLE_OAUTH_CLIENT_SECRET | Google OAuth client secret |
| NEXT_FRONTEND_URL | Frontend application URL (e.g., `http://localhost:3000`) |
| EMBEDDING_MODEL | Name of the embedding model (e.g., `openai://text-embedding-ada-002`, `anthropic://claude-v1`, `mixedbread-ai/mxbai-embed-large-v1`) |
| RERANKERS_MODEL_NAME | Name of the reranker model (e.g., `ms-marco-MiniLM-L-12-v2`) |
| RERANKERS_MODEL_TYPE | Type of reranker model (e.g., `flashrank`) |
| FAST_LLM | LiteLLM routed faster LLM (e.g., `openai/gpt-4o-mini`, `ollama/deepseek-r1:8b`) |
| STRATEGIC_LLM | LiteLLM routed advanced LLM (e.g., `openai/gpt-4o`, `ollama/gemma3:12b`) |
| LONG_CONTEXT_LLM | LiteLLM routed long-context LLM (e.g., `gemini/gemini-2.0-flash`, `ollama/deepseek-r1:8b`) |
| UNSTRUCTURED_API_KEY | API key for Unstructured.io service |
| FIRECRAWL_API_KEY | API key for Firecrawl service (if using crawler) |
| TTS_SERVICE | Text-to-Speech API provider for Podcasts (e.g., `openai/tts-1`, `azure/neural`, `vertex_ai/`). See [supported providers](https://docs.litellm.ai/docs/text_to_speech#supported-providers) |
| ENV VARIABLE | DESCRIPTION |
| -------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| DATABASE_URL | PostgreSQL connection string (e.g., `postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense`) |
| SECRET_KEY | JWT Secret key for authentication (should be a secure random string) |
| GOOGLE_OAUTH_CLIENT_ID | Google OAuth client ID |
| GOOGLE_OAUTH_CLIENT_SECRET | Google OAuth client secret |
| NEXT_FRONTEND_URL | Frontend application URL (e.g., `http://localhost:3000`) |
| EMBEDDING_MODEL | Name of the embedding model (e.g., `openai://text-embedding-ada-002`, `anthropic://claude-v1`, `mixedbread-ai/mxbai-embed-large-v1`) |
| RERANKERS_MODEL_NAME | Name of the reranker model (e.g., `ms-marco-MiniLM-L-12-v2`) |
| RERANKERS_MODEL_TYPE | Type of reranker model (e.g., `flashrank`) |
| FAST_LLM | LiteLLM routed faster LLM (e.g., `openai/gpt-4o-mini`, `ollama/deepseek-r1:8b`) |
| STRATEGIC_LLM | LiteLLM routed advanced LLM (e.g., `openai/gpt-4o`, `ollama/gemma3:12b`) |
| LONG_CONTEXT_LLM | LiteLLM routed long-context LLM (e.g., `gemini/gemini-2.0-flash`, `ollama/deepseek-r1:8b`) |
| UNSTRUCTURED_API_KEY | API key for Unstructured.io service |
| FIRECRAWL_API_KEY | API key for Firecrawl service (if using crawler) |
| TTS_SERVICE | Text-to-Speech API provider for Podcasts (e.g., `openai/tts-1`, `azure/neural`, `vertex_ai/`). See [supported providers](https://docs.litellm.ai/docs/text_to_speech#supported-providers) |
**Important**: Since LLM calls are routed through LiteLLM, include API keys for the LLM providers you're using:
- For OpenAI models: `OPENAI_API_KEY`
- For Google Gemini models: `GEMINI_API_KEY`
- For other providers, refer to the [LiteLLM documentation](https://docs.litellm.ai/docs/providers)
**Optional LangSmith Observability:**
| ENV VARIABLE | DESCRIPTION |
|--------------|-------------|
| LANGSMITH_TRACING | Enable LangSmith tracing (e.g., `true`) |
| LANGSMITH_ENDPOINT | LangSmith API endpoint (e.g., `https://api.smith.langchain.com`) |
| LANGSMITH_API_KEY | Your LangSmith API key |
| LANGSMITH_PROJECT | LangSmith project name (e.g., `surfsense`) |
**Optional LiteLLM API Base URLs:**
| ENV VARIABLE | DESCRIPTION |
|--------------|-------------|
| FAST_LLM_API_BASE | Custom API base URL for the fast LLM |
| STRATEGIC_LLM_API_BASE | Custom API base URL for the strategic LLM |
| LONG_CONTEXT_LLM_API_BASE | Custom API base URL for the long context LLM |
### 2. Install Dependencies
Install the backend dependencies using `uv`:
**Linux/macOS:**
```bash
# Install uv if you don't have it
curl -fsSL https://astral.sh/uv/install.sh | bash
@ -82,6 +102,7 @@ uv sync
```
**Windows (PowerShell):**
```powershell
# Install uv if you don't have it
iwr -useb https://astral.sh/uv/install.ps1 | iex
@ -91,6 +112,7 @@ uv sync
```
**Windows (Command Prompt):**
```cmd
# Install dependencies with uv (after installing uv)
uv sync
@ -101,6 +123,7 @@ uv sync
Start the backend server:
**Linux/macOS/Windows:**
```bash
# Run without hot reloading
uv run main.py
@ -118,18 +141,21 @@ If everything is set up correctly, you should see output indicating the server i
Set up the frontend environment:
**Linux/macOS:**
```bash
cd surfsense_web
cp .env.example .env
```
**Windows (Command Prompt):**
```cmd
cd surfsense_web
copy .env.example .env
```
**Windows (PowerShell):**
```powershell
cd surfsense_web
Copy-Item -Path .env.example -Destination .env
@ -137,8 +163,8 @@ Copy-Item -Path .env.example -Destination .env
Edit the `.env` file and set:
| ENV VARIABLE | DESCRIPTION |
|--------------|-------------|
| ENV VARIABLE | DESCRIPTION |
| ------------------------------- | ------------------------------------------- |
| NEXT_PUBLIC_FASTAPI_BACKEND_URL | Backend URL (e.g., `http://localhost:8000`) |
### 2. Install Dependencies
@ -146,6 +172,7 @@ Edit the `.env` file and set:
Install the frontend dependencies:
**Linux/macOS:**
```bash
# Install pnpm if you don't have it
npm install -g pnpm
@ -155,6 +182,7 @@ pnpm install
```
**Windows:**
```powershell
# Install pnpm if you don't have it
npm install -g pnpm
@ -168,6 +196,7 @@ pnpm install
Start the Next.js development server:
**Linux/macOS/Windows:**
```bash
pnpm run dev
```
@ -181,18 +210,21 @@ The SurfSense browser extension allows you to save any webpage, including those
### 1. Environment Configuration
**Linux/macOS:**
```bash
cd surfsense_browser_extension
cp .env.example .env
```
**Windows (Command Prompt):**
```cmd
cd surfsense_browser_extension
copy .env.example .env
```
**Windows (PowerShell):**
```powershell
cd surfsense_browser_extension
Copy-Item -Path .env.example -Destination .env
@ -200,8 +232,8 @@ Copy-Item -Path .env.example -Destination .env
Edit the `.env` file:
| ENV VARIABLE | DESCRIPTION |
|--------------|-------------|
| ENV VARIABLE | DESCRIPTION |
| ------------------------- | ----------------------------------------------------- |
| PLASMO_PUBLIC_BACKEND_URL | SurfSense Backend URL (e.g., `http://127.0.0.1:8000`) |
### 2. Build the Extension
@ -209,6 +241,7 @@ Edit the `.env` file:
Build the extension for your browser using the [Plasmo framework](https://docs.plasmo.com/framework/workflows/build#with-a-specific-target).
**Linux/macOS/Windows:**
```bash
# Install dependencies
pnpm install
@ -253,6 +286,7 @@ Now that you have SurfSense running locally, you can explore its features:
- Explore the advanced RAG capabilities
For production deployments, consider setting up:
- A reverse proxy like Nginx
- SSL certificates for secure connections
- Proper database backups