fix: Added API_BASE param for LiteLLM.

This commit is contained in:
DESKTOP-RTLN3BA\$punk 2025-05-08 19:31:47 -07:00
parent cae5f835af
commit 4a2be4b98e
7 changed files with 151 additions and 73 deletions

View file

@ -30,3 +30,8 @@ LANGSMITH_TRACING=true
LANGSMITH_ENDPOINT="https://api.smith.langchain.com"
LANGSMITH_API_KEY="lsv2_pt_....."
LANGSMITH_PROJECT="surfsense"
# OPTIONAL: LiteLLM API Base
FAST_LLM_API_BASE=""
STRATEGIC_LLM_API_BASE=""
LONG_CONTEXT_LLM_API_BASE=""

View file

@ -5,3 +5,4 @@ data/
__pycache__/
.flashrank_cache
surf_new_backend.egg-info/
podcasts/

View file

@ -3,7 +3,7 @@
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Optional, Any
from typing import List, Optional, Any
from sqlalchemy.ext.asyncio import AsyncSession
from app.utils.streaming_service import StreamingService
@ -21,7 +21,7 @@ class State:
# Streaming service
streaming_service: StreamingService
# Intermediate state - populated during workflow
# chat_history: Optional[List[Any]] = field(default=None)
# Using field to explicitly mark as part of state
answer_outline: Optional[Any] = field(default=None)

View file

@ -46,14 +46,27 @@ class Config:
# LONG-CONTEXT LLMS
LONG_CONTEXT_LLM = os.getenv("LONG_CONTEXT_LLM")
long_context_llm_instance = ChatLiteLLM(model=LONG_CONTEXT_LLM)
FAST_LLM_API_BASE = os.getenv("FAST_LLM_API_BASE")
if FAST_LLM_API_BASE:
long_context_llm_instance = ChatLiteLLM(model=LONG_CONTEXT_LLM, api_base=FAST_LLM_API_BASE)
else:
long_context_llm_instance = ChatLiteLLM(model=LONG_CONTEXT_LLM)
# FAST & STRATEGIC LLM's
# FAST LLM
FAST_LLM = os.getenv("FAST_LLM")
STRATEGIC_LLM = os.getenv("STRATEGIC_LLM")
fast_llm_instance = ChatLiteLLM(model=FAST_LLM)
strategic_llm_instance = ChatLiteLLM(model=STRATEGIC_LLM)
FAST_LLM_API_BASE = os.getenv("FAST_LLM_API_BASE")
if FAST_LLM_API_BASE:
fast_llm_instance = ChatLiteLLM(model=FAST_LLM, api_base=FAST_LLM_API_BASE)
else:
fast_llm_instance = ChatLiteLLM(model=FAST_LLM)
# STRATEGIC LLM
STRATEGIC_LLM = os.getenv("STRATEGIC_LLM")
STRATEGIC_LLM_API_BASE = os.getenv("STRATEGIC_LLM_API_BASE")
if STRATEGIC_LLM_API_BASE:
strategic_llm_instance = ChatLiteLLM(model=STRATEGIC_LLM, api_base=STRATEGIC_LLM_API_BASE)
else:
strategic_llm_instance = ChatLiteLLM(model=STRATEGIC_LLM)
# Chonkie Configuration | Edit this to your needs
EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL")

View file

@ -9,7 +9,7 @@ interface PageProps {
export default async function ChatsPage({ params }: PageProps) {
// Get search space ID from the route parameter
const { search_space_id: searchSpaceId } = params;
const { search_space_id: searchSpaceId } = await Promise.resolve(params);
return (
<Suspense fallback={<div className="flex items-center justify-center h-[60vh]">

View file

@ -1,8 +1,9 @@
---
title: Docker Installation
description: Setting up SurfSense using Docker
description: Setting up SurfSense using Docker
full: true
---
## Known Limitations
⚠️ **Important Note:** Currently, the following features have limited functionality when running in Docker:
@ -12,8 +13,7 @@ full: true
We're actively working to resolve these limitations in future releases.
# Docker Installation
# Docker Installation
This guide explains how to run SurfSense using Docker Compose, which is the preferred and recommended method for deployment.
@ -32,23 +32,26 @@ Before you begin, ensure you have:
## Installation Steps
1. **Configure Environment Variables**
Set up the necessary environment variables:
**Linux/macOS:**
```bash
# Copy example environment files
cp surfsense_backend/.env.example surfsense_backend/.env
cp surfsense_web/.env.example surfsense_web/.env
```
**Windows (Command Prompt):**
```cmd
copy surfsense_backend\.env.example surfsense_backend\.env
copy surfsense_web\.env.example surfsense_web\.env
```
**Windows (PowerShell):**
```powershell
Copy-Item -Path surfsense_backend\.env.example -Destination surfsense_backend\.env
Copy-Item -Path surfsense_web\.env.example -Destination surfsense_web\.env
@ -58,47 +61,65 @@ Before you begin, ensure you have:
**Backend Environment Variables:**
| ENV VARIABLE | DESCRIPTION |
|--------------|-------------|
| DATABASE_URL | PostgreSQL connection string (e.g., `postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense`) |
| SECRET_KEY | JWT Secret key for authentication (should be a secure random string) |
| GOOGLE_OAUTH_CLIENT_ID | Google OAuth client ID obtained from Google Cloud Console |
| GOOGLE_OAUTH_CLIENT_SECRET | Google OAuth client secret obtained from Google Cloud Console |
| NEXT_FRONTEND_URL | URL where your frontend application is hosted (e.g., `http://localhost:3000`) |
| EMBEDDING_MODEL | Name of the embedding model (e.g., `openai://text-embedding-ada-002`, `anthropic://claude-v1`, `mixedbread-ai/mxbai-embed-large-v1`) |
| RERANKERS_MODEL_NAME | Name of the reranker model (e.g., `ms-marco-MiniLM-L-12-v2`) |
| RERANKERS_MODEL_TYPE | Type of reranker model (e.g., `flashrank`) |
| FAST_LLM | LiteLLM routed smaller, faster LLM (e.g., `openai/gpt-4o-mini`, `ollama/deepseek-r1:8b`) |
| STRATEGIC_LLM | LiteLLM routed advanced LLM for complex tasks (e.g., `openai/gpt-4o`, `ollama/gemma3:12b`) |
| LONG_CONTEXT_LLM | LiteLLM routed LLM for longer context windows (e.g., `gemini/gemini-2.0-flash`, `ollama/deepseek-r1:8b`) |
| UNSTRUCTURED_API_KEY | API key for Unstructured.io service for document parsing |
| FIRECRAWL_API_KEY | API key for Firecrawl service for web crawling |
| TTS_SERVICE | Text-to-Speech API provider for Podcasts (e.g., `openai/tts-1`, `azure/neural`, `vertex_ai/`). See [supported providers](https://docs.litellm.ai/docs/text_to_speech#supported-providers) |
| ENV VARIABLE | DESCRIPTION |
| -------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| DATABASE_URL | PostgreSQL connection string (e.g., `postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense`) |
| SECRET_KEY | JWT Secret key for authentication (should be a secure random string) |
| GOOGLE_OAUTH_CLIENT_ID | Google OAuth client ID obtained from Google Cloud Console |
| GOOGLE_OAUTH_CLIENT_SECRET | Google OAuth client secret obtained from Google Cloud Console |
| NEXT_FRONTEND_URL | URL where your frontend application is hosted (e.g., `http://localhost:3000`) |
| EMBEDDING_MODEL | Name of the embedding model (e.g., `openai://text-embedding-ada-002`, `anthropic://claude-v1`, `mixedbread-ai/mxbai-embed-large-v1`) |
| RERANKERS_MODEL_NAME | Name of the reranker model (e.g., `ms-marco-MiniLM-L-12-v2`) |
| RERANKERS_MODEL_TYPE | Type of reranker model (e.g., `flashrank`) |
| FAST_LLM | LiteLLM routed smaller, faster LLM (e.g., `openai/gpt-4o-mini`, `ollama/deepseek-r1:8b`) |
| STRATEGIC_LLM | LiteLLM routed advanced LLM for complex tasks (e.g., `openai/gpt-4o`, `ollama/gemma3:12b`) |
| LONG_CONTEXT_LLM | LiteLLM routed LLM for longer context windows (e.g., `gemini/gemini-2.0-flash`, `ollama/deepseek-r1:8b`) |
| UNSTRUCTURED_API_KEY | API key for Unstructured.io service for document parsing |
| FIRECRAWL_API_KEY | API key for Firecrawl service for web crawling |
| TTS_SERVICE | Text-to-Speech API provider for Podcasts (e.g., `openai/tts-1`, `azure/neural`, `vertex_ai/`). See [supported providers](https://docs.litellm.ai/docs/text_to_speech#supported-providers) |
Include API keys for the LLM providers you're using. For example:
- `OPENAI_API_KEY`: If using OpenAI models
- `GEMINI_API_KEY`: If using Google Gemini models
For other LLM providers, refer to the [LiteLLM documentation](https://docs.litellm.ai/docs/providers).
Include API keys for the LLM providers you're using. For example:
**Frontend Environment Variables:**
- `OPENAI_API_KEY`: If using OpenAI models
- `GEMINI_API_KEY`: If using Google Gemini models
| ENV VARIABLE | DESCRIPTION |
|--------------|-------------|
| NEXT_PUBLIC_FASTAPI_BACKEND_URL | URL of the backend service (e.g., `http://localhost:8000`) |
**Optional LangSmith Observability:**
| ENV VARIABLE | DESCRIPTION |
|--------------|-------------|
| LANGSMITH_TRACING | Enable LangSmith tracing (e.g., `true`) |
| LANGSMITH_ENDPOINT | LangSmith API endpoint (e.g., `https://api.smith.langchain.com`) |
| LANGSMITH_API_KEY | Your LangSmith API key |
| LANGSMITH_PROJECT | LangSmith project name (e.g., `surfsense`) |
**Optional LiteLLM API Base URLs:**
| ENV VARIABLE | DESCRIPTION |
|--------------|-------------|
| FAST_LLM_API_BASE | Custom API base URL for the fast LLM |
| STRATEGIC_LLM_API_BASE | Custom API base URL for the strategic LLM |
| LONG_CONTEXT_LLM_API_BASE | Custom API base URL for the long context LLM |
For other LLM providers, refer to the [LiteLLM documentation](https://docs.litellm.ai/docs/providers).
**Frontend Environment Variables:**
| ENV VARIABLE | DESCRIPTION |
| ------------------------------- | ---------------------------------------------------------- |
| NEXT_PUBLIC_FASTAPI_BACKEND_URL | URL of the backend service (e.g., `http://localhost:8000`) |
2. **Build and Start Containers**
Start the Docker containers:
**Linux/macOS/Windows:**
```bash
docker-compose up --build
```
To run in detached mode (in the background):
**Linux/macOS/Windows:**
```bash
docker-compose up -d
```
@ -106,8 +127,9 @@ Before you begin, ensure you have:
**Note for Windows users:** If you're using older Docker Desktop versions, you might need to use `docker compose` (with a space) instead of `docker-compose`.
3. **Access the Applications**
Once the containers are running, you can access:
- Frontend: [http://localhost:3000](http://localhost:3000)
- Backend API: [http://localhost:8000](http://localhost:8000)
- API Documentation: [http://localhost:8000/docs](http://localhost:8000/docs)
@ -117,19 +139,21 @@ Before you begin, ensure you have:
### Container Management
- **Stop containers:**
**Linux/macOS/Windows:**
```bash
docker-compose down
```
- **View logs:**
**Linux/macOS/Windows:**
```bash
# All services
docker-compose logs -f
# Specific service
docker-compose logs -f backend
docker-compose logs -f frontend
@ -137,19 +161,21 @@ Before you begin, ensure you have:
```
- **Restart a specific service:**
**Linux/macOS/Windows:**
```bash
docker-compose restart backend
```
- **Execute commands in a running container:**
**Linux/macOS/Windows:**
```bash
# Backend
docker-compose exec backend python -m pytest
# Frontend
docker-compose exec frontend pnpm lint
```
@ -163,7 +189,6 @@ Before you begin, ensure you have:
- For frontend dependency issues, check the `Dockerfile` in the frontend directory.
- **Windows-specific:** If you encounter line ending issues (CRLF vs LF), configure Git to handle line endings properly with `git config --global core.autocrlf true` before cloning the repository.
## Next Steps
Once your installation is complete, you can start using SurfSense! Navigate to the frontend URL and log in using your Google account.
Once your installation is complete, you can start using SurfSense! Navigate to the frontend URL and log in using your Google account.

View file

@ -27,18 +27,21 @@ The backend is the core of SurfSense. Follow these steps to set it up:
First, create and configure your environment variables by copying the example file:
**Linux/macOS:**
```bash
cd surfsense_backend
cp .env.example .env
```
**Windows (Command Prompt):**
```cmd
cd surfsense_backend
copy .env.example .env
```
**Windows (PowerShell):**
```powershell
cd surfsense_backend
Copy-Item -Path .env.example -Destination .env
@ -46,33 +49,50 @@ Copy-Item -Path .env.example -Destination .env
Edit the `.env` file and set the following variables:
| ENV VARIABLE | DESCRIPTION |
|--------------|-------------|
| DATABASE_URL | PostgreSQL connection string (e.g., `postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense`) |
| SECRET_KEY | JWT Secret key for authentication (should be a secure random string) |
| GOOGLE_OAUTH_CLIENT_ID | Google OAuth client ID |
| GOOGLE_OAUTH_CLIENT_SECRET | Google OAuth client secret |
| NEXT_FRONTEND_URL | Frontend application URL (e.g., `http://localhost:3000`) |
| EMBEDDING_MODEL | Name of the embedding model (e.g., `openai://text-embedding-ada-002`, `anthropic://claude-v1`, `mixedbread-ai/mxbai-embed-large-v1`) |
| RERANKERS_MODEL_NAME | Name of the reranker model (e.g., `ms-marco-MiniLM-L-12-v2`) |
| RERANKERS_MODEL_TYPE | Type of reranker model (e.g., `flashrank`) |
| FAST_LLM | LiteLLM routed faster LLM (e.g., `openai/gpt-4o-mini`, `ollama/deepseek-r1:8b`) |
| STRATEGIC_LLM | LiteLLM routed advanced LLM (e.g., `openai/gpt-4o`, `ollama/gemma3:12b`) |
| LONG_CONTEXT_LLM | LiteLLM routed long-context LLM (e.g., `gemini/gemini-2.0-flash`, `ollama/deepseek-r1:8b`) |
| UNSTRUCTURED_API_KEY | API key for Unstructured.io service |
| FIRECRAWL_API_KEY | API key for Firecrawl service (if using crawler) |
| TTS_SERVICE | Text-to-Speech API provider for Podcasts (e.g., `openai/tts-1`, `azure/neural`, `vertex_ai/`). See [supported providers](https://docs.litellm.ai/docs/text_to_speech#supported-providers) |
| ENV VARIABLE | DESCRIPTION |
| -------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| DATABASE_URL | PostgreSQL connection string (e.g., `postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense`) |
| SECRET_KEY | JWT Secret key for authentication (should be a secure random string) |
| GOOGLE_OAUTH_CLIENT_ID | Google OAuth client ID |
| GOOGLE_OAUTH_CLIENT_SECRET | Google OAuth client secret |
| NEXT_FRONTEND_URL | Frontend application URL (e.g., `http://localhost:3000`) |
| EMBEDDING_MODEL | Name of the embedding model (e.g., `openai://text-embedding-ada-002`, `anthropic://claude-v1`, `mixedbread-ai/mxbai-embed-large-v1`) |
| RERANKERS_MODEL_NAME | Name of the reranker model (e.g., `ms-marco-MiniLM-L-12-v2`) |
| RERANKERS_MODEL_TYPE | Type of reranker model (e.g., `flashrank`) |
| FAST_LLM | LiteLLM routed faster LLM (e.g., `openai/gpt-4o-mini`, `ollama/deepseek-r1:8b`) |
| STRATEGIC_LLM | LiteLLM routed advanced LLM (e.g., `openai/gpt-4o`, `ollama/gemma3:12b`) |
| LONG_CONTEXT_LLM | LiteLLM routed long-context LLM (e.g., `gemini/gemini-2.0-flash`, `ollama/deepseek-r1:8b`) |
| UNSTRUCTURED_API_KEY | API key for Unstructured.io service |
| FIRECRAWL_API_KEY | API key for Firecrawl service (if using crawler) |
| TTS_SERVICE | Text-to-Speech API provider for Podcasts (e.g., `openai/tts-1`, `azure/neural`, `vertex_ai/`). See [supported providers](https://docs.litellm.ai/docs/text_to_speech#supported-providers) |
**Important**: Since LLM calls are routed through LiteLLM, include API keys for the LLM providers you're using:
- For OpenAI models: `OPENAI_API_KEY`
- For Google Gemini models: `GEMINI_API_KEY`
- For other providers, refer to the [LiteLLM documentation](https://docs.litellm.ai/docs/providers)
**Optional LangSmith Observability:**
| ENV VARIABLE | DESCRIPTION |
|--------------|-------------|
| LANGSMITH_TRACING | Enable LangSmith tracing (e.g., `true`) |
| LANGSMITH_ENDPOINT | LangSmith API endpoint (e.g., `https://api.smith.langchain.com`) |
| LANGSMITH_API_KEY | Your LangSmith API key |
| LANGSMITH_PROJECT | LangSmith project name (e.g., `surfsense`) |
**Optional LiteLLM API Base URLs:**
| ENV VARIABLE | DESCRIPTION |
|--------------|-------------|
| FAST_LLM_API_BASE | Custom API base URL for the fast LLM |
| STRATEGIC_LLM_API_BASE | Custom API base URL for the strategic LLM |
| LONG_CONTEXT_LLM_API_BASE | Custom API base URL for the long context LLM |
### 2. Install Dependencies
Install the backend dependencies using `uv`:
**Linux/macOS:**
```bash
# Install uv if you don't have it
curl -fsSL https://astral.sh/uv/install.sh | bash
@ -82,6 +102,7 @@ uv sync
```
**Windows (PowerShell):**
```powershell
# Install uv if you don't have it
iwr -useb https://astral.sh/uv/install.ps1 | iex
@ -91,6 +112,7 @@ uv sync
```
**Windows (Command Prompt):**
```cmd
# Install dependencies with uv (after installing uv)
uv sync
@ -101,6 +123,7 @@ uv sync
Start the backend server:
**Linux/macOS/Windows:**
```bash
# Run without hot reloading
uv run main.py
@ -118,18 +141,21 @@ If everything is set up correctly, you should see output indicating the server i
Set up the frontend environment:
**Linux/macOS:**
```bash
cd surfsense_web
cp .env.example .env
```
**Windows (Command Prompt):**
```cmd
cd surfsense_web
copy .env.example .env
```
**Windows (PowerShell):**
```powershell
cd surfsense_web
Copy-Item -Path .env.example -Destination .env
@ -137,8 +163,8 @@ Copy-Item -Path .env.example -Destination .env
Edit the `.env` file and set:
| ENV VARIABLE | DESCRIPTION |
|--------------|-------------|
| ENV VARIABLE | DESCRIPTION |
| ------------------------------- | ------------------------------------------- |
| NEXT_PUBLIC_FASTAPI_BACKEND_URL | Backend URL (e.g., `http://localhost:8000`) |
### 2. Install Dependencies
@ -146,6 +172,7 @@ Edit the `.env` file and set:
Install the frontend dependencies:
**Linux/macOS:**
```bash
# Install pnpm if you don't have it
npm install -g pnpm
@ -155,6 +182,7 @@ pnpm install
```
**Windows:**
```powershell
# Install pnpm if you don't have it
npm install -g pnpm
@ -168,6 +196,7 @@ pnpm install
Start the Next.js development server:
**Linux/macOS/Windows:**
```bash
pnpm run dev
```
@ -181,18 +210,21 @@ The SurfSense browser extension allows you to save any webpage, including those
### 1. Environment Configuration
**Linux/macOS:**
```bash
cd surfsense_browser_extension
cp .env.example .env
```
**Windows (Command Prompt):**
```cmd
cd surfsense_browser_extension
copy .env.example .env
```
**Windows (PowerShell):**
```powershell
cd surfsense_browser_extension
Copy-Item -Path .env.example -Destination .env
@ -200,8 +232,8 @@ Copy-Item -Path .env.example -Destination .env
Edit the `.env` file:
| ENV VARIABLE | DESCRIPTION |
|--------------|-------------|
| ENV VARIABLE | DESCRIPTION |
| ------------------------- | ----------------------------------------------------- |
| PLASMO_PUBLIC_BACKEND_URL | SurfSense Backend URL (e.g., `http://127.0.0.1:8000`) |
### 2. Build the Extension
@ -209,6 +241,7 @@ Edit the `.env` file:
Build the extension for your browser using the [Plasmo framework](https://docs.plasmo.com/framework/workflows/build#with-a-specific-target).
**Linux/macOS/Windows:**
```bash
# Install dependencies
pnpm install
@ -253,7 +286,8 @@ Now that you have SurfSense running locally, you can explore its features:
- Explore the advanced RAG capabilities
For production deployments, consider setting up:
- A reverse proxy like Nginx
- SSL certificates for secure connections
- Proper database backups
- User access controls
- User access controls