fix: Added API_BASE param for LiteLLM.

2025-09-01 18:19:08 +00:00 · 2025-05-08 19:31:47 -07:00 · 2025-05-08 19:31:47 -07:00 · 4a2be4b98e
commit 4a2be4b98e
parent cae5f835af
7 changed files with 151 additions and 73 deletions
--- a/surfsense_backend/.env.example
+++ b/surfsense_backend/.env.example
@ -30,3 +30,8 @@ LANGSMITH_TRACING=true
 LANGSMITH_ENDPOINT="https://api.smith.langchain.com"
 LANGSMITH_API_KEY="lsv2_pt_....."
 LANGSMITH_PROJECT="surfsense"
+
+# OPTIONAL: LiteLLM API Base
+FAST_LLM_API_BASE=""
+STRATEGIC_LLM_API_BASE=""
+LONG_CONTEXT_LLM_API_BASE=""
--- a/surfsense_backend/.gitignore
+++ b/surfsense_backend/.gitignore
@ -5,3 +5,4 @@ data/
 __pycache__/
 .flashrank_cache
 surf_new_backend.egg-info/
+podcasts/
--- a/surfsense_backend/app/agents/researcher/state.py
+++ b/surfsense_backend/app/agents/researcher/state.py
@ -3,7 +3,7 @@
 from __future__ import annotations

 from dataclasses import dataclass, field
-from typing import Optional, Any
+from typing import List, Optional, Any
 from sqlalchemy.ext.asyncio import AsyncSession
 from app.utils.streaming_service import StreamingService

@ -21,7 +21,7 @@ class State:
    # Streaming service
    streaming_service: StreamingService
    
-    # Intermediate state - populated during workflow
+    # chat_history: Optional[List[Any]] = field(default=None)
    # Using field to explicitly mark as part of state
    answer_outline: Optional[Any] = field(default=None)
    
--- a/surfsense_backend/app/config/init.py
+++ b/surfsense_backend/app/config/init.py
@ -46,14 +46,27 @@ class Config:
    
    # LONG-CONTEXT LLMS
    LONG_CONTEXT_LLM = os.getenv("LONG_CONTEXT_LLM")
-    long_context_llm_instance = ChatLiteLLM(model=LONG_CONTEXT_LLM)
+    FAST_LLM_API_BASE = os.getenv("FAST_LLM_API_BASE")
+    if FAST_LLM_API_BASE:
+        long_context_llm_instance = ChatLiteLLM(model=LONG_CONTEXT_LLM, api_base=FAST_LLM_API_BASE)
+    else:
+        long_context_llm_instance = ChatLiteLLM(model=LONG_CONTEXT_LLM)
    
-    # FAST & STRATEGIC LLM's
+    # FAST LLM
    FAST_LLM = os.getenv("FAST_LLM")
-    STRATEGIC_LLM = os.getenv("STRATEGIC_LLM")
-    fast_llm_instance = ChatLiteLLM(model=FAST_LLM)
-    strategic_llm_instance = ChatLiteLLM(model=STRATEGIC_LLM)
+    FAST_LLM_API_BASE = os.getenv("FAST_LLM_API_BASE")
+    if FAST_LLM_API_BASE:
+        fast_llm_instance = ChatLiteLLM(model=FAST_LLM, api_base=FAST_LLM_API_BASE)
+    else:
+        fast_llm_instance = ChatLiteLLM(model=FAST_LLM)
    
+    # STRATEGIC LLM
+    STRATEGIC_LLM = os.getenv("STRATEGIC_LLM")
+    STRATEGIC_LLM_API_BASE = os.getenv("STRATEGIC_LLM_API_BASE")
+    if STRATEGIC_LLM_API_BASE:
+        strategic_llm_instance = ChatLiteLLM(model=STRATEGIC_LLM, api_base=STRATEGIC_LLM_API_BASE)
+    else:
+        strategic_llm_instance = ChatLiteLLM(model=STRATEGIC_LLM)

    # Chonkie Configuration | Edit this to your needs
    EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL")
--- a/surfsense_web/app/dashboard/[search_space_id]/chats/page.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/chats/page.tsx
@ -9,7 +9,7 @@ interface PageProps {

 export default async function ChatsPage({ params }: PageProps) {
  // Get search space ID from the route parameter
-  const { search_space_id: searchSpaceId } = params;
+  const { search_space_id: searchSpaceId } = await Promise.resolve(params);
  
  return (
    <Suspense fallback={<div className="flex items-center justify-center h-[60vh]">
--- a/surfsense_web/content/docs/docker-installation.mdx
+++ b/surfsense_web/content/docs/docker-installation.mdx
@ -3,6 +3,7 @@ title: Docker Installation
 description: Setting up SurfSense using Docker
 full: true
 ---
+
 ## Known Limitations

 ⚠️ **Important Note:** Currently, the following features have limited functionality when running in Docker:
@ -12,7 +13,6 @@ full: true

 We're actively working to resolve these limitations in future releases.

-
 # Docker Installation

 This guide explains how to run SurfSense using Docker Compose, which is the preferred and recommended method for deployment.
@ -36,6 +36,7 @@ Before you begin, ensure you have:
   Set up the necessary environment variables:

   **Linux/macOS:**
+
   ```bash
   # Copy example environment files
   cp surfsense_backend/.env.example surfsense_backend/.env
@ -43,12 +44,14 @@ Before you begin, ensure you have:
   ```

   **Windows (Command Prompt):**
+
   ```cmd
   copy surfsense_backend\.env.example surfsense_backend\.env
   copy surfsense_web\.env.example surfsense_web\.env
   ```

   **Windows (PowerShell):**
+
   ```powershell
   Copy-Item -Path surfsense_backend\.env.example -Destination surfsense_backend\.env
   Copy-Item -Path surfsense_web\.env.example -Destination surfsense_web\.env
@ -58,40 +61,57 @@ Before you begin, ensure you have:

   **Backend Environment Variables:**

-   | ENV VARIABLE | DESCRIPTION |
-   |--------------|-------------|
-   | DATABASE_URL | PostgreSQL connection string (e.g., `postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense`) |
-   | SECRET_KEY | JWT Secret key for authentication (should be a secure random string) |
-   | GOOGLE_OAUTH_CLIENT_ID | Google OAuth client ID obtained from Google Cloud Console |
-   | GOOGLE_OAUTH_CLIENT_SECRET | Google OAuth client secret obtained from Google Cloud Console |
-   | NEXT_FRONTEND_URL | URL where your frontend application is hosted (e.g., `http://localhost:3000`) |
-   | EMBEDDING_MODEL | Name of the embedding model (e.g., `openai://text-embedding-ada-002`, `anthropic://claude-v1`, `mixedbread-ai/mxbai-embed-large-v1`) |
-   | RERANKERS_MODEL_NAME | Name of the reranker model (e.g., `ms-marco-MiniLM-L-12-v2`) |
-   | RERANKERS_MODEL_TYPE | Type of reranker model (e.g., `flashrank`) |
-   | FAST_LLM | LiteLLM routed smaller, faster LLM (e.g., `openai/gpt-4o-mini`, `ollama/deepseek-r1:8b`) |
-   | STRATEGIC_LLM | LiteLLM routed advanced LLM for complex tasks (e.g., `openai/gpt-4o`, `ollama/gemma3:12b`) |
-   | LONG_CONTEXT_LLM | LiteLLM routed LLM for longer context windows (e.g., `gemini/gemini-2.0-flash`, `ollama/deepseek-r1:8b`) |
-   | UNSTRUCTURED_API_KEY | API key for Unstructured.io service for document parsing |
-   | FIRECRAWL_API_KEY | API key for Firecrawl service for web crawling |
-   | TTS_SERVICE | Text-to-Speech API provider for Podcasts (e.g., `openai/tts-1`, `azure/neural`, `vertex_ai/`). See [supported providers](https://docs.litellm.ai/docs/text_to_speech#supported-providers) |
+   | ENV VARIABLE               | DESCRIPTION                                                                                                                                                                               |
+   | -------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+   | DATABASE_URL               | PostgreSQL connection string (e.g., `postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense`)                                                                                    |
+   | SECRET_KEY                 | JWT Secret key for authentication (should be a secure random string)                                                                                                                      |
+   | GOOGLE_OAUTH_CLIENT_ID     | Google OAuth client ID obtained from Google Cloud Console                                                                                                                                 |
+   | GOOGLE_OAUTH_CLIENT_SECRET | Google OAuth client secret obtained from Google Cloud Console                                                                                                                             |
+   | NEXT_FRONTEND_URL          | URL where your frontend application is hosted (e.g., `http://localhost:3000`)                                                                                                             |
+   | EMBEDDING_MODEL            | Name of the embedding model (e.g., `openai://text-embedding-ada-002`, `anthropic://claude-v1`, `mixedbread-ai/mxbai-embed-large-v1`)                                                      |
+   | RERANKERS_MODEL_NAME       | Name of the reranker model (e.g., `ms-marco-MiniLM-L-12-v2`)                                                                                                                              |
+   | RERANKERS_MODEL_TYPE       | Type of reranker model (e.g., `flashrank`)                                                                                                                                                |
+   | FAST_LLM                   | LiteLLM routed smaller, faster LLM (e.g., `openai/gpt-4o-mini`, `ollama/deepseek-r1:8b`)                                                                                                  |
+   | STRATEGIC_LLM              | LiteLLM routed advanced LLM for complex tasks (e.g., `openai/gpt-4o`, `ollama/gemma3:12b`)                                                                                                |
+   | LONG_CONTEXT_LLM           | LiteLLM routed LLM for longer context windows (e.g., `gemini/gemini-2.0-flash`, `ollama/deepseek-r1:8b`)                                                                                  |
+   | UNSTRUCTURED_API_KEY       | API key for Unstructured.io service for document parsing                                                                                                                                  |
+   | FIRECRAWL_API_KEY          | API key for Firecrawl service for web crawling                                                                                                                                            |
+   | TTS_SERVICE                | Text-to-Speech API provider for Podcasts (e.g., `openai/tts-1`, `azure/neural`, `vertex_ai/`). See [supported providers](https://docs.litellm.ai/docs/text_to_speech#supported-providers) |

-   Include API keys for the LLM providers you're using. For example:
-   - `OPENAI_API_KEY`: If using OpenAI models
-   - `GEMINI_API_KEY`: If using Google Gemini models
+Include API keys for the LLM providers you're using. For example:

-   For other LLM providers, refer to the [LiteLLM documentation](https://docs.litellm.ai/docs/providers).
+- `OPENAI_API_KEY`: If using OpenAI models
+- `GEMINI_API_KEY`: If using Google Gemini models

-   **Frontend Environment Variables:**
+  **Optional LangSmith Observability:**
+  | ENV VARIABLE | DESCRIPTION |
+  |--------------|-------------|
+  | LANGSMITH_TRACING | Enable LangSmith tracing (e.g., `true`) |
+  | LANGSMITH_ENDPOINT | LangSmith API endpoint (e.g., `https://api.smith.langchain.com`) |
+  | LANGSMITH_API_KEY | Your LangSmith API key |
+  | LANGSMITH_PROJECT | LangSmith project name (e.g., `surfsense`) |

-   | ENV VARIABLE | DESCRIPTION |
-   |--------------|-------------|
-   | NEXT_PUBLIC_FASTAPI_BACKEND_URL | URL of the backend service (e.g., `http://localhost:8000`) |
+  **Optional LiteLLM API Base URLs:**
+  | ENV VARIABLE | DESCRIPTION |
+  |--------------|-------------|
+  | FAST_LLM_API_BASE | Custom API base URL for the fast LLM |
+  | STRATEGIC_LLM_API_BASE | Custom API base URL for the strategic LLM |
+  | LONG_CONTEXT_LLM_API_BASE | Custom API base URL for the long context LLM |
+
+For other LLM providers, refer to the [LiteLLM documentation](https://docs.litellm.ai/docs/providers).
+
+**Frontend Environment Variables:**
+
+| ENV VARIABLE                    | DESCRIPTION                                                |
+| ------------------------------- | ---------------------------------------------------------- |
+| NEXT_PUBLIC_FASTAPI_BACKEND_URL | URL of the backend service (e.g., `http://localhost:8000`) |

 2. **Build and Start Containers**

   Start the Docker containers:

   **Linux/macOS/Windows:**
+
   ```bash
   docker-compose up --build
   ```
@ -99,6 +119,7 @@ Before you begin, ensure you have:
   To run in detached mode (in the background):

   **Linux/macOS/Windows:**
+
   ```bash
   docker-compose up -d
   ```
@ -108,6 +129,7 @@ Before you begin, ensure you have:
 3. **Access the Applications**

   Once the containers are running, you can access:
+
   - Frontend: [http://localhost:3000](http://localhost:3000)
   - Backend API: [http://localhost:8000](http://localhost:8000)
   - API Documentation: [http://localhost:8000/docs](http://localhost:8000/docs)
@ -119,6 +141,7 @@ Before you begin, ensure you have:
 - **Stop containers:**

  **Linux/macOS/Windows:**
+
  ```bash
  docker-compose down
  ```
@ -126,6 +149,7 @@ Before you begin, ensure you have:
 - **View logs:**

  **Linux/macOS/Windows:**
+
  ```bash
  # All services
  docker-compose logs -f
@ -139,6 +163,7 @@ Before you begin, ensure you have:
 - **Restart a specific service:**

  **Linux/macOS/Windows:**
+
  ```bash
  docker-compose restart backend
  ```
@ -146,6 +171,7 @@ Before you begin, ensure you have:
 - **Execute commands in a running container:**

  **Linux/macOS/Windows:**
+
  ```bash
  # Backend
  docker-compose exec backend python -m pytest
@ -163,7 +189,6 @@ Before you begin, ensure you have:
 - For frontend dependency issues, check the `Dockerfile` in the frontend directory.
 - **Windows-specific:** If you encounter line ending issues (CRLF vs LF), configure Git to handle line endings properly with `git config --global core.autocrlf true` before cloning the repository.

-
 ## Next Steps

 Once your installation is complete, you can start using SurfSense! Navigate to the frontend URL and log in using your Google account.
--- a/surfsense_web/content/docs/manual-installation.mdx
+++ b/surfsense_web/content/docs/manual-installation.mdx
@ -27,18 +27,21 @@ The backend is the core of SurfSense. Follow these steps to set it up:
 First, create and configure your environment variables by copying the example file:

 **Linux/macOS:**
+
 ```bash
 cd surfsense_backend
 cp .env.example .env
 ```

 **Windows (Command Prompt):**
+
 ```cmd
 cd surfsense_backend
 copy .env.example .env
 ```

 **Windows (PowerShell):**
+
 ```powershell
 cd surfsense_backend
 Copy-Item -Path .env.example -Destination .env
@ -46,33 +49,50 @@ Copy-Item -Path .env.example -Destination .env

 Edit the `.env` file and set the following variables:

-| ENV VARIABLE | DESCRIPTION |
-|--------------|-------------|
-| DATABASE_URL | PostgreSQL connection string (e.g., `postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense`) |
-| SECRET_KEY | JWT Secret key for authentication (should be a secure random string) |
-| GOOGLE_OAUTH_CLIENT_ID | Google OAuth client ID |
-| GOOGLE_OAUTH_CLIENT_SECRET | Google OAuth client secret |
-| NEXT_FRONTEND_URL | Frontend application URL (e.g., `http://localhost:3000`) |
-| EMBEDDING_MODEL | Name of the embedding model (e.g., `openai://text-embedding-ada-002`, `anthropic://claude-v1`, `mixedbread-ai/mxbai-embed-large-v1`) |
-| RERANKERS_MODEL_NAME | Name of the reranker model (e.g., `ms-marco-MiniLM-L-12-v2`) |
-| RERANKERS_MODEL_TYPE | Type of reranker model (e.g., `flashrank`) |
-| FAST_LLM | LiteLLM routed faster LLM (e.g., `openai/gpt-4o-mini`, `ollama/deepseek-r1:8b`) |
-| STRATEGIC_LLM | LiteLLM routed advanced LLM (e.g., `openai/gpt-4o`, `ollama/gemma3:12b`) |
-| LONG_CONTEXT_LLM | LiteLLM routed long-context LLM (e.g., `gemini/gemini-2.0-flash`, `ollama/deepseek-r1:8b`) |
-| UNSTRUCTURED_API_KEY | API key for Unstructured.io service |
-| FIRECRAWL_API_KEY | API key for Firecrawl service (if using crawler) |
-| TTS_SERVICE | Text-to-Speech API provider for Podcasts (e.g., `openai/tts-1`, `azure/neural`, `vertex_ai/`). See [supported providers](https://docs.litellm.ai/docs/text_to_speech#supported-providers) |
+| ENV VARIABLE               | DESCRIPTION                                                                                                                                                                               |
+| -------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| DATABASE_URL               | PostgreSQL connection string (e.g., `postgresql+asyncpg://postgres:postgres@localhost:5432/surfsense`)                                                                                    |
+| SECRET_KEY                 | JWT Secret key for authentication (should be a secure random string)                                                                                                                      |
+| GOOGLE_OAUTH_CLIENT_ID     | Google OAuth client ID                                                                                                                                                                    |
+| GOOGLE_OAUTH_CLIENT_SECRET | Google OAuth client secret                                                                                                                                                                |
+| NEXT_FRONTEND_URL          | Frontend application URL (e.g., `http://localhost:3000`)                                                                                                                                  |
+| EMBEDDING_MODEL            | Name of the embedding model (e.g., `openai://text-embedding-ada-002`, `anthropic://claude-v1`, `mixedbread-ai/mxbai-embed-large-v1`)                                                      |
+| RERANKERS_MODEL_NAME       | Name of the reranker model (e.g., `ms-marco-MiniLM-L-12-v2`)                                                                                                                              |
+| RERANKERS_MODEL_TYPE       | Type of reranker model (e.g., `flashrank`)                                                                                                                                                |
+| FAST_LLM                   | LiteLLM routed faster LLM (e.g., `openai/gpt-4o-mini`, `ollama/deepseek-r1:8b`)                                                                                                           |
+| STRATEGIC_LLM              | LiteLLM routed advanced LLM (e.g., `openai/gpt-4o`, `ollama/gemma3:12b`)                                                                                                                  |
+| LONG_CONTEXT_LLM           | LiteLLM routed long-context LLM (e.g., `gemini/gemini-2.0-flash`, `ollama/deepseek-r1:8b`)                                                                                                |
+| UNSTRUCTURED_API_KEY       | API key for Unstructured.io service                                                                                                                                                       |
+| FIRECRAWL_API_KEY          | API key for Firecrawl service (if using crawler)                                                                                                                                          |
+| TTS_SERVICE                | Text-to-Speech API provider for Podcasts (e.g., `openai/tts-1`, `azure/neural`, `vertex_ai/`). See [supported providers](https://docs.litellm.ai/docs/text_to_speech#supported-providers) |

 **Important**: Since LLM calls are routed through LiteLLM, include API keys for the LLM providers you're using:
+
 - For OpenAI models: `OPENAI_API_KEY`
 - For Google Gemini models: `GEMINI_API_KEY`
 - For other providers, refer to the [LiteLLM documentation](https://docs.litellm.ai/docs/providers)

+  **Optional LangSmith Observability:**
+  | ENV VARIABLE | DESCRIPTION |
+  |--------------|-------------|
+  | LANGSMITH_TRACING | Enable LangSmith tracing (e.g., `true`) |
+  | LANGSMITH_ENDPOINT | LangSmith API endpoint (e.g., `https://api.smith.langchain.com`) |
+  | LANGSMITH_API_KEY | Your LangSmith API key |
+  | LANGSMITH_PROJECT | LangSmith project name (e.g., `surfsense`) |
+
+  **Optional LiteLLM API Base URLs:**
+  | ENV VARIABLE | DESCRIPTION |
+  |--------------|-------------|
+  | FAST_LLM_API_BASE | Custom API base URL for the fast LLM |
+  | STRATEGIC_LLM_API_BASE | Custom API base URL for the strategic LLM |
+  | LONG_CONTEXT_LLM_API_BASE | Custom API base URL for the long context LLM |
+
 ### 2. Install Dependencies

 Install the backend dependencies using `uv`:

 **Linux/macOS:**
+
 ```bash
 # Install uv if you don't have it
 curl -fsSL https://astral.sh/uv/install.sh | bash
@ -82,6 +102,7 @@ uv sync
 ```

 **Windows (PowerShell):**
+
 ```powershell
 # Install uv if you don't have it
 iwr -useb https://astral.sh/uv/install.ps1 | iex
@ -91,6 +112,7 @@ uv sync
 ```

 **Windows (Command Prompt):**
+
 ```cmd
 # Install dependencies with uv (after installing uv)
 uv sync
@ -101,6 +123,7 @@ uv sync
 Start the backend server:

 **Linux/macOS/Windows:**
+
 ```bash
 # Run without hot reloading
 uv run main.py
@ -118,18 +141,21 @@ If everything is set up correctly, you should see output indicating the server i
 Set up the frontend environment:

 **Linux/macOS:**
+
 ```bash
 cd surfsense_web
 cp .env.example .env
 ```

 **Windows (Command Prompt):**
+
 ```cmd
 cd surfsense_web
 copy .env.example .env
 ```

 **Windows (PowerShell):**
+
 ```powershell
 cd surfsense_web
 Copy-Item -Path .env.example -Destination .env
@ -137,8 +163,8 @@ Copy-Item -Path .env.example -Destination .env

 Edit the `.env` file and set:

-| ENV VARIABLE | DESCRIPTION |
-|--------------|-------------|
+| ENV VARIABLE                    | DESCRIPTION                                 |
+| ------------------------------- | ------------------------------------------- |
 | NEXT_PUBLIC_FASTAPI_BACKEND_URL | Backend URL (e.g., `http://localhost:8000`) |

 ### 2. Install Dependencies
@ -146,6 +172,7 @@ Edit the `.env` file and set:
 Install the frontend dependencies:

 **Linux/macOS:**
+
 ```bash
 # Install pnpm if you don't have it
 npm install -g pnpm
@ -155,6 +182,7 @@ pnpm install
 ```

 **Windows:**
+
 ```powershell
 # Install pnpm if you don't have it
 npm install -g pnpm
@ -168,6 +196,7 @@ pnpm install
 Start the Next.js development server:

 **Linux/macOS/Windows:**
+
 ```bash
 pnpm run dev
 ```
@ -181,18 +210,21 @@ The SurfSense browser extension allows you to save any webpage, including those
 ### 1. Environment Configuration

 **Linux/macOS:**
+
 ```bash
 cd surfsense_browser_extension
 cp .env.example .env
 ```

 **Windows (Command Prompt):**
+
 ```cmd
 cd surfsense_browser_extension
 copy .env.example .env
 ```

 **Windows (PowerShell):**
+
 ```powershell
 cd surfsense_browser_extension
 Copy-Item -Path .env.example -Destination .env
@ -200,8 +232,8 @@ Copy-Item -Path .env.example -Destination .env

 Edit the `.env` file:

-| ENV VARIABLE | DESCRIPTION |
-|--------------|-------------|
+| ENV VARIABLE              | DESCRIPTION                                           |
+| ------------------------- | ----------------------------------------------------- |
 | PLASMO_PUBLIC_BACKEND_URL | SurfSense Backend URL (e.g., `http://127.0.0.1:8000`) |

 ### 2. Build the Extension
@ -209,6 +241,7 @@ Edit the `.env` file:
 Build the extension for your browser using the [Plasmo framework](https://docs.plasmo.com/framework/workflows/build#with-a-specific-target).

 **Linux/macOS/Windows:**
+
 ```bash
 # Install dependencies
 pnpm install
@ -253,6 +286,7 @@ Now that you have SurfSense running locally, you can explore its features:
 - Explore the advanced RAG capabilities

 For production deployments, consider setting up:
+
 - A reverse proxy like Nginx
 - SSL certificates for secure connections
 - Proper database backups