diff --git a/surfsense_backend/app/agents/new_chat/chat_deepagent.py b/surfsense_backend/app/agents/new_chat/chat_deepagent.py
index ab47b49ce..a901a7519 100644
--- a/surfsense_backend/app/agents/new_chat/chat_deepagent.py
+++ b/surfsense_backend/app/agents/new_chat/chat_deepagent.py
@@ -24,7 +24,6 @@ from deepagents.backends import StateBackend
from deepagents.graph import BASE_AGENT_PROMPT
from deepagents.middleware.patch_tool_calls import PatchToolCallsMiddleware
from deepagents.middleware.subagents import GENERAL_PURPOSE_SUBAGENT
-from deepagents.middleware.summarization import create_summarization_middleware
from langchain.agents import create_agent
from langchain.agents.middleware import TodoListMiddleware
from langchain_anthropic.middleware import AnthropicPromptCachingMiddleware
@@ -41,6 +40,9 @@ from app.agents.new_chat.middleware import (
MemoryInjectionMiddleware,
SurfSenseFilesystemMiddleware,
)
+from app.agents.new_chat.middleware.safe_summarization import (
+ create_safe_summarization_middleware,
+)
from app.agents.new_chat.system_prompt import (
build_configurable_system_prompt,
build_surfsense_system_prompt,
@@ -442,7 +444,7 @@ async def create_surfsense_deep_agent(
created_by_id=user_id,
thread_id=thread_id,
),
- create_summarization_middleware(llm, StateBackend),
+ create_safe_summarization_middleware(llm, StateBackend),
PatchToolCallsMiddleware(),
AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore"),
]
@@ -472,7 +474,7 @@ async def create_surfsense_deep_agent(
thread_id=thread_id,
),
SubAgentMiddleware(backend=StateBackend, subagents=[general_purpose_spec]),
- create_summarization_middleware(llm, StateBackend),
+ create_safe_summarization_middleware(llm, StateBackend),
PatchToolCallsMiddleware(),
DedupHITLToolCallsMiddleware(agent_tools=tools),
AnthropicPromptCachingMiddleware(unsupported_model_behavior="ignore"),
diff --git a/surfsense_backend/app/agents/new_chat/middleware/safe_summarization.py b/surfsense_backend/app/agents/new_chat/middleware/safe_summarization.py
new file mode 100644
index 000000000..4ddcf334f
--- /dev/null
+++ b/surfsense_backend/app/agents/new_chat/middleware/safe_summarization.py
@@ -0,0 +1,123 @@
+"""Safe wrapper around deepagents' SummarizationMiddleware.
+
+Upstream issue
+--------------
+`deepagents.middleware.summarization.SummarizationMiddleware._aoffload_to_backend`
+(and its sync counterpart) call
+``get_buffer_string(filtered_messages)`` before writing the evicted history
+to the backend file. In recent ``langchain-core`` versions, ``get_buffer_string``
+accesses ``m.text`` which iterates ``self.content`` — this raises
+``TypeError: 'NoneType' object is not iterable`` whenever an ``AIMessage``
+has ``content=None`` (common when a model returns *only* tool_calls, seen
+frequently with Azure OpenAI ``gpt-5.x`` responses streamed through
+LiteLLM).
+
+The exception aborts the whole agent turn, so the user just sees "Error during
+chat" with no assistant response.
+
+Fix
+---
+We subclass ``SummarizationMiddleware`` and override
+``_filter_summary_messages`` — the only call site that feeds messages into
+``get_buffer_string`` — to return *copies* of messages whose ``content`` is
+``None`` with ``content=""``. The originals flowing through the rest of the
+agent state are untouched.
+
+We also expose a drop-in ``create_safe_summarization_middleware`` factory
+that mirrors ``deepagents.middleware.summarization.create_summarization_middleware``
+but instantiates our safe subclass.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING
+
+from deepagents.middleware.summarization import (
+ SummarizationMiddleware,
+ compute_summarization_defaults,
+)
+
+if TYPE_CHECKING:
+ from deepagents.backends.protocol import BACKEND_TYPES
+ from langchain_core.language_models import BaseChatModel
+ from langchain_core.messages import AnyMessage
+
+logger = logging.getLogger(__name__)
+
+
+def _sanitize_message_content(msg: AnyMessage) -> AnyMessage:
+ """Return ``msg`` with ``content`` coerced to a non-``None`` value.
+
+ ``get_buffer_string`` reads ``m.text`` which iterates ``self.content``;
+ when a provider streams back an ``AIMessage`` with only tool_calls and
+ no text, ``content`` can be ``None`` and the iteration explodes. We
+ replace ``None`` with an empty string so downstream consumers that only
+ care about text see an empty body.
+
+ The original message is left untouched — we return a copy via
+ pydantic's ``model_copy`` when available, otherwise we fall back to
+ re-setting the attribute on a shallow copy.
+ """
+
+ if getattr(msg, "content", "not-missing") is not None:
+ return msg
+
+ try:
+ return msg.model_copy(update={"content": ""})
+ except AttributeError:
+ import copy
+
+ new_msg = copy.copy(msg)
+ try:
+ new_msg.content = ""
+ except Exception: # pragma: no cover - defensive
+ logger.debug(
+ "Could not sanitize content=None on message of type %s",
+ type(msg).__name__,
+ )
+ return msg
+ return new_msg
+
+
+class SafeSummarizationMiddleware(SummarizationMiddleware):
+ """`SummarizationMiddleware` that tolerates messages with ``content=None``.
+
+ Only ``_filter_summary_messages`` is overridden — this is the single
+ helper invoked by both the sync and async offload paths immediately
+ before ``get_buffer_string``. Normalising here means we get coverage
+ for both without having to copy the (long, rapidly-changing) offload
+ implementations from upstream.
+ """
+
+ def _filter_summary_messages(self, messages: list[AnyMessage]) -> list[AnyMessage]:
+ filtered = super()._filter_summary_messages(messages)
+ return [_sanitize_message_content(m) for m in filtered]
+
+
+def create_safe_summarization_middleware(
+ model: BaseChatModel,
+ backend: BACKEND_TYPES,
+) -> SafeSummarizationMiddleware:
+ """Drop-in replacement for ``create_summarization_middleware``.
+
+ Mirrors the defaults computed by ``deepagents`` but returns our
+ ``SafeSummarizationMiddleware`` subclass so the
+ ``content=None`` crash in ``get_buffer_string`` is avoided.
+ """
+
+ defaults = compute_summarization_defaults(model)
+ return SafeSummarizationMiddleware(
+ model=model,
+ backend=backend,
+ trigger=defaults["trigger"],
+ keep=defaults["keep"],
+ trim_tokens_to_summarize=None,
+ truncate_args_settings=defaults["truncate_args_settings"],
+ )
+
+
+__all__ = [
+ "SafeSummarizationMiddleware",
+ "create_safe_summarization_middleware",
+]
diff --git a/surfsense_backend/app/services/llm_router_service.py b/surfsense_backend/app/services/llm_router_service.py
index 35dfdd44e..c9eeff01b 100644
--- a/surfsense_backend/app/services/llm_router_service.py
+++ b/surfsense_backend/app/services/llm_router_service.py
@@ -133,6 +133,44 @@ PROVIDER_MAP = {
}
+# Default ``api_base`` per LiteLLM provider prefix. Used as a safety net when
+# a global LLM config does *not* specify ``api_base``: without this, LiteLLM
+# happily picks up provider-agnostic env vars (e.g. ``AZURE_API_BASE``,
+# ``OPENAI_API_BASE``) and routes, say, an ``openrouter/anthropic/claude-3-haiku``
+# request to an Azure endpoint, which then 404s with ``Resource not found``.
+# Only providers with a well-known, stable public base URL are listed here —
+# self-hosted / BYO-endpoint providers (ollama, custom, bedrock, vertex_ai,
+# huggingface, databricks, cloudflare, replicate) are intentionally omitted
+# so their existing config-driven behaviour is preserved.
+PROVIDER_DEFAULT_API_BASE = {
+ "openrouter": "https://openrouter.ai/api/v1",
+ "groq": "https://api.groq.com/openai/v1",
+ "mistral": "https://api.mistral.ai/v1",
+ "perplexity": "https://api.perplexity.ai",
+ "xai": "https://api.x.ai/v1",
+ "cerebras": "https://api.cerebras.ai/v1",
+ "deepinfra": "https://api.deepinfra.com/v1/openai",
+ "fireworks_ai": "https://api.fireworks.ai/inference/v1",
+ "together_ai": "https://api.together.xyz/v1",
+ "anyscale": "https://api.endpoints.anyscale.com/v1",
+ "cometapi": "https://api.cometapi.com/v1",
+ "sambanova": "https://api.sambanova.ai/v1",
+}
+
+
+# Canonical provider → base URL when a config uses a generic ``openai``-style
+# prefix but the ``provider`` field tells us which API it really is
+# (e.g. DeepSeek/Alibaba/Moonshot/Zhipu/MiniMax all use ``openai`` compat but
+# each has its own base URL).
+PROVIDER_KEY_DEFAULT_API_BASE = {
+ "DEEPSEEK": "https://api.deepseek.com/v1",
+ "ALIBABA_QWEN": "https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
+ "MOONSHOT": "https://api.moonshot.ai/v1",
+ "ZHIPU": "https://open.bigmodel.cn/api/paas/v4",
+ "MINIMAX": "https://api.minimax.io/v1",
+}
+
+
class LLMRouterService:
"""
Singleton service for managing LiteLLM Router.
@@ -224,6 +262,16 @@ class LLMRouterService:
# hits ContextWindowExceededError.
full_model_list, ctx_fallbacks = cls._build_context_fallback_groups(model_list)
+ # Build a general-purpose fallback list so NotFound/timeout/rate-limit
+ # style failures on one deployment don't bubble up as hard errors —
+ # the router retries with a sibling deployment in ``auto-large``.
+ # ``auto-large`` is the large-context subset of ``auto``; if it is
+ # empty we fall back to ``auto`` itself so the router at least picks a
+ # different deployment in the same group.
+ fallbacks: list[dict[str, list[str]]] | None = None
+ if ctx_fallbacks:
+ fallbacks = [{"auto": ["auto-large"]}]
+
try:
router_kwargs: dict[str, Any] = {
"model_list": full_model_list,
@@ -237,15 +285,18 @@ class LLMRouterService:
}
if ctx_fallbacks:
router_kwargs["context_window_fallbacks"] = ctx_fallbacks
+ if fallbacks:
+ router_kwargs["fallbacks"] = fallbacks
instance._router = Router(**router_kwargs)
instance._initialized = True
logger.info(
"LLM Router initialized with %d deployments, "
- "strategy: %s, context_window_fallbacks: %s",
+ "strategy: %s, context_window_fallbacks: %s, fallbacks: %s",
len(model_list),
final_settings.get("routing_strategy"),
ctx_fallbacks or "none",
+ fallbacks or "none",
)
except Exception as e:
logger.error(f"Failed to initialize LLM Router: {e}")
@@ -348,10 +399,11 @@ class LLMRouterService:
return None
# Build model string
+ provider = config.get("provider", "").upper()
if config.get("custom_provider"):
- model_string = f"{config['custom_provider']}/{config['model_name']}"
+ provider_prefix = config["custom_provider"]
+ model_string = f"{provider_prefix}/{config['model_name']}"
else:
- provider = config.get("provider", "").upper()
provider_prefix = PROVIDER_MAP.get(provider, provider.lower())
model_string = f"{provider_prefix}/{config['model_name']}"
@@ -361,9 +413,19 @@ class LLMRouterService:
"api_key": config.get("api_key"),
}
- # Add optional api_base
- if config.get("api_base"):
- litellm_params["api_base"] = config["api_base"]
+ # Resolve ``api_base``. Config value wins; otherwise apply a
+ # provider-aware default so the deployment does not silently
+ # inherit unrelated env vars (e.g. ``AZURE_API_BASE``) and route
+ # requests to the wrong endpoint. See ``PROVIDER_DEFAULT_API_BASE``
+ # docstring for the motivating bug (OpenRouter models 404-ing
+ # against an Azure endpoint).
+ api_base = config.get("api_base")
+ if not api_base:
+ api_base = PROVIDER_KEY_DEFAULT_API_BASE.get(provider)
+ if not api_base:
+ api_base = PROVIDER_DEFAULT_API_BASE.get(provider_prefix)
+ if api_base:
+ litellm_params["api_base"] = api_base
# Add any additional litellm parameters
if config.get("litellm_params"):
diff --git a/surfsense_backend/pyproject.toml b/surfsense_backend/pyproject.toml
index 01f5ddc1b..131627386 100644
--- a/surfsense_backend/pyproject.toml
+++ b/surfsense_backend/pyproject.toml
@@ -74,7 +74,7 @@ dependencies = [
"deepagents>=0.4.12",
"stripe>=15.0.0",
"azure-ai-documentintelligence>=1.0.2",
- "litellm>=1.83.0",
+ "litellm>=1.83.4",
"langchain-litellm>=0.6.4",
]
diff --git a/surfsense_backend/tests/unit/test_error_contract.py b/surfsense_backend/tests/unit/test_error_contract.py
index 81ec08b2d..ec8021290 100644
--- a/surfsense_backend/tests/unit/test_error_contract.py
+++ b/surfsense_backend/tests/unit/test_error_contract.py
@@ -202,9 +202,7 @@ class TestHTTPExceptionHandler:
# Intentional 503s (e.g. feature flag off) must surface the developer
# message so the frontend can render actionable copy.
body = _assert_envelope(client.get("/http-503"), 503)
- assert (
- body["error"]["message"] == "Page purchases are temporarily unavailable."
- )
+ assert body["error"]["message"] == "Page purchases are temporarily unavailable."
assert body["error"]["message"] != GENERIC_5XX_MESSAGE
def test_502_preserves_detail(self, client):
diff --git a/surfsense_backend/uv.lock b/surfsense_backend/uv.lock
index ac2784668..209c42a9c 100644
--- a/surfsense_backend/uv.lock
+++ b/surfsense_backend/uv.lock
@@ -8070,7 +8070,7 @@ requires-dist = [
{ name = "langgraph", specifier = ">=1.1.3" },
{ name = "langgraph-checkpoint-postgres", specifier = ">=3.0.2" },
{ name = "linkup-sdk", specifier = ">=0.2.4" },
- { name = "litellm", specifier = ">=1.83.0" },
+ { name = "litellm", specifier = ">=1.83.4" },
{ name = "llama-cloud-services", specifier = ">=0.6.25" },
{ name = "markdown", specifier = ">=3.7" },
{ name = "markdownify", specifier = ">=0.14.1" },
diff --git a/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/DesktopContent.tsx b/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/DesktopContent.tsx
index 3175268d2..63ca9f5df 100644
--- a/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/DesktopContent.tsx
+++ b/surfsense_web/app/dashboard/[search_space_id]/user-settings/components/DesktopContent.tsx
@@ -200,8 +200,8 @@ export function DesktopContent() {
Launch on Startup
- Automatically start SurfSense when you sign in to your computer so global
- shortcuts and folder sync are always available.
+ Automatically start SurfSense when you sign in to your computer so global shortcuts and
+ folder sync are always available.
@@ -232,8 +232,7 @@ export function DesktopContent() {
Start minimized to tray
- Skip the main window on boot — SurfSense lives in the system tray until you need
- it.
+ Skip the main window on boot — SurfSense lives in the system tray until you need it.