[feat] ollama method support (#129)

Support use ollama method like LM stuio

---------

Co-authored-by: Alishahryar1 <alishahryar2@gmail.com>
Co-authored-by: u011436427 <u011436427@noreply.gitcode.com>
This commit is contained in:
Wang Ji 2026-04-26 13:06:36 +08:00 committed by GitHub
parent 7f1e860c7f
commit b525217633
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
21 changed files with 593 additions and 15 deletions

View file

@ -18,9 +18,13 @@ LM_STUDIO_BASE_URL="http://localhost:1234/v1"
LLAMACPP_BASE_URL="http://localhost:8080/v1"
# Ollama Config (local provider, no API key required)
OLLAMA_BASE_URL="http://localhost:11434"
# All Claude model requests are mapped to these models, plain model is fallback
# Format: provider_type/model/name
# Valid providers: "nvidia_nim" | "open_router" | "deepseek" | "lmstudio" | "llamacpp"
# Valid providers: "nvidia_nim" | "open_router" | "deepseek" | "lmstudio" | "llamacpp" | "ollama"
MODEL_OPUS=
MODEL_SONNET=
MODEL_HAIKU=

View file

@ -31,7 +31,7 @@ A lightweight proxy that routes Claude Code's Anthropic API calls to **NVIDIA NI
| -------------------------- | ----------------------------------------------------------------------------------------------- |
| **Zero Cost** | 40 req/min free on NVIDIA NIM. Free models on OpenRouter. Fully local with LM Studio |
| **Drop-in Replacement** | Set 2 env vars. No modifications to Claude Code CLI or VSCode extension needed |
| **5 Providers** | NVIDIA NIM, OpenRouter, DeepSeek, LM Studio (local), llama.cpp (`llama-server`) |
| **6 Providers** | NVIDIA NIM, OpenRouter, DeepSeek, LM Studio (local), llama.cpp (`llama-server`), Ollama |
| **Per-Model Mapping** | Route Opus / Sonnet / Haiku to different models and providers. Mix providers freely |
| **Thinking Token Support** | Parses `<think>` tags and `reasoning_content` into native Claude thinking blocks |
| **Heuristic Tool Parser** | Models outputting tool calls as text are auto-parsed into structured tool use |
@ -361,6 +361,7 @@ The proxy also exposes Claude-compatible probe routes: `GET /v1/models`, `POST /
| **DeepSeek** | Usage-based | Varies | Direct access to DeepSeek chat/reasoner |
| **LM Studio** | Free (local) | Unlimited | Privacy, offline use, no rate limits |
| **llama.cpp** | Free (local) | Unlimited | Lightweight local inference engine |
| **Ollama** | Free (local) | Unlimited | Easy local LLM runtime, native Anthropic API |
Models use a prefix format: `provider_prefix/model/name`. An invalid prefix causes an error.
@ -371,6 +372,7 @@ Models use a prefix format: `provider_prefix/model/name`. An invalid prefix caus
| DeepSeek | `deepseek/...` | `DEEPSEEK_API_KEY` | `api.deepseek.com` |
| LM Studio | `lmstudio/...` | (none) | `localhost:1234/v1` |
| llama.cpp | `llamacpp/...` | (none) | `localhost:8080/v1` |
| Ollama | `ollama/...` | (none) | `localhost:11434` |
<details>
<summary><b>NVIDIA NIM models</b></summary>
@ -439,6 +441,32 @@ See the Unsloth docs for detailed instructions and capable models:
</details>
<details>
<summary><b>Ollama</b> (fully local, no API key)</summary>
```dotenv
OLLAMA_BASE_URL="http://localhost:11434"
MODEL_OPUS="ollama/llama3.1:70b"
MODEL_SONNET="ollama/llama3.1:8b"
MODEL_HAIKU="ollama/llama3.1:8b"
MODEL="ollama/llama3.1:8b"
```
Install Ollama: [ollama.com](https://ollama.com)
Pull a model:
```bash
ollama pull llama3.1
```
Start Ollama server:
```bash
ollama serve
```
</details>
---
## Discord Bot
@ -544,6 +572,7 @@ Configure via `WHISPER_DEVICE` (`cpu` | `cuda` | `nvidia_nim`) and `WHISPER_MODE
| `OPENROUTER_PROXY` | Optional proxy URL for OpenRouter requests (`http://...` or `socks5://...`) | `""` |
| `LMSTUDIO_PROXY` | Optional proxy URL for LM Studio requests (`http://...` or `socks5://...`) | `""` |
| `LLAMACPP_PROXY` | Optional proxy URL for llama.cpp requests (`http://...` or `socks5://...`) | `""` |
| `OLLAMA_BASE_URL` | Ollama server root URL | `http://localhost:11434` |
### Rate Limiting & Timeouts

View file

@ -14,4 +14,5 @@ SUPPORTED_PROVIDER_IDS: tuple[str, ...] = (
"deepseek",
"lmstudio",
"llamacpp",
"ollama",
)

View file

@ -121,6 +121,12 @@ class Settings(BaseSettings):
validation_alias="LLAMACPP_BASE_URL",
)
# ==================== Ollama Config ====================
ollama_base_url: str = Field(
default="http://localhost:11434",
validation_alias="OLLAMA_BASE_URL",
)
# ==================== Model ====================
# All Claude model requests are mapped to this single model (fallback)
# Format: provider_type/model/name
@ -266,6 +272,16 @@ class Settings(BaseSettings):
)
return v
@field_validator("ollama_base_url")
@classmethod
def validate_ollama_base_url(cls, v: str) -> str:
if v.rstrip("/").endswith("/v1"):
raise ValueError(
"OLLAMA_BASE_URL must be the Ollama root URL for native Anthropic "
"messages, e.g. http://localhost:11434 (without /v1)."
)
return v
@field_validator("model", "model_opus", "model_sonnet", "model_haiku")
@classmethod
def validate_model_format(cls, v: str | None) -> str | None:

View file

@ -4,12 +4,13 @@ Adapters and :mod:`providers.registry` import from here to avoid duplicating
literals and to keep ``providers.registry`` free of per-adapter eager imports.
"""
# OpenAI-compatible chat (NIM, DeepSeek) and local OpenAI-shaped endpoints
# OpenAI-compatible chat (NIM, DeepSeek) and local/native provider endpoints
NVIDIA_NIM_DEFAULT_BASE = "https://integrate.api.nvidia.com/v1"
DEEPSEEK_DEFAULT_BASE = "https://api.deepseek.com"
OPENROUTER_DEFAULT_BASE = "https://openrouter.ai/api/v1"
LMSTUDIO_DEFAULT_BASE = "http://localhost:1234/v1"
LLAMACPP_DEFAULT_BASE = "http://localhost:8080/v1"
OLLAMA_DEFAULT_BASE = "http://localhost:11434"
# Backward-compatible names used by existing adapter modules
NVIDIA_NIM_BASE_URL = NVIDIA_NIM_DEFAULT_BASE
@ -17,3 +18,4 @@ DEEPSEEK_BASE_URL = DEEPSEEK_DEFAULT_BASE
OPENROUTER_BASE_URL = OPENROUTER_DEFAULT_BASE
LMSTUDIO_DEFAULT_BASE_URL = LMSTUDIO_DEFAULT_BASE
LLAMACPP_DEFAULT_BASE_URL = LLAMACPP_DEFAULT_BASE
OLLAMA_DEFAULT_BASE_URL = OLLAMA_DEFAULT_BASE

View file

@ -0,0 +1,5 @@
"""Ollama provider package."""
from .client import OLLAMA_BASE_URL, OllamaProvider
__all__ = ["OLLAMA_BASE_URL", "OllamaProvider"]

View file

@ -0,0 +1,31 @@
"""Ollama provider implementation."""
import httpx
from providers.anthropic_messages import AnthropicMessagesTransport
from providers.base import ProviderConfig
from providers.defaults import OLLAMA_DEFAULT_BASE
OLLAMA_BASE_URL = OLLAMA_DEFAULT_BASE
class OllamaProvider(AnthropicMessagesTransport):
"""Ollama provider using native Anthropic Messages API."""
def __init__(self, config: ProviderConfig):
super().__init__(
config,
provider_name="OLLAMA",
default_base_url=OLLAMA_BASE_URL,
)
self._api_key = config.api_key or "ollama"
async def _send_stream_request(self, body: dict) -> httpx.Response:
"""Create a streaming native Anthropic messages response."""
request = self._client.build_request(
"POST",
"/v1/messages",
json=body,
headers=self._request_headers(),
)
return await self._client.send(request, stream=True)

View file

@ -14,6 +14,7 @@ from providers.defaults import (
LLAMACPP_DEFAULT_BASE,
LMSTUDIO_DEFAULT_BASE,
NVIDIA_NIM_DEFAULT_BASE,
OLLAMA_DEFAULT_BASE,
OPENROUTER_DEFAULT_BASE,
)
from providers.exceptions import AuthenticationError, UnknownProviderTypeError
@ -88,6 +89,21 @@ PROVIDER_DESCRIPTORS: dict[str, ProviderDescriptor] = {
proxy_attr="llamacpp_proxy",
capabilities=("chat", "streaming", "tools", "native_anthropic", "local"),
),
"ollama": ProviderDescriptor(
provider_id="ollama",
transport_type="anthropic_messages",
static_credential="ollama",
default_base_url=OLLAMA_DEFAULT_BASE,
base_url_attr="ollama_base_url",
capabilities=(
"chat",
"streaming",
"tools",
"thinking",
"native_anthropic",
"local",
),
),
}
@ -121,12 +137,19 @@ def _create_llamacpp(config: ProviderConfig, settings: Settings) -> BaseProvider
return LlamaCppProvider(config)
def _create_ollama(config: ProviderConfig, settings: Settings) -> BaseProvider:
from providers.ollama import OllamaProvider
return OllamaProvider(config)
PROVIDER_FACTORIES: dict[str, ProviderFactory] = {
"nvidia_nim": _create_nvidia_nim,
"open_router": _create_open_router,
"deepseek": _create_deepseek,
"lmstudio": _create_lmstudio,
"llamacpp": _create_llamacpp,
"ollama": _create_ollama,
}
if set(PROVIDER_DESCRIPTORS) != set(SUPPORTED_PROVIDER_IDS) or set(

View file

@ -54,6 +54,7 @@ Default targets do not send real bot messages or load voice backends:
| `rate_limit` | disconnect cleanup and follow-up request | configured provider |
| `lmstudio` | local `/models` plus native `/messages` through proxy | running LM Studio server |
| `llamacpp` | local `/models` plus native `/messages` through proxy | running llama-server |
| `ollama` | local `/api/tags` plus native Anthropic messages through proxy | running Ollama server |
Side-effectful targets are opt-in:
@ -67,10 +68,17 @@ Side-effectful targets are opt-in:
```powershell
$env:FCC_LIVE_SMOKE = "1"
$env:FCC_SMOKE_PROVIDER_MATRIX = "open_router,nvidia_nim,deepseek,lmstudio,llamacpp"
$env:FCC_SMOKE_PROVIDER_MATRIX = "open_router,nvidia_nim,deepseek,lmstudio,llamacpp,ollama"
uv run pytest smoke/product -n 0 -s --tb=short
```
```powershell
$env:FCC_LIVE_SMOKE = "1"
$env:FCC_SMOKE_TARGETS = "ollama"
$env:OLLAMA_BASE_URL = "http://localhost:11434"
uv run pytest smoke/prereq smoke/product -n 0 -s --tb=short
```
```powershell
$env:FCC_LIVE_SMOKE = "1"
$env:FCC_SMOKE_TARGETS = "telegram,discord,voice"

View file

@ -277,6 +277,17 @@ CAPABILITY_CONTRACTS: tuple[CapabilityContract, ...] = (
),
("test_llamacpp_models_endpoint_when_available",),
),
CapabilityContract(
"local_providers",
"ollama_native_messages",
"ollama_endpoint",
"providers.ollama.OllamaProvider",
"Anthropic request body and local Ollama root URL",
"Anthropic SSE stream through the proxy",
"SSE error event for local upstream failure",
("tests/providers/test_ollama.py",),
("test_ollama_models_endpoint_when_available",),
),
CapabilityContract(
"openrouter",
"native_anthropic_messages",

View file

@ -348,6 +348,17 @@ FEATURE_INVENTORY: tuple[FeatureCoverage, ...] = (
("LLAMACPP_BASE_URL with running llama-server",),
"skip when local upstream is unavailable",
),
FeatureCoverage(
"ollama_endpoint",
"Ollama native Anthropic messages and local no-key operation work when running",
"public_surface",
("tests/providers/test_ollama.py",),
("test_ollama_models_endpoint_when_available",),
("test_ollama_native_messages_e2e",),
("ollama",),
("OLLAMA_BASE_URL with running Ollama server",),
"skip when local upstream is unavailable",
),
FeatureCoverage(
"package_cli_entrypoints",
"Installed package scripts scaffold config and start the server",

View file

@ -20,6 +20,7 @@ DEFAULT_TARGETS = frozenset(
"llamacpp",
"lmstudio",
"messaging",
"ollama",
"providers",
"rate_limit",
"tools",
@ -49,6 +50,7 @@ TARGET_REQUIRED_ENV: dict[str, tuple[str, ...]] = {
"tools": ("configured tool-capable provider model",),
"lmstudio": ("LM_STUDIO_BASE_URL with a running LM Studio server",),
"llamacpp": ("LLAMACPP_BASE_URL with a running llama-server",),
"ollama": ("OLLAMA_BASE_URL with a running Ollama server",),
"telegram": (
"TELEGRAM_BOT_TOKEN",
"ALLOWED_TELEGRAM_USER_ID or FCC_SMOKE_TELEGRAM_CHAT_ID",
@ -142,6 +144,8 @@ class SmokeConfig:
return bool(self.settings.lm_studio_base_url.strip())
if provider == "llamacpp":
return bool(self.settings.llamacpp_base_url.strip())
if provider == "ollama":
return bool(self.settings.ollama_base_url.strip())
return False

View file

@ -27,6 +27,14 @@ def test_llamacpp_models_endpoint_when_available(smoke_config: SmokeConfig) -> N
)
@pytest.mark.live
@pytest.mark.smoke_target("ollama")
def test_ollama_models_endpoint_when_available(smoke_config: SmokeConfig) -> None:
_assert_ollama_tags_endpoint(
smoke_config.settings.ollama_base_url, timeout_s=smoke_config.timeout_s
)
def _assert_models_endpoint(
base_url: str, *, timeout_s: float, provider_name: str
) -> None:
@ -45,4 +53,34 @@ def _assert_models_endpoint(
assert response.status_code == 200, response.text
payload = response.json()
assert isinstance(payload.get("data"), list), payload
data = payload.get("data")
if isinstance(data, list) and data:
return
if isinstance(data, list):
pytest.skip(f"upstream_unavailable: {provider_name} has no local models")
assert isinstance(data, list), payload
def _assert_ollama_tags_endpoint(base_url: str, *, timeout_s: float) -> None:
url = f"{_ollama_root_url(base_url)}/api/tags"
try:
response = httpx.get(url, timeout=timeout_s)
except Exception as exc:
skip_if_upstream_unavailable_exception(exc)
raise
if response.status_code in {404, 405, 502, 503}:
pytest.skip(
f"upstream_unavailable: Ollama tags endpoint {url} "
f"returned HTTP {response.status_code}"
)
assert response.status_code == 200, response.text
models = response.json().get("models")
if isinstance(models, list) and models:
return
pytest.skip("upstream_unavailable: Ollama has no pulled models")
def _ollama_root_url(base_url: str) -> str:
return base_url.rstrip("/")

View file

@ -29,6 +29,15 @@ def test_llamacpp_native_messages_e2e(smoke_config: SmokeConfig) -> None:
)
@pytest.mark.smoke_target("ollama")
def test_ollama_native_messages_e2e(smoke_config: SmokeConfig) -> None:
_local_native_messages_e2e(
smoke_config,
provider="ollama",
base_url=smoke_config.settings.ollama_base_url,
)
def _local_native_messages_e2e(
smoke_config: SmokeConfig,
*,
@ -38,15 +47,11 @@ def _local_native_messages_e2e(
if not base_url.strip():
pytest.skip(f"missing_env: {provider} base URL is not configured")
models_url = urljoin(base_url.rstrip("/") + "/", "models")
try:
models = httpx.get(models_url, timeout=5)
except httpx.ConnectError as exc:
pytest.skip(f"upstream_unavailable: {provider} models endpoint: {exc}")
except httpx.TimeoutException as exc:
pytest.skip(f"upstream_unavailable: {provider} models endpoint: {exc}")
assert models.status_code == 200, models.text
model_id = _first_local_model_id(models)
model_id = (
_first_ollama_tag_model_id(base_url)
if provider == "ollama"
else (_first_non_ollama_model_id(provider, base_url))
)
with SmokeServerDriver(
smoke_config,
@ -60,11 +65,44 @@ def _local_native_messages_e2e(
assert_product_stream(turn.events)
def _first_local_model_id(response: httpx.Response) -> str:
def _first_non_ollama_model_id(provider: str, base_url: str) -> str:
models_url = urljoin(base_url.rstrip("/") + "/", "models")
try:
response = httpx.get(models_url, timeout=5)
except httpx.ConnectError as exc:
pytest.skip(f"upstream_unavailable: {provider} models endpoint: {exc}")
except httpx.TimeoutException as exc:
pytest.skip(f"upstream_unavailable: {provider} models endpoint: {exc}")
assert response.status_code == 200, response.text
payload = response.json()
data = payload.get("data") if isinstance(payload, dict) else None
if isinstance(data, list):
for item in data:
if isinstance(item, dict) and isinstance(item.get("id"), str):
return item["id"]
pytest.skip(f"upstream_unavailable: {provider} has no local models")
pytest.fail("product_failure: local /models did not expose a model id")
def _first_ollama_tag_model_id(base_url: str) -> str:
tags_url = f"{_ollama_root_url(base_url)}/api/tags"
try:
response = httpx.get(tags_url, timeout=5)
except httpx.ConnectError as exc:
pytest.skip(f"upstream_unavailable: ollama tags endpoint: {exc}")
except httpx.TimeoutException as exc:
pytest.skip(f"upstream_unavailable: ollama tags endpoint: {exc}")
assert response.status_code == 200, response.text
payload = response.json()
models = payload.get("models") if isinstance(payload, dict) else None
if isinstance(models, list):
for item in models:
if isinstance(item, dict) and isinstance(item.get("name"), str):
return item["name"]
pytest.skip("upstream_unavailable: ollama has no pulled models")
pytest.fail("product_failure: ollama /api/tags did not expose models")
def _ollama_root_url(base_url: str) -> str:
return base_url.rstrip("/")

View file

@ -19,6 +19,7 @@ from providers.deepseek import DeepSeekProvider
from providers.exceptions import UnknownProviderTypeError
from providers.lmstudio import LMStudioProvider
from providers.nvidia_nim import NvidiaNimProvider
from providers.ollama import OllamaProvider
from providers.open_router import OpenRouterProvider
from providers.registry import ProviderRegistry
@ -35,6 +36,7 @@ def _make_mock_settings(**overrides):
mock.open_router_api_key = "test_openrouter_key"
mock.deepseek_api_key = "test_deepseek_key"
mock.lm_studio_base_url = "http://localhost:1234/v1"
mock.ollama_base_url = "http://localhost:11434"
mock.nim = NimSettings()
mock.http_read_timeout = 300.0
mock.http_write_timeout = 10.0
@ -130,6 +132,19 @@ async def test_get_provider_lmstudio():
assert provider._base_url == "http://localhost:1234/v1"
@pytest.mark.asyncio
async def test_get_provider_ollama():
"""Test that provider_type=ollama returns OllamaProvider without an API key."""
with patch("api.dependencies.get_settings") as mock_settings:
mock_settings.return_value = _make_mock_settings(provider_type="ollama")
provider = get_provider()
assert isinstance(provider, OllamaProvider)
assert provider._base_url == "http://localhost:11434"
assert provider._api_key == "ollama"
@pytest.mark.asyncio
async def test_get_provider_deepseek():
"""Test that provider_type=deepseek returns DeepSeekProvider."""

View file

@ -70,6 +70,23 @@ class TestSettings:
settings = Settings()
assert settings.lm_studio_base_url == "http://custom:5678/v1"
def test_ollama_base_url_defaults_to_root(self, monkeypatch):
"""OLLAMA_BASE_URL defaults to the Anthropic-compatible Ollama root URL."""
from config.settings import Settings
monkeypatch.delenv("OLLAMA_BASE_URL", raising=False)
monkeypatch.setitem(Settings.model_config, "env_file", ())
settings = Settings()
assert settings.ollama_base_url == "http://localhost:11434"
def test_ollama_base_url_rejects_v1_suffix(self, monkeypatch):
"""OLLAMA_BASE_URL must not include /v1 for native Anthropic messages."""
from config.settings import Settings
monkeypatch.setenv("OLLAMA_BASE_URL", "http://localhost:11434/v1")
with pytest.raises(ValidationError, match="without /v1"):
Settings()
def test_provider_rate_limit_from_env(self, monkeypatch):
"""PROVIDER_RATE_LIMIT env var is loaded into settings."""
from config.settings import Settings
@ -466,6 +483,7 @@ class TestPerModelMapping:
({"MODEL": "deepseek/deepseek-chat"}, "deepseek/deepseek-chat", None),
({"MODEL": "lmstudio/qwen2.5-7b"}, "lmstudio/qwen2.5-7b", None),
({"MODEL": "llamacpp/local-model"}, "llamacpp/local-model", None),
({"MODEL": "ollama/llama3.1"}, "ollama/llama3.1", None),
],
)
def test_settings_models_from_env(
@ -602,6 +620,7 @@ class TestPerModelMapping:
assert Settings.parse_provider_type("deepseek/deepseek-chat") == "deepseek"
assert Settings.parse_provider_type("lmstudio/qwen") == "lmstudio"
assert Settings.parse_provider_type("llamacpp/model") == "llamacpp"
assert Settings.parse_provider_type("ollama/llama3.1") == "ollama"
def test_parse_model_name(self):
"""parse_model_name extracts model name from model string."""
@ -611,3 +630,4 @@ class TestPerModelMapping:
assert Settings.parse_model_name("deepseek/deepseek-chat") == "deepseek-chat"
assert Settings.parse_model_name("lmstudio/qwen") == "qwen"
assert Settings.parse_model_name("llamacpp/model") == "model"
assert Settings.parse_model_name("ollama/llama3.1") == "llama3.1"

View file

@ -9,6 +9,7 @@ from providers.deepseek import DeepSeekProvider
from providers.llamacpp import LlamaCppProvider
from providers.lmstudio import LMStudioProvider
from providers.nvidia_nim import NvidiaNimProvider
from providers.ollama import OllamaProvider
from providers.open_router import OpenRouterProvider
from smoke.features import FEATURE_INVENTORY, README_FEATURES, feature_ids
@ -71,6 +72,7 @@ def test_provider_and_platform_registries_include_advertised_builtins() -> None:
"deepseek": DeepSeekProvider,
"lmstudio": LMStudioProvider,
"llamacpp": LlamaCppProvider,
"ollama": OllamaProvider,
}
for provider_class in provider_classes.values():
assert issubclass(provider_class, BaseProvider)

View file

@ -0,0 +1,59 @@
from __future__ import annotations
from pathlib import Path
from types import SimpleNamespace
from smoke.lib.config import DEFAULT_TARGETS, TARGET_REQUIRED_ENV, SmokeConfig
def _settings(**overrides):
values = {
"model": "ollama/llama3.1",
"model_opus": None,
"model_sonnet": None,
"model_haiku": None,
"nvidia_nim_api_key": "",
"open_router_api_key": "",
"deepseek_api_key": "",
"lm_studio_base_url": "",
"llamacpp_base_url": "",
"ollama_base_url": "http://localhost:11434",
}
values.update(overrides)
return SimpleNamespace(**values)
def _smoke_config(**overrides) -> SmokeConfig:
values = {
"root": Path("."),
"results_dir": Path(".smoke-results"),
"live": False,
"interactive": False,
"targets": DEFAULT_TARGETS,
"provider_matrix": frozenset(),
"timeout_s": 45.0,
"prompt": "Reply with exactly: FCC_SMOKE_PONG",
"claude_bin": "claude",
"worker_id": "main",
"settings": _settings(),
}
values.update(overrides)
return SmokeConfig(**values)
def test_ollama_is_default_smoke_target() -> None:
assert "ollama" in DEFAULT_TARGETS
assert "ollama" in TARGET_REQUIRED_ENV
def test_ollama_provider_configuration_uses_base_url() -> None:
config = _smoke_config()
assert config.has_provider_configuration("ollama")
assert config.provider_models()[0].full_model == "ollama/llama3.1"
def test_ollama_provider_matrix_filters_models() -> None:
config = _smoke_config(provider_matrix=frozenset({"ollama"}))
assert [model.provider for model in config.provider_models()] == ["ollama"]

View file

@ -277,6 +277,12 @@ async def test_stream_response_suppresses_thinking_when_disabled(provider_config
assert "Answer" in event_text
def _make_bad_request_error(message: str) -> openai.BadRequestError:
response = Response(status_code=400, request=Request("POST", "http://test"))
body = {"error": {"message": message}}
return openai.BadRequestError(message, response=response, body=body)
@pytest.mark.asyncio
async def test_stream_response_retries_without_chat_template(provider_config):
from config.nim import NimSettings

View file

@ -0,0 +1,244 @@
"""Tests for Ollama native Anthropic provider."""
from unittest.mock import AsyncMock, MagicMock, patch
import httpx
import pytest
from providers.base import ProviderConfig
from providers.ollama import OLLAMA_BASE_URL, OllamaProvider
class MockMessage:
def __init__(self, role, content):
self.role = role
self.content = content
class MockRequest:
def __init__(self, **kwargs):
self.model = "llama3.1:8b"
self.messages = [MockMessage("user", "Hello")]
self.max_tokens = 100
self.temperature = 0.5
self.top_p = 0.9
self.system = "System prompt"
self.stop_sequences = None
self.stream = True
self.tools = []
self.tool_choice = None
self.extra_body = {}
self.thinking = MagicMock()
self.thinking.enabled = True
for key, value in kwargs.items():
setattr(self, key, value)
def model_dump(self, exclude_none=True):
return {
"model": self.model,
"messages": [{"role": m.role, "content": m.content} for m in self.messages],
"max_tokens": self.max_tokens,
"temperature": self.temperature,
"top_p": self.top_p,
"system": self.system,
"stream": self.stream,
"tools": self.tools,
"tool_choice": self.tool_choice,
"extra_body": self.extra_body,
"thinking": {"enabled": self.thinking.enabled} if self.thinking else None,
}
@pytest.fixture
def ollama_config():
return ProviderConfig(
api_key="ollama",
base_url="http://localhost:11434",
rate_limit=10,
rate_window=60,
)
@pytest.fixture(autouse=True)
def mock_rate_limiter():
"""Mock the global rate limiter to prevent waiting."""
with patch("providers.anthropic_messages.GlobalRateLimiter") as mock:
instance = mock.get_scoped_instance.return_value
instance.wait_if_blocked = AsyncMock(return_value=False)
async def _passthrough(fn, *args, **kwargs):
return await fn(*args, **kwargs)
instance.execute_with_retry = AsyncMock(side_effect=_passthrough)
yield instance
@pytest.fixture
def ollama_provider(ollama_config):
return OllamaProvider(ollama_config)
def test_init(ollama_config):
"""Test provider initialization."""
with patch("httpx.AsyncClient"):
provider = OllamaProvider(ollama_config)
assert provider._base_url == "http://localhost:11434"
assert provider._provider_name == "OLLAMA"
assert provider._api_key == "ollama"
def test_init_uses_default_base_url():
"""Test that provider uses default root URL when not configured."""
config = ProviderConfig(api_key="ollama", base_url=None)
with patch("httpx.AsyncClient"):
provider = OllamaProvider(config)
assert provider._base_url == OLLAMA_BASE_URL
def test_init_uses_configurable_timeouts():
"""Test that provider passes configurable read/write/connect timeouts to client."""
config = ProviderConfig(
api_key="ollama",
base_url="http://localhost:11434",
http_read_timeout=600.0,
http_write_timeout=15.0,
http_connect_timeout=5.0,
)
with patch("httpx.AsyncClient") as mock_client:
OllamaProvider(config)
call_kwargs = mock_client.call_args[1]
timeout = call_kwargs["timeout"]
assert timeout.read == 600.0
assert timeout.write == 15.0
assert timeout.connect == 5.0
def test_init_base_url_strips_trailing_slash():
"""Config with base_url trailing slash is stored without it."""
config = ProviderConfig(
api_key="ollama",
base_url="http://localhost:11434/",
rate_limit=10,
rate_window=60,
)
with patch("httpx.AsyncClient"):
provider = OllamaProvider(config)
assert provider._base_url == "http://localhost:11434"
def test_init_uses_default_api_key():
"""Test that provider uses default API key when not configured."""
config = ProviderConfig(
base_url="http://localhost:11434",
api_key="",
rate_limit=10,
rate_window=60,
)
with patch("httpx.AsyncClient"):
provider = OllamaProvider(config)
assert provider._api_key == "ollama"
@pytest.mark.asyncio
async def test_stream_response(ollama_provider):
"""Test streaming native Anthropic response."""
req = MockRequest()
mock_response = MagicMock()
mock_response.status_code = 200
async def mock_aiter_lines():
yield "event: message_start"
yield 'data: {"type":"message_start","message":{}}'
yield ""
yield "event: content_block_delta"
yield 'data: {"type":"content_block_delta","delta":{"type":"text_delta","text":"Hello World"}}'
yield ""
yield "event: message_stop"
yield 'data: {"type":"message_stop"}'
yield ""
mock_response.aiter_lines = mock_aiter_lines
with (
patch.object(
ollama_provider._client, "build_request", return_value=MagicMock()
) as mock_build,
patch.object(
ollama_provider._client,
"send",
new_callable=AsyncMock,
return_value=mock_response,
),
):
events = [event async for event in ollama_provider.stream_response(req)]
mock_build.assert_called_once()
args, kwargs = mock_build.call_args
assert args[0] == "POST"
assert args[1] == "/v1/messages"
assert kwargs["json"]["model"] == "llama3.1:8b"
assert kwargs["json"]["stream"] is True
assert "extra_body" not in kwargs["json"]
assert kwargs["json"]["thinking"] == {"type": "enabled"}
assert len(events) == 9
assert events[0] == "event: message_start\n"
@pytest.mark.asyncio
async def test_build_request_body_omits_thinking_when_disabled(ollama_config):
"""Global disable suppresses provider-side thinking."""
provider = OllamaProvider(
ollama_config.model_copy(update={"enable_thinking": False})
)
req = MockRequest()
body = provider._build_request_body(req)
assert "thinking" not in body
assert body["model"] == "llama3.1:8b"
@pytest.mark.asyncio
async def test_stream_error_status_code(ollama_provider):
"""Non-200 status code is yielded as an SSE API error."""
req = MockRequest()
mock_response = MagicMock()
mock_response.status_code = 500
mock_response.aread = AsyncMock(return_value=b"Internal Server Error")
mock_response.raise_for_status = MagicMock(
side_effect=httpx.HTTPStatusError(
"Internal Server Error", request=MagicMock(), response=mock_response
)
)
with (
patch.object(
ollama_provider._client, "build_request", return_value=MagicMock()
),
patch.object(
ollama_provider._client,
"send",
new_callable=AsyncMock,
return_value=mock_response,
),
):
events = [
event
async for event in ollama_provider.stream_response(req, request_id="REQ")
]
assert len(events) == 1
assert events[0].startswith("event: error\ndata: {")
assert "Internal Server Error" in events[0]
assert "REQ" in events[0]
@pytest.mark.asyncio
async def test_cleanup(ollama_provider):
"""Test that cleanup closes the client."""
ollama_provider._client.aclose = AsyncMock()
await ollama_provider.cleanup()
ollama_provider._client.aclose.assert_called_once()

View file

@ -11,6 +11,7 @@ from providers.exceptions import UnknownProviderTypeError
from providers.llamacpp import LlamaCppProvider
from providers.lmstudio import LMStudioProvider
from providers.nvidia_nim import NvidiaNimProvider
from providers.ollama import OllamaProvider
from providers.open_router import OpenRouterProvider
from providers.registry import (
PROVIDER_DESCRIPTORS,
@ -28,6 +29,7 @@ def _make_settings(**overrides):
mock.deepseek_api_key = "test_deepseek_key"
mock.lm_studio_base_url = "http://localhost:1234/v1"
mock.llamacpp_base_url = "http://localhost:8080/v1"
mock.ollama_base_url = "http://localhost:11434"
mock.nvidia_nim_proxy = ""
mock.open_router_proxy = ""
mock.lmstudio_proxy = ""
@ -69,6 +71,14 @@ def test_descriptors_cover_advertised_provider_ids():
assert descriptor.capabilities
def test_ollama_descriptor_uses_native_anthropic_transport():
descriptor = PROVIDER_DESCRIPTORS["ollama"]
assert descriptor.transport_type == "anthropic_messages"
assert descriptor.default_base_url == "http://localhost:11434"
assert "native_anthropic" in descriptor.capabilities
def test_create_provider_uses_native_openrouter_by_default():
with patch("httpx.AsyncClient"):
provider = create_provider("open_router", _make_settings())
@ -83,6 +93,7 @@ def test_create_provider_instantiates_each_builtin():
"deepseek": DeepSeekProvider,
"lmstudio": LMStudioProvider,
"llamacpp": LlamaCppProvider,
"ollama": OllamaProvider,
}
with (