mirror of
https://github.com/Alishahryar1/free-claude-code.git
synced 2026-04-26 10:31:07 +00:00
[feat] ollama method support (#129)
Support use ollama method like LM stuio --------- Co-authored-by: Alishahryar1 <alishahryar2@gmail.com> Co-authored-by: u011436427 <u011436427@noreply.gitcode.com>
This commit is contained in:
parent
7f1e860c7f
commit
b525217633
21 changed files with 593 additions and 15 deletions
|
|
@ -18,9 +18,13 @@ LM_STUDIO_BASE_URL="http://localhost:1234/v1"
|
|||
LLAMACPP_BASE_URL="http://localhost:8080/v1"
|
||||
|
||||
|
||||
# Ollama Config (local provider, no API key required)
|
||||
OLLAMA_BASE_URL="http://localhost:11434"
|
||||
|
||||
|
||||
# All Claude model requests are mapped to these models, plain model is fallback
|
||||
# Format: provider_type/model/name
|
||||
# Valid providers: "nvidia_nim" | "open_router" | "deepseek" | "lmstudio" | "llamacpp"
|
||||
# Valid providers: "nvidia_nim" | "open_router" | "deepseek" | "lmstudio" | "llamacpp" | "ollama"
|
||||
MODEL_OPUS=
|
||||
MODEL_SONNET=
|
||||
MODEL_HAIKU=
|
||||
|
|
|
|||
31
README.md
31
README.md
|
|
@ -31,7 +31,7 @@ A lightweight proxy that routes Claude Code's Anthropic API calls to **NVIDIA NI
|
|||
| -------------------------- | ----------------------------------------------------------------------------------------------- |
|
||||
| **Zero Cost** | 40 req/min free on NVIDIA NIM. Free models on OpenRouter. Fully local with LM Studio |
|
||||
| **Drop-in Replacement** | Set 2 env vars. No modifications to Claude Code CLI or VSCode extension needed |
|
||||
| **5 Providers** | NVIDIA NIM, OpenRouter, DeepSeek, LM Studio (local), llama.cpp (`llama-server`) |
|
||||
| **6 Providers** | NVIDIA NIM, OpenRouter, DeepSeek, LM Studio (local), llama.cpp (`llama-server`), Ollama |
|
||||
| **Per-Model Mapping** | Route Opus / Sonnet / Haiku to different models and providers. Mix providers freely |
|
||||
| **Thinking Token Support** | Parses `<think>` tags and `reasoning_content` into native Claude thinking blocks |
|
||||
| **Heuristic Tool Parser** | Models outputting tool calls as text are auto-parsed into structured tool use |
|
||||
|
|
@ -361,6 +361,7 @@ The proxy also exposes Claude-compatible probe routes: `GET /v1/models`, `POST /
|
|||
| **DeepSeek** | Usage-based | Varies | Direct access to DeepSeek chat/reasoner |
|
||||
| **LM Studio** | Free (local) | Unlimited | Privacy, offline use, no rate limits |
|
||||
| **llama.cpp** | Free (local) | Unlimited | Lightweight local inference engine |
|
||||
| **Ollama** | Free (local) | Unlimited | Easy local LLM runtime, native Anthropic API |
|
||||
|
||||
Models use a prefix format: `provider_prefix/model/name`. An invalid prefix causes an error.
|
||||
|
||||
|
|
@ -371,6 +372,7 @@ Models use a prefix format: `provider_prefix/model/name`. An invalid prefix caus
|
|||
| DeepSeek | `deepseek/...` | `DEEPSEEK_API_KEY` | `api.deepseek.com` |
|
||||
| LM Studio | `lmstudio/...` | (none) | `localhost:1234/v1` |
|
||||
| llama.cpp | `llamacpp/...` | (none) | `localhost:8080/v1` |
|
||||
| Ollama | `ollama/...` | (none) | `localhost:11434` |
|
||||
|
||||
<details>
|
||||
<summary><b>NVIDIA NIM models</b></summary>
|
||||
|
|
@ -439,6 +441,32 @@ See the Unsloth docs for detailed instructions and capable models:
|
|||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>Ollama</b> (fully local, no API key)</summary>
|
||||
|
||||
```dotenv
|
||||
OLLAMA_BASE_URL="http://localhost:11434"
|
||||
|
||||
MODEL_OPUS="ollama/llama3.1:70b"
|
||||
MODEL_SONNET="ollama/llama3.1:8b"
|
||||
MODEL_HAIKU="ollama/llama3.1:8b"
|
||||
MODEL="ollama/llama3.1:8b"
|
||||
```
|
||||
|
||||
Install Ollama: [ollama.com](https://ollama.com)
|
||||
|
||||
Pull a model:
|
||||
```bash
|
||||
ollama pull llama3.1
|
||||
```
|
||||
|
||||
Start Ollama server:
|
||||
```bash
|
||||
ollama serve
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
---
|
||||
|
||||
## Discord Bot
|
||||
|
|
@ -544,6 +572,7 @@ Configure via `WHISPER_DEVICE` (`cpu` | `cuda` | `nvidia_nim`) and `WHISPER_MODE
|
|||
| `OPENROUTER_PROXY` | Optional proxy URL for OpenRouter requests (`http://...` or `socks5://...`) | `""` |
|
||||
| `LMSTUDIO_PROXY` | Optional proxy URL for LM Studio requests (`http://...` or `socks5://...`) | `""` |
|
||||
| `LLAMACPP_PROXY` | Optional proxy URL for llama.cpp requests (`http://...` or `socks5://...`) | `""` |
|
||||
| `OLLAMA_BASE_URL` | Ollama server root URL | `http://localhost:11434` |
|
||||
|
||||
### Rate Limiting & Timeouts
|
||||
|
||||
|
|
|
|||
|
|
@ -14,4 +14,5 @@ SUPPORTED_PROVIDER_IDS: tuple[str, ...] = (
|
|||
"deepseek",
|
||||
"lmstudio",
|
||||
"llamacpp",
|
||||
"ollama",
|
||||
)
|
||||
|
|
|
|||
|
|
@ -121,6 +121,12 @@ class Settings(BaseSettings):
|
|||
validation_alias="LLAMACPP_BASE_URL",
|
||||
)
|
||||
|
||||
# ==================== Ollama Config ====================
|
||||
ollama_base_url: str = Field(
|
||||
default="http://localhost:11434",
|
||||
validation_alias="OLLAMA_BASE_URL",
|
||||
)
|
||||
|
||||
# ==================== Model ====================
|
||||
# All Claude model requests are mapped to this single model (fallback)
|
||||
# Format: provider_type/model/name
|
||||
|
|
@ -266,6 +272,16 @@ class Settings(BaseSettings):
|
|||
)
|
||||
return v
|
||||
|
||||
@field_validator("ollama_base_url")
|
||||
@classmethod
|
||||
def validate_ollama_base_url(cls, v: str) -> str:
|
||||
if v.rstrip("/").endswith("/v1"):
|
||||
raise ValueError(
|
||||
"OLLAMA_BASE_URL must be the Ollama root URL for native Anthropic "
|
||||
"messages, e.g. http://localhost:11434 (without /v1)."
|
||||
)
|
||||
return v
|
||||
|
||||
@field_validator("model", "model_opus", "model_sonnet", "model_haiku")
|
||||
@classmethod
|
||||
def validate_model_format(cls, v: str | None) -> str | None:
|
||||
|
|
|
|||
|
|
@ -4,12 +4,13 @@ Adapters and :mod:`providers.registry` import from here to avoid duplicating
|
|||
literals and to keep ``providers.registry`` free of per-adapter eager imports.
|
||||
"""
|
||||
|
||||
# OpenAI-compatible chat (NIM, DeepSeek) and local OpenAI-shaped endpoints
|
||||
# OpenAI-compatible chat (NIM, DeepSeek) and local/native provider endpoints
|
||||
NVIDIA_NIM_DEFAULT_BASE = "https://integrate.api.nvidia.com/v1"
|
||||
DEEPSEEK_DEFAULT_BASE = "https://api.deepseek.com"
|
||||
OPENROUTER_DEFAULT_BASE = "https://openrouter.ai/api/v1"
|
||||
LMSTUDIO_DEFAULT_BASE = "http://localhost:1234/v1"
|
||||
LLAMACPP_DEFAULT_BASE = "http://localhost:8080/v1"
|
||||
OLLAMA_DEFAULT_BASE = "http://localhost:11434"
|
||||
|
||||
# Backward-compatible names used by existing adapter modules
|
||||
NVIDIA_NIM_BASE_URL = NVIDIA_NIM_DEFAULT_BASE
|
||||
|
|
@ -17,3 +18,4 @@ DEEPSEEK_BASE_URL = DEEPSEEK_DEFAULT_BASE
|
|||
OPENROUTER_BASE_URL = OPENROUTER_DEFAULT_BASE
|
||||
LMSTUDIO_DEFAULT_BASE_URL = LMSTUDIO_DEFAULT_BASE
|
||||
LLAMACPP_DEFAULT_BASE_URL = LLAMACPP_DEFAULT_BASE
|
||||
OLLAMA_DEFAULT_BASE_URL = OLLAMA_DEFAULT_BASE
|
||||
|
|
|
|||
5
providers/ollama/__init__.py
Normal file
5
providers/ollama/__init__.py
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
"""Ollama provider package."""
|
||||
|
||||
from .client import OLLAMA_BASE_URL, OllamaProvider
|
||||
|
||||
__all__ = ["OLLAMA_BASE_URL", "OllamaProvider"]
|
||||
31
providers/ollama/client.py
Normal file
31
providers/ollama/client.py
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
"""Ollama provider implementation."""
|
||||
|
||||
import httpx
|
||||
|
||||
from providers.anthropic_messages import AnthropicMessagesTransport
|
||||
from providers.base import ProviderConfig
|
||||
from providers.defaults import OLLAMA_DEFAULT_BASE
|
||||
|
||||
OLLAMA_BASE_URL = OLLAMA_DEFAULT_BASE
|
||||
|
||||
|
||||
class OllamaProvider(AnthropicMessagesTransport):
|
||||
"""Ollama provider using native Anthropic Messages API."""
|
||||
|
||||
def __init__(self, config: ProviderConfig):
|
||||
super().__init__(
|
||||
config,
|
||||
provider_name="OLLAMA",
|
||||
default_base_url=OLLAMA_BASE_URL,
|
||||
)
|
||||
self._api_key = config.api_key or "ollama"
|
||||
|
||||
async def _send_stream_request(self, body: dict) -> httpx.Response:
|
||||
"""Create a streaming native Anthropic messages response."""
|
||||
request = self._client.build_request(
|
||||
"POST",
|
||||
"/v1/messages",
|
||||
json=body,
|
||||
headers=self._request_headers(),
|
||||
)
|
||||
return await self._client.send(request, stream=True)
|
||||
|
|
@ -14,6 +14,7 @@ from providers.defaults import (
|
|||
LLAMACPP_DEFAULT_BASE,
|
||||
LMSTUDIO_DEFAULT_BASE,
|
||||
NVIDIA_NIM_DEFAULT_BASE,
|
||||
OLLAMA_DEFAULT_BASE,
|
||||
OPENROUTER_DEFAULT_BASE,
|
||||
)
|
||||
from providers.exceptions import AuthenticationError, UnknownProviderTypeError
|
||||
|
|
@ -88,6 +89,21 @@ PROVIDER_DESCRIPTORS: dict[str, ProviderDescriptor] = {
|
|||
proxy_attr="llamacpp_proxy",
|
||||
capabilities=("chat", "streaming", "tools", "native_anthropic", "local"),
|
||||
),
|
||||
"ollama": ProviderDescriptor(
|
||||
provider_id="ollama",
|
||||
transport_type="anthropic_messages",
|
||||
static_credential="ollama",
|
||||
default_base_url=OLLAMA_DEFAULT_BASE,
|
||||
base_url_attr="ollama_base_url",
|
||||
capabilities=(
|
||||
"chat",
|
||||
"streaming",
|
||||
"tools",
|
||||
"thinking",
|
||||
"native_anthropic",
|
||||
"local",
|
||||
),
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -121,12 +137,19 @@ def _create_llamacpp(config: ProviderConfig, settings: Settings) -> BaseProvider
|
|||
return LlamaCppProvider(config)
|
||||
|
||||
|
||||
def _create_ollama(config: ProviderConfig, settings: Settings) -> BaseProvider:
|
||||
from providers.ollama import OllamaProvider
|
||||
|
||||
return OllamaProvider(config)
|
||||
|
||||
|
||||
PROVIDER_FACTORIES: dict[str, ProviderFactory] = {
|
||||
"nvidia_nim": _create_nvidia_nim,
|
||||
"open_router": _create_open_router,
|
||||
"deepseek": _create_deepseek,
|
||||
"lmstudio": _create_lmstudio,
|
||||
"llamacpp": _create_llamacpp,
|
||||
"ollama": _create_ollama,
|
||||
}
|
||||
|
||||
if set(PROVIDER_DESCRIPTORS) != set(SUPPORTED_PROVIDER_IDS) or set(
|
||||
|
|
|
|||
|
|
@ -54,6 +54,7 @@ Default targets do not send real bot messages or load voice backends:
|
|||
| `rate_limit` | disconnect cleanup and follow-up request | configured provider |
|
||||
| `lmstudio` | local `/models` plus native `/messages` through proxy | running LM Studio server |
|
||||
| `llamacpp` | local `/models` plus native `/messages` through proxy | running llama-server |
|
||||
| `ollama` | local `/api/tags` plus native Anthropic messages through proxy | running Ollama server |
|
||||
|
||||
Side-effectful targets are opt-in:
|
||||
|
||||
|
|
@ -67,10 +68,17 @@ Side-effectful targets are opt-in:
|
|||
|
||||
```powershell
|
||||
$env:FCC_LIVE_SMOKE = "1"
|
||||
$env:FCC_SMOKE_PROVIDER_MATRIX = "open_router,nvidia_nim,deepseek,lmstudio,llamacpp"
|
||||
$env:FCC_SMOKE_PROVIDER_MATRIX = "open_router,nvidia_nim,deepseek,lmstudio,llamacpp,ollama"
|
||||
uv run pytest smoke/product -n 0 -s --tb=short
|
||||
```
|
||||
|
||||
```powershell
|
||||
$env:FCC_LIVE_SMOKE = "1"
|
||||
$env:FCC_SMOKE_TARGETS = "ollama"
|
||||
$env:OLLAMA_BASE_URL = "http://localhost:11434"
|
||||
uv run pytest smoke/prereq smoke/product -n 0 -s --tb=short
|
||||
```
|
||||
|
||||
```powershell
|
||||
$env:FCC_LIVE_SMOKE = "1"
|
||||
$env:FCC_SMOKE_TARGETS = "telegram,discord,voice"
|
||||
|
|
|
|||
|
|
@ -277,6 +277,17 @@ CAPABILITY_CONTRACTS: tuple[CapabilityContract, ...] = (
|
|||
),
|
||||
("test_llamacpp_models_endpoint_when_available",),
|
||||
),
|
||||
CapabilityContract(
|
||||
"local_providers",
|
||||
"ollama_native_messages",
|
||||
"ollama_endpoint",
|
||||
"providers.ollama.OllamaProvider",
|
||||
"Anthropic request body and local Ollama root URL",
|
||||
"Anthropic SSE stream through the proxy",
|
||||
"SSE error event for local upstream failure",
|
||||
("tests/providers/test_ollama.py",),
|
||||
("test_ollama_models_endpoint_when_available",),
|
||||
),
|
||||
CapabilityContract(
|
||||
"openrouter",
|
||||
"native_anthropic_messages",
|
||||
|
|
|
|||
|
|
@ -348,6 +348,17 @@ FEATURE_INVENTORY: tuple[FeatureCoverage, ...] = (
|
|||
("LLAMACPP_BASE_URL with running llama-server",),
|
||||
"skip when local upstream is unavailable",
|
||||
),
|
||||
FeatureCoverage(
|
||||
"ollama_endpoint",
|
||||
"Ollama native Anthropic messages and local no-key operation work when running",
|
||||
"public_surface",
|
||||
("tests/providers/test_ollama.py",),
|
||||
("test_ollama_models_endpoint_when_available",),
|
||||
("test_ollama_native_messages_e2e",),
|
||||
("ollama",),
|
||||
("OLLAMA_BASE_URL with running Ollama server",),
|
||||
"skip when local upstream is unavailable",
|
||||
),
|
||||
FeatureCoverage(
|
||||
"package_cli_entrypoints",
|
||||
"Installed package scripts scaffold config and start the server",
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ DEFAULT_TARGETS = frozenset(
|
|||
"llamacpp",
|
||||
"lmstudio",
|
||||
"messaging",
|
||||
"ollama",
|
||||
"providers",
|
||||
"rate_limit",
|
||||
"tools",
|
||||
|
|
@ -49,6 +50,7 @@ TARGET_REQUIRED_ENV: dict[str, tuple[str, ...]] = {
|
|||
"tools": ("configured tool-capable provider model",),
|
||||
"lmstudio": ("LM_STUDIO_BASE_URL with a running LM Studio server",),
|
||||
"llamacpp": ("LLAMACPP_BASE_URL with a running llama-server",),
|
||||
"ollama": ("OLLAMA_BASE_URL with a running Ollama server",),
|
||||
"telegram": (
|
||||
"TELEGRAM_BOT_TOKEN",
|
||||
"ALLOWED_TELEGRAM_USER_ID or FCC_SMOKE_TELEGRAM_CHAT_ID",
|
||||
|
|
@ -142,6 +144,8 @@ class SmokeConfig:
|
|||
return bool(self.settings.lm_studio_base_url.strip())
|
||||
if provider == "llamacpp":
|
||||
return bool(self.settings.llamacpp_base_url.strip())
|
||||
if provider == "ollama":
|
||||
return bool(self.settings.ollama_base_url.strip())
|
||||
return False
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -27,6 +27,14 @@ def test_llamacpp_models_endpoint_when_available(smoke_config: SmokeConfig) -> N
|
|||
)
|
||||
|
||||
|
||||
@pytest.mark.live
|
||||
@pytest.mark.smoke_target("ollama")
|
||||
def test_ollama_models_endpoint_when_available(smoke_config: SmokeConfig) -> None:
|
||||
_assert_ollama_tags_endpoint(
|
||||
smoke_config.settings.ollama_base_url, timeout_s=smoke_config.timeout_s
|
||||
)
|
||||
|
||||
|
||||
def _assert_models_endpoint(
|
||||
base_url: str, *, timeout_s: float, provider_name: str
|
||||
) -> None:
|
||||
|
|
@ -45,4 +53,34 @@ def _assert_models_endpoint(
|
|||
|
||||
assert response.status_code == 200, response.text
|
||||
payload = response.json()
|
||||
assert isinstance(payload.get("data"), list), payload
|
||||
data = payload.get("data")
|
||||
if isinstance(data, list) and data:
|
||||
return
|
||||
if isinstance(data, list):
|
||||
pytest.skip(f"upstream_unavailable: {provider_name} has no local models")
|
||||
assert isinstance(data, list), payload
|
||||
|
||||
|
||||
def _assert_ollama_tags_endpoint(base_url: str, *, timeout_s: float) -> None:
|
||||
url = f"{_ollama_root_url(base_url)}/api/tags"
|
||||
try:
|
||||
response = httpx.get(url, timeout=timeout_s)
|
||||
except Exception as exc:
|
||||
skip_if_upstream_unavailable_exception(exc)
|
||||
raise
|
||||
|
||||
if response.status_code in {404, 405, 502, 503}:
|
||||
pytest.skip(
|
||||
f"upstream_unavailable: Ollama tags endpoint {url} "
|
||||
f"returned HTTP {response.status_code}"
|
||||
)
|
||||
|
||||
assert response.status_code == 200, response.text
|
||||
models = response.json().get("models")
|
||||
if isinstance(models, list) and models:
|
||||
return
|
||||
pytest.skip("upstream_unavailable: Ollama has no pulled models")
|
||||
|
||||
|
||||
def _ollama_root_url(base_url: str) -> str:
|
||||
return base_url.rstrip("/")
|
||||
|
|
|
|||
|
|
@ -29,6 +29,15 @@ def test_llamacpp_native_messages_e2e(smoke_config: SmokeConfig) -> None:
|
|||
)
|
||||
|
||||
|
||||
@pytest.mark.smoke_target("ollama")
|
||||
def test_ollama_native_messages_e2e(smoke_config: SmokeConfig) -> None:
|
||||
_local_native_messages_e2e(
|
||||
smoke_config,
|
||||
provider="ollama",
|
||||
base_url=smoke_config.settings.ollama_base_url,
|
||||
)
|
||||
|
||||
|
||||
def _local_native_messages_e2e(
|
||||
smoke_config: SmokeConfig,
|
||||
*,
|
||||
|
|
@ -38,15 +47,11 @@ def _local_native_messages_e2e(
|
|||
if not base_url.strip():
|
||||
pytest.skip(f"missing_env: {provider} base URL is not configured")
|
||||
|
||||
models_url = urljoin(base_url.rstrip("/") + "/", "models")
|
||||
try:
|
||||
models = httpx.get(models_url, timeout=5)
|
||||
except httpx.ConnectError as exc:
|
||||
pytest.skip(f"upstream_unavailable: {provider} models endpoint: {exc}")
|
||||
except httpx.TimeoutException as exc:
|
||||
pytest.skip(f"upstream_unavailable: {provider} models endpoint: {exc}")
|
||||
assert models.status_code == 200, models.text
|
||||
model_id = _first_local_model_id(models)
|
||||
model_id = (
|
||||
_first_ollama_tag_model_id(base_url)
|
||||
if provider == "ollama"
|
||||
else (_first_non_ollama_model_id(provider, base_url))
|
||||
)
|
||||
|
||||
with SmokeServerDriver(
|
||||
smoke_config,
|
||||
|
|
@ -60,11 +65,44 @@ def _local_native_messages_e2e(
|
|||
assert_product_stream(turn.events)
|
||||
|
||||
|
||||
def _first_local_model_id(response: httpx.Response) -> str:
|
||||
def _first_non_ollama_model_id(provider: str, base_url: str) -> str:
|
||||
models_url = urljoin(base_url.rstrip("/") + "/", "models")
|
||||
try:
|
||||
response = httpx.get(models_url, timeout=5)
|
||||
except httpx.ConnectError as exc:
|
||||
pytest.skip(f"upstream_unavailable: {provider} models endpoint: {exc}")
|
||||
except httpx.TimeoutException as exc:
|
||||
pytest.skip(f"upstream_unavailable: {provider} models endpoint: {exc}")
|
||||
assert response.status_code == 200, response.text
|
||||
payload = response.json()
|
||||
data = payload.get("data") if isinstance(payload, dict) else None
|
||||
if isinstance(data, list):
|
||||
for item in data:
|
||||
if isinstance(item, dict) and isinstance(item.get("id"), str):
|
||||
return item["id"]
|
||||
pytest.skip(f"upstream_unavailable: {provider} has no local models")
|
||||
pytest.fail("product_failure: local /models did not expose a model id")
|
||||
|
||||
|
||||
def _first_ollama_tag_model_id(base_url: str) -> str:
|
||||
tags_url = f"{_ollama_root_url(base_url)}/api/tags"
|
||||
try:
|
||||
response = httpx.get(tags_url, timeout=5)
|
||||
except httpx.ConnectError as exc:
|
||||
pytest.skip(f"upstream_unavailable: ollama tags endpoint: {exc}")
|
||||
except httpx.TimeoutException as exc:
|
||||
pytest.skip(f"upstream_unavailable: ollama tags endpoint: {exc}")
|
||||
|
||||
assert response.status_code == 200, response.text
|
||||
payload = response.json()
|
||||
models = payload.get("models") if isinstance(payload, dict) else None
|
||||
if isinstance(models, list):
|
||||
for item in models:
|
||||
if isinstance(item, dict) and isinstance(item.get("name"), str):
|
||||
return item["name"]
|
||||
pytest.skip("upstream_unavailable: ollama has no pulled models")
|
||||
pytest.fail("product_failure: ollama /api/tags did not expose models")
|
||||
|
||||
|
||||
def _ollama_root_url(base_url: str) -> str:
|
||||
return base_url.rstrip("/")
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ from providers.deepseek import DeepSeekProvider
|
|||
from providers.exceptions import UnknownProviderTypeError
|
||||
from providers.lmstudio import LMStudioProvider
|
||||
from providers.nvidia_nim import NvidiaNimProvider
|
||||
from providers.ollama import OllamaProvider
|
||||
from providers.open_router import OpenRouterProvider
|
||||
from providers.registry import ProviderRegistry
|
||||
|
||||
|
|
@ -35,6 +36,7 @@ def _make_mock_settings(**overrides):
|
|||
mock.open_router_api_key = "test_openrouter_key"
|
||||
mock.deepseek_api_key = "test_deepseek_key"
|
||||
mock.lm_studio_base_url = "http://localhost:1234/v1"
|
||||
mock.ollama_base_url = "http://localhost:11434"
|
||||
mock.nim = NimSettings()
|
||||
mock.http_read_timeout = 300.0
|
||||
mock.http_write_timeout = 10.0
|
||||
|
|
@ -130,6 +132,19 @@ async def test_get_provider_lmstudio():
|
|||
assert provider._base_url == "http://localhost:1234/v1"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_provider_ollama():
|
||||
"""Test that provider_type=ollama returns OllamaProvider without an API key."""
|
||||
with patch("api.dependencies.get_settings") as mock_settings:
|
||||
mock_settings.return_value = _make_mock_settings(provider_type="ollama")
|
||||
|
||||
provider = get_provider()
|
||||
|
||||
assert isinstance(provider, OllamaProvider)
|
||||
assert provider._base_url == "http://localhost:11434"
|
||||
assert provider._api_key == "ollama"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_provider_deepseek():
|
||||
"""Test that provider_type=deepseek returns DeepSeekProvider."""
|
||||
|
|
|
|||
|
|
@ -70,6 +70,23 @@ class TestSettings:
|
|||
settings = Settings()
|
||||
assert settings.lm_studio_base_url == "http://custom:5678/v1"
|
||||
|
||||
def test_ollama_base_url_defaults_to_root(self, monkeypatch):
|
||||
"""OLLAMA_BASE_URL defaults to the Anthropic-compatible Ollama root URL."""
|
||||
from config.settings import Settings
|
||||
|
||||
monkeypatch.delenv("OLLAMA_BASE_URL", raising=False)
|
||||
monkeypatch.setitem(Settings.model_config, "env_file", ())
|
||||
settings = Settings()
|
||||
assert settings.ollama_base_url == "http://localhost:11434"
|
||||
|
||||
def test_ollama_base_url_rejects_v1_suffix(self, monkeypatch):
|
||||
"""OLLAMA_BASE_URL must not include /v1 for native Anthropic messages."""
|
||||
from config.settings import Settings
|
||||
|
||||
monkeypatch.setenv("OLLAMA_BASE_URL", "http://localhost:11434/v1")
|
||||
with pytest.raises(ValidationError, match="without /v1"):
|
||||
Settings()
|
||||
|
||||
def test_provider_rate_limit_from_env(self, monkeypatch):
|
||||
"""PROVIDER_RATE_LIMIT env var is loaded into settings."""
|
||||
from config.settings import Settings
|
||||
|
|
@ -466,6 +483,7 @@ class TestPerModelMapping:
|
|||
({"MODEL": "deepseek/deepseek-chat"}, "deepseek/deepseek-chat", None),
|
||||
({"MODEL": "lmstudio/qwen2.5-7b"}, "lmstudio/qwen2.5-7b", None),
|
||||
({"MODEL": "llamacpp/local-model"}, "llamacpp/local-model", None),
|
||||
({"MODEL": "ollama/llama3.1"}, "ollama/llama3.1", None),
|
||||
],
|
||||
)
|
||||
def test_settings_models_from_env(
|
||||
|
|
@ -602,6 +620,7 @@ class TestPerModelMapping:
|
|||
assert Settings.parse_provider_type("deepseek/deepseek-chat") == "deepseek"
|
||||
assert Settings.parse_provider_type("lmstudio/qwen") == "lmstudio"
|
||||
assert Settings.parse_provider_type("llamacpp/model") == "llamacpp"
|
||||
assert Settings.parse_provider_type("ollama/llama3.1") == "ollama"
|
||||
|
||||
def test_parse_model_name(self):
|
||||
"""parse_model_name extracts model name from model string."""
|
||||
|
|
@ -611,3 +630,4 @@ class TestPerModelMapping:
|
|||
assert Settings.parse_model_name("deepseek/deepseek-chat") == "deepseek-chat"
|
||||
assert Settings.parse_model_name("lmstudio/qwen") == "qwen"
|
||||
assert Settings.parse_model_name("llamacpp/model") == "model"
|
||||
assert Settings.parse_model_name("ollama/llama3.1") == "llama3.1"
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ from providers.deepseek import DeepSeekProvider
|
|||
from providers.llamacpp import LlamaCppProvider
|
||||
from providers.lmstudio import LMStudioProvider
|
||||
from providers.nvidia_nim import NvidiaNimProvider
|
||||
from providers.ollama import OllamaProvider
|
||||
from providers.open_router import OpenRouterProvider
|
||||
from smoke.features import FEATURE_INVENTORY, README_FEATURES, feature_ids
|
||||
|
||||
|
|
@ -71,6 +72,7 @@ def test_provider_and_platform_registries_include_advertised_builtins() -> None:
|
|||
"deepseek": DeepSeekProvider,
|
||||
"lmstudio": LMStudioProvider,
|
||||
"llamacpp": LlamaCppProvider,
|
||||
"ollama": OllamaProvider,
|
||||
}
|
||||
for provider_class in provider_classes.values():
|
||||
assert issubclass(provider_class, BaseProvider)
|
||||
|
|
|
|||
59
tests/contracts/test_smoke_config.py
Normal file
59
tests/contracts/test_smoke_config.py
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
|
||||
from smoke.lib.config import DEFAULT_TARGETS, TARGET_REQUIRED_ENV, SmokeConfig
|
||||
|
||||
|
||||
def _settings(**overrides):
|
||||
values = {
|
||||
"model": "ollama/llama3.1",
|
||||
"model_opus": None,
|
||||
"model_sonnet": None,
|
||||
"model_haiku": None,
|
||||
"nvidia_nim_api_key": "",
|
||||
"open_router_api_key": "",
|
||||
"deepseek_api_key": "",
|
||||
"lm_studio_base_url": "",
|
||||
"llamacpp_base_url": "",
|
||||
"ollama_base_url": "http://localhost:11434",
|
||||
}
|
||||
values.update(overrides)
|
||||
return SimpleNamespace(**values)
|
||||
|
||||
|
||||
def _smoke_config(**overrides) -> SmokeConfig:
|
||||
values = {
|
||||
"root": Path("."),
|
||||
"results_dir": Path(".smoke-results"),
|
||||
"live": False,
|
||||
"interactive": False,
|
||||
"targets": DEFAULT_TARGETS,
|
||||
"provider_matrix": frozenset(),
|
||||
"timeout_s": 45.0,
|
||||
"prompt": "Reply with exactly: FCC_SMOKE_PONG",
|
||||
"claude_bin": "claude",
|
||||
"worker_id": "main",
|
||||
"settings": _settings(),
|
||||
}
|
||||
values.update(overrides)
|
||||
return SmokeConfig(**values)
|
||||
|
||||
|
||||
def test_ollama_is_default_smoke_target() -> None:
|
||||
assert "ollama" in DEFAULT_TARGETS
|
||||
assert "ollama" in TARGET_REQUIRED_ENV
|
||||
|
||||
|
||||
def test_ollama_provider_configuration_uses_base_url() -> None:
|
||||
config = _smoke_config()
|
||||
|
||||
assert config.has_provider_configuration("ollama")
|
||||
assert config.provider_models()[0].full_model == "ollama/llama3.1"
|
||||
|
||||
|
||||
def test_ollama_provider_matrix_filters_models() -> None:
|
||||
config = _smoke_config(provider_matrix=frozenset({"ollama"}))
|
||||
|
||||
assert [model.provider for model in config.provider_models()] == ["ollama"]
|
||||
|
|
@ -277,6 +277,12 @@ async def test_stream_response_suppresses_thinking_when_disabled(provider_config
|
|||
assert "Answer" in event_text
|
||||
|
||||
|
||||
def _make_bad_request_error(message: str) -> openai.BadRequestError:
|
||||
response = Response(status_code=400, request=Request("POST", "http://test"))
|
||||
body = {"error": {"message": message}}
|
||||
return openai.BadRequestError(message, response=response, body=body)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stream_response_retries_without_chat_template(provider_config):
|
||||
from config.nim import NimSettings
|
||||
|
|
|
|||
244
tests/providers/test_ollama.py
Normal file
244
tests/providers/test_ollama.py
Normal file
|
|
@ -0,0 +1,244 @@
|
|||
"""Tests for Ollama native Anthropic provider."""
|
||||
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
from providers.base import ProviderConfig
|
||||
from providers.ollama import OLLAMA_BASE_URL, OllamaProvider
|
||||
|
||||
|
||||
class MockMessage:
|
||||
def __init__(self, role, content):
|
||||
self.role = role
|
||||
self.content = content
|
||||
|
||||
|
||||
class MockRequest:
|
||||
def __init__(self, **kwargs):
|
||||
self.model = "llama3.1:8b"
|
||||
self.messages = [MockMessage("user", "Hello")]
|
||||
self.max_tokens = 100
|
||||
self.temperature = 0.5
|
||||
self.top_p = 0.9
|
||||
self.system = "System prompt"
|
||||
self.stop_sequences = None
|
||||
self.stream = True
|
||||
self.tools = []
|
||||
self.tool_choice = None
|
||||
self.extra_body = {}
|
||||
self.thinking = MagicMock()
|
||||
self.thinking.enabled = True
|
||||
for key, value in kwargs.items():
|
||||
setattr(self, key, value)
|
||||
|
||||
def model_dump(self, exclude_none=True):
|
||||
return {
|
||||
"model": self.model,
|
||||
"messages": [{"role": m.role, "content": m.content} for m in self.messages],
|
||||
"max_tokens": self.max_tokens,
|
||||
"temperature": self.temperature,
|
||||
"top_p": self.top_p,
|
||||
"system": self.system,
|
||||
"stream": self.stream,
|
||||
"tools": self.tools,
|
||||
"tool_choice": self.tool_choice,
|
||||
"extra_body": self.extra_body,
|
||||
"thinking": {"enabled": self.thinking.enabled} if self.thinking else None,
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def ollama_config():
|
||||
return ProviderConfig(
|
||||
api_key="ollama",
|
||||
base_url="http://localhost:11434",
|
||||
rate_limit=10,
|
||||
rate_window=60,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def mock_rate_limiter():
|
||||
"""Mock the global rate limiter to prevent waiting."""
|
||||
with patch("providers.anthropic_messages.GlobalRateLimiter") as mock:
|
||||
instance = mock.get_scoped_instance.return_value
|
||||
instance.wait_if_blocked = AsyncMock(return_value=False)
|
||||
|
||||
async def _passthrough(fn, *args, **kwargs):
|
||||
return await fn(*args, **kwargs)
|
||||
|
||||
instance.execute_with_retry = AsyncMock(side_effect=_passthrough)
|
||||
yield instance
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def ollama_provider(ollama_config):
|
||||
return OllamaProvider(ollama_config)
|
||||
|
||||
|
||||
def test_init(ollama_config):
|
||||
"""Test provider initialization."""
|
||||
with patch("httpx.AsyncClient"):
|
||||
provider = OllamaProvider(ollama_config)
|
||||
assert provider._base_url == "http://localhost:11434"
|
||||
assert provider._provider_name == "OLLAMA"
|
||||
assert provider._api_key == "ollama"
|
||||
|
||||
|
||||
def test_init_uses_default_base_url():
|
||||
"""Test that provider uses default root URL when not configured."""
|
||||
config = ProviderConfig(api_key="ollama", base_url=None)
|
||||
with patch("httpx.AsyncClient"):
|
||||
provider = OllamaProvider(config)
|
||||
assert provider._base_url == OLLAMA_BASE_URL
|
||||
|
||||
|
||||
def test_init_uses_configurable_timeouts():
|
||||
"""Test that provider passes configurable read/write/connect timeouts to client."""
|
||||
config = ProviderConfig(
|
||||
api_key="ollama",
|
||||
base_url="http://localhost:11434",
|
||||
http_read_timeout=600.0,
|
||||
http_write_timeout=15.0,
|
||||
http_connect_timeout=5.0,
|
||||
)
|
||||
with patch("httpx.AsyncClient") as mock_client:
|
||||
OllamaProvider(config)
|
||||
call_kwargs = mock_client.call_args[1]
|
||||
timeout = call_kwargs["timeout"]
|
||||
assert timeout.read == 600.0
|
||||
assert timeout.write == 15.0
|
||||
assert timeout.connect == 5.0
|
||||
|
||||
|
||||
def test_init_base_url_strips_trailing_slash():
|
||||
"""Config with base_url trailing slash is stored without it."""
|
||||
config = ProviderConfig(
|
||||
api_key="ollama",
|
||||
base_url="http://localhost:11434/",
|
||||
rate_limit=10,
|
||||
rate_window=60,
|
||||
)
|
||||
with patch("httpx.AsyncClient"):
|
||||
provider = OllamaProvider(config)
|
||||
assert provider._base_url == "http://localhost:11434"
|
||||
|
||||
|
||||
def test_init_uses_default_api_key():
|
||||
"""Test that provider uses default API key when not configured."""
|
||||
config = ProviderConfig(
|
||||
base_url="http://localhost:11434",
|
||||
api_key="",
|
||||
rate_limit=10,
|
||||
rate_window=60,
|
||||
)
|
||||
with patch("httpx.AsyncClient"):
|
||||
provider = OllamaProvider(config)
|
||||
assert provider._api_key == "ollama"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stream_response(ollama_provider):
|
||||
"""Test streaming native Anthropic response."""
|
||||
req = MockRequest()
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 200
|
||||
|
||||
async def mock_aiter_lines():
|
||||
yield "event: message_start"
|
||||
yield 'data: {"type":"message_start","message":{}}'
|
||||
yield ""
|
||||
yield "event: content_block_delta"
|
||||
yield 'data: {"type":"content_block_delta","delta":{"type":"text_delta","text":"Hello World"}}'
|
||||
yield ""
|
||||
yield "event: message_stop"
|
||||
yield 'data: {"type":"message_stop"}'
|
||||
yield ""
|
||||
|
||||
mock_response.aiter_lines = mock_aiter_lines
|
||||
|
||||
with (
|
||||
patch.object(
|
||||
ollama_provider._client, "build_request", return_value=MagicMock()
|
||||
) as mock_build,
|
||||
patch.object(
|
||||
ollama_provider._client,
|
||||
"send",
|
||||
new_callable=AsyncMock,
|
||||
return_value=mock_response,
|
||||
),
|
||||
):
|
||||
events = [event async for event in ollama_provider.stream_response(req)]
|
||||
|
||||
mock_build.assert_called_once()
|
||||
args, kwargs = mock_build.call_args
|
||||
assert args[0] == "POST"
|
||||
assert args[1] == "/v1/messages"
|
||||
assert kwargs["json"]["model"] == "llama3.1:8b"
|
||||
assert kwargs["json"]["stream"] is True
|
||||
assert "extra_body" not in kwargs["json"]
|
||||
assert kwargs["json"]["thinking"] == {"type": "enabled"}
|
||||
assert len(events) == 9
|
||||
assert events[0] == "event: message_start\n"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_build_request_body_omits_thinking_when_disabled(ollama_config):
|
||||
"""Global disable suppresses provider-side thinking."""
|
||||
provider = OllamaProvider(
|
||||
ollama_config.model_copy(update={"enable_thinking": False})
|
||||
)
|
||||
req = MockRequest()
|
||||
|
||||
body = provider._build_request_body(req)
|
||||
|
||||
assert "thinking" not in body
|
||||
assert body["model"] == "llama3.1:8b"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stream_error_status_code(ollama_provider):
|
||||
"""Non-200 status code is yielded as an SSE API error."""
|
||||
req = MockRequest()
|
||||
mock_response = MagicMock()
|
||||
mock_response.status_code = 500
|
||||
mock_response.aread = AsyncMock(return_value=b"Internal Server Error")
|
||||
mock_response.raise_for_status = MagicMock(
|
||||
side_effect=httpx.HTTPStatusError(
|
||||
"Internal Server Error", request=MagicMock(), response=mock_response
|
||||
)
|
||||
)
|
||||
|
||||
with (
|
||||
patch.object(
|
||||
ollama_provider._client, "build_request", return_value=MagicMock()
|
||||
),
|
||||
patch.object(
|
||||
ollama_provider._client,
|
||||
"send",
|
||||
new_callable=AsyncMock,
|
||||
return_value=mock_response,
|
||||
),
|
||||
):
|
||||
events = [
|
||||
event
|
||||
async for event in ollama_provider.stream_response(req, request_id="REQ")
|
||||
]
|
||||
|
||||
assert len(events) == 1
|
||||
assert events[0].startswith("event: error\ndata: {")
|
||||
assert "Internal Server Error" in events[0]
|
||||
assert "REQ" in events[0]
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_cleanup(ollama_provider):
|
||||
"""Test that cleanup closes the client."""
|
||||
ollama_provider._client.aclose = AsyncMock()
|
||||
|
||||
await ollama_provider.cleanup()
|
||||
|
||||
ollama_provider._client.aclose.assert_called_once()
|
||||
|
|
@ -11,6 +11,7 @@ from providers.exceptions import UnknownProviderTypeError
|
|||
from providers.llamacpp import LlamaCppProvider
|
||||
from providers.lmstudio import LMStudioProvider
|
||||
from providers.nvidia_nim import NvidiaNimProvider
|
||||
from providers.ollama import OllamaProvider
|
||||
from providers.open_router import OpenRouterProvider
|
||||
from providers.registry import (
|
||||
PROVIDER_DESCRIPTORS,
|
||||
|
|
@ -28,6 +29,7 @@ def _make_settings(**overrides):
|
|||
mock.deepseek_api_key = "test_deepseek_key"
|
||||
mock.lm_studio_base_url = "http://localhost:1234/v1"
|
||||
mock.llamacpp_base_url = "http://localhost:8080/v1"
|
||||
mock.ollama_base_url = "http://localhost:11434"
|
||||
mock.nvidia_nim_proxy = ""
|
||||
mock.open_router_proxy = ""
|
||||
mock.lmstudio_proxy = ""
|
||||
|
|
@ -69,6 +71,14 @@ def test_descriptors_cover_advertised_provider_ids():
|
|||
assert descriptor.capabilities
|
||||
|
||||
|
||||
def test_ollama_descriptor_uses_native_anthropic_transport():
|
||||
descriptor = PROVIDER_DESCRIPTORS["ollama"]
|
||||
|
||||
assert descriptor.transport_type == "anthropic_messages"
|
||||
assert descriptor.default_base_url == "http://localhost:11434"
|
||||
assert "native_anthropic" in descriptor.capabilities
|
||||
|
||||
|
||||
def test_create_provider_uses_native_openrouter_by_default():
|
||||
with patch("httpx.AsyncClient"):
|
||||
provider = create_provider("open_router", _make_settings())
|
||||
|
|
@ -83,6 +93,7 @@ def test_create_provider_instantiates_each_builtin():
|
|||
"deepseek": DeepSeekProvider,
|
||||
"lmstudio": LMStudioProvider,
|
||||
"llamacpp": LlamaCppProvider,
|
||||
"ollama": OllamaProvider,
|
||||
}
|
||||
|
||||
with (
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue