[feat] ollama method support (#129)

Support use ollama method like LM stuio --------- Co-authored-by: Alishahryar1 <alishahryar2@gmail.com> Co-authored-by: u011436427 <u011436427@noreply.gitcode.com>
2026-04-26 10:31:07 +00:00 · 2026-04-26 13:06:36 +08:00 · 2026-04-26 13:06:36 +08:00 · b525217633
commit b525217633
parent 7f1e860c7f
21 changed files with 593 additions and 15 deletions
--- a/.env.example
+++ b/.env.example
@ -18,9 +18,13 @@ LM_STUDIO_BASE_URL="http://localhost:1234/v1"
 LLAMACPP_BASE_URL="http://localhost:8080/v1"


+# Ollama Config (local provider, no API key required)
+OLLAMA_BASE_URL="http://localhost:11434"
+
+
 # All Claude model requests are mapped to these models, plain model is fallback
 # Format: provider_type/model/name
-# Valid providers: "nvidia_nim" | "open_router" | "deepseek" | "lmstudio" | "llamacpp"
+# Valid providers: "nvidia_nim" | "open_router" | "deepseek" | "lmstudio" | "llamacpp" | "ollama"
 MODEL_OPUS=
 MODEL_SONNET=
 MODEL_HAIKU=
--- a/README.md
+++ b/README.md
@ -31,7 +31,7 @@ A lightweight proxy that routes Claude Code's Anthropic API calls to **NVIDIA NI
 | -------------------------- | ----------------------------------------------------------------------------------------------- |
 | **Zero Cost**              | 40 req/min free on NVIDIA NIM. Free models on OpenRouter. Fully local with LM Studio            |
 | **Drop-in Replacement**    | Set 2 env vars. No modifications to Claude Code CLI or VSCode extension needed                  |
-| **5 Providers**            | NVIDIA NIM, OpenRouter, DeepSeek, LM Studio (local), llama.cpp (`llama-server`)                  |
+| **6 Providers**            | NVIDIA NIM, OpenRouter, DeepSeek, LM Studio (local), llama.cpp (`llama-server`), Ollama         |
 | **Per-Model Mapping**      | Route Opus / Sonnet / Haiku to different models and providers. Mix providers freely             |
 | **Thinking Token Support** | Parses `<think>` tags and `reasoning_content` into native Claude thinking blocks                |
 | **Heuristic Tool Parser**  | Models outputting tool calls as text are auto-parsed into structured tool use                   |
@ -361,6 +361,7 @@ The proxy also exposes Claude-compatible probe routes: `GET /v1/models`, `POST /
 | **DeepSeek**   | Usage-based  | Varies     | Direct access to DeepSeek chat/reasoner |
 | **LM Studio**  | Free (local) | Unlimited  | Privacy, offline use, no rate limits |
 | **llama.cpp**  | Free (local) | Unlimited  | Lightweight local inference engine   |
+| **Ollama**     | Free (local) | Unlimited  | Easy local LLM runtime, native Anthropic API |

 Models use a prefix format: `provider_prefix/model/name`. An invalid prefix causes an error.

@ -371,6 +372,7 @@ Models use a prefix format: `provider_prefix/model/name`. An invalid prefix caus
 | DeepSeek   | `deepseek/...`    | `DEEPSEEK_API_KEY`   | `api.deepseek.com`            |
 | LM Studio  | `lmstudio/...`    | (none)               | `localhost:1234/v1`           |
 | llama.cpp  | `llamacpp/...`    | (none)               | `localhost:8080/v1`           |
+| Ollama     | `ollama/...`      | (none)               | `localhost:11434`             |

 <details>
 <summary><b>NVIDIA NIM models</b></summary>
@ -439,6 +441,32 @@ See the Unsloth docs for detailed instructions and capable models:

 </details>

+<details>
+<summary><b>Ollama</b> (fully local, no API key)</summary>
+
+```dotenv
+OLLAMA_BASE_URL="http://localhost:11434"
+
+MODEL_OPUS="ollama/llama3.1:70b"
+MODEL_SONNET="ollama/llama3.1:8b"
+MODEL_HAIKU="ollama/llama3.1:8b"
+MODEL="ollama/llama3.1:8b"
+```
+
+Install Ollama: [ollama.com](https://ollama.com)
+
+Pull a model:
+```bash
+ollama pull llama3.1
+```
+
+Start Ollama server:
+```bash
+ollama serve
+```
+
+</details>
+
 ---

 ## Discord Bot
@ -544,6 +572,7 @@ Configure via `WHISPER_DEVICE` (`cpu` | `cuda` | `nvidia_nim`) and `WHISPER_MODE
 | `OPENROUTER_PROXY`   | Optional proxy URL for OpenRouter requests (`http://...` or `socks5://...`) | `""` |
 | `LMSTUDIO_PROXY`     | Optional proxy URL for LM Studio requests (`http://...` or `socks5://...`) | `""` |
 | `LLAMACPP_PROXY`     | Optional proxy URL for llama.cpp requests (`http://...` or `socks5://...`) | `""` |
+| `OLLAMA_BASE_URL`    | Ollama server root URL                                               | `http://localhost:11434`                          |

 ### Rate Limiting & Timeouts

--- a/config/provider_ids.py
+++ b/config/provider_ids.py
@ -14,4 +14,5 @@ SUPPORTED_PROVIDER_IDS: tuple[str, ...] = (
    "deepseek",
    "lmstudio",
    "llamacpp",
+    "ollama",
 )
--- a/config/settings.py
+++ b/config/settings.py
@ -121,6 +121,12 @@ class Settings(BaseSettings):
        validation_alias="LLAMACPP_BASE_URL",
    )

+    # ==================== Ollama Config ====================
+    ollama_base_url: str = Field(
+        default="http://localhost:11434",
+        validation_alias="OLLAMA_BASE_URL",
+    )
+
    # ==================== Model ====================
    # All Claude model requests are mapped to this single model (fallback)
    # Format: provider_type/model/name
@ -266,6 +272,16 @@ class Settings(BaseSettings):
            )
        return v

+    @field_validator("ollama_base_url")
+    @classmethod
+    def validate_ollama_base_url(cls, v: str) -> str:
+        if v.rstrip("/").endswith("/v1"):
+            raise ValueError(
+                "OLLAMA_BASE_URL must be the Ollama root URL for native Anthropic "
+                "messages, e.g. http://localhost:11434 (without /v1)."
+            )
+        return v
+
    @field_validator("model", "model_opus", "model_sonnet", "model_haiku")
    @classmethod
    def validate_model_format(cls, v: str | None) -> str | None:
--- a/providers/defaults.py
+++ b/providers/defaults.py
@ -4,12 +4,13 @@ Adapters and :mod:`providers.registry` import from here to avoid duplicating
 literals and to keep ``providers.registry`` free of per-adapter eager imports.
 """

-# OpenAI-compatible chat (NIM, DeepSeek) and local OpenAI-shaped endpoints
+# OpenAI-compatible chat (NIM, DeepSeek) and local/native provider endpoints
 NVIDIA_NIM_DEFAULT_BASE = "https://integrate.api.nvidia.com/v1"
 DEEPSEEK_DEFAULT_BASE = "https://api.deepseek.com"
 OPENROUTER_DEFAULT_BASE = "https://openrouter.ai/api/v1"
 LMSTUDIO_DEFAULT_BASE = "http://localhost:1234/v1"
 LLAMACPP_DEFAULT_BASE = "http://localhost:8080/v1"
+OLLAMA_DEFAULT_BASE = "http://localhost:11434"

 # Backward-compatible names used by existing adapter modules
 NVIDIA_NIM_BASE_URL = NVIDIA_NIM_DEFAULT_BASE
@ -17,3 +18,4 @@ DEEPSEEK_BASE_URL = DEEPSEEK_DEFAULT_BASE
 OPENROUTER_BASE_URL = OPENROUTER_DEFAULT_BASE
 LMSTUDIO_DEFAULT_BASE_URL = LMSTUDIO_DEFAULT_BASE
 LLAMACPP_DEFAULT_BASE_URL = LLAMACPP_DEFAULT_BASE
+OLLAMA_DEFAULT_BASE_URL = OLLAMA_DEFAULT_BASE
--- a/providers/ollama/init.py
+++ b/providers/ollama/init.py
@ -0,0 +1,5 @@
+"""Ollama provider package."""
+
+from .client import OLLAMA_BASE_URL, OllamaProvider
+
+__all__ = ["OLLAMA_BASE_URL", "OllamaProvider"]
--- a/providers/ollama/client.py
+++ b/providers/ollama/client.py
@ -0,0 +1,31 @@
+"""Ollama provider implementation."""
+
+import httpx
+
+from providers.anthropic_messages import AnthropicMessagesTransport
+from providers.base import ProviderConfig
+from providers.defaults import OLLAMA_DEFAULT_BASE
+
+OLLAMA_BASE_URL = OLLAMA_DEFAULT_BASE
+
+
+class OllamaProvider(AnthropicMessagesTransport):
+    """Ollama provider using native Anthropic Messages API."""
+
+    def __init__(self, config: ProviderConfig):
+        super().__init__(
+            config,
+            provider_name="OLLAMA",
+            default_base_url=OLLAMA_BASE_URL,
+        )
+        self._api_key = config.api_key or "ollama"
+
+    async def _send_stream_request(self, body: dict) -> httpx.Response:
+        """Create a streaming native Anthropic messages response."""
+        request = self._client.build_request(
+            "POST",
+            "/v1/messages",
+            json=body,
+            headers=self._request_headers(),
+        )
+        return await self._client.send(request, stream=True)
--- a/providers/registry.py
+++ b/providers/registry.py
@ -14,6 +14,7 @@ from providers.defaults import (
    LLAMACPP_DEFAULT_BASE,
    LMSTUDIO_DEFAULT_BASE,
    NVIDIA_NIM_DEFAULT_BASE,
+    OLLAMA_DEFAULT_BASE,
    OPENROUTER_DEFAULT_BASE,
 )
 from providers.exceptions import AuthenticationError, UnknownProviderTypeError
@ -88,6 +89,21 @@ PROVIDER_DESCRIPTORS: dict[str, ProviderDescriptor] = {
        proxy_attr="llamacpp_proxy",
        capabilities=("chat", "streaming", "tools", "native_anthropic", "local"),
    ),
+    "ollama": ProviderDescriptor(
+        provider_id="ollama",
+        transport_type="anthropic_messages",
+        static_credential="ollama",
+        default_base_url=OLLAMA_DEFAULT_BASE,
+        base_url_attr="ollama_base_url",
+        capabilities=(
+            "chat",
+            "streaming",
+            "tools",
+            "thinking",
+            "native_anthropic",
+            "local",
+        ),
+    ),
 }


@ -121,12 +137,19 @@ def _create_llamacpp(config: ProviderConfig, settings: Settings) -> BaseProvider
    return LlamaCppProvider(config)


+def _create_ollama(config: ProviderConfig, settings: Settings) -> BaseProvider:
+    from providers.ollama import OllamaProvider
+
+    return OllamaProvider(config)
+
+
 PROVIDER_FACTORIES: dict[str, ProviderFactory] = {
    "nvidia_nim": _create_nvidia_nim,
    "open_router": _create_open_router,
    "deepseek": _create_deepseek,
    "lmstudio": _create_lmstudio,
    "llamacpp": _create_llamacpp,
+    "ollama": _create_ollama,
 }

 if set(PROVIDER_DESCRIPTORS) != set(SUPPORTED_PROVIDER_IDS) or set(
--- a/smoke/README.md
+++ b/smoke/README.md
@ -54,6 +54,7 @@ Default targets do not send real bot messages or load voice backends:
 | `rate_limit` | disconnect cleanup and follow-up request | configured provider |
 | `lmstudio` | local `/models` plus native `/messages` through proxy | running LM Studio server |
 | `llamacpp` | local `/models` plus native `/messages` through proxy | running llama-server |
+| `ollama` | local `/api/tags` plus native Anthropic messages through proxy | running Ollama server |

 Side-effectful targets are opt-in:

@ -67,10 +68,17 @@ Side-effectful targets are opt-in:

 ```powershell
 $env:FCC_LIVE_SMOKE = "1"
-$env:FCC_SMOKE_PROVIDER_MATRIX = "open_router,nvidia_nim,deepseek,lmstudio,llamacpp"
+$env:FCC_SMOKE_PROVIDER_MATRIX = "open_router,nvidia_nim,deepseek,lmstudio,llamacpp,ollama"
 uv run pytest smoke/product -n 0 -s --tb=short
 ```

+```powershell
+$env:FCC_LIVE_SMOKE = "1"
+$env:FCC_SMOKE_TARGETS = "ollama"
+$env:OLLAMA_BASE_URL = "http://localhost:11434"
+uv run pytest smoke/prereq smoke/product -n 0 -s --tb=short
+```
+
 ```powershell
 $env:FCC_LIVE_SMOKE = "1"
 $env:FCC_SMOKE_TARGETS = "telegram,discord,voice"
--- a/smoke/capabilities.py
+++ b/smoke/capabilities.py
@ -277,6 +277,17 @@ CAPABILITY_CONTRACTS: tuple[CapabilityContract, ...] = (
        ),
        ("test_llamacpp_models_endpoint_when_available",),
    ),
+    CapabilityContract(
+        "local_providers",
+        "ollama_native_messages",
+        "ollama_endpoint",
+        "providers.ollama.OllamaProvider",
+        "Anthropic request body and local Ollama root URL",
+        "Anthropic SSE stream through the proxy",
+        "SSE error event for local upstream failure",
+        ("tests/providers/test_ollama.py",),
+        ("test_ollama_models_endpoint_when_available",),
+    ),
    CapabilityContract(
        "openrouter",
        "native_anthropic_messages",
--- a/smoke/features.py
+++ b/smoke/features.py
@ -348,6 +348,17 @@ FEATURE_INVENTORY: tuple[FeatureCoverage, ...] = (
        ("LLAMACPP_BASE_URL with running llama-server",),
        "skip when local upstream is unavailable",
    ),
+    FeatureCoverage(
+        "ollama_endpoint",
+        "Ollama native Anthropic messages and local no-key operation work when running",
+        "public_surface",
+        ("tests/providers/test_ollama.py",),
+        ("test_ollama_models_endpoint_when_available",),
+        ("test_ollama_native_messages_e2e",),
+        ("ollama",),
+        ("OLLAMA_BASE_URL with running Ollama server",),
+        "skip when local upstream is unavailable",
+    ),
    FeatureCoverage(
        "package_cli_entrypoints",
        "Installed package scripts scaffold config and start the server",
--- a/smoke/lib/config.py
+++ b/smoke/lib/config.py
@ -20,6 +20,7 @@ DEFAULT_TARGETS = frozenset(
        "llamacpp",
        "lmstudio",
        "messaging",
+        "ollama",
        "providers",
        "rate_limit",
        "tools",
@ -49,6 +50,7 @@ TARGET_REQUIRED_ENV: dict[str, tuple[str, ...]] = {
    "tools": ("configured tool-capable provider model",),
    "lmstudio": ("LM_STUDIO_BASE_URL with a running LM Studio server",),
    "llamacpp": ("LLAMACPP_BASE_URL with a running llama-server",),
+    "ollama": ("OLLAMA_BASE_URL with a running Ollama server",),
    "telegram": (
        "TELEGRAM_BOT_TOKEN",
        "ALLOWED_TELEGRAM_USER_ID or FCC_SMOKE_TELEGRAM_CHAT_ID",
@ -142,6 +144,8 @@ class SmokeConfig:
            return bool(self.settings.lm_studio_base_url.strip())
        if provider == "llamacpp":
            return bool(self.settings.llamacpp_base_url.strip())
+        if provider == "ollama":
+            return bool(self.settings.ollama_base_url.strip())
        return False


--- a/smoke/prereq/test_local_provider_endpoints_prereq_live.py
+++ b/smoke/prereq/test_local_provider_endpoints_prereq_live.py
@ -27,6 +27,14 @@ def test_llamacpp_models_endpoint_when_available(smoke_config: SmokeConfig) -> N
    )


+@pytest.mark.live
+@pytest.mark.smoke_target("ollama")
+def test_ollama_models_endpoint_when_available(smoke_config: SmokeConfig) -> None:
+    _assert_ollama_tags_endpoint(
+        smoke_config.settings.ollama_base_url, timeout_s=smoke_config.timeout_s
+    )
+
+
 def _assert_models_endpoint(
    base_url: str, *, timeout_s: float, provider_name: str
 ) -> None:
@ -45,4 +53,34 @@ def _assert_models_endpoint(

    assert response.status_code == 200, response.text
    payload = response.json()
-    assert isinstance(payload.get("data"), list), payload
+    data = payload.get("data")
+    if isinstance(data, list) and data:
+        return
+    if isinstance(data, list):
+        pytest.skip(f"upstream_unavailable: {provider_name} has no local models")
+    assert isinstance(data, list), payload
+
+
+def _assert_ollama_tags_endpoint(base_url: str, *, timeout_s: float) -> None:
+    url = f"{_ollama_root_url(base_url)}/api/tags"
+    try:
+        response = httpx.get(url, timeout=timeout_s)
+    except Exception as exc:
+        skip_if_upstream_unavailable_exception(exc)
+        raise
+
+    if response.status_code in {404, 405, 502, 503}:
+        pytest.skip(
+            f"upstream_unavailable: Ollama tags endpoint {url} "
+            f"returned HTTP {response.status_code}"
+        )
+
+    assert response.status_code == 200, response.text
+    models = response.json().get("models")
+    if isinstance(models, list) and models:
+        return
+    pytest.skip("upstream_unavailable: Ollama has no pulled models")
+
+
+def _ollama_root_url(base_url: str) -> str:
+    return base_url.rstrip("/")
--- a/smoke/product/test_local_provider_product_live.py
+++ b/smoke/product/test_local_provider_product_live.py
@ -29,6 +29,15 @@ def test_llamacpp_native_messages_e2e(smoke_config: SmokeConfig) -> None:
    )


+@pytest.mark.smoke_target("ollama")
+def test_ollama_native_messages_e2e(smoke_config: SmokeConfig) -> None:
+    _local_native_messages_e2e(
+        smoke_config,
+        provider="ollama",
+        base_url=smoke_config.settings.ollama_base_url,
+    )
+
+
 def _local_native_messages_e2e(
    smoke_config: SmokeConfig,
    *,
@ -38,15 +47,11 @@ def _local_native_messages_e2e(
    if not base_url.strip():
        pytest.skip(f"missing_env: {provider} base URL is not configured")

-    models_url = urljoin(base_url.rstrip("/") + "/", "models")
-    try:
-        models = httpx.get(models_url, timeout=5)
-    except httpx.ConnectError as exc:
-        pytest.skip(f"upstream_unavailable: {provider} models endpoint: {exc}")
-    except httpx.TimeoutException as exc:
-        pytest.skip(f"upstream_unavailable: {provider} models endpoint: {exc}")
-    assert models.status_code == 200, models.text
-    model_id = _first_local_model_id(models)
+    model_id = (
+        _first_ollama_tag_model_id(base_url)
+        if provider == "ollama"
+        else (_first_non_ollama_model_id(provider, base_url))
+    )

    with SmokeServerDriver(
        smoke_config,
@ -60,11 +65,44 @@ def _local_native_messages_e2e(
    assert_product_stream(turn.events)


-def _first_local_model_id(response: httpx.Response) -> str:
+def _first_non_ollama_model_id(provider: str, base_url: str) -> str:
+    models_url = urljoin(base_url.rstrip("/") + "/", "models")
+    try:
+        response = httpx.get(models_url, timeout=5)
+    except httpx.ConnectError as exc:
+        pytest.skip(f"upstream_unavailable: {provider} models endpoint: {exc}")
+    except httpx.TimeoutException as exc:
+        pytest.skip(f"upstream_unavailable: {provider} models endpoint: {exc}")
+    assert response.status_code == 200, response.text
    payload = response.json()
    data = payload.get("data") if isinstance(payload, dict) else None
    if isinstance(data, list):
        for item in data:
            if isinstance(item, dict) and isinstance(item.get("id"), str):
                return item["id"]
+        pytest.skip(f"upstream_unavailable: {provider} has no local models")
    pytest.fail("product_failure: local /models did not expose a model id")
+
+
+def _first_ollama_tag_model_id(base_url: str) -> str:
+    tags_url = f"{_ollama_root_url(base_url)}/api/tags"
+    try:
+        response = httpx.get(tags_url, timeout=5)
+    except httpx.ConnectError as exc:
+        pytest.skip(f"upstream_unavailable: ollama tags endpoint: {exc}")
+    except httpx.TimeoutException as exc:
+        pytest.skip(f"upstream_unavailable: ollama tags endpoint: {exc}")
+
+    assert response.status_code == 200, response.text
+    payload = response.json()
+    models = payload.get("models") if isinstance(payload, dict) else None
+    if isinstance(models, list):
+        for item in models:
+            if isinstance(item, dict) and isinstance(item.get("name"), str):
+                return item["name"]
+        pytest.skip("upstream_unavailable: ollama has no pulled models")
+    pytest.fail("product_failure: ollama /api/tags did not expose models")
+
+
+def _ollama_root_url(base_url: str) -> str:
+    return base_url.rstrip("/")
--- a/tests/api/test_dependencies.py
+++ b/tests/api/test_dependencies.py
@ -19,6 +19,7 @@ from providers.deepseek import DeepSeekProvider
 from providers.exceptions import UnknownProviderTypeError
 from providers.lmstudio import LMStudioProvider
 from providers.nvidia_nim import NvidiaNimProvider
+from providers.ollama import OllamaProvider
 from providers.open_router import OpenRouterProvider
 from providers.registry import ProviderRegistry

@ -35,6 +36,7 @@ def _make_mock_settings(**overrides):
    mock.open_router_api_key = "test_openrouter_key"
    mock.deepseek_api_key = "test_deepseek_key"
    mock.lm_studio_base_url = "http://localhost:1234/v1"
+    mock.ollama_base_url = "http://localhost:11434"
    mock.nim = NimSettings()
    mock.http_read_timeout = 300.0
    mock.http_write_timeout = 10.0
@ -130,6 +132,19 @@ async def test_get_provider_lmstudio():
        assert provider._base_url == "http://localhost:1234/v1"


+@pytest.mark.asyncio
+async def test_get_provider_ollama():
+    """Test that provider_type=ollama returns OllamaProvider without an API key."""
+    with patch("api.dependencies.get_settings") as mock_settings:
+        mock_settings.return_value = _make_mock_settings(provider_type="ollama")
+
+        provider = get_provider()
+
+        assert isinstance(provider, OllamaProvider)
+        assert provider._base_url == "http://localhost:11434"
+        assert provider._api_key == "ollama"
+
+
@pytest.mark.asyncio
 async def test_get_provider_deepseek():
    """Test that provider_type=deepseek returns DeepSeekProvider."""
--- a/tests/config/test_config.py
+++ b/tests/config/test_config.py
@ -70,6 +70,23 @@ class TestSettings:
        settings = Settings()
        assert settings.lm_studio_base_url == "http://custom:5678/v1"

+    def test_ollama_base_url_defaults_to_root(self, monkeypatch):
+        """OLLAMA_BASE_URL defaults to the Anthropic-compatible Ollama root URL."""
+        from config.settings import Settings
+
+        monkeypatch.delenv("OLLAMA_BASE_URL", raising=False)
+        monkeypatch.setitem(Settings.model_config, "env_file", ())
+        settings = Settings()
+        assert settings.ollama_base_url == "http://localhost:11434"
+
+    def test_ollama_base_url_rejects_v1_suffix(self, monkeypatch):
+        """OLLAMA_BASE_URL must not include /v1 for native Anthropic messages."""
+        from config.settings import Settings
+
+        monkeypatch.setenv("OLLAMA_BASE_URL", "http://localhost:11434/v1")
+        with pytest.raises(ValidationError, match="without /v1"):
+            Settings()
+
    def test_provider_rate_limit_from_env(self, monkeypatch):
        """PROVIDER_RATE_LIMIT env var is loaded into settings."""
        from config.settings import Settings
@ -466,6 +483,7 @@ class TestPerModelMapping:
            ({"MODEL": "deepseek/deepseek-chat"}, "deepseek/deepseek-chat", None),
            ({"MODEL": "lmstudio/qwen2.5-7b"}, "lmstudio/qwen2.5-7b", None),
            ({"MODEL": "llamacpp/local-model"}, "llamacpp/local-model", None),
+            ({"MODEL": "ollama/llama3.1"}, "ollama/llama3.1", None),
        ],
    )
    def test_settings_models_from_env(
@ -602,6 +620,7 @@ class TestPerModelMapping:
        assert Settings.parse_provider_type("deepseek/deepseek-chat") == "deepseek"
        assert Settings.parse_provider_type("lmstudio/qwen") == "lmstudio"
        assert Settings.parse_provider_type("llamacpp/model") == "llamacpp"
+        assert Settings.parse_provider_type("ollama/llama3.1") == "ollama"

    def test_parse_model_name(self):
        """parse_model_name extracts model name from model string."""
@ -611,3 +630,4 @@ class TestPerModelMapping:
        assert Settings.parse_model_name("deepseek/deepseek-chat") == "deepseek-chat"
        assert Settings.parse_model_name("lmstudio/qwen") == "qwen"
        assert Settings.parse_model_name("llamacpp/model") == "model"
+        assert Settings.parse_model_name("ollama/llama3.1") == "llama3.1"
--- a/tests/contracts/test_feature_manifest.py
+++ b/tests/contracts/test_feature_manifest.py
@ -9,6 +9,7 @@ from providers.deepseek import DeepSeekProvider
 from providers.llamacpp import LlamaCppProvider
 from providers.lmstudio import LMStudioProvider
 from providers.nvidia_nim import NvidiaNimProvider
+from providers.ollama import OllamaProvider
 from providers.open_router import OpenRouterProvider
 from smoke.features import FEATURE_INVENTORY, README_FEATURES, feature_ids

@ -71,6 +72,7 @@ def test_provider_and_platform_registries_include_advertised_builtins() -> None:
        "deepseek": DeepSeekProvider,
        "lmstudio": LMStudioProvider,
        "llamacpp": LlamaCppProvider,
+        "ollama": OllamaProvider,
    }
    for provider_class in provider_classes.values():
        assert issubclass(provider_class, BaseProvider)
--- a/tests/contracts/test_smoke_config.py
+++ b/tests/contracts/test_smoke_config.py
@ -0,0 +1,59 @@
+from __future__ import annotations
+
+from pathlib import Path
+from types import SimpleNamespace
+
+from smoke.lib.config import DEFAULT_TARGETS, TARGET_REQUIRED_ENV, SmokeConfig
+
+
+def _settings(**overrides):
+    values = {
+        "model": "ollama/llama3.1",
+        "model_opus": None,
+        "model_sonnet": None,
+        "model_haiku": None,
+        "nvidia_nim_api_key": "",
+        "open_router_api_key": "",
+        "deepseek_api_key": "",
+        "lm_studio_base_url": "",
+        "llamacpp_base_url": "",
+        "ollama_base_url": "http://localhost:11434",
+    }
+    values.update(overrides)
+    return SimpleNamespace(**values)
+
+
+def _smoke_config(**overrides) -> SmokeConfig:
+    values = {
+        "root": Path("."),
+        "results_dir": Path(".smoke-results"),
+        "live": False,
+        "interactive": False,
+        "targets": DEFAULT_TARGETS,
+        "provider_matrix": frozenset(),
+        "timeout_s": 45.0,
+        "prompt": "Reply with exactly: FCC_SMOKE_PONG",
+        "claude_bin": "claude",
+        "worker_id": "main",
+        "settings": _settings(),
+    }
+    values.update(overrides)
+    return SmokeConfig(**values)
+
+
+def test_ollama_is_default_smoke_target() -> None:
+    assert "ollama" in DEFAULT_TARGETS
+    assert "ollama" in TARGET_REQUIRED_ENV
+
+
+def test_ollama_provider_configuration_uses_base_url() -> None:
+    config = _smoke_config()
+
+    assert config.has_provider_configuration("ollama")
+    assert config.provider_models()[0].full_model == "ollama/llama3.1"
+
+
+def test_ollama_provider_matrix_filters_models() -> None:
+    config = _smoke_config(provider_matrix=frozenset({"ollama"}))
+
+    assert [model.provider for model in config.provider_models()] == ["ollama"]
--- a/tests/providers/test_nvidia_nim.py
+++ b/tests/providers/test_nvidia_nim.py
@ -277,6 +277,12 @@ async def test_stream_response_suppresses_thinking_when_disabled(provider_config
    assert "Answer" in event_text


+def _make_bad_request_error(message: str) -> openai.BadRequestError:
+    response = Response(status_code=400, request=Request("POST", "http://test"))
+    body = {"error": {"message": message}}
+    return openai.BadRequestError(message, response=response, body=body)
+
+
@pytest.mark.asyncio
 async def test_stream_response_retries_without_chat_template(provider_config):
    from config.nim import NimSettings
--- a/tests/providers/test_ollama.py
+++ b/tests/providers/test_ollama.py
@ -0,0 +1,244 @@
+"""Tests for Ollama native Anthropic provider."""
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import httpx
+import pytest
+
+from providers.base import ProviderConfig
+from providers.ollama import OLLAMA_BASE_URL, OllamaProvider
+
+
+class MockMessage:
+    def __init__(self, role, content):
+        self.role = role
+        self.content = content
+
+
+class MockRequest:
+    def __init__(self, **kwargs):
+        self.model = "llama3.1:8b"
+        self.messages = [MockMessage("user", "Hello")]
+        self.max_tokens = 100
+        self.temperature = 0.5
+        self.top_p = 0.9
+        self.system = "System prompt"
+        self.stop_sequences = None
+        self.stream = True
+        self.tools = []
+        self.tool_choice = None
+        self.extra_body = {}
+        self.thinking = MagicMock()
+        self.thinking.enabled = True
+        for key, value in kwargs.items():
+            setattr(self, key, value)
+
+    def model_dump(self, exclude_none=True):
+        return {
+            "model": self.model,
+            "messages": [{"role": m.role, "content": m.content} for m in self.messages],
+            "max_tokens": self.max_tokens,
+            "temperature": self.temperature,
+            "top_p": self.top_p,
+            "system": self.system,
+            "stream": self.stream,
+            "tools": self.tools,
+            "tool_choice": self.tool_choice,
+            "extra_body": self.extra_body,
+            "thinking": {"enabled": self.thinking.enabled} if self.thinking else None,
+        }
+
+
+@pytest.fixture
+def ollama_config():
+    return ProviderConfig(
+        api_key="ollama",
+        base_url="http://localhost:11434",
+        rate_limit=10,
+        rate_window=60,
+    )
+
+
+@pytest.fixture(autouse=True)
+def mock_rate_limiter():
+    """Mock the global rate limiter to prevent waiting."""
+    with patch("providers.anthropic_messages.GlobalRateLimiter") as mock:
+        instance = mock.get_scoped_instance.return_value
+        instance.wait_if_blocked = AsyncMock(return_value=False)
+
+        async def _passthrough(fn, *args, **kwargs):
+            return await fn(*args, **kwargs)
+
+        instance.execute_with_retry = AsyncMock(side_effect=_passthrough)
+        yield instance
+
+
+@pytest.fixture
+def ollama_provider(ollama_config):
+    return OllamaProvider(ollama_config)
+
+
+def test_init(ollama_config):
+    """Test provider initialization."""
+    with patch("httpx.AsyncClient"):
+        provider = OllamaProvider(ollama_config)
+        assert provider._base_url == "http://localhost:11434"
+        assert provider._provider_name == "OLLAMA"
+        assert provider._api_key == "ollama"
+
+
+def test_init_uses_default_base_url():
+    """Test that provider uses default root URL when not configured."""
+    config = ProviderConfig(api_key="ollama", base_url=None)
+    with patch("httpx.AsyncClient"):
+        provider = OllamaProvider(config)
+        assert provider._base_url == OLLAMA_BASE_URL
+
+
+def test_init_uses_configurable_timeouts():
+    """Test that provider passes configurable read/write/connect timeouts to client."""
+    config = ProviderConfig(
+        api_key="ollama",
+        base_url="http://localhost:11434",
+        http_read_timeout=600.0,
+        http_write_timeout=15.0,
+        http_connect_timeout=5.0,
+    )
+    with patch("httpx.AsyncClient") as mock_client:
+        OllamaProvider(config)
+        call_kwargs = mock_client.call_args[1]
+        timeout = call_kwargs["timeout"]
+        assert timeout.read == 600.0
+        assert timeout.write == 15.0
+        assert timeout.connect == 5.0
+
+
+def test_init_base_url_strips_trailing_slash():
+    """Config with base_url trailing slash is stored without it."""
+    config = ProviderConfig(
+        api_key="ollama",
+        base_url="http://localhost:11434/",
+        rate_limit=10,
+        rate_window=60,
+    )
+    with patch("httpx.AsyncClient"):
+        provider = OllamaProvider(config)
+        assert provider._base_url == "http://localhost:11434"
+
+
+def test_init_uses_default_api_key():
+    """Test that provider uses default API key when not configured."""
+    config = ProviderConfig(
+        base_url="http://localhost:11434",
+        api_key="",
+        rate_limit=10,
+        rate_window=60,
+    )
+    with patch("httpx.AsyncClient"):
+        provider = OllamaProvider(config)
+        assert provider._api_key == "ollama"
+
+
+@pytest.mark.asyncio
+async def test_stream_response(ollama_provider):
+    """Test streaming native Anthropic response."""
+    req = MockRequest()
+
+    mock_response = MagicMock()
+    mock_response.status_code = 200
+
+    async def mock_aiter_lines():
+        yield "event: message_start"
+        yield 'data: {"type":"message_start","message":{}}'
+        yield ""
+        yield "event: content_block_delta"
+        yield 'data: {"type":"content_block_delta","delta":{"type":"text_delta","text":"Hello World"}}'
+        yield ""
+        yield "event: message_stop"
+        yield 'data: {"type":"message_stop"}'
+        yield ""
+
+    mock_response.aiter_lines = mock_aiter_lines
+
+    with (
+        patch.object(
+            ollama_provider._client, "build_request", return_value=MagicMock()
+        ) as mock_build,
+        patch.object(
+            ollama_provider._client,
+            "send",
+            new_callable=AsyncMock,
+            return_value=mock_response,
+        ),
+    ):
+        events = [event async for event in ollama_provider.stream_response(req)]
+
+    mock_build.assert_called_once()
+    args, kwargs = mock_build.call_args
+    assert args[0] == "POST"
+    assert args[1] == "/v1/messages"
+    assert kwargs["json"]["model"] == "llama3.1:8b"
+    assert kwargs["json"]["stream"] is True
+    assert "extra_body" not in kwargs["json"]
+    assert kwargs["json"]["thinking"] == {"type": "enabled"}
+    assert len(events) == 9
+    assert events[0] == "event: message_start\n"
+
+
+@pytest.mark.asyncio
+async def test_build_request_body_omits_thinking_when_disabled(ollama_config):
+    """Global disable suppresses provider-side thinking."""
+    provider = OllamaProvider(
+        ollama_config.model_copy(update={"enable_thinking": False})
+    )
+    req = MockRequest()
+
+    body = provider._build_request_body(req)
+
+    assert "thinking" not in body
+    assert body["model"] == "llama3.1:8b"
+
+
+@pytest.mark.asyncio
+async def test_stream_error_status_code(ollama_provider):
+    """Non-200 status code is yielded as an SSE API error."""
+    req = MockRequest()
+    mock_response = MagicMock()
+    mock_response.status_code = 500
+    mock_response.aread = AsyncMock(return_value=b"Internal Server Error")
+    mock_response.raise_for_status = MagicMock(
+        side_effect=httpx.HTTPStatusError(
+            "Internal Server Error", request=MagicMock(), response=mock_response
+        )
+    )
+
+    with (
+        patch.object(
+            ollama_provider._client, "build_request", return_value=MagicMock()
+        ),
+        patch.object(
+            ollama_provider._client,
+            "send",
+            new_callable=AsyncMock,
+            return_value=mock_response,
+        ),
+    ):
+        events = [
+            event
+            async for event in ollama_provider.stream_response(req, request_id="REQ")
+        ]
+
+    assert len(events) == 1
+    assert events[0].startswith("event: error\ndata: {")
+    assert "Internal Server Error" in events[0]
+    assert "REQ" in events[0]
+
+
+@pytest.mark.asyncio
+async def test_cleanup(ollama_provider):
+    """Test that cleanup closes the client."""
+    ollama_provider._client.aclose = AsyncMock()
+
+    await ollama_provider.cleanup()
+
+    ollama_provider._client.aclose.assert_called_once()
--- a/tests/providers/test_registry.py
+++ b/tests/providers/test_registry.py
@ -11,6 +11,7 @@ from providers.exceptions import UnknownProviderTypeError
 from providers.llamacpp import LlamaCppProvider
 from providers.lmstudio import LMStudioProvider
 from providers.nvidia_nim import NvidiaNimProvider
+from providers.ollama import OllamaProvider
 from providers.open_router import OpenRouterProvider
 from providers.registry import (
    PROVIDER_DESCRIPTORS,
@ -28,6 +29,7 @@ def _make_settings(**overrides):
    mock.deepseek_api_key = "test_deepseek_key"
    mock.lm_studio_base_url = "http://localhost:1234/v1"
    mock.llamacpp_base_url = "http://localhost:8080/v1"
+    mock.ollama_base_url = "http://localhost:11434"
    mock.nvidia_nim_proxy = ""
    mock.open_router_proxy = ""
    mock.lmstudio_proxy = ""
@ -69,6 +71,14 @@ def test_descriptors_cover_advertised_provider_ids():
        assert descriptor.capabilities


+def test_ollama_descriptor_uses_native_anthropic_transport():
+    descriptor = PROVIDER_DESCRIPTORS["ollama"]
+
+    assert descriptor.transport_type == "anthropic_messages"
+    assert descriptor.default_base_url == "http://localhost:11434"
+    assert "native_anthropic" in descriptor.capabilities
+
+
 def test_create_provider_uses_native_openrouter_by_default():
    with patch("httpx.AsyncClient"):
        provider = create_provider("open_router", _make_settings())
@ -83,6 +93,7 @@ def test_create_provider_instantiates_each_builtin():
        "deepseek": DeepSeekProvider,
        "lmstudio": LMStudioProvider,
        "llamacpp": LlamaCppProvider,
+        "ollama": OllamaProvider,
    }

    with (