diff --git a/.env.example b/.env.example index ec442ac..1933899 100644 --- a/.env.example +++ b/.env.example @@ -28,9 +28,12 @@ MODEL="nvidia_nim/z-ai/glm4.7" # Thinking output -# Global switch for provider reasoning requests and Claude thinking blocks. -# Set false to suppress thinking across NIM, OpenRouter, LM Studio, and llama.cpp. -ENABLE_THINKING=true +# Per-Claude-model switches for provider reasoning requests and Claude thinking blocks. +# Blank per-model switches inherit ENABLE_MODEL_THINKING. +ENABLE_OPUS_THINKING= +ENABLE_SONNET_THINKING= +ENABLE_HAIKU_THINKING= +ENABLE_MODEL_THINKING=true # Provider config diff --git a/README.md b/README.md index 928b78a..4eba001 100644 --- a/README.md +++ b/README.md @@ -102,8 +102,12 @@ MODEL_SONNET= MODEL_HAIKU= MODEL="nvidia_nim/z-ai/glm4.7" # fallback -# Global switch for provider reasoning requests and Claude thinking blocks. -ENABLE_THINKING=true +# Per-Claude-model switches for provider reasoning requests and Claude thinking blocks. +# Blank per-model switches inherit ENABLE_MODEL_THINKING. +ENABLE_OPUS_THINKING= +ENABLE_SONNET_THINKING= +ENABLE_HAIKU_THINKING= +ENABLE_MODEL_THINKING=true ``` @@ -179,7 +183,7 @@ MODEL="nvidia_nim/z-ai/glm4.7" # fallback -> Migration: `NIM_ENABLE_THINKING` was removed in this release. Rename it to `ENABLE_THINKING`. +> Migration: `NIM_ENABLE_THINKING` and `ENABLE_THINKING` were removed in this release. Use `ENABLE_MODEL_THINKING` as the fallback switch, with optional `ENABLE_OPUS_THINKING`, `ENABLE_SONNET_THINKING`, and `ENABLE_HAIKU_THINKING` overrides.
Optional Authentication (restrict access to your proxy) @@ -342,7 +346,7 @@ free-claude-code # starts the server - **Per-model routing**: Opus / Sonnet / Haiku requests resolve to their model-specific backend, with `MODEL` as fallback - **Request optimization**: 5 categories of trivial requests (quota probes, title generation, prefix detection, suggestions, filepath extraction) are intercepted and responded to locally without using API quota - **Format handling**: OpenRouter, LM Studio, and llama.cpp use native Anthropic Messages endpoints; NIM and DeepSeek use shared OpenAI chat translation -- **Thinking tokens**: `` tags and `reasoning_content` fields are converted into native Claude thinking blocks when `ENABLE_THINKING=true` +- **Thinking tokens**: `` tags and `reasoning_content` fields are converted into native Claude thinking blocks when the resolved model's thinking switch is enabled The proxy also exposes Claude-compatible probe routes: `GET /v1/models`, `POST /v1/messages`, `POST /v1/messages/count_tokens`, plus `HEAD`/`OPTIONS` support for the common probe endpoints. @@ -528,7 +532,10 @@ Configure via `WHISPER_DEVICE` (`cpu` | `cuda` | `nvidia_nim`) and `WHISPER_MODE | `MODEL_SONNET` | Model for Claude Sonnet requests; empty falls back to `MODEL` | empty | | `MODEL_HAIKU` | Model for Claude Haiku requests; empty falls back to `MODEL` | empty | | `NVIDIA_NIM_API_KEY` | NVIDIA API key | required for NIM | -| `ENABLE_THINKING` | Global switch for provider reasoning requests and Claude thinking blocks. Set `false` to hide thinking across all providers. | `true` | +| `ENABLE_MODEL_THINKING` | Fallback switch for provider reasoning requests and Claude thinking blocks. Set `false` to hide thinking unless a model tier overrides it. | `true` | +| `ENABLE_OPUS_THINKING` | Optional thinking switch for Claude Opus requests; empty inherits `ENABLE_MODEL_THINKING`. | empty | +| `ENABLE_SONNET_THINKING` | Optional thinking switch for Claude Sonnet requests; empty inherits `ENABLE_MODEL_THINKING`. | empty | +| `ENABLE_HAIKU_THINKING` | Optional thinking switch for Claude Haiku requests; empty inherits `ENABLE_MODEL_THINKING`. | empty | | `OPENROUTER_API_KEY` | OpenRouter API key | required for OpenRouter | | `DEEPSEEK_API_KEY` | DeepSeek API key | required for DeepSeek | | `LM_STUDIO_BASE_URL` | LM Studio server URL | `http://localhost:1234/v1` | diff --git a/api/model_router.py b/api/model_router.py index 789d02e..f49d106 100644 --- a/api/model_router.py +++ b/api/model_router.py @@ -17,6 +17,7 @@ class ResolvedModel: provider_id: str provider_model: str provider_model_ref: str + thinking_enabled: bool @dataclass(frozen=True, slots=True) @@ -39,6 +40,7 @@ class ModelRouter: def resolve(self, claude_model_name: str) -> ResolvedModel: provider_model_ref = self._settings.resolve_model(claude_model_name) + thinking_enabled = self._settings.resolve_thinking(claude_model_name) provider_id = Settings.parse_provider_type(provider_model_ref) provider_model = Settings.parse_model_name(provider_model_ref) if provider_model != claude_model_name: @@ -50,6 +52,7 @@ class ModelRouter: provider_id=provider_id, provider_model=provider_model, provider_model_ref=provider_model_ref, + thinking_enabled=thinking_enabled, ) def resolve_messages_request( diff --git a/api/services.py b/api/services.py index 037f48d..d0bbcaa 100644 --- a/api/services.py +++ b/api/services.py @@ -95,6 +95,7 @@ class ClaudeProxyService: routed.request, input_tokens=input_tokens, request_id=request_id, + thinking_enabled=routed.resolved.thinking_enabled, ), media_type="text/event-stream", headers={ diff --git a/config/env.example b/config/env.example index 7312a8d..12e30ed 100644 --- a/config/env.example +++ b/config/env.example @@ -28,9 +28,12 @@ MODEL="nvidia_nim/z-ai/glm4.7" # Thinking output -# Global switch for provider reasoning requests and Claude thinking blocks. -# Set false to suppress thinking across NIM, OpenRouter, LM Studio, and llama.cpp. -ENABLE_THINKING=true +# Per-Claude-model switches for provider reasoning requests and Claude thinking blocks. +# Blank per-model switches inherit ENABLE_MODEL_THINKING. +ENABLE_OPUS_THINKING= +ENABLE_SONNET_THINKING= +ENABLE_HAIKU_THINKING= +ENABLE_MODEL_THINKING=true # Provider config diff --git a/config/settings.py b/config/settings.py index 184f4ae..b734dd5 100644 --- a/config/settings.py +++ b/config/settings.py @@ -68,22 +68,26 @@ def _env_file_override(model_config: Mapping[str, Any], key: str) -> str | None: def _removed_env_var_message(model_config: Mapping[str, Any]) -> str | None: """Return a migration error for removed env vars, if present.""" - removed_key = "NIM_ENABLE_THINKING" - replacement = "ENABLE_THINKING" + removed_keys = ("NIM_ENABLE_THINKING", "ENABLE_THINKING") + replacement = ( + "ENABLE_MODEL_THINKING, ENABLE_OPUS_THINKING, " + "ENABLE_SONNET_THINKING, or ENABLE_HAIKU_THINKING" + ) - if removed_key in os.environ: - return ( - f"{removed_key} has been removed in this release. " - f"Rename it to {replacement}." - ) - - for env_file in _configured_env_files(model_config): - if _env_file_contains_key(env_file, removed_key): + for removed_key in removed_keys: + if removed_key in os.environ: return ( f"{removed_key} has been removed in this release. " - f"Rename it to {replacement}. Found in {env_file}." + f"Rename it to {replacement}." ) + for env_file in _configured_env_files(model_config): + if _env_file_contains_key(env_file, removed_key): + return ( + f"{removed_key} has been removed in this release. " + f"Rename it to {replacement}. Found in {env_file}." + ) + return None @@ -142,7 +146,18 @@ class Settings(BaseSettings): provider_max_concurrency: int = Field( default=5, validation_alias="PROVIDER_MAX_CONCURRENCY" ) - enable_thinking: bool = Field(default=True, validation_alias="ENABLE_THINKING") + enable_model_thinking: bool = Field( + default=True, validation_alias="ENABLE_MODEL_THINKING" + ) + enable_opus_thinking: bool | None = Field( + default=None, validation_alias="ENABLE_OPUS_THINKING" + ) + enable_sonnet_thinking: bool | None = Field( + default=None, validation_alias="ENABLE_SONNET_THINKING" + ) + enable_haiku_thinking: bool | None = Field( + default=None, validation_alias="ENABLE_HAIKU_THINKING" + ) # ==================== HTTP Client Timeouts ==================== http_read_timeout: float = Field( @@ -222,6 +237,9 @@ class Settings(BaseSettings): "model_opus", "model_sonnet", "model_haiku", + "enable_opus_thinking", + "enable_sonnet_thinking", + "enable_haiku_thinking", mode="before", ) @classmethod @@ -317,6 +335,17 @@ class Settings(BaseSettings): return self.model_sonnet return self.model + def resolve_thinking(self, claude_model_name: str) -> bool: + """Resolve whether thinking is enabled for an incoming Claude model name.""" + name_lower = claude_model_name.lower() + if "opus" in name_lower and self.enable_opus_thinking is not None: + return self.enable_opus_thinking + if "haiku" in name_lower and self.enable_haiku_thinking is not None: + return self.enable_haiku_thinking + if "sonnet" in name_lower and self.enable_sonnet_thinking is not None: + return self.enable_sonnet_thinking + return self.enable_model_thinking + @staticmethod def parse_provider_type(model_string: str) -> str: """Extract provider type from any 'provider/model' string.""" diff --git a/providers/anthropic_messages.py b/providers/anthropic_messages.py index a884a03..c35c1d5 100644 --- a/providers/anthropic_messages.py +++ b/providers/anthropic_messages.py @@ -59,9 +59,11 @@ class AnthropicMessagesTransport(BaseProvider): """Return headers for the native messages request.""" return {"Content-Type": "application/json"} - def _build_request_body(self, request: Any) -> dict: + def _build_request_body( + self, request: Any, thinking_enabled: bool | None = None + ) -> dict: """Build a native Anthropic request body.""" - thinking_enabled = self._is_thinking_enabled(request) + thinking_enabled = self._is_thinking_enabled(request, thinking_enabled) body = request.model_dump(exclude_none=True) body.pop("extra_body", None) @@ -218,12 +220,13 @@ class AnthropicMessagesTransport(BaseProvider): input_tokens: int = 0, *, request_id: str | None = None, + thinking_enabled: bool | None = None, ) -> AsyncIterator[str]: """Stream response via a native Anthropic-compatible messages endpoint.""" tag = self._provider_name req_tag = f" request_id={request_id}" if request_id else "" - thinking_enabled = self._is_thinking_enabled(request) - body = self._build_request_body(request) + body = self._build_request_body(request, thinking_enabled=thinking_enabled) + thinking_enabled = self._is_thinking_enabled(request, thinking_enabled) logger.info( "{}_STREAM:{} natively passing Anthropic request model={} msgs={} tools={}", diff --git a/providers/base.py b/providers/base.py index 6270c8e..f8ff193 100644 --- a/providers/base.py +++ b/providers/base.py @@ -32,9 +32,16 @@ class BaseProvider(ABC): def __init__(self, config: ProviderConfig): self._config = config - def _is_thinking_enabled(self, request: Any) -> bool: + def _is_thinking_enabled( + self, request: Any, thinking_enabled: bool | None = None + ) -> bool: """Return whether thinking should be enabled for this request.""" thinking = getattr(request, "thinking", None) + config_enabled = ( + self._config.enable_thinking + if thinking_enabled is None + else thinking_enabled + ) request_enabled = True if thinking is not None: thinking_type = ( @@ -52,7 +59,7 @@ class BaseProvider(ABC): ) if enabled is not None: request_enabled = bool(enabled) - return self._config.enable_thinking and request_enabled + return config_enabled and request_enabled @abstractmethod async def cleanup(self) -> None: @@ -65,6 +72,7 @@ class BaseProvider(ABC): input_tokens: int = 0, *, request_id: str | None = None, + thinking_enabled: bool | None = None, ) -> AsyncIterator[str]: """Stream response in Anthropic SSE format.""" if False: diff --git a/providers/deepseek/client.py b/providers/deepseek/client.py index dc21bf7..210f223 100644 --- a/providers/deepseek/client.py +++ b/providers/deepseek/client.py @@ -20,9 +20,11 @@ class DeepSeekProvider(OpenAIChatTransport): api_key=config.api_key, ) - def _build_request_body(self, request: Any) -> dict: + def _build_request_body( + self, request: Any, thinking_enabled: bool | None = None + ) -> dict: """Internal helper for tests and shared building.""" return build_request_body( request, - thinking_enabled=self._is_thinking_enabled(request), + thinking_enabled=self._is_thinking_enabled(request, thinking_enabled), ) diff --git a/providers/nvidia_nim/client.py b/providers/nvidia_nim/client.py index 63218ff..5cc2f31 100644 --- a/providers/nvidia_nim/client.py +++ b/providers/nvidia_nim/client.py @@ -30,12 +30,14 @@ class NvidiaNimProvider(OpenAIChatTransport): ) self._nim_settings = nim_settings - def _build_request_body(self, request: Any) -> dict: + def _build_request_body( + self, request: Any, thinking_enabled: bool | None = None + ) -> dict: """Internal helper for tests and shared building.""" return build_request_body( request, self._nim_settings, - thinking_enabled=self._is_thinking_enabled(request), + thinking_enabled=self._is_thinking_enabled(request, thinking_enabled), ) def _get_retry_request_body(self, error: Exception, body: dict) -> dict | None: diff --git a/providers/open_router/client.py b/providers/open_router/client.py index f0df371..d5c2081 100644 --- a/providers/open_router/client.py +++ b/providers/open_router/client.py @@ -42,11 +42,13 @@ class OpenRouterProvider(AnthropicMessagesTransport): default_base_url=OPENROUTER_BASE_URL, ) - def _build_request_body(self, request: Any) -> dict: + def _build_request_body( + self, request: Any, thinking_enabled: bool | None = None + ) -> dict: """Internal helper for tests and direct request dispatch.""" return build_request_body( request, - thinking_enabled=self._is_thinking_enabled(request), + thinking_enabled=self._is_thinking_enabled(request, thinking_enabled), ) def _request_headers(self) -> dict[str, str]: diff --git a/providers/openai_compat.py b/providers/openai_compat.py index 93c3af4..d729896 100644 --- a/providers/openai_compat.py +++ b/providers/openai_compat.py @@ -81,7 +81,9 @@ class OpenAIChatTransport(BaseProvider): await client.aclose() @abstractmethod - def _build_request_body(self, request: Any) -> dict: + def _build_request_body( + self, request: Any, thinking_enabled: bool | None = None + ) -> dict: """Build request body. Must be implemented by subclasses.""" def _handle_extra_reasoning( @@ -159,11 +161,12 @@ class OpenAIChatTransport(BaseProvider): input_tokens: int = 0, *, request_id: str | None = None, + thinking_enabled: bool | None = None, ) -> AsyncIterator[str]: """Stream response in Anthropic SSE format.""" with logger.contextualize(request_id=request_id): async for event in self._stream_response_impl( - request, input_tokens, request_id + request, input_tokens, request_id, thinking_enabled=thinking_enabled ): yield event @@ -172,13 +175,16 @@ class OpenAIChatTransport(BaseProvider): request: Any, input_tokens: int, request_id: str | None, + *, + thinking_enabled: bool | None, ) -> AsyncIterator[str]: """Shared streaming implementation.""" tag = self._provider_name message_id = f"msg_{uuid.uuid4()}" sse = SSEBuilder(message_id, request.model, input_tokens) - body = self._build_request_body(request) + body = self._build_request_body(request, thinking_enabled=thinking_enabled) + thinking_enabled = self._is_thinking_enabled(request, thinking_enabled) req_tag = f" request_id={request_id}" if request_id else "" logger.info( "{}_STREAM:{} model={} msgs={} tools={}", @@ -193,8 +199,6 @@ class OpenAIChatTransport(BaseProvider): think_parser = ThinkTagParser() heuristic_parser = HeuristicToolParser() - thinking_enabled = self._is_thinking_enabled(request) - finish_reason = None usage_info = None error_occurred = False diff --git a/providers/registry.py b/providers/registry.py index acbe73a..0c306dc 100644 --- a/providers/registry.py +++ b/providers/registry.py @@ -183,7 +183,7 @@ def build_provider_config( http_read_timeout=settings.http_read_timeout, http_write_timeout=settings.http_write_timeout, http_connect_timeout=settings.http_connect_timeout, - enable_thinking=settings.enable_thinking, + enable_thinking=settings.enable_model_thinking, proxy=proxy, ) diff --git a/smoke/capabilities.py b/smoke/capabilities.py index 7b2bfe0..ac23ce8 100644 --- a/smoke/capabilities.py +++ b/smoke/capabilities.py @@ -170,6 +170,7 @@ CAPABILITY_CONTRACTS: tuple[CapabilityContract, ...] = ( "tests/contracts/test_stream_contracts.py", "tests/providers/test_open_router.py", ), + ("test_per_model_thinking_config_e2e",), ), CapabilityContract( "streaming_conversion", @@ -232,7 +233,7 @@ CAPABILITY_CONTRACTS: tuple[CapabilityContract, ...] = ( "removed_env_migration", "removed_env_migration", "config.settings.Settings", - "NIM_ENABLE_THINKING in env or dotenv", + "NIM_ENABLE_THINKING or ENABLE_THINKING in env or dotenv", "startup validation error with rename guidance", "application fails fast", ("tests/config/test_config.py",), diff --git a/smoke/features.py b/smoke/features.py index 0ce2200..214d04d 100644 --- a/smoke/features.py +++ b/smoke/features.py @@ -124,8 +124,9 @@ FEATURE_INVENTORY: tuple[FeatureCoverage, ...] = ( ( "test_provider_adaptive_thinking_history_e2e", "test_claude_cli_adaptive_thinking_e2e", + "test_per_model_thinking_config_e2e", ), - ("providers", "cli"), + ("providers", "cli", "config"), ("configured provider",), "configured providers must not reject adaptive thinking payloads", ), diff --git a/smoke/product/test_config_extensibility_product_live.py b/smoke/product/test_config_extensibility_product_live.py index 50d3d00..f14f924 100644 --- a/smoke/product/test_config_extensibility_product_live.py +++ b/smoke/product/test_config_extensibility_product_live.py @@ -64,6 +64,39 @@ def test_removed_env_migration_e2e(smoke_config: SmokeConfig, tmp_path) -> None: assert "NIM_ENABLE_THINKING has been removed" in (result.stderr + result.stdout) +@pytest.mark.smoke_target("config") +def test_per_model_thinking_config_e2e(smoke_config: SmokeConfig, tmp_path) -> None: + env_file = tmp_path / "thinking.env" + env_file.write_text( + 'ENABLE_MODEL_THINKING="false"\n' + 'ENABLE_OPUS_THINKING="true"\n' + "ENABLE_SONNET_THINKING=\n" + 'ENABLE_HAIKU_THINKING="false"\n', + encoding="utf-8", + ) + env = os.environ.copy() + env["FCC_ENV_FILE"] = str(env_file) + script = ( + "from config.settings import Settings; " + "s=Settings(); " + "print(s.resolve_thinking('claude-opus-4-20250514')); " + "print(s.resolve_thinking('claude-sonnet-4-20250514')); " + "print(s.resolve_thinking('claude-haiku-4-20250514')); " + "print(s.resolve_thinking('unknown-model'))" + ) + result = subprocess.run( + cmd_python_c(script), + cwd=smoke_config.root, + env=env, + capture_output=True, + text=True, + timeout=smoke_config.timeout_s, + check=False, + ) + assert result.returncode == 0, result.stderr + assert result.stdout.splitlines() == ["True", "False", "False", "False"] + + @pytest.mark.smoke_target("config") def test_proxy_timeout_config_e2e(smoke_config: SmokeConfig, tmp_path) -> None: env_file = tmp_path / "timeouts.env" diff --git a/tests/api/test_api.py b/tests/api/test_api.py index f9149c8..e4912e7 100644 --- a/tests/api/test_api.py +++ b/tests/api/test_api.py @@ -100,7 +100,9 @@ def test_model_mapping(client: TestClient): client.post("/v1/messages", json=payload_haiku) assert len(_stream_response_calls) == 1 args = _stream_response_calls[0][0] + kwargs = _stream_response_calls[0][1] assert args[0].model != "claude-3-haiku-20240307" + assert kwargs["thinking_enabled"] is True def test_error_fallbacks(client: TestClient): diff --git a/tests/api/test_dependencies.py b/tests/api/test_dependencies.py index ccec712..528d157 100644 --- a/tests/api/test_dependencies.py +++ b/tests/api/test_dependencies.py @@ -39,7 +39,7 @@ def _make_mock_settings(**overrides): mock.http_read_timeout = 300.0 mock.http_write_timeout = 10.0 mock.http_connect_timeout = 2.0 - mock.enable_thinking = True + mock.enable_model_thinking = True for key, value in overrides.items(): setattr(mock, key, value) return mock @@ -159,12 +159,12 @@ async def test_get_provider_deepseek_uses_fixed_base_url(): @pytest.mark.asyncio -async def test_get_provider_deepseek_passes_enable_thinking(): - """DeepSeek provider receives the global thinking toggle.""" +async def test_get_provider_deepseek_passes_enable_model_thinking(): + """DeepSeek provider receives the fallback thinking toggle.""" with patch("api.dependencies.get_settings") as mock_settings: mock_settings.return_value = _make_mock_settings( provider_type="deepseek", - enable_thinking=False, + enable_model_thinking=False, ) provider = get_provider() diff --git a/tests/api/test_model_router.py b/tests/api/test_model_router.py index e51324d..6bc2cd5 100644 --- a/tests/api/test_model_router.py +++ b/tests/api/test_model_router.py @@ -14,6 +14,10 @@ def settings(): settings.model_opus = None settings.model_sonnet = None settings.model_haiku = None + settings.enable_model_thinking = True + settings.enable_opus_thinking = None + settings.enable_sonnet_thinking = None + settings.enable_haiku_thinking = None return settings @@ -24,6 +28,7 @@ def test_model_router_resolves_default_model(settings): assert resolved.provider_id == "nvidia_nim" assert resolved.provider_model == "fallback-model" assert resolved.provider_model_ref == "nvidia_nim/fallback-model" + assert resolved.thinking_enabled is True def test_model_router_applies_opus_override(settings): @@ -39,9 +44,23 @@ def test_model_router_applies_opus_override(settings): assert routed.request.model == "deepseek/deepseek-r1" assert routed.resolved.provider_model_ref == "open_router/deepseek/deepseek-r1" assert routed.resolved.original_model == "claude-opus-4-20250514" + assert routed.resolved.thinking_enabled is True assert request.model == "claude-opus-4-20250514" +def test_model_router_resolves_per_model_thinking(settings): + settings.enable_model_thinking = False + settings.enable_opus_thinking = True + settings.enable_haiku_thinking = False + + router = ModelRouter(settings) + + assert router.resolve("claude-opus-4-20250514").thinking_enabled is True + assert router.resolve("claude-sonnet-4-20250514").thinking_enabled is False + assert router.resolve("claude-3-haiku-20240307").thinking_enabled is False + assert router.resolve("claude-2.1").thinking_enabled is False + + def test_model_router_applies_haiku_override(settings): settings.model_haiku = "lmstudio/qwen2.5-7b" diff --git a/tests/config/test_config.py b/tests/config/test_config.py index ae64bec..43843b3 100644 --- a/tests/config/test_config.py +++ b/tests/config/test_config.py @@ -29,7 +29,7 @@ class TestSettings: assert isinstance(settings.provider_rate_window, int) assert isinstance(settings.nim.temperature, float) assert isinstance(settings.fast_prefix_detection, bool) - assert isinstance(settings.enable_thinking, bool) + assert isinstance(settings.enable_model_thinking, bool) assert settings.http_read_timeout == 120.0 def test_get_settings_cached(self): @@ -110,13 +110,48 @@ class TestSettings: settings = Settings() assert settings.http_connect_timeout == 5.0 - def test_enable_thinking_from_env(self, monkeypatch): - """ENABLE_THINKING env var is loaded into settings.""" + def test_enable_model_thinking_from_env(self, monkeypatch): + """ENABLE_MODEL_THINKING env var is loaded into settings.""" from config.settings import Settings - monkeypatch.setenv("ENABLE_THINKING", "false") + monkeypatch.setenv("ENABLE_MODEL_THINKING", "false") settings = Settings() - assert settings.enable_thinking is False + assert settings.enable_model_thinking is False + + def test_per_model_thinking_from_env(self, monkeypatch): + """Per-model thinking env vars are loaded into settings.""" + from config.settings import Settings + + monkeypatch.setenv("ENABLE_OPUS_THINKING", "true") + monkeypatch.setenv("ENABLE_SONNET_THINKING", "false") + monkeypatch.setenv("ENABLE_HAIKU_THINKING", "false") + settings = Settings() + assert settings.enable_opus_thinking is True + assert settings.enable_sonnet_thinking is False + assert settings.enable_haiku_thinking is False + + def test_empty_per_model_thinking_inherits_model_default(self, monkeypatch): + """Blank per-model thinking env vars are treated as unset.""" + from config.settings import Settings + + monkeypatch.setenv("ENABLE_MODEL_THINKING", "false") + monkeypatch.setenv("ENABLE_OPUS_THINKING", "") + settings = Settings() + assert settings.enable_opus_thinking is None + assert settings.resolve_thinking("claude-opus-4-20250514") is False + + def test_resolve_thinking_uses_model_tiers(self, monkeypatch): + """resolve_thinking applies tier override then fallback.""" + from config.settings import Settings + + monkeypatch.setenv("ENABLE_MODEL_THINKING", "false") + monkeypatch.setenv("ENABLE_OPUS_THINKING", "true") + monkeypatch.setenv("ENABLE_HAIKU_THINKING", "false") + settings = Settings() + assert settings.resolve_thinking("claude-opus-4-20250514") is True + assert settings.resolve_thinking("claude-sonnet-4-20250514") is False + assert settings.resolve_thinking("claude-haiku-4-20250514") is False + assert settings.resolve_thinking("unknown-model") is False def test_anthropic_auth_token_from_env_without_dotenv_key(self, monkeypatch): """ANTHROPIC_AUTH_TOKEN env var is loaded when dotenv does not define it.""" @@ -166,7 +201,15 @@ class TestSettings: from config.settings import Settings monkeypatch.setenv("NIM_ENABLE_THINKING", "false") - with pytest.raises(ValidationError, match="Rename it to ENABLE_THINKING"): + with pytest.raises(ValidationError, match="ENABLE_MODEL_THINKING"): + Settings() + + def test_removed_enable_thinking_raises(self, monkeypatch): + """ENABLE_THINKING now fails fast with a migration message.""" + from config.settings import Settings + + monkeypatch.setenv("ENABLE_THINKING", "false") + with pytest.raises(ValidationError, match="ENABLE_MODEL_THINKING"): Settings() diff --git a/tests/providers/test_registry.py b/tests/providers/test_registry.py index f30f74b..e6837f2 100644 --- a/tests/providers/test_registry.py +++ b/tests/providers/test_registry.py @@ -38,7 +38,7 @@ def _make_settings(**overrides): mock.http_read_timeout = 300.0 mock.http_write_timeout = 10.0 mock.http_connect_timeout = 2.0 - mock.enable_thinking = True + mock.enable_model_thinking = True mock.nim = NimSettings() for key, value in overrides.items(): setattr(mock, key, value)