Add per-model thinking toggles

This commit is contained in:
Alishahryar1 2026-04-25 20:51:07 -07:00
parent 180c942af7
commit f29e693dc5
21 changed files with 220 additions and 54 deletions

View file

@ -100,7 +100,9 @@ def test_model_mapping(client: TestClient):
client.post("/v1/messages", json=payload_haiku)
assert len(_stream_response_calls) == 1
args = _stream_response_calls[0][0]
kwargs = _stream_response_calls[0][1]
assert args[0].model != "claude-3-haiku-20240307"
assert kwargs["thinking_enabled"] is True
def test_error_fallbacks(client: TestClient):

View file

@ -39,7 +39,7 @@ def _make_mock_settings(**overrides):
mock.http_read_timeout = 300.0
mock.http_write_timeout = 10.0
mock.http_connect_timeout = 2.0
mock.enable_thinking = True
mock.enable_model_thinking = True
for key, value in overrides.items():
setattr(mock, key, value)
return mock
@ -159,12 +159,12 @@ async def test_get_provider_deepseek_uses_fixed_base_url():
@pytest.mark.asyncio
async def test_get_provider_deepseek_passes_enable_thinking():
"""DeepSeek provider receives the global thinking toggle."""
async def test_get_provider_deepseek_passes_enable_model_thinking():
"""DeepSeek provider receives the fallback thinking toggle."""
with patch("api.dependencies.get_settings") as mock_settings:
mock_settings.return_value = _make_mock_settings(
provider_type="deepseek",
enable_thinking=False,
enable_model_thinking=False,
)
provider = get_provider()

View file

@ -14,6 +14,10 @@ def settings():
settings.model_opus = None
settings.model_sonnet = None
settings.model_haiku = None
settings.enable_model_thinking = True
settings.enable_opus_thinking = None
settings.enable_sonnet_thinking = None
settings.enable_haiku_thinking = None
return settings
@ -24,6 +28,7 @@ def test_model_router_resolves_default_model(settings):
assert resolved.provider_id == "nvidia_nim"
assert resolved.provider_model == "fallback-model"
assert resolved.provider_model_ref == "nvidia_nim/fallback-model"
assert resolved.thinking_enabled is True
def test_model_router_applies_opus_override(settings):
@ -39,9 +44,23 @@ def test_model_router_applies_opus_override(settings):
assert routed.request.model == "deepseek/deepseek-r1"
assert routed.resolved.provider_model_ref == "open_router/deepseek/deepseek-r1"
assert routed.resolved.original_model == "claude-opus-4-20250514"
assert routed.resolved.thinking_enabled is True
assert request.model == "claude-opus-4-20250514"
def test_model_router_resolves_per_model_thinking(settings):
settings.enable_model_thinking = False
settings.enable_opus_thinking = True
settings.enable_haiku_thinking = False
router = ModelRouter(settings)
assert router.resolve("claude-opus-4-20250514").thinking_enabled is True
assert router.resolve("claude-sonnet-4-20250514").thinking_enabled is False
assert router.resolve("claude-3-haiku-20240307").thinking_enabled is False
assert router.resolve("claude-2.1").thinking_enabled is False
def test_model_router_applies_haiku_override(settings):
settings.model_haiku = "lmstudio/qwen2.5-7b"

View file

@ -29,7 +29,7 @@ class TestSettings:
assert isinstance(settings.provider_rate_window, int)
assert isinstance(settings.nim.temperature, float)
assert isinstance(settings.fast_prefix_detection, bool)
assert isinstance(settings.enable_thinking, bool)
assert isinstance(settings.enable_model_thinking, bool)
assert settings.http_read_timeout == 120.0
def test_get_settings_cached(self):
@ -110,13 +110,48 @@ class TestSettings:
settings = Settings()
assert settings.http_connect_timeout == 5.0
def test_enable_thinking_from_env(self, monkeypatch):
"""ENABLE_THINKING env var is loaded into settings."""
def test_enable_model_thinking_from_env(self, monkeypatch):
"""ENABLE_MODEL_THINKING env var is loaded into settings."""
from config.settings import Settings
monkeypatch.setenv("ENABLE_THINKING", "false")
monkeypatch.setenv("ENABLE_MODEL_THINKING", "false")
settings = Settings()
assert settings.enable_thinking is False
assert settings.enable_model_thinking is False
def test_per_model_thinking_from_env(self, monkeypatch):
"""Per-model thinking env vars are loaded into settings."""
from config.settings import Settings
monkeypatch.setenv("ENABLE_OPUS_THINKING", "true")
monkeypatch.setenv("ENABLE_SONNET_THINKING", "false")
monkeypatch.setenv("ENABLE_HAIKU_THINKING", "false")
settings = Settings()
assert settings.enable_opus_thinking is True
assert settings.enable_sonnet_thinking is False
assert settings.enable_haiku_thinking is False
def test_empty_per_model_thinking_inherits_model_default(self, monkeypatch):
"""Blank per-model thinking env vars are treated as unset."""
from config.settings import Settings
monkeypatch.setenv("ENABLE_MODEL_THINKING", "false")
monkeypatch.setenv("ENABLE_OPUS_THINKING", "")
settings = Settings()
assert settings.enable_opus_thinking is None
assert settings.resolve_thinking("claude-opus-4-20250514") is False
def test_resolve_thinking_uses_model_tiers(self, monkeypatch):
"""resolve_thinking applies tier override then fallback."""
from config.settings import Settings
monkeypatch.setenv("ENABLE_MODEL_THINKING", "false")
monkeypatch.setenv("ENABLE_OPUS_THINKING", "true")
monkeypatch.setenv("ENABLE_HAIKU_THINKING", "false")
settings = Settings()
assert settings.resolve_thinking("claude-opus-4-20250514") is True
assert settings.resolve_thinking("claude-sonnet-4-20250514") is False
assert settings.resolve_thinking("claude-haiku-4-20250514") is False
assert settings.resolve_thinking("unknown-model") is False
def test_anthropic_auth_token_from_env_without_dotenv_key(self, monkeypatch):
"""ANTHROPIC_AUTH_TOKEN env var is loaded when dotenv does not define it."""
@ -166,7 +201,15 @@ class TestSettings:
from config.settings import Settings
monkeypatch.setenv("NIM_ENABLE_THINKING", "false")
with pytest.raises(ValidationError, match="Rename it to ENABLE_THINKING"):
with pytest.raises(ValidationError, match="ENABLE_MODEL_THINKING"):
Settings()
def test_removed_enable_thinking_raises(self, monkeypatch):
"""ENABLE_THINKING now fails fast with a migration message."""
from config.settings import Settings
monkeypatch.setenv("ENABLE_THINKING", "false")
with pytest.raises(ValidationError, match="ENABLE_MODEL_THINKING"):
Settings()

View file

@ -38,7 +38,7 @@ def _make_settings(**overrides):
mock.http_read_timeout = 300.0
mock.http_write_timeout = 10.0
mock.http_connect_timeout = 2.0
mock.enable_thinking = True
mock.enable_model_thinking = True
mock.nim = NimSettings()
for key, value in overrides.items():
setattr(mock, key, value)