mirror of
https://github.com/Alishahryar1/free-claude-code.git
synced 2026-04-28 03:20:01 +00:00
Add per-model thinking toggles
This commit is contained in:
parent
180c942af7
commit
f29e693dc5
21 changed files with 220 additions and 54 deletions
|
|
@ -28,9 +28,12 @@ MODEL="nvidia_nim/z-ai/glm4.7"
|
|||
|
||||
|
||||
# Thinking output
|
||||
# Global switch for provider reasoning requests and Claude thinking blocks.
|
||||
# Set false to suppress thinking across NIM, OpenRouter, LM Studio, and llama.cpp.
|
||||
ENABLE_THINKING=true
|
||||
# Per-Claude-model switches for provider reasoning requests and Claude thinking blocks.
|
||||
# Blank per-model switches inherit ENABLE_MODEL_THINKING.
|
||||
ENABLE_OPUS_THINKING=
|
||||
ENABLE_SONNET_THINKING=
|
||||
ENABLE_HAIKU_THINKING=
|
||||
ENABLE_MODEL_THINKING=true
|
||||
|
||||
|
||||
# Provider config
|
||||
|
|
|
|||
17
README.md
17
README.md
|
|
@ -102,8 +102,12 @@ MODEL_SONNET=
|
|||
MODEL_HAIKU=
|
||||
MODEL="nvidia_nim/z-ai/glm4.7" # fallback
|
||||
|
||||
# Global switch for provider reasoning requests and Claude thinking blocks.
|
||||
ENABLE_THINKING=true
|
||||
# Per-Claude-model switches for provider reasoning requests and Claude thinking blocks.
|
||||
# Blank per-model switches inherit ENABLE_MODEL_THINKING.
|
||||
ENABLE_OPUS_THINKING=
|
||||
ENABLE_SONNET_THINKING=
|
||||
ENABLE_HAIKU_THINKING=
|
||||
ENABLE_MODEL_THINKING=true
|
||||
```
|
||||
|
||||
</details>
|
||||
|
|
@ -179,7 +183,7 @@ MODEL="nvidia_nim/z-ai/glm4.7" # fallback
|
|||
|
||||
</details>
|
||||
|
||||
> Migration: `NIM_ENABLE_THINKING` was removed in this release. Rename it to `ENABLE_THINKING`.
|
||||
> Migration: `NIM_ENABLE_THINKING` and `ENABLE_THINKING` were removed in this release. Use `ENABLE_MODEL_THINKING` as the fallback switch, with optional `ENABLE_OPUS_THINKING`, `ENABLE_SONNET_THINKING`, and `ENABLE_HAIKU_THINKING` overrides.
|
||||
|
||||
<details>
|
||||
<summary><b>Optional Authentication</b> (restrict access to your proxy)</summary>
|
||||
|
|
@ -342,7 +346,7 @@ free-claude-code # starts the server
|
|||
- **Per-model routing**: Opus / Sonnet / Haiku requests resolve to their model-specific backend, with `MODEL` as fallback
|
||||
- **Request optimization**: 5 categories of trivial requests (quota probes, title generation, prefix detection, suggestions, filepath extraction) are intercepted and responded to locally without using API quota
|
||||
- **Format handling**: OpenRouter, LM Studio, and llama.cpp use native Anthropic Messages endpoints; NIM and DeepSeek use shared OpenAI chat translation
|
||||
- **Thinking tokens**: `<think>` tags and `reasoning_content` fields are converted into native Claude thinking blocks when `ENABLE_THINKING=true`
|
||||
- **Thinking tokens**: `<think>` tags and `reasoning_content` fields are converted into native Claude thinking blocks when the resolved model's thinking switch is enabled
|
||||
|
||||
The proxy also exposes Claude-compatible probe routes: `GET /v1/models`, `POST /v1/messages`, `POST /v1/messages/count_tokens`, plus `HEAD`/`OPTIONS` support for the common probe endpoints.
|
||||
|
||||
|
|
@ -528,7 +532,10 @@ Configure via `WHISPER_DEVICE` (`cpu` | `cuda` | `nvidia_nim`) and `WHISPER_MODE
|
|||
| `MODEL_SONNET` | Model for Claude Sonnet requests; empty falls back to `MODEL` | empty |
|
||||
| `MODEL_HAIKU` | Model for Claude Haiku requests; empty falls back to `MODEL` | empty |
|
||||
| `NVIDIA_NIM_API_KEY` | NVIDIA API key | required for NIM |
|
||||
| `ENABLE_THINKING` | Global switch for provider reasoning requests and Claude thinking blocks. Set `false` to hide thinking across all providers. | `true` |
|
||||
| `ENABLE_MODEL_THINKING` | Fallback switch for provider reasoning requests and Claude thinking blocks. Set `false` to hide thinking unless a model tier overrides it. | `true` |
|
||||
| `ENABLE_OPUS_THINKING` | Optional thinking switch for Claude Opus requests; empty inherits `ENABLE_MODEL_THINKING`. | empty |
|
||||
| `ENABLE_SONNET_THINKING` | Optional thinking switch for Claude Sonnet requests; empty inherits `ENABLE_MODEL_THINKING`. | empty |
|
||||
| `ENABLE_HAIKU_THINKING` | Optional thinking switch for Claude Haiku requests; empty inherits `ENABLE_MODEL_THINKING`. | empty |
|
||||
| `OPENROUTER_API_KEY` | OpenRouter API key | required for OpenRouter |
|
||||
| `DEEPSEEK_API_KEY` | DeepSeek API key | required for DeepSeek |
|
||||
| `LM_STUDIO_BASE_URL` | LM Studio server URL | `http://localhost:1234/v1` |
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ class ResolvedModel:
|
|||
provider_id: str
|
||||
provider_model: str
|
||||
provider_model_ref: str
|
||||
thinking_enabled: bool
|
||||
|
||||
|
||||
@dataclass(frozen=True, slots=True)
|
||||
|
|
@ -39,6 +40,7 @@ class ModelRouter:
|
|||
|
||||
def resolve(self, claude_model_name: str) -> ResolvedModel:
|
||||
provider_model_ref = self._settings.resolve_model(claude_model_name)
|
||||
thinking_enabled = self._settings.resolve_thinking(claude_model_name)
|
||||
provider_id = Settings.parse_provider_type(provider_model_ref)
|
||||
provider_model = Settings.parse_model_name(provider_model_ref)
|
||||
if provider_model != claude_model_name:
|
||||
|
|
@ -50,6 +52,7 @@ class ModelRouter:
|
|||
provider_id=provider_id,
|
||||
provider_model=provider_model,
|
||||
provider_model_ref=provider_model_ref,
|
||||
thinking_enabled=thinking_enabled,
|
||||
)
|
||||
|
||||
def resolve_messages_request(
|
||||
|
|
|
|||
|
|
@ -95,6 +95,7 @@ class ClaudeProxyService:
|
|||
routed.request,
|
||||
input_tokens=input_tokens,
|
||||
request_id=request_id,
|
||||
thinking_enabled=routed.resolved.thinking_enabled,
|
||||
),
|
||||
media_type="text/event-stream",
|
||||
headers={
|
||||
|
|
|
|||
|
|
@ -28,9 +28,12 @@ MODEL="nvidia_nim/z-ai/glm4.7"
|
|||
|
||||
|
||||
# Thinking output
|
||||
# Global switch for provider reasoning requests and Claude thinking blocks.
|
||||
# Set false to suppress thinking across NIM, OpenRouter, LM Studio, and llama.cpp.
|
||||
ENABLE_THINKING=true
|
||||
# Per-Claude-model switches for provider reasoning requests and Claude thinking blocks.
|
||||
# Blank per-model switches inherit ENABLE_MODEL_THINKING.
|
||||
ENABLE_OPUS_THINKING=
|
||||
ENABLE_SONNET_THINKING=
|
||||
ENABLE_HAIKU_THINKING=
|
||||
ENABLE_MODEL_THINKING=true
|
||||
|
||||
|
||||
# Provider config
|
||||
|
|
|
|||
|
|
@ -68,22 +68,26 @@ def _env_file_override(model_config: Mapping[str, Any], key: str) -> str | None:
|
|||
|
||||
def _removed_env_var_message(model_config: Mapping[str, Any]) -> str | None:
|
||||
"""Return a migration error for removed env vars, if present."""
|
||||
removed_key = "NIM_ENABLE_THINKING"
|
||||
replacement = "ENABLE_THINKING"
|
||||
removed_keys = ("NIM_ENABLE_THINKING", "ENABLE_THINKING")
|
||||
replacement = (
|
||||
"ENABLE_MODEL_THINKING, ENABLE_OPUS_THINKING, "
|
||||
"ENABLE_SONNET_THINKING, or ENABLE_HAIKU_THINKING"
|
||||
)
|
||||
|
||||
if removed_key in os.environ:
|
||||
return (
|
||||
f"{removed_key} has been removed in this release. "
|
||||
f"Rename it to {replacement}."
|
||||
)
|
||||
|
||||
for env_file in _configured_env_files(model_config):
|
||||
if _env_file_contains_key(env_file, removed_key):
|
||||
for removed_key in removed_keys:
|
||||
if removed_key in os.environ:
|
||||
return (
|
||||
f"{removed_key} has been removed in this release. "
|
||||
f"Rename it to {replacement}. Found in {env_file}."
|
||||
f"Rename it to {replacement}."
|
||||
)
|
||||
|
||||
for env_file in _configured_env_files(model_config):
|
||||
if _env_file_contains_key(env_file, removed_key):
|
||||
return (
|
||||
f"{removed_key} has been removed in this release. "
|
||||
f"Rename it to {replacement}. Found in {env_file}."
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
|
|
@ -142,7 +146,18 @@ class Settings(BaseSettings):
|
|||
provider_max_concurrency: int = Field(
|
||||
default=5, validation_alias="PROVIDER_MAX_CONCURRENCY"
|
||||
)
|
||||
enable_thinking: bool = Field(default=True, validation_alias="ENABLE_THINKING")
|
||||
enable_model_thinking: bool = Field(
|
||||
default=True, validation_alias="ENABLE_MODEL_THINKING"
|
||||
)
|
||||
enable_opus_thinking: bool | None = Field(
|
||||
default=None, validation_alias="ENABLE_OPUS_THINKING"
|
||||
)
|
||||
enable_sonnet_thinking: bool | None = Field(
|
||||
default=None, validation_alias="ENABLE_SONNET_THINKING"
|
||||
)
|
||||
enable_haiku_thinking: bool | None = Field(
|
||||
default=None, validation_alias="ENABLE_HAIKU_THINKING"
|
||||
)
|
||||
|
||||
# ==================== HTTP Client Timeouts ====================
|
||||
http_read_timeout: float = Field(
|
||||
|
|
@ -222,6 +237,9 @@ class Settings(BaseSettings):
|
|||
"model_opus",
|
||||
"model_sonnet",
|
||||
"model_haiku",
|
||||
"enable_opus_thinking",
|
||||
"enable_sonnet_thinking",
|
||||
"enable_haiku_thinking",
|
||||
mode="before",
|
||||
)
|
||||
@classmethod
|
||||
|
|
@ -317,6 +335,17 @@ class Settings(BaseSettings):
|
|||
return self.model_sonnet
|
||||
return self.model
|
||||
|
||||
def resolve_thinking(self, claude_model_name: str) -> bool:
|
||||
"""Resolve whether thinking is enabled for an incoming Claude model name."""
|
||||
name_lower = claude_model_name.lower()
|
||||
if "opus" in name_lower and self.enable_opus_thinking is not None:
|
||||
return self.enable_opus_thinking
|
||||
if "haiku" in name_lower and self.enable_haiku_thinking is not None:
|
||||
return self.enable_haiku_thinking
|
||||
if "sonnet" in name_lower and self.enable_sonnet_thinking is not None:
|
||||
return self.enable_sonnet_thinking
|
||||
return self.enable_model_thinking
|
||||
|
||||
@staticmethod
|
||||
def parse_provider_type(model_string: str) -> str:
|
||||
"""Extract provider type from any 'provider/model' string."""
|
||||
|
|
|
|||
|
|
@ -59,9 +59,11 @@ class AnthropicMessagesTransport(BaseProvider):
|
|||
"""Return headers for the native messages request."""
|
||||
return {"Content-Type": "application/json"}
|
||||
|
||||
def _build_request_body(self, request: Any) -> dict:
|
||||
def _build_request_body(
|
||||
self, request: Any, thinking_enabled: bool | None = None
|
||||
) -> dict:
|
||||
"""Build a native Anthropic request body."""
|
||||
thinking_enabled = self._is_thinking_enabled(request)
|
||||
thinking_enabled = self._is_thinking_enabled(request, thinking_enabled)
|
||||
body = request.model_dump(exclude_none=True)
|
||||
|
||||
body.pop("extra_body", None)
|
||||
|
|
@ -218,12 +220,13 @@ class AnthropicMessagesTransport(BaseProvider):
|
|||
input_tokens: int = 0,
|
||||
*,
|
||||
request_id: str | None = None,
|
||||
thinking_enabled: bool | None = None,
|
||||
) -> AsyncIterator[str]:
|
||||
"""Stream response via a native Anthropic-compatible messages endpoint."""
|
||||
tag = self._provider_name
|
||||
req_tag = f" request_id={request_id}" if request_id else ""
|
||||
thinking_enabled = self._is_thinking_enabled(request)
|
||||
body = self._build_request_body(request)
|
||||
body = self._build_request_body(request, thinking_enabled=thinking_enabled)
|
||||
thinking_enabled = self._is_thinking_enabled(request, thinking_enabled)
|
||||
|
||||
logger.info(
|
||||
"{}_STREAM:{} natively passing Anthropic request model={} msgs={} tools={}",
|
||||
|
|
|
|||
|
|
@ -32,9 +32,16 @@ class BaseProvider(ABC):
|
|||
def __init__(self, config: ProviderConfig):
|
||||
self._config = config
|
||||
|
||||
def _is_thinking_enabled(self, request: Any) -> bool:
|
||||
def _is_thinking_enabled(
|
||||
self, request: Any, thinking_enabled: bool | None = None
|
||||
) -> bool:
|
||||
"""Return whether thinking should be enabled for this request."""
|
||||
thinking = getattr(request, "thinking", None)
|
||||
config_enabled = (
|
||||
self._config.enable_thinking
|
||||
if thinking_enabled is None
|
||||
else thinking_enabled
|
||||
)
|
||||
request_enabled = True
|
||||
if thinking is not None:
|
||||
thinking_type = (
|
||||
|
|
@ -52,7 +59,7 @@ class BaseProvider(ABC):
|
|||
)
|
||||
if enabled is not None:
|
||||
request_enabled = bool(enabled)
|
||||
return self._config.enable_thinking and request_enabled
|
||||
return config_enabled and request_enabled
|
||||
|
||||
@abstractmethod
|
||||
async def cleanup(self) -> None:
|
||||
|
|
@ -65,6 +72,7 @@ class BaseProvider(ABC):
|
|||
input_tokens: int = 0,
|
||||
*,
|
||||
request_id: str | None = None,
|
||||
thinking_enabled: bool | None = None,
|
||||
) -> AsyncIterator[str]:
|
||||
"""Stream response in Anthropic SSE format."""
|
||||
if False:
|
||||
|
|
|
|||
|
|
@ -20,9 +20,11 @@ class DeepSeekProvider(OpenAIChatTransport):
|
|||
api_key=config.api_key,
|
||||
)
|
||||
|
||||
def _build_request_body(self, request: Any) -> dict:
|
||||
def _build_request_body(
|
||||
self, request: Any, thinking_enabled: bool | None = None
|
||||
) -> dict:
|
||||
"""Internal helper for tests and shared building."""
|
||||
return build_request_body(
|
||||
request,
|
||||
thinking_enabled=self._is_thinking_enabled(request),
|
||||
thinking_enabled=self._is_thinking_enabled(request, thinking_enabled),
|
||||
)
|
||||
|
|
|
|||
|
|
@ -30,12 +30,14 @@ class NvidiaNimProvider(OpenAIChatTransport):
|
|||
)
|
||||
self._nim_settings = nim_settings
|
||||
|
||||
def _build_request_body(self, request: Any) -> dict:
|
||||
def _build_request_body(
|
||||
self, request: Any, thinking_enabled: bool | None = None
|
||||
) -> dict:
|
||||
"""Internal helper for tests and shared building."""
|
||||
return build_request_body(
|
||||
request,
|
||||
self._nim_settings,
|
||||
thinking_enabled=self._is_thinking_enabled(request),
|
||||
thinking_enabled=self._is_thinking_enabled(request, thinking_enabled),
|
||||
)
|
||||
|
||||
def _get_retry_request_body(self, error: Exception, body: dict) -> dict | None:
|
||||
|
|
|
|||
|
|
@ -42,11 +42,13 @@ class OpenRouterProvider(AnthropicMessagesTransport):
|
|||
default_base_url=OPENROUTER_BASE_URL,
|
||||
)
|
||||
|
||||
def _build_request_body(self, request: Any) -> dict:
|
||||
def _build_request_body(
|
||||
self, request: Any, thinking_enabled: bool | None = None
|
||||
) -> dict:
|
||||
"""Internal helper for tests and direct request dispatch."""
|
||||
return build_request_body(
|
||||
request,
|
||||
thinking_enabled=self._is_thinking_enabled(request),
|
||||
thinking_enabled=self._is_thinking_enabled(request, thinking_enabled),
|
||||
)
|
||||
|
||||
def _request_headers(self) -> dict[str, str]:
|
||||
|
|
|
|||
|
|
@ -81,7 +81,9 @@ class OpenAIChatTransport(BaseProvider):
|
|||
await client.aclose()
|
||||
|
||||
@abstractmethod
|
||||
def _build_request_body(self, request: Any) -> dict:
|
||||
def _build_request_body(
|
||||
self, request: Any, thinking_enabled: bool | None = None
|
||||
) -> dict:
|
||||
"""Build request body. Must be implemented by subclasses."""
|
||||
|
||||
def _handle_extra_reasoning(
|
||||
|
|
@ -159,11 +161,12 @@ class OpenAIChatTransport(BaseProvider):
|
|||
input_tokens: int = 0,
|
||||
*,
|
||||
request_id: str | None = None,
|
||||
thinking_enabled: bool | None = None,
|
||||
) -> AsyncIterator[str]:
|
||||
"""Stream response in Anthropic SSE format."""
|
||||
with logger.contextualize(request_id=request_id):
|
||||
async for event in self._stream_response_impl(
|
||||
request, input_tokens, request_id
|
||||
request, input_tokens, request_id, thinking_enabled=thinking_enabled
|
||||
):
|
||||
yield event
|
||||
|
||||
|
|
@ -172,13 +175,16 @@ class OpenAIChatTransport(BaseProvider):
|
|||
request: Any,
|
||||
input_tokens: int,
|
||||
request_id: str | None,
|
||||
*,
|
||||
thinking_enabled: bool | None,
|
||||
) -> AsyncIterator[str]:
|
||||
"""Shared streaming implementation."""
|
||||
tag = self._provider_name
|
||||
message_id = f"msg_{uuid.uuid4()}"
|
||||
sse = SSEBuilder(message_id, request.model, input_tokens)
|
||||
|
||||
body = self._build_request_body(request)
|
||||
body = self._build_request_body(request, thinking_enabled=thinking_enabled)
|
||||
thinking_enabled = self._is_thinking_enabled(request, thinking_enabled)
|
||||
req_tag = f" request_id={request_id}" if request_id else ""
|
||||
logger.info(
|
||||
"{}_STREAM:{} model={} msgs={} tools={}",
|
||||
|
|
@ -193,8 +199,6 @@ class OpenAIChatTransport(BaseProvider):
|
|||
|
||||
think_parser = ThinkTagParser()
|
||||
heuristic_parser = HeuristicToolParser()
|
||||
thinking_enabled = self._is_thinking_enabled(request)
|
||||
|
||||
finish_reason = None
|
||||
usage_info = None
|
||||
error_occurred = False
|
||||
|
|
|
|||
|
|
@ -183,7 +183,7 @@ def build_provider_config(
|
|||
http_read_timeout=settings.http_read_timeout,
|
||||
http_write_timeout=settings.http_write_timeout,
|
||||
http_connect_timeout=settings.http_connect_timeout,
|
||||
enable_thinking=settings.enable_thinking,
|
||||
enable_thinking=settings.enable_model_thinking,
|
||||
proxy=proxy,
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -170,6 +170,7 @@ CAPABILITY_CONTRACTS: tuple[CapabilityContract, ...] = (
|
|||
"tests/contracts/test_stream_contracts.py",
|
||||
"tests/providers/test_open_router.py",
|
||||
),
|
||||
("test_per_model_thinking_config_e2e",),
|
||||
),
|
||||
CapabilityContract(
|
||||
"streaming_conversion",
|
||||
|
|
@ -232,7 +233,7 @@ CAPABILITY_CONTRACTS: tuple[CapabilityContract, ...] = (
|
|||
"removed_env_migration",
|
||||
"removed_env_migration",
|
||||
"config.settings.Settings",
|
||||
"NIM_ENABLE_THINKING in env or dotenv",
|
||||
"NIM_ENABLE_THINKING or ENABLE_THINKING in env or dotenv",
|
||||
"startup validation error with rename guidance",
|
||||
"application fails fast",
|
||||
("tests/config/test_config.py",),
|
||||
|
|
|
|||
|
|
@ -124,8 +124,9 @@ FEATURE_INVENTORY: tuple[FeatureCoverage, ...] = (
|
|||
(
|
||||
"test_provider_adaptive_thinking_history_e2e",
|
||||
"test_claude_cli_adaptive_thinking_e2e",
|
||||
"test_per_model_thinking_config_e2e",
|
||||
),
|
||||
("providers", "cli"),
|
||||
("providers", "cli", "config"),
|
||||
("configured provider",),
|
||||
"configured providers must not reject adaptive thinking payloads",
|
||||
),
|
||||
|
|
|
|||
|
|
@ -64,6 +64,39 @@ def test_removed_env_migration_e2e(smoke_config: SmokeConfig, tmp_path) -> None:
|
|||
assert "NIM_ENABLE_THINKING has been removed" in (result.stderr + result.stdout)
|
||||
|
||||
|
||||
@pytest.mark.smoke_target("config")
|
||||
def test_per_model_thinking_config_e2e(smoke_config: SmokeConfig, tmp_path) -> None:
|
||||
env_file = tmp_path / "thinking.env"
|
||||
env_file.write_text(
|
||||
'ENABLE_MODEL_THINKING="false"\n'
|
||||
'ENABLE_OPUS_THINKING="true"\n'
|
||||
"ENABLE_SONNET_THINKING=\n"
|
||||
'ENABLE_HAIKU_THINKING="false"\n',
|
||||
encoding="utf-8",
|
||||
)
|
||||
env = os.environ.copy()
|
||||
env["FCC_ENV_FILE"] = str(env_file)
|
||||
script = (
|
||||
"from config.settings import Settings; "
|
||||
"s=Settings(); "
|
||||
"print(s.resolve_thinking('claude-opus-4-20250514')); "
|
||||
"print(s.resolve_thinking('claude-sonnet-4-20250514')); "
|
||||
"print(s.resolve_thinking('claude-haiku-4-20250514')); "
|
||||
"print(s.resolve_thinking('unknown-model'))"
|
||||
)
|
||||
result = subprocess.run(
|
||||
cmd_python_c(script),
|
||||
cwd=smoke_config.root,
|
||||
env=env,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=smoke_config.timeout_s,
|
||||
check=False,
|
||||
)
|
||||
assert result.returncode == 0, result.stderr
|
||||
assert result.stdout.splitlines() == ["True", "False", "False", "False"]
|
||||
|
||||
|
||||
@pytest.mark.smoke_target("config")
|
||||
def test_proxy_timeout_config_e2e(smoke_config: SmokeConfig, tmp_path) -> None:
|
||||
env_file = tmp_path / "timeouts.env"
|
||||
|
|
|
|||
|
|
@ -100,7 +100,9 @@ def test_model_mapping(client: TestClient):
|
|||
client.post("/v1/messages", json=payload_haiku)
|
||||
assert len(_stream_response_calls) == 1
|
||||
args = _stream_response_calls[0][0]
|
||||
kwargs = _stream_response_calls[0][1]
|
||||
assert args[0].model != "claude-3-haiku-20240307"
|
||||
assert kwargs["thinking_enabled"] is True
|
||||
|
||||
|
||||
def test_error_fallbacks(client: TestClient):
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ def _make_mock_settings(**overrides):
|
|||
mock.http_read_timeout = 300.0
|
||||
mock.http_write_timeout = 10.0
|
||||
mock.http_connect_timeout = 2.0
|
||||
mock.enable_thinking = True
|
||||
mock.enable_model_thinking = True
|
||||
for key, value in overrides.items():
|
||||
setattr(mock, key, value)
|
||||
return mock
|
||||
|
|
@ -159,12 +159,12 @@ async def test_get_provider_deepseek_uses_fixed_base_url():
|
|||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_get_provider_deepseek_passes_enable_thinking():
|
||||
"""DeepSeek provider receives the global thinking toggle."""
|
||||
async def test_get_provider_deepseek_passes_enable_model_thinking():
|
||||
"""DeepSeek provider receives the fallback thinking toggle."""
|
||||
with patch("api.dependencies.get_settings") as mock_settings:
|
||||
mock_settings.return_value = _make_mock_settings(
|
||||
provider_type="deepseek",
|
||||
enable_thinking=False,
|
||||
enable_model_thinking=False,
|
||||
)
|
||||
|
||||
provider = get_provider()
|
||||
|
|
|
|||
|
|
@ -14,6 +14,10 @@ def settings():
|
|||
settings.model_opus = None
|
||||
settings.model_sonnet = None
|
||||
settings.model_haiku = None
|
||||
settings.enable_model_thinking = True
|
||||
settings.enable_opus_thinking = None
|
||||
settings.enable_sonnet_thinking = None
|
||||
settings.enable_haiku_thinking = None
|
||||
return settings
|
||||
|
||||
|
||||
|
|
@ -24,6 +28,7 @@ def test_model_router_resolves_default_model(settings):
|
|||
assert resolved.provider_id == "nvidia_nim"
|
||||
assert resolved.provider_model == "fallback-model"
|
||||
assert resolved.provider_model_ref == "nvidia_nim/fallback-model"
|
||||
assert resolved.thinking_enabled is True
|
||||
|
||||
|
||||
def test_model_router_applies_opus_override(settings):
|
||||
|
|
@ -39,9 +44,23 @@ def test_model_router_applies_opus_override(settings):
|
|||
assert routed.request.model == "deepseek/deepseek-r1"
|
||||
assert routed.resolved.provider_model_ref == "open_router/deepseek/deepseek-r1"
|
||||
assert routed.resolved.original_model == "claude-opus-4-20250514"
|
||||
assert routed.resolved.thinking_enabled is True
|
||||
assert request.model == "claude-opus-4-20250514"
|
||||
|
||||
|
||||
def test_model_router_resolves_per_model_thinking(settings):
|
||||
settings.enable_model_thinking = False
|
||||
settings.enable_opus_thinking = True
|
||||
settings.enable_haiku_thinking = False
|
||||
|
||||
router = ModelRouter(settings)
|
||||
|
||||
assert router.resolve("claude-opus-4-20250514").thinking_enabled is True
|
||||
assert router.resolve("claude-sonnet-4-20250514").thinking_enabled is False
|
||||
assert router.resolve("claude-3-haiku-20240307").thinking_enabled is False
|
||||
assert router.resolve("claude-2.1").thinking_enabled is False
|
||||
|
||||
|
||||
def test_model_router_applies_haiku_override(settings):
|
||||
settings.model_haiku = "lmstudio/qwen2.5-7b"
|
||||
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ class TestSettings:
|
|||
assert isinstance(settings.provider_rate_window, int)
|
||||
assert isinstance(settings.nim.temperature, float)
|
||||
assert isinstance(settings.fast_prefix_detection, bool)
|
||||
assert isinstance(settings.enable_thinking, bool)
|
||||
assert isinstance(settings.enable_model_thinking, bool)
|
||||
assert settings.http_read_timeout == 120.0
|
||||
|
||||
def test_get_settings_cached(self):
|
||||
|
|
@ -110,13 +110,48 @@ class TestSettings:
|
|||
settings = Settings()
|
||||
assert settings.http_connect_timeout == 5.0
|
||||
|
||||
def test_enable_thinking_from_env(self, monkeypatch):
|
||||
"""ENABLE_THINKING env var is loaded into settings."""
|
||||
def test_enable_model_thinking_from_env(self, monkeypatch):
|
||||
"""ENABLE_MODEL_THINKING env var is loaded into settings."""
|
||||
from config.settings import Settings
|
||||
|
||||
monkeypatch.setenv("ENABLE_THINKING", "false")
|
||||
monkeypatch.setenv("ENABLE_MODEL_THINKING", "false")
|
||||
settings = Settings()
|
||||
assert settings.enable_thinking is False
|
||||
assert settings.enable_model_thinking is False
|
||||
|
||||
def test_per_model_thinking_from_env(self, monkeypatch):
|
||||
"""Per-model thinking env vars are loaded into settings."""
|
||||
from config.settings import Settings
|
||||
|
||||
monkeypatch.setenv("ENABLE_OPUS_THINKING", "true")
|
||||
monkeypatch.setenv("ENABLE_SONNET_THINKING", "false")
|
||||
monkeypatch.setenv("ENABLE_HAIKU_THINKING", "false")
|
||||
settings = Settings()
|
||||
assert settings.enable_opus_thinking is True
|
||||
assert settings.enable_sonnet_thinking is False
|
||||
assert settings.enable_haiku_thinking is False
|
||||
|
||||
def test_empty_per_model_thinking_inherits_model_default(self, monkeypatch):
|
||||
"""Blank per-model thinking env vars are treated as unset."""
|
||||
from config.settings import Settings
|
||||
|
||||
monkeypatch.setenv("ENABLE_MODEL_THINKING", "false")
|
||||
monkeypatch.setenv("ENABLE_OPUS_THINKING", "")
|
||||
settings = Settings()
|
||||
assert settings.enable_opus_thinking is None
|
||||
assert settings.resolve_thinking("claude-opus-4-20250514") is False
|
||||
|
||||
def test_resolve_thinking_uses_model_tiers(self, monkeypatch):
|
||||
"""resolve_thinking applies tier override then fallback."""
|
||||
from config.settings import Settings
|
||||
|
||||
monkeypatch.setenv("ENABLE_MODEL_THINKING", "false")
|
||||
monkeypatch.setenv("ENABLE_OPUS_THINKING", "true")
|
||||
monkeypatch.setenv("ENABLE_HAIKU_THINKING", "false")
|
||||
settings = Settings()
|
||||
assert settings.resolve_thinking("claude-opus-4-20250514") is True
|
||||
assert settings.resolve_thinking("claude-sonnet-4-20250514") is False
|
||||
assert settings.resolve_thinking("claude-haiku-4-20250514") is False
|
||||
assert settings.resolve_thinking("unknown-model") is False
|
||||
|
||||
def test_anthropic_auth_token_from_env_without_dotenv_key(self, monkeypatch):
|
||||
"""ANTHROPIC_AUTH_TOKEN env var is loaded when dotenv does not define it."""
|
||||
|
|
@ -166,7 +201,15 @@ class TestSettings:
|
|||
from config.settings import Settings
|
||||
|
||||
monkeypatch.setenv("NIM_ENABLE_THINKING", "false")
|
||||
with pytest.raises(ValidationError, match="Rename it to ENABLE_THINKING"):
|
||||
with pytest.raises(ValidationError, match="ENABLE_MODEL_THINKING"):
|
||||
Settings()
|
||||
|
||||
def test_removed_enable_thinking_raises(self, monkeypatch):
|
||||
"""ENABLE_THINKING now fails fast with a migration message."""
|
||||
from config.settings import Settings
|
||||
|
||||
monkeypatch.setenv("ENABLE_THINKING", "false")
|
||||
with pytest.raises(ValidationError, match="ENABLE_MODEL_THINKING"):
|
||||
Settings()
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ def _make_settings(**overrides):
|
|||
mock.http_read_timeout = 300.0
|
||||
mock.http_write_timeout = 10.0
|
||||
mock.http_connect_timeout = 2.0
|
||||
mock.enable_thinking = True
|
||||
mock.enable_model_thinking = True
|
||||
mock.nim = NimSettings()
|
||||
for key, value in overrides.items():
|
||||
setattr(mock, key, value)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue