diff --git a/.env.example b/.env.example index 77fef51..5983188 100644 --- a/.env.example +++ b/.env.example @@ -21,9 +21,9 @@ LLAMACPP_BASE_URL="http://localhost:8080/v1" # All Claude model requests are mapped to these models, plain model is fallback # Format: provider_type/model/name # Valid providers: "nvidia_nim" | "open_router" | "deepseek" | "lmstudio" | "llamacpp" -MODEL_OPUS="nvidia_nim/z-ai/glm4.7" -MODEL_SONNET="open_router/arcee-ai/trinity-large-preview:free" -MODEL_HAIKU="open_router/stepfun/step-3.5-flash:free" +MODEL_OPUS= +MODEL_SONNET= +MODEL_HAIKU= MODEL="nvidia_nim/z-ai/glm4.7" diff --git a/README.md b/README.md index 6a2e11d..2309e61 100644 --- a/README.md +++ b/README.md @@ -76,9 +76,9 @@ Choose your provider and edit `.env`: ```dotenv NVIDIA_NIM_API_KEY="nvapi-your-key-here" -MODEL_OPUS="nvidia_nim/z-ai/glm4.7" -MODEL_SONNET="nvidia_nim/moonshotai/kimi-k2-thinking" -MODEL_HAIKU="nvidia_nim/stepfun-ai/step-3.5-flash" +MODEL_OPUS= +MODEL_SONNET= +MODEL_HAIKU= MODEL="nvidia_nim/z-ai/glm4.7" # fallback # Global switch for provider reasoning requests and Claude thinking blocks. @@ -502,10 +502,10 @@ Configure via `WHISPER_DEVICE` (`cpu` | `cuda` | `nvidia_nim`) and `WHISPER_MODE | Variable | Description | Default | | -------------------- | --------------------------------------------------------------------- | ------------------------------------------------- | -| `MODEL` | Fallback model (`provider/model/name` format; invalid prefix → error) | `nvidia_nim/stepfun-ai/step-3.5-flash` | -| `MODEL_OPUS` | Model for Claude Opus requests (falls back to `MODEL`) | `nvidia_nim/z-ai/glm4.7` | -| `MODEL_SONNET` | Model for Claude Sonnet requests (falls back to `MODEL`) | `open_router/arcee-ai/trinity-large-preview:free` | -| `MODEL_HAIKU` | Model for Claude Haiku requests (falls back to `MODEL`) | `open_router/stepfun/step-3.5-flash:free` | +| `MODEL` | Fallback model (`provider/model/name` format; invalid prefix → error) | `nvidia_nim/z-ai/glm4.7` | +| `MODEL_OPUS` | Model for Claude Opus requests; empty falls back to `MODEL` | empty | +| `MODEL_SONNET` | Model for Claude Sonnet requests; empty falls back to `MODEL` | empty | +| `MODEL_HAIKU` | Model for Claude Haiku requests; empty falls back to `MODEL` | empty | | `NVIDIA_NIM_API_KEY` | NVIDIA API key | required for NIM | | `ENABLE_THINKING` | Global switch for provider reasoning requests and Claude thinking blocks. Set `false` to hide thinking across all providers. | `true` | | `OPENROUTER_API_KEY` | OpenRouter API key | required for OpenRouter | diff --git a/config/env.example b/config/env.example index a77fd4e..6b0faa5 100644 --- a/config/env.example +++ b/config/env.example @@ -17,9 +17,9 @@ LM_STUDIO_BASE_URL="http://localhost:1234/v1" # All Claude model requests are mapped to these models, plain model is fallback # Format: provider_type/model/name # Valid providers: "nvidia_nim" | "open_router" | "deepseek" | "lmstudio" | "llamacpp" -MODEL_OPUS="nvidia_nim/z-ai/glm4.7" -MODEL_SONNET="open_router/arcee-ai/trinity-large-preview:free" -MODEL_HAIKU="open_router/stepfun/step-3.5-flash:free" +MODEL_OPUS= +MODEL_SONNET= +MODEL_HAIKU= MODEL="nvidia_nim/z-ai/glm4.7" diff --git a/config/settings.py b/config/settings.py index deb6888..59c69b0 100644 --- a/config/settings.py +++ b/config/settings.py @@ -119,7 +119,7 @@ class Settings(BaseSettings): # ==================== Model ==================== # All Claude model requests are mapped to this single model (fallback) # Format: provider_type/model/name - model: str = "nvidia_nim/stepfun-ai/step-3.5-flash" + model: str = "nvidia_nim/z-ai/glm4.7" # Per-model overrides (optional, falls back to MODEL) # Each can use a different provider @@ -217,6 +217,9 @@ class Settings(BaseSettings): "allowed_telegram_user_id", "discord_bot_token", "allowed_discord_channels", + "model_opus", + "model_sonnet", + "model_haiku", mode="before", ) @classmethod diff --git a/tests/config/test_config.py b/tests/config/test_config.py index ba53c0d..ae64bec 100644 --- a/tests/config/test_config.py +++ b/tests/config/test_config.py @@ -24,7 +24,7 @@ class TestSettings: monkeypatch.delenv("HTTP_READ_TIMEOUT", raising=False) monkeypatch.setitem(Settings.model_config, "env_file", ()) settings = Settings() - assert settings.model == "nvidia_nim/stepfun-ai/step-3.5-flash" + assert settings.model == "nvidia_nim/z-ai/glm4.7" assert isinstance(settings.provider_rate_limit, int) assert isinstance(settings.provider_rate_window, int) assert isinstance(settings.nim.temperature, float) @@ -391,6 +391,19 @@ class TestPerModelMapping: s = Settings() assert s.model_opus == "open_router/deepseek/deepseek-r1" + @pytest.mark.parametrize("env_var", ["MODEL_OPUS", "MODEL_SONNET", "MODEL_HAIKU"]) + def test_empty_model_override_env_is_unset(self, monkeypatch, env_var): + """Empty per-model override env vars are treated as unset.""" + from config.settings import Settings + + monkeypatch.setenv(env_var, "") + s = Settings() + assert getattr(s, env_var.lower()) is None + assert ( + s.resolve_model(f"claude-{env_var.removeprefix('MODEL_').lower()}-4") + == s.model + ) + @pytest.mark.parametrize( "env_vars,expected_model,expected_haiku", [