Treat empty model overrides as fallback

This commit is contained in:
Alishahryar1 2026-04-24 13:58:25 -07:00
parent 862eb43046
commit d2db1bd689
5 changed files with 31 additions and 15 deletions

View file

@ -21,9 +21,9 @@ LLAMACPP_BASE_URL="http://localhost:8080/v1"
# All Claude model requests are mapped to these models, plain model is fallback # All Claude model requests are mapped to these models, plain model is fallback
# Format: provider_type/model/name # Format: provider_type/model/name
# Valid providers: "nvidia_nim" | "open_router" | "deepseek" | "lmstudio" | "llamacpp" # Valid providers: "nvidia_nim" | "open_router" | "deepseek" | "lmstudio" | "llamacpp"
MODEL_OPUS="nvidia_nim/z-ai/glm4.7" MODEL_OPUS=
MODEL_SONNET="open_router/arcee-ai/trinity-large-preview:free" MODEL_SONNET=
MODEL_HAIKU="open_router/stepfun/step-3.5-flash:free" MODEL_HAIKU=
MODEL="nvidia_nim/z-ai/glm4.7" MODEL="nvidia_nim/z-ai/glm4.7"

View file

@ -76,9 +76,9 @@ Choose your provider and edit `.env`:
```dotenv ```dotenv
NVIDIA_NIM_API_KEY="nvapi-your-key-here" NVIDIA_NIM_API_KEY="nvapi-your-key-here"
MODEL_OPUS="nvidia_nim/z-ai/glm4.7" MODEL_OPUS=
MODEL_SONNET="nvidia_nim/moonshotai/kimi-k2-thinking" MODEL_SONNET=
MODEL_HAIKU="nvidia_nim/stepfun-ai/step-3.5-flash" MODEL_HAIKU=
MODEL="nvidia_nim/z-ai/glm4.7" # fallback MODEL="nvidia_nim/z-ai/glm4.7" # fallback
# Global switch for provider reasoning requests and Claude thinking blocks. # Global switch for provider reasoning requests and Claude thinking blocks.
@ -502,10 +502,10 @@ Configure via `WHISPER_DEVICE` (`cpu` | `cuda` | `nvidia_nim`) and `WHISPER_MODE
| Variable | Description | Default | | Variable | Description | Default |
| -------------------- | --------------------------------------------------------------------- | ------------------------------------------------- | | -------------------- | --------------------------------------------------------------------- | ------------------------------------------------- |
| `MODEL` | Fallback model (`provider/model/name` format; invalid prefix → error) | `nvidia_nim/stepfun-ai/step-3.5-flash` | | `MODEL` | Fallback model (`provider/model/name` format; invalid prefix → error) | `nvidia_nim/z-ai/glm4.7` |
| `MODEL_OPUS` | Model for Claude Opus requests (falls back to `MODEL`) | `nvidia_nim/z-ai/glm4.7` | | `MODEL_OPUS` | Model for Claude Opus requests; empty falls back to `MODEL` | empty |
| `MODEL_SONNET` | Model for Claude Sonnet requests (falls back to `MODEL`) | `open_router/arcee-ai/trinity-large-preview:free` | | `MODEL_SONNET` | Model for Claude Sonnet requests; empty falls back to `MODEL` | empty |
| `MODEL_HAIKU` | Model for Claude Haiku requests (falls back to `MODEL`) | `open_router/stepfun/step-3.5-flash:free` | | `MODEL_HAIKU` | Model for Claude Haiku requests; empty falls back to `MODEL` | empty |
| `NVIDIA_NIM_API_KEY` | NVIDIA API key | required for NIM | | `NVIDIA_NIM_API_KEY` | NVIDIA API key | required for NIM |
| `ENABLE_THINKING` | Global switch for provider reasoning requests and Claude thinking blocks. Set `false` to hide thinking across all providers. | `true` | | `ENABLE_THINKING` | Global switch for provider reasoning requests and Claude thinking blocks. Set `false` to hide thinking across all providers. | `true` |
| `OPENROUTER_API_KEY` | OpenRouter API key | required for OpenRouter | | `OPENROUTER_API_KEY` | OpenRouter API key | required for OpenRouter |

View file

@ -17,9 +17,9 @@ LM_STUDIO_BASE_URL="http://localhost:1234/v1"
# All Claude model requests are mapped to these models, plain model is fallback # All Claude model requests are mapped to these models, plain model is fallback
# Format: provider_type/model/name # Format: provider_type/model/name
# Valid providers: "nvidia_nim" | "open_router" | "deepseek" | "lmstudio" | "llamacpp" # Valid providers: "nvidia_nim" | "open_router" | "deepseek" | "lmstudio" | "llamacpp"
MODEL_OPUS="nvidia_nim/z-ai/glm4.7" MODEL_OPUS=
MODEL_SONNET="open_router/arcee-ai/trinity-large-preview:free" MODEL_SONNET=
MODEL_HAIKU="open_router/stepfun/step-3.5-flash:free" MODEL_HAIKU=
MODEL="nvidia_nim/z-ai/glm4.7" MODEL="nvidia_nim/z-ai/glm4.7"

View file

@ -119,7 +119,7 @@ class Settings(BaseSettings):
# ==================== Model ==================== # ==================== Model ====================
# All Claude model requests are mapped to this single model (fallback) # All Claude model requests are mapped to this single model (fallback)
# Format: provider_type/model/name # Format: provider_type/model/name
model: str = "nvidia_nim/stepfun-ai/step-3.5-flash" model: str = "nvidia_nim/z-ai/glm4.7"
# Per-model overrides (optional, falls back to MODEL) # Per-model overrides (optional, falls back to MODEL)
# Each can use a different provider # Each can use a different provider
@ -217,6 +217,9 @@ class Settings(BaseSettings):
"allowed_telegram_user_id", "allowed_telegram_user_id",
"discord_bot_token", "discord_bot_token",
"allowed_discord_channels", "allowed_discord_channels",
"model_opus",
"model_sonnet",
"model_haiku",
mode="before", mode="before",
) )
@classmethod @classmethod

View file

@ -24,7 +24,7 @@ class TestSettings:
monkeypatch.delenv("HTTP_READ_TIMEOUT", raising=False) monkeypatch.delenv("HTTP_READ_TIMEOUT", raising=False)
monkeypatch.setitem(Settings.model_config, "env_file", ()) monkeypatch.setitem(Settings.model_config, "env_file", ())
settings = Settings() settings = Settings()
assert settings.model == "nvidia_nim/stepfun-ai/step-3.5-flash" assert settings.model == "nvidia_nim/z-ai/glm4.7"
assert isinstance(settings.provider_rate_limit, int) assert isinstance(settings.provider_rate_limit, int)
assert isinstance(settings.provider_rate_window, int) assert isinstance(settings.provider_rate_window, int)
assert isinstance(settings.nim.temperature, float) assert isinstance(settings.nim.temperature, float)
@ -391,6 +391,19 @@ class TestPerModelMapping:
s = Settings() s = Settings()
assert s.model_opus == "open_router/deepseek/deepseek-r1" assert s.model_opus == "open_router/deepseek/deepseek-r1"
@pytest.mark.parametrize("env_var", ["MODEL_OPUS", "MODEL_SONNET", "MODEL_HAIKU"])
def test_empty_model_override_env_is_unset(self, monkeypatch, env_var):
"""Empty per-model override env vars are treated as unset."""
from config.settings import Settings
monkeypatch.setenv(env_var, "")
s = Settings()
assert getattr(s, env_var.lower()) is None
assert (
s.resolve_model(f"claude-{env_var.removeprefix('MODEL_').lower()}-4")
== s.model
)
@pytest.mark.parametrize( @pytest.mark.parametrize(
"env_vars,expected_model,expected_haiku", "env_vars,expected_model,expected_haiku",
[ [