free-claude-code/api/dependencies.py
Claude afaf50a972
Add queue-level concurrency limit to provider streaming
Adds max_concurrency cap to GlobalRateLimiter using asyncio.Semaphore.
A request now waits for a concurrency slot before the sliding window rate
limit check, so at most N streams are open to the provider simultaneously,
even when the rate window would allow more.

Changes:
- providers/rate_limit.py: max_concurrency param, _concurrency_sem, concurrency_slot() asynccontextmanager
- providers/openai_compat.py: pass max_concurrency to limiter; wrap execute_with_retry + stream iteration in concurrency_slot()
- providers/base.py: max_concurrency field on ProviderConfig
- config/settings.py: provider_max_concurrency setting (PROVIDER_MAX_CONCURRENCY env var, default None = unlimited)
- api/dependencies.py: pass provider_max_concurrency into all three provider ProviderConfig instantiations
- .env.example: document PROVIDER_MAX_CONCURRENCY (commented out)
- tests/providers/test_provider_rate_limit.py: 5 new tests covering concurrency limit enforcement, slot release on exception, noop when unconfigured
- tests/api/test_dependencies.py: add provider_max_concurrency=None to mock settings helper

https://claude.ai/code/session_014mrF1WMNgmNjtPBuoQHsbg
2026-02-19 14:23:21 +00:00

112 lines
4.5 KiB
Python

"""Dependency injection for FastAPI."""
from fastapi import HTTPException
from loguru import logger
from config.settings import NVIDIA_NIM_BASE_URL, Settings
from config.settings import get_settings as _get_settings
from providers.base import BaseProvider, ProviderConfig
# Global provider instance (singleton)
_provider: BaseProvider | None = None
def get_settings() -> Settings:
"""Get application settings via dependency injection."""
return _get_settings()
def get_provider() -> BaseProvider:
"""Get or create the provider instance based on settings.provider_type."""
global _provider
if _provider is None:
settings = get_settings()
if settings.provider_type == "nvidia_nim":
if (
not settings.nvidia_nim_api_key
or not settings.nvidia_nim_api_key.strip()
):
raise HTTPException(
status_code=503,
detail=(
"NVIDIA_NIM_API_KEY is not set. Add it to your .env file. "
"Get a key at https://build.nvidia.com/settings/api-keys"
),
)
from providers.nvidia_nim import NvidiaNimProvider
config = ProviderConfig(
api_key=settings.nvidia_nim_api_key,
base_url=NVIDIA_NIM_BASE_URL,
rate_limit=settings.provider_rate_limit,
rate_window=settings.provider_rate_window,
max_concurrency=settings.provider_max_concurrency,
http_read_timeout=settings.http_read_timeout,
http_write_timeout=settings.http_write_timeout,
http_connect_timeout=settings.http_connect_timeout,
)
_provider = NvidiaNimProvider(config, nim_settings=settings.nim)
logger.info("Provider initialized: %s", settings.provider_type)
elif settings.provider_type == "open_router":
if (
not settings.open_router_api_key
or not settings.open_router_api_key.strip()
):
raise HTTPException(
status_code=503,
detail=(
"OPENROUTER_API_KEY is not set. Add it to your .env file. "
"Get a key at https://openrouter.ai/keys"
),
)
from providers.open_router import OpenRouterProvider
config = ProviderConfig(
api_key=settings.open_router_api_key,
base_url="https://openrouter.ai/api/v1",
rate_limit=settings.provider_rate_limit,
rate_window=settings.provider_rate_window,
max_concurrency=settings.provider_max_concurrency,
http_read_timeout=settings.http_read_timeout,
http_write_timeout=settings.http_write_timeout,
http_connect_timeout=settings.http_connect_timeout,
)
_provider = OpenRouterProvider(config)
logger.info("Provider initialized: %s", settings.provider_type)
elif settings.provider_type == "lmstudio":
from providers.lmstudio import LMStudioProvider
config = ProviderConfig(
api_key="lm-studio",
base_url=settings.lm_studio_base_url,
rate_limit=settings.provider_rate_limit,
rate_window=settings.provider_rate_window,
max_concurrency=settings.provider_max_concurrency,
http_read_timeout=settings.http_read_timeout,
http_write_timeout=settings.http_write_timeout,
http_connect_timeout=settings.http_connect_timeout,
)
_provider = LMStudioProvider(config)
logger.info("Provider initialized: %s", settings.provider_type)
else:
logger.error(
"Unknown provider_type: '%s'. Supported: 'nvidia_nim', 'open_router', 'lmstudio'",
settings.provider_type,
)
raise ValueError(
f"Unknown provider_type: '{settings.provider_type}'. "
f"Supported: 'nvidia_nim', 'open_router', 'lmstudio'"
)
return _provider
async def cleanup_provider():
"""Cleanup provider resources."""
global _provider
if _provider:
client = getattr(_provider, "_client", None)
if client and hasattr(client, "aclose"):
await client.aclose()
_provider = None
logger.debug("Provider cleanup completed")