feat: add proxy support for httpx clients (#125)

Add proxy support for providers based on
[doc](https://www.python-httpx.org/advanced/proxies/):

- Add per-provider proxy support (HTTP and SOCKS5) for all 4 providers:
nvidia_nim, open_router, lmstudio, llamacpp
- Each provider gets its own env var (NVIDIA_NIM_PROXY,
OPENROUTER_PROXY, LMSTUDIO_PROXY, LLAMACPP_PROXY) for independent proxy
configuration

---------

Co-authored-by: Alishahryar1 <alishahryar2@gmail.com>
This commit is contained in:
arssing 2026-04-23 07:06:16 +07:00 committed by GitHub
parent e719e4aed2
commit 2fe15bd2cd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 107 additions and 9 deletions

View file

@ -34,6 +34,12 @@ ENABLE_THINKING=true
# Provider config
# Per-provider proxy support: http and socks5, example: "http://username:password@host:port"
NVIDIA_NIM_PROXY=""
OPENROUTER_PROXY=""
LMSTUDIO_PROXY=""
LLAMACPP_PROXY=""
PROVIDER_RATE_LIMIT=40
PROVIDER_RATE_WINDOW=60
PROVIDER_MAX_CONCURRENCY=5

View file

@ -23,8 +23,22 @@ def get_settings() -> Settings:
return _get_settings()
def _get_proxy_value(settings: Settings, attr_name: str) -> str:
"""Return a provider proxy only when configured as a string."""
value = getattr(settings, attr_name, "")
return value if isinstance(value, str) else ""
def _create_provider_for_type(provider_type: str, settings: Settings) -> BaseProvider:
"""Construct and return a new provider instance for the given provider type."""
_proxy_map = {
"nvidia_nim": _get_proxy_value(settings, "nvidia_nim_proxy"),
"open_router": _get_proxy_value(settings, "open_router_proxy"),
"lmstudio": _get_proxy_value(settings, "lmstudio_proxy"),
"llamacpp": _get_proxy_value(settings, "llamacpp_proxy"),
}
proxy = _proxy_map.get(provider_type, "")
if provider_type == "nvidia_nim":
if not settings.nvidia_nim_api_key or not settings.nvidia_nim_api_key.strip():
raise AuthenticationError(
@ -41,6 +55,7 @@ def _create_provider_for_type(provider_type: str, settings: Settings) -> BasePro
http_write_timeout=settings.http_write_timeout,
http_connect_timeout=settings.http_connect_timeout,
enable_thinking=settings.enable_thinking,
proxy=proxy,
)
return NvidiaNimProvider(config, nim_settings=settings.nim)
if provider_type == "open_router":
@ -59,6 +74,7 @@ def _create_provider_for_type(provider_type: str, settings: Settings) -> BasePro
http_write_timeout=settings.http_write_timeout,
http_connect_timeout=settings.http_connect_timeout,
enable_thinking=settings.enable_thinking,
proxy=proxy,
)
return OpenRouterProvider(config)
if provider_type == "deepseek":
@ -90,6 +106,7 @@ def _create_provider_for_type(provider_type: str, settings: Settings) -> BasePro
http_write_timeout=settings.http_write_timeout,
http_connect_timeout=settings.http_connect_timeout,
enable_thinking=settings.enable_thinking,
proxy=proxy,
)
return LMStudioProvider(config)
if provider_type == "llamacpp":
@ -103,6 +120,7 @@ def _create_provider_for_type(provider_type: str, settings: Settings) -> BasePro
http_write_timeout=settings.http_write_timeout,
http_connect_timeout=settings.http_connect_timeout,
enable_thinking=settings.enable_thinking,
proxy=proxy,
)
return LlamaCppProvider(config)
logger.error(

View file

@ -116,6 +116,12 @@ class Settings(BaseSettings):
model_sonnet: str | None = Field(default=None, validation_alias="MODEL_SONNET")
model_haiku: str | None = Field(default=None, validation_alias="MODEL_HAIKU")
# ==================== Per-Provider Proxy ====================
nvidia_nim_proxy: str = Field(default="", validation_alias="NVIDIA_NIM_PROXY")
open_router_proxy: str = Field(default="", validation_alias="OPENROUTER_PROXY")
lmstudio_proxy: str = Field(default="", validation_alias="LMSTUDIO_PROXY")
llamacpp_proxy: str = Field(default="", validation_alias="LLAMACPP_PROXY")
# ==================== Provider Rate Limiting ====================
provider_rate_limit: int = Field(default=40, validation_alias="PROVIDER_RATE_LIMIT")
provider_rate_window: int = Field(

View file

@ -23,6 +23,7 @@ class ProviderConfig(BaseModel):
http_write_timeout: float = 10.0
http_connect_timeout: float = 2.0
enable_thinking: bool = True
proxy: str = ""
class BaseProvider(ABC):

View file

@ -34,6 +34,7 @@ class LlamaCppProvider(BaseProvider):
)
self._client = httpx.AsyncClient(
base_url=self._base_url,
proxy=config.proxy or None,
timeout=httpx.Timeout(
config.http_read_timeout,
connect=config.http_connect_timeout,

View file

@ -34,6 +34,7 @@ class LMStudioProvider(BaseProvider):
)
self._client = httpx.AsyncClient(
base_url=self._base_url,
proxy=config.proxy or None,
timeout=httpx.Timeout(
config.http_read_timeout,
connect=config.http_connect_timeout,

View file

@ -44,6 +44,17 @@ class OpenAICompatibleProvider(BaseProvider):
rate_window=config.rate_window,
max_concurrency=config.max_concurrency,
)
http_client = None
if config.proxy:
http_client = httpx.AsyncClient(
proxy=config.proxy,
timeout=httpx.Timeout(
config.http_read_timeout,
connect=config.http_connect_timeout,
read=config.http_read_timeout,
write=config.http_write_timeout,
),
)
self._client = AsyncOpenAI(
api_key=self._api_key,
base_url=self._base_url,
@ -54,6 +65,7 @@ class OpenAICompatibleProvider(BaseProvider):
read=config.http_read_timeout,
write=config.http_write_timeout,
),
http_client=http_client,
)
async def cleanup(self) -> None:

View file

@ -11,7 +11,7 @@ requires-python = ">=3.14"
dependencies = [
"fastapi[standard]>=0.115.11",
"uvicorn>=0.34.0",
"httpx>=0.25.0",
"httpx[socks]>=0.25.0",
"markdown-it-py>=3.0.0",
"pydantic>=2.0.0",
"python-dotenv>=1.0.0",

View file

@ -202,6 +202,45 @@ async def test_get_provider_passes_http_timeouts_from_settings():
assert timeout.connect == 5.0
@pytest.mark.asyncio
async def test_get_provider_passes_proxy_from_settings():
"""Provider receives configured proxy and builds a proxied HTTP client."""
with (
patch("api.dependencies.get_settings") as mock_settings,
patch("providers.openai_compat.httpx.AsyncClient") as mock_http_client,
patch("providers.openai_compat.AsyncOpenAI") as mock_openai,
):
mock_settings.return_value = _make_mock_settings(
nvidia_nim_proxy="http://proxy.example:8080"
)
provider = get_provider()
assert isinstance(provider, NvidiaNimProvider)
mock_http_client.assert_called_once()
assert mock_http_client.call_args.kwargs["proxy"] == "http://proxy.example:8080"
assert (
mock_openai.call_args.kwargs["http_client"] is mock_http_client.return_value
)
@pytest.mark.asyncio
async def test_get_provider_ignores_non_string_proxy_value():
"""Mock settings without proxy attrs should not fail provider construction."""
with (
patch("api.dependencies.get_settings") as mock_settings,
patch("providers.openai_compat.AsyncOpenAI") as mock_openai,
):
mock_settings.return_value = _make_mock_settings(
nvidia_nim_proxy=MagicMock(name="proxy")
)
provider = get_provider()
assert isinstance(provider, NvidiaNimProvider)
assert mock_openai.call_args.kwargs["http_client"] is None
@pytest.mark.asyncio
async def test_get_provider_nvidia_nim_missing_api_key():
"""NVIDIA NIM with empty API key raises HTTPException 503."""

30
uv.lock generated
View file

@ -516,7 +516,7 @@ source = { editable = "." }
dependencies = [
{ name = "discord-py" },
{ name = "fastapi", extra = ["standard"] },
{ name = "httpx" },
{ name = "httpx", extra = ["socks"] },
{ name = "loguru" },
{ name = "markdown-it-py" },
{ name = "openai" },
@ -558,7 +558,7 @@ requires-dist = [
{ name = "fastapi", extras = ["standard"], specifier = ">=0.115.11" },
{ name = "grpcio", marker = "extra == 'voice'", specifier = ">=1.78.0" },
{ name = "grpcio-tools", marker = "extra == 'voice'", specifier = ">=1.78.0" },
{ name = "httpx", specifier = ">=0.25.0" },
{ name = "httpx", extras = ["socks"], specifier = ">=0.25.0" },
{ name = "librosa", marker = "extra == 'voice-local'", specifier = ">=0.10.0" },
{ name = "loguru", specifier = ">=0.7.0" },
{ name = "markdown-it-py", specifier = ">=3.0.0" },
@ -753,6 +753,11 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" },
]
[package.optional-dependencies]
socks = [
{ name = "socksio" },
]
[[package]]
name = "huggingface-hub"
version = "1.4.1"
@ -1872,6 +1877,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" },
]
[[package]]
name = "socksio"
version = "1.0.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/f8/5c/48a7d9495be3d1c651198fd99dbb6ce190e2274d0f28b9051307bdec6b85/socksio-1.0.0.tar.gz", hash = "sha256:f88beb3da5b5c38b9890469de67d0cb0f9d494b78b106ca1845f96c10b91c4ac", size = 19055, upload-time = "2020-04-17T15:50:34.664Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/37/c3/6eeb6034408dac0fa653d126c9204ade96b819c936e136c5e8a6897eee9c/socksio-1.0.0-py3-none-any.whl", hash = "sha256:95dc1f15f9b34e8d7b16f06d74b8ccf48f609af32ab33c608d08761c5dcbb1f3", size = 12763, upload-time = "2020-04-17T15:50:31.878Z" },
]
[[package]]
name = "soundfile"
version = "0.13.1"
@ -2062,12 +2076,12 @@ dependencies = [
{ name = "typing-extensions" },
]
wheels = [
{ url = "https://download.pytorch.org/whl/cu130/torch-2.10.0%2Bcu130-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:18f87ae628c02f095f2e97756e4fa249ceef6ed6e87d5a3c79b5338abf842511" },
{ url = "https://download.pytorch.org/whl/cu130/torch-2.10.0%2Bcu130-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:db5a61791b7da3c1aa5a496e64cd72dbd4ef3ef2cbb69680fd45dc255b0da2f3" },
{ url = "https://download.pytorch.org/whl/cu130/torch-2.10.0%2Bcu130-cp314-cp314-win_amd64.whl", hash = "sha256:dda35d473dd34cafa0668be176b9ad2cb69b1ff570d0336715a6541e89e27640" },
{ url = "https://download.pytorch.org/whl/cu130/torch-2.10.0%2Bcu130-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:7781235583ea06b214075c10fa95f83b9805f06af44efc6e9946808413cff94f" },
{ url = "https://download.pytorch.org/whl/cu130/torch-2.10.0%2Bcu130-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:526b737db11d632281795484ec729baae5f193a5a0d76a1f7d822f7897c8b4f5" },
{ url = "https://download.pytorch.org/whl/cu130/torch-2.10.0%2Bcu130-cp314-cp314t-win_amd64.whl", hash = "sha256:d5ea18790a18b660d655f6e75a8ca6e8d6298b55fc338f8c921764b94c886743" },
{ url = "https://download-r2.pytorch.org/whl/cu130/torch-2.10.0%2Bcu130-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:18f87ae628c02f095f2e97756e4fa249ceef6ed6e87d5a3c79b5338abf842511" },
{ url = "https://download-r2.pytorch.org/whl/cu130/torch-2.10.0%2Bcu130-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:db5a61791b7da3c1aa5a496e64cd72dbd4ef3ef2cbb69680fd45dc255b0da2f3" },
{ url = "https://download-r2.pytorch.org/whl/cu130/torch-2.10.0%2Bcu130-cp314-cp314-win_amd64.whl", hash = "sha256:dda35d473dd34cafa0668be176b9ad2cb69b1ff570d0336715a6541e89e27640" },
{ url = "https://download-r2.pytorch.org/whl/cu130/torch-2.10.0%2Bcu130-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:7781235583ea06b214075c10fa95f83b9805f06af44efc6e9946808413cff94f" },
{ url = "https://download-r2.pytorch.org/whl/cu130/torch-2.10.0%2Bcu130-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:526b737db11d632281795484ec729baae5f193a5a0d76a1f7d822f7897c8b4f5" },
{ url = "https://download-r2.pytorch.org/whl/cu130/torch-2.10.0%2Bcu130-cp314-cp314t-win_amd64.whl", hash = "sha256:d5ea18790a18b660d655f6e75a8ca6e8d6298b55fc338f8c921764b94c886743" },
]
[[package]]