From e5a096049dc73f3577d69fd99accc6cf82fa93c4 Mon Sep 17 00:00:00 2001 From: Alishahryar1 Date: Sun, 15 Feb 2026 10:50:53 -0800 Subject: [PATCH] feat: add OpenRouter support and configuration options - Introduced OpenRouter as a new provider option in settings and environment configuration. - Updated README.md to include instructions for using OpenRouter. - Enhanced the message converter to support reasoning content for OpenRouter. - Added tests for OpenRouter provider functionality and message conversion. - Updated dependencies to include OpenRouterProvider. --- .env.example | 9 + README.md | 71 +++- api/dependencies.py | 16 +- config/settings.py | 10 + providers/__init__.py | 2 + .../nvidia_nim/utils/message_converter.py | 29 +- providers/open_router/__init__.py | 5 + providers/open_router/client.py | 370 ++++++++++++++++++ providers/open_router/request.py | 80 ++++ tests/conftest.py | 7 + tests/test_converter.py | 17 + tests/test_dependencies.py | 17 + tests/test_open_router.py | 179 +++++++++ 13 files changed, 788 insertions(+), 24 deletions(-) create mode 100644 providers/open_router/__init__.py create mode 100644 providers/open_router/client.py create mode 100644 providers/open_router/request.py create mode 100644 tests/test_open_router.py diff --git a/.env.example b/.env.example index 0400292..990c2e8 100644 --- a/.env.example +++ b/.env.example @@ -1,3 +1,6 @@ +# Provider: "nvidia_nim" | "open_router" +PROVIDER_TYPE=nvidia_nim + # All Claude model requests are mapped to this model MODEL="stepfun-ai/step-3.5-flash" @@ -8,6 +11,12 @@ NVIDIA_NIM_RATE_LIMIT=40 NVIDIA_NIM_RATE_WINDOW=60 +# OpenRouter Config +OPENROUTER_API_KEY="" +OPENROUTER_RATE_LIMIT=1 +OPENROUTER_RATE_WINDOW=1 + + # Telegram Config TELEGRAM_BOT_TOKEN="" ALLOWED_TELEGRAM_USER_ID="" diff --git a/README.md b/README.md index afca186..e1a11d4 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ # 馃殌 Free Claude Code -### Use Claude Code for free with NVIDIA NIM +### Use Claude Code for free with NVIDIA NIM or OpenRouter [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg?style=for-the-badge)](https://opensource.org/licenses/MIT) [![Python 3.14](https://img.shields.io/badge/python-3.14-3776ab.svg?style=for-the-badge&logo=python&logoColor=white)](https://www.python.org/downloads/) @@ -12,10 +12,10 @@ [![Code style: Ruff](https://img.shields.io/badge/code%20formatting-ruff-f5a623.svg?style=for-the-badge)](https://github.com/astral-sh/ruff) [![Logging: Loguru](https://img.shields.io/badge/logging-loguru-4ecdc4.svg?style=for-the-badge)](https://github.com/Delgan/loguru) -A lightweight proxy that converts Claude Code's Anthropic API requests to NVIDIA NIM format. -**40 reqs/min free** 路 **Telegram bot** 路 **VSCode & CLI** +A lightweight proxy that converts Claude Code's Anthropic API requests to NVIDIA NIM or OpenRouter format. +**40 reqs/min free** 路 **Provider switching** 路 **Telegram bot** 路 **VSCode & CLI** -[Quick Start](#quick-start) 路 [Telegram Bot](#telegram-bot-integration) 路 [Models](#available-models) 路 [Configuration](#configuration) +[Quick Start](#quick-start) 路 [Provider Switching](#provider-switching) 路 [Telegram Bot](#telegram-bot-integration) 路 [Models](#available-models) 路 [Configuration](#configuration) --- @@ -27,7 +27,9 @@ A lightweight proxy that converts Claude Code's Anthropic API requests to NVIDIA ### 1. Prerequisites -1. Get a new API key from [build.nvidia.com/settings/api-keys](https://build.nvidia.com/settings/api-keys) +1. Get an API key: + - **NVIDIA NIM**: [build.nvidia.com/settings/api-keys](https://build.nvidia.com/settings/api-keys) + - **OpenRouter**: [openrouter.ai/keys](https://openrouter.ai/keys) 2. Install [claude-code](https://github.com/anthropics/claude-code) 3. Install [uv](https://github.com/astral-sh/uv) @@ -40,13 +42,22 @@ cd free-claude-code cp .env.example .env ``` -Edit `.env`: +Edit `.env` for **NVIDIA NIM** (default): ```dotenv +PROVIDER_TYPE=nvidia_nim NVIDIA_NIM_API_KEY=nvapi-your-key-here MODEL=moonshotai/kimi-k2-thinking ``` +Or for **OpenRouter**: + +```dotenv +PROVIDER_TYPE=open_router +OPENROUTER_API_KEY=sk-or-your-key-here +MODEL=stepfun/step-3.5-flash:free +``` + --- ### Claude Code CLI @@ -63,7 +74,7 @@ uv run uvicorn server:app --host 0.0.0.0 --port 8082 ANTHROPIC_AUTH_TOKEN=freecc ANTHROPIC_BASE_URL=http://localhost:8082 claude ``` -That's it! Claude Code now uses NVIDIA NIM for free. +That's it! Claude Code now uses your configured provider for free. --- @@ -90,7 +101,20 @@ uv run uvicorn server:app --host 0.0.0.0 --port 8082 6. **If you see the login screen** ("How do you want to log in?"): Click **Anthropic Console**, then authorize. The extension will start working. You may be redirected to buy credits in the browser鈥攊gnore that; the extension already works. -That's it! The Claude Code VSCode extension now uses NVIDIA NIM for free. To go back to Anthropic models just comment out the added block and reload extensions. +That's it! The Claude Code VSCode extension now uses your configured provider for free. To go back to Anthropic models just comment out the added block and reload extensions. + +--- + +### Provider Switching + +Switch between **NVIDIA NIM** and **OpenRouter** via `PROVIDER_TYPE`: + +| Provider | `PROVIDER_TYPE` | API Key Variable | Base URL | +| ------------- | ---------------- | ---------------------- | --------------------------------- | +| NVIDIA NIM | `nvidia_nim` | `NVIDIA_NIM_API_KEY` | `integrate.api.nvidia.com/v1` | +| OpenRouter | `open_router` | `OPENROUTER_API_KEY` | `openrouter.ai/api/v1` | + +OpenRouter gives access to hundreds of models (stepfun, OpenAI, Anthropic, etc.) through a single API. Set `MODEL` to any OpenRouter model ID, e.g. `stepfun/step-3.5-flash:free`. --- @@ -139,9 +163,7 @@ uv run uvicorn server:app --host 0.0.0.0 --port 8082 ## Available Models -See [`nvidia_nim_models.json`](nvidia_nim_models.json) for the full list of supported models. - -Popular choices: +**NVIDIA NIM** (`PROVIDER_TYPE=nvidia_nim`): See [`nvidia_nim_models.json`](nvidia_nim_models.json) for the full list. Popular choices: - `z-ai/glm5` - `stepfun-ai/step-3.5-flash` @@ -149,21 +171,31 @@ Popular choices: - `minimaxai/minimax-m2.1` - `mistralai/devstral-2-123b-instruct-2512` -Browse all models at [build.nvidia.com](https://build.nvidia.com/explore/discover) +Browse at [build.nvidia.com](https://build.nvidia.com/explore/discover). -### Updating the Model List +### Updating the NIM Model List -To update `nvidia_nim_models.json` with the latest models from NVIDIA NIM, run the following command: +To update `nvidia_nim_models.json` with the latest models from NVIDIA NIM: ```bash curl "https://integrate.api.nvidia.com/v1/models" > nvidia_nim_models.json ``` +**OpenRouter** (`PROVIDER_TYPE=open_router`): Hundreds of models from stepfun, OpenAI, Anthropic, Google, etc. Examples: + +- `stepfun/step-3.5-flash:free` +- `openai/gpt-4o-mini` +- `anthropic/claude-3.5-sonnet` + +Browse at [openrouter.ai/models](https://openrouter.ai/models). + ## Configuration | Variable | Description | Default | | --------------------------------- | ------------------------------- | ----------------------------- | -| `NVIDIA_NIM_API_KEY` | Your NVIDIA API key | required | +| `PROVIDER_TYPE` | Provider: `nvidia_nim` or `open_router` | `nvidia_nim` | +| `NVIDIA_NIM_API_KEY` | Your NVIDIA API key (NIM provider) | required | +| `OPENROUTER_API_KEY` | Your OpenRouter API key (OpenRouter provider) | required | | `MODEL` | Model to use for all requests | `stepfun-ai/step-3.5-flash` | | `CLAUDE_WORKSPACE` | Directory for agent workspace | `./agent_workspace` | | `ALLOWED_DIR` | Allowed directories for agent | `""` | @@ -177,10 +209,13 @@ curl "https://integrate.api.nvidia.com/v1/models" > nvidia_nim_models.json | `ALLOWED_TELEGRAM_USER_ID` | Allowed Telegram User ID | `""` | | `MESSAGING_RATE_LIMIT` | Telegram messages per window | `1` | | `MESSAGING_RATE_WINDOW` | Messaging window (seconds) | `1` | -| `NVIDIA_NIM_RATE_LIMIT` | API requests per window | `40` | -| `NVIDIA_NIM_RATE_WINDOW` | Rate limit window (seconds) | `60` | +| `NVIDIA_NIM_RATE_LIMIT` | NIM API requests per window | `40` | +| `NVIDIA_NIM_RATE_WINDOW` | NIM rate limit window (seconds)| `60` | +| `OPENROUTER_RATE_LIMIT` | OpenRouter requests per window | `40` | +| `OPENROUTER_RATE_WINDOW` | OpenRouter rate limit window | `60` | -The NVIDIA NIM base URL is fixed to `https://integrate.api.nvidia.com/v1`. +- **NVIDIA NIM** base URL: `https://integrate.api.nvidia.com/v1` +- **OpenRouter** base URL: `https://openrouter.ai/api/v1` **NIM Settings (prefix `NVIDIA_NIM_`)** diff --git a/api/dependencies.py b/api/dependencies.py index 0cfd464..b4f96e2 100644 --- a/api/dependencies.py +++ b/api/dependencies.py @@ -35,14 +35,26 @@ def get_provider() -> BaseProvider: ) _provider = NvidiaNimProvider(config) logger.info("Provider initialized: %s", settings.provider_type) + elif settings.provider_type == "open_router": + from providers.open_router import OpenRouterProvider + + config = ProviderConfig( + api_key=settings.open_router_api_key, + base_url="https://openrouter.ai/api/v1", + rate_limit=settings.open_router_rate_limit, + rate_window=settings.open_router_rate_window, + nim_settings=settings.nim, + ) + _provider = OpenRouterProvider(config) + logger.info("Provider initialized: %s", settings.provider_type) else: logger.error( - "Unknown provider_type: '%s'. Supported: 'nvidia_nim'", + "Unknown provider_type: '%s'. Supported: 'nvidia_nim', 'open_router'", settings.provider_type, ) raise ValueError( f"Unknown provider_type: '{settings.provider_type}'. " - f"Supported: 'nvidia_nim'" + f"Supported: 'nvidia_nim', 'open_router'" ) return _provider diff --git a/config/settings.py b/config/settings.py index 9068ee8..29cfd45 100644 --- a/config/settings.py +++ b/config/settings.py @@ -19,8 +19,18 @@ class Settings(BaseSettings): """Application settings loaded from environment variables.""" # ==================== Provider Selection ==================== + # Valid: "nvidia_nim" | "open_router" provider_type: str = "nvidia_nim" + # ==================== OpenRouter Config ==================== + open_router_api_key: str = Field(default="", validation_alias="OPENROUTER_API_KEY") + open_router_rate_limit: int = Field( + default=40, validation_alias="OPENROUTER_RATE_LIMIT" + ) + open_router_rate_window: int = Field( + default=60, validation_alias="OPENROUTER_RATE_WINDOW" + ) + # ==================== Messaging Platform Selection ==================== messaging_platform: str = "telegram" diff --git a/providers/__init__.py b/providers/__init__.py index 2c968cc..cd9760f 100644 --- a/providers/__init__.py +++ b/providers/__init__.py @@ -2,6 +2,7 @@ from .base import BaseProvider, ProviderConfig from .nvidia_nim import NvidiaNimProvider +from .open_router import OpenRouterProvider from .exceptions import ( ProviderError, AuthenticationError, @@ -15,6 +16,7 @@ __all__ = [ "BaseProvider", "ProviderConfig", "NvidiaNimProvider", + "OpenRouterProvider", "ProviderError", "AuthenticationError", "InvalidRequestError", diff --git a/providers/nvidia_nim/utils/message_converter.py b/providers/nvidia_nim/utils/message_converter.py index 0bb3b85..bcd7ecf 100644 --- a/providers/nvidia_nim/utils/message_converter.py +++ b/providers/nvidia_nim/utils/message_converter.py @@ -22,8 +22,17 @@ class AnthropicToOpenAIConverter: """Converts Anthropic message format to OpenAI format.""" @staticmethod - def convert_messages(messages: List[Any]) -> List[Dict[str, Any]]: - """Convert a list of Anthropic messages to OpenAI format.""" + def convert_messages( + messages: List[Any], + *, + include_reasoning_for_openrouter: bool = False, + ) -> List[Dict[str, Any]]: + """Convert a list of Anthropic messages to OpenAI format. + + When include_reasoning_for_openrouter is True, assistant messages with + thinking blocks get reasoning_content added for OpenRouter multi-turn + reasoning continuation. + """ result = [] for msg in messages: @@ -35,7 +44,10 @@ class AnthropicToOpenAIConverter: elif isinstance(content, list): if role == "assistant": result.extend( - AnthropicToOpenAIConverter._convert_assistant_message(content) + AnthropicToOpenAIConverter._convert_assistant_message( + content, + include_reasoning_for_openrouter=include_reasoning_for_openrouter, + ) ) elif role == "user": result.extend( @@ -47,9 +59,14 @@ class AnthropicToOpenAIConverter: return result @staticmethod - def _convert_assistant_message(content: List[Any]) -> List[Dict[str, Any]]: + def _convert_assistant_message( + content: List[Any], + *, + include_reasoning_for_openrouter: bool = False, + ) -> List[Dict[str, Any]]: """Convert assistant message blocks, preserving interleaved thinking+text order.""" content_parts: List[str] = [] + thinking_parts: List[str] = [] tool_calls: List[Dict[str, Any]] = [] for block in content: @@ -60,6 +77,8 @@ class AnthropicToOpenAIConverter: elif block_type == "thinking": thinking = get_block_attr(block, "thinking", "") content_parts.append(f"\n{thinking}\n") + if include_reasoning_for_openrouter: + thinking_parts.append(thinking) elif block_type == "tool_use": tool_input = get_block_attr(block, "input", {}) tool_calls.append( @@ -88,6 +107,8 @@ class AnthropicToOpenAIConverter: } if tool_calls: msg["tool_calls"] = tool_calls + if include_reasoning_for_openrouter and thinking_parts: + msg["reasoning_content"] = "\n".join(thinking_parts) return [msg] diff --git a/providers/open_router/__init__.py b/providers/open_router/__init__.py new file mode 100644 index 0000000..2253357 --- /dev/null +++ b/providers/open_router/__init__.py @@ -0,0 +1,5 @@ +"""OpenRouter provider - OpenAI-compatible API for hundreds of models.""" + +from .client import OpenRouterProvider + +__all__ = ["OpenRouterProvider"] diff --git a/providers/open_router/client.py b/providers/open_router/client.py new file mode 100644 index 0000000..57bb8d5 --- /dev/null +++ b/providers/open_router/client.py @@ -0,0 +1,370 @@ +"""OpenRouter provider implementation.""" + +import json +import logging +import uuid +from typing import Any, AsyncIterator + +from loguru import logger as loguru_logger +from openai import AsyncOpenAI + +from providers.base import BaseProvider, ProviderConfig +from providers.rate_limit import GlobalRateLimiter +from providers.nvidia_nim.errors import map_error +from providers.nvidia_nim.utils import ( + SSEBuilder, + map_stop_reason, + ThinkTagParser, + HeuristicToolParser, + ContentType, +) + +from .request import build_request_body + +logger = logging.getLogger(__name__) + +OPENROUTER_BASE_URL = "https://openrouter.ai/api/v1" + + +class OpenRouterProvider(BaseProvider): + """OpenRouter provider using OpenAI-compatible API.""" + + def __init__(self, config: ProviderConfig): + super().__init__(config) + self._api_key = config.api_key + self._base_url = (config.base_url or OPENROUTER_BASE_URL).rstrip("/") + self._global_rate_limiter = GlobalRateLimiter.get_instance( + rate_limit=config.rate_limit, + rate_window=config.rate_window, + ) + self._client = AsyncOpenAI( + api_key=self._api_key, + base_url=self._base_url, + max_retries=0, + timeout=300.0, + ) + + def _build_request_body(self, request: Any) -> dict: + """Internal helper for tests and shared building.""" + return build_request_body(request) + + async def stream_response( + self, + request: Any, + input_tokens: int = 0, + *, + request_id: str | None = None, + ) -> AsyncIterator[str]: + """Stream response in Anthropic SSE format.""" + with loguru_logger.contextualize(request_id=request_id): + async for event in self._stream_response_impl( + request, input_tokens, request_id + ): + yield event + + async def _stream_response_impl( + self, + request: Any, + input_tokens: int, + request_id: str | None, + ) -> AsyncIterator[str]: + """Internal streaming implementation with context bound.""" + message_id = f"msg_{uuid.uuid4()}" + sse = SSEBuilder(message_id, request.model, input_tokens) + + body = self._build_request_body(request) + req_tag = f" request_id={request_id}" if request_id else "" + logger.info( + "OPENROUTER_STREAM:%s model=%s msgs=%d tools=%d", + req_tag, + body.get("model"), + len(body.get("messages", [])), + len(body.get("tools", [])), + ) + + yield sse.message_start() + + think_parser = ThinkTagParser() + heuristic_parser = HeuristicToolParser() + + finish_reason = None + usage_info = None + error_occurred = False + error_message = "" + + try: + stream = await self._global_rate_limiter.execute_with_retry( + self._client.chat.completions.create, **body, stream=True + ) + async for chunk in stream: + if getattr(chunk, "usage", None): + usage_info = chunk.usage + + if not chunk.choices: + continue + + choice = chunk.choices[0] + delta = choice.delta + if delta is None: + continue + + if choice.finish_reason: + finish_reason = choice.finish_reason + logger.debug("OPENROUTER finish_reason: %s", finish_reason) + + # Handle reasoning_content (OpenRouter/OpenAI extended format) + reasoning = getattr(delta, "reasoning_content", None) + if reasoning: + for event in sse.ensure_thinking_block(): + yield event + yield sse.emit_thinking_delta(reasoning) + + # Handle reasoning_details (e.g. stepfun models) + reasoning_details = getattr(delta, "reasoning_details", None) + if reasoning_details and isinstance(reasoning_details, list): + for item in reasoning_details: + text = item.get("text", "") if isinstance(item, dict) else "" + if text: + for event in sse.ensure_thinking_block(): + yield event + yield sse.emit_thinking_delta(text) + + # Handle text content + if delta.content: + for part in think_parser.feed(delta.content): + if part.type == ContentType.THINKING: + for event in sse.ensure_thinking_block(): + yield event + yield sse.emit_thinking_delta(part.content) + else: + filtered_text, detected_tools = heuristic_parser.feed( + part.content + ) + + if filtered_text: + for event in sse.ensure_text_block(): + yield event + yield sse.emit_text_delta(filtered_text) + + for tool_use in detected_tools: + for event in sse.close_content_blocks(): + yield event + + block_idx = sse.blocks.allocate_index() + if tool_use.get("name") == "Task" and isinstance( + tool_use.get("input"), dict + ): + tool_use["input"]["run_in_background"] = False + yield sse.content_block_start( + block_idx, + "tool_use", + id=tool_use["id"], + name=tool_use["name"], + ) + yield sse.content_block_delta( + block_idx, + "input_json_delta", + json.dumps(tool_use["input"]), + ) + yield sse.content_block_stop(block_idx) + + # Handle native tool calls + if delta.tool_calls: + for event in sse.close_content_blocks(): + yield event + for tc in delta.tool_calls: + tc_info = { + "index": tc.index, + "id": tc.id, + "function": { + "name": tc.function.name, + "arguments": tc.function.arguments, + }, + } + for event in self._process_tool_call(tc_info, sse): + yield event + + except Exception as e: + req_tag = f" request_id={request_id}" if request_id else "" + logger.error("OPENROUTER_ERROR:%s %s: %s", req_tag, type(e).__name__, e) + mapped_e = map_error(e) + error_occurred = True + error_message = str(mapped_e) + logger.info( + "OPENROUTER_STREAM: Emitting SSE error event for %s%s", + type(e).__name__, + req_tag, + ) + for event in sse.close_content_blocks(): + yield event + for event in sse.emit_error(error_message): + yield event + + # Flush remaining content + remaining = think_parser.flush() + if remaining: + if remaining.type == ContentType.THINKING: + for event in sse.ensure_thinking_block(): + yield event + yield sse.emit_thinking_delta(remaining.content) + else: + for event in sse.ensure_text_block(): + yield event + yield sse.emit_text_delta(remaining.content) + + for tool_use in heuristic_parser.flush(): + for event in sse.close_content_blocks(): + yield event + + block_idx = sse.blocks.allocate_index() + yield sse.content_block_start( + block_idx, + "tool_use", + id=tool_use["id"], + name=tool_use["name"], + ) + if tool_use.get("name") == "Task" and isinstance( + tool_use.get("input"), dict + ): + tool_use["input"]["run_in_background"] = False + yield sse.content_block_delta( + block_idx, + "input_json_delta", + json.dumps(tool_use["input"]), + ) + yield sse.content_block_stop(block_idx) + + if ( + not error_occurred + and sse.blocks.text_index == -1 + and not sse.blocks.tool_indices + ): + for event in sse.ensure_text_block(): + yield event + yield sse.emit_text_delta(" ") + + for event in self._flush_task_arg_buffers(sse): + yield event + + for event in sse.close_all_blocks(): + yield event + + output_tokens = ( + usage_info.completion_tokens + if usage_info and hasattr(usage_info, "completion_tokens") + else sse.estimate_output_tokens() + ) + if usage_info and hasattr(usage_info, "prompt_tokens"): + provider_input = usage_info.prompt_tokens + if isinstance(provider_input, int): + diff = provider_input - input_tokens + logger.debug( + "TOKEN_ESTIMATE: our=%d provider=%d diff=%+d", + input_tokens, + provider_input, + diff, + ) + yield sse.message_delta(map_stop_reason(finish_reason), output_tokens) + yield sse.message_stop() + yield sse.done() + + def _process_tool_call(self, tc: dict, sse: Any): + """Process a single tool call delta and yield SSE events.""" + tc_index = tc.get("index", 0) + if tc_index < 0: + tc_index = len(sse.blocks.tool_indices) + + fn_delta = tc.get("function", {}) + incoming_name = fn_delta.get("name") + if incoming_name is not None: + prev = sse.blocks.tool_names.get(tc_index, "") + if not prev: + sse.blocks.tool_names[tc_index] = incoming_name + elif prev == incoming_name: + pass + elif isinstance(prev, str) and isinstance(incoming_name, str): + if incoming_name.startswith(prev): + sse.blocks.tool_names[tc_index] = incoming_name + elif prev.startswith(incoming_name): + pass + else: + sse.blocks.tool_names[tc_index] = prev + incoming_name + else: + sse.blocks.tool_names[tc_index] = str(prev) + str(incoming_name) + + if tc_index not in sse.blocks.tool_indices: + name = sse.blocks.tool_names.get(tc_index, "") + if name or tc.get("id"): + tool_id = tc.get("id") or f"tool_{uuid.uuid4()}" + yield sse.start_tool_block(tc_index, tool_id, name) + sse.blocks.tool_started[tc_index] = True + elif not sse.blocks.tool_started.get(tc_index) and sse.blocks.tool_names.get( + tc_index + ): + tool_id = tc.get("id") or f"tool_{uuid.uuid4()}" + name = sse.blocks.tool_names[tc_index] + yield sse.start_tool_block(tc_index, tool_id, name) + sse.blocks.tool_started[tc_index] = True + + args = fn_delta.get("arguments", "") + if args: + if not sse.blocks.tool_started.get(tc_index): + tool_id = tc.get("id") or f"tool_{uuid.uuid4()}" + name = sse.blocks.tool_names.get(tc_index, "tool_call") or "tool_call" + yield sse.start_tool_block(tc_index, tool_id, name) + sse.blocks.tool_started[tc_index] = True + + current_name = sse.blocks.tool_names.get(tc_index, "") + if current_name == "Task": + if not sse.blocks.task_args_emitted.get(tc_index, False): + buf = sse.blocks.task_arg_buffer.get(tc_index, "") + args + sse.blocks.task_arg_buffer[tc_index] = buf + try: + args_json = json.loads(buf) + except Exception: + return + if args_json.get("run_in_background") is not False: + logger.info( + "OPENROUTER_INTERCEPT: Forcing run_in_background=False for Task %s", + tc.get("id") + or sse.blocks.tool_ids.get(tc_index, "unknown"), + ) + args_json["run_in_background"] = False + sse.blocks.task_args_emitted[tc_index] = True + sse.blocks.task_arg_buffer.pop(tc_index, None) + yield sse.emit_tool_delta(tc_index, json.dumps(args_json)) + return + + yield sse.emit_tool_delta(tc_index, args) + + def _flush_task_arg_buffers(self, sse: Any): + """Emit buffered Task args as a single JSON delta (best-effort).""" + for tool_index, buf in list(sse.blocks.task_arg_buffer.items()): + if sse.blocks.task_args_emitted.get(tool_index, False): + sse.blocks.task_arg_buffer.pop(tool_index, None) + continue + + tool_id = sse.blocks.tool_ids.get(tool_index, "unknown") + out = "{}" + try: + args_json = json.loads(buf) + if args_json.get("run_in_background") is not False: + logger.info( + "OPENROUTER_INTERCEPT: Forcing run_in_background=False for Task %s", + tool_id, + ) + args_json["run_in_background"] = False + out = json.dumps(args_json) + except Exception as e: + prefix = buf[:120] + logger.warning( + "OPENROUTER_INTERCEPT: Task args invalid JSON (id=%s len=%d prefix=%r): %s", + tool_id, + len(buf), + prefix, + e, + ) + + sse.blocks.task_args_emitted[tool_index] = True + sse.blocks.task_arg_buffer.pop(tool_index, None) + yield sse.emit_tool_delta(tool_index, out) diff --git a/providers/open_router/request.py b/providers/open_router/request.py new file mode 100644 index 0000000..035d363 --- /dev/null +++ b/providers/open_router/request.py @@ -0,0 +1,80 @@ +"""Request builder for OpenRouter provider.""" + +import logging +from typing import Any, Dict + +from providers.nvidia_nim.utils.message_converter import AnthropicToOpenAIConverter + +logger = logging.getLogger(__name__) + +OPENROUTER_DEFAULT_MAX_TOKENS = 8192 + + +def _set_if_not_none(body: Dict[str, Any], key: str, value: Any) -> None: + if value is not None: + body[key] = value + + +def build_request_body(request_data: Any) -> dict: + """Build OpenAI-format request body from Anthropic request for OpenRouter.""" + logger.debug( + "OPENROUTER_REQUEST: conversion start model=%s msgs=%d", + getattr(request_data, "model", "?"), + len(getattr(request_data, "messages", [])), + ) + messages = AnthropicToOpenAIConverter.convert_messages( + request_data.messages, include_reasoning_for_openrouter=True + ) + + # Add system prompt + system = getattr(request_data, "system", None) + if system: + system_msg = AnthropicToOpenAIConverter.convert_system_prompt(system) + if system_msg: + messages.insert(0, system_msg) + + body: Dict[str, Any] = { + "model": request_data.model, + "messages": messages, + } + + max_tokens = getattr(request_data, "max_tokens", None) + _set_if_not_none(body, "max_tokens", max_tokens or OPENROUTER_DEFAULT_MAX_TOKENS) + + _set_if_not_none(body, "temperature", getattr(request_data, "temperature", None)) + _set_if_not_none(body, "top_p", getattr(request_data, "top_p", None)) + + stop_sequences = getattr(request_data, "stop_sequences", None) + if stop_sequences: + body["stop"] = stop_sequences + + tools = getattr(request_data, "tools", None) + if tools: + body["tools"] = AnthropicToOpenAIConverter.convert_tools(tools) + tool_choice = getattr(request_data, "tool_choice", None) + if tool_choice: + body["tool_choice"] = tool_choice + + # OpenRouter reasoning: extra_body={"reasoning": {"enabled": True}} + extra_body: Dict[str, Any] = {} + request_extra = getattr(request_data, "extra_body", None) + if request_extra: + extra_body.update(request_extra) + + thinking = getattr(request_data, "thinking", None) + thinking_enabled = ( + thinking.enabled if thinking and hasattr(thinking, "enabled") else True + ) + if thinking_enabled: + extra_body.setdefault("reasoning", {"enabled": True}) + + if extra_body: + body["extra_body"] = extra_body + + logger.debug( + "OPENROUTER_REQUEST: conversion done model=%s msgs=%d tools=%d", + body.get("model"), + len(body.get("messages", [])), + len(body.get("tools", [])), + ) + return body diff --git a/tests/conftest.py b/tests/conftest.py index cd78de1..b73845d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -37,6 +37,13 @@ def nim_provider(provider_config): return NvidiaNimProvider(provider_config) +@pytest.fixture +def open_router_provider(provider_config): + from providers.open_router import OpenRouterProvider + + return OpenRouterProvider(provider_config) + + @pytest.fixture def mock_cli_session(): session = MagicMock(spec=CLISession) diff --git a/tests/test_converter.py b/tests/test_converter.py index cdb493a..a6dec58 100644 --- a/tests/test_converter.py +++ b/tests/test_converter.py @@ -178,6 +178,23 @@ def test_convert_assistant_message_thinking(): "\nI need to calculate this.\n\n\nThe answer is 4." ) assert result[0]["content"] == expected_content + assert "reasoning_content" not in result[0] + + +def test_convert_assistant_message_thinking_include_reasoning_for_openrouter(): + """When include_reasoning_for_openrouter=True, reasoning_content is added.""" + content = [ + MockBlock(type="thinking", thinking="I need to calculate this."), + MockBlock(type="text", text="The answer is 4."), + ] + messages = [MockMessage("assistant", content)] + result = AnthropicToOpenAIConverter.convert_messages( + messages, include_reasoning_for_openrouter=True + ) + + assert len(result) == 1 + assert result[0]["reasoning_content"] == "I need to calculate this." + assert "" in result[0]["content"] def test_convert_assistant_message_tool_use(): diff --git a/tests/test_dependencies.py b/tests/test_dependencies.py index cdc0160..25b6599 100644 --- a/tests/test_dependencies.py +++ b/tests/test_dependencies.py @@ -2,6 +2,7 @@ import pytest from unittest.mock import AsyncMock, MagicMock, patch from api.dependencies import get_provider, get_settings, cleanup_provider from providers.nvidia_nim import NvidiaNimProvider +from providers.open_router import OpenRouterProvider from config.nim import NimSettings @@ -12,6 +13,9 @@ def _make_mock_settings(**overrides): mock.nvidia_nim_api_key = "test_key" mock.nvidia_nim_rate_limit = 40 mock.nvidia_nim_rate_window = 60 + mock.open_router_api_key = "test_openrouter_key" + mock.open_router_rate_limit = 40 + mock.open_router_rate_window = 60 mock.nim = NimSettings() for key, value in overrides.items(): setattr(mock, key, value) @@ -74,6 +78,19 @@ async def test_cleanup_provider_no_client(): # Should not raise +@pytest.mark.asyncio +async def test_get_provider_open_router(): + """Test that provider_type=open_router returns OpenRouterProvider.""" + with patch("api.dependencies.get_settings") as mock_settings: + mock_settings.return_value = _make_mock_settings(provider_type="open_router") + + provider = get_provider() + + assert isinstance(provider, OpenRouterProvider) + assert provider._base_url == "https://openrouter.ai/api/v1" + assert provider._api_key == "test_openrouter_key" + + @pytest.mark.asyncio async def test_get_provider_unknown_type(): """Test that unknown provider_type raises ValueError.""" diff --git a/tests/test_open_router.py b/tests/test_open_router.py new file mode 100644 index 0000000..2c7806b --- /dev/null +++ b/tests/test_open_router.py @@ -0,0 +1,179 @@ +"""Tests for OpenRouter provider.""" + +import pytest +import json +from unittest.mock import MagicMock, AsyncMock, patch +from providers.open_router import OpenRouterProvider +from providers.base import ProviderConfig +from config.nim import NimSettings + + +class MockMessage: + def __init__(self, role, content): + self.role = role + self.content = content + + +class MockRequest: + def __init__(self, **kwargs): + self.model = "stepfun/step-3.5-flash:free" + self.messages = [MockMessage("user", "Hello")] + self.max_tokens = 100 + self.temperature = 0.5 + self.top_p = 0.9 + self.system = "System prompt" + self.stop_sequences = None + self.tools = [] + self.extra_body = {} + self.thinking = MagicMock() + self.thinking.enabled = True + for k, v in kwargs.items(): + setattr(self, k, v) + + +@pytest.fixture +def open_router_config(): + return ProviderConfig( + api_key="test_openrouter_key", + base_url="https://openrouter.ai/api/v1", + rate_limit=10, + rate_window=60, + nim_settings=NimSettings(), + ) + + +@pytest.fixture(autouse=True) +def mock_rate_limiter(): + """Mock the global rate limiter to prevent waiting.""" + with patch("providers.open_router.client.GlobalRateLimiter") as mock: + instance = mock.get_instance.return_value + instance.wait_if_blocked = AsyncMock(return_value=False) + + async def _passthrough(fn, *args, **kwargs): + return await fn(*args, **kwargs) + + instance.execute_with_retry = AsyncMock(side_effect=_passthrough) + yield instance + + +@pytest.fixture +def open_router_provider(open_router_config): + return OpenRouterProvider(open_router_config) + + +def test_init(open_router_config): + """Test provider initialization.""" + with patch("providers.open_router.client.AsyncOpenAI") as mock_openai: + provider = OpenRouterProvider(open_router_config) + assert provider._api_key == "test_openrouter_key" + assert provider._base_url == "https://openrouter.ai/api/v1" + mock_openai.assert_called_once() + + +def test_build_request_body_has_reasoning_extra(open_router_provider): + """Request body has extra_body.reasoning.enabled for thinking models.""" + req = MockRequest() + body = open_router_provider._build_request_body(req) + + assert body["model"] == "stepfun/step-3.5-flash:free" + assert body["temperature"] == 0.5 + assert len(body["messages"]) == 2 # System + User + assert body["messages"][0]["role"] == "system" + assert body["messages"][0]["content"] == "System prompt" + + assert "extra_body" in body + assert "reasoning" in body["extra_body"] + assert body["extra_body"]["reasoning"]["enabled"] is True + + +def test_build_request_body_base_url_and_model(open_router_provider): + """Base URL and model are correct in provider config.""" + assert open_router_provider._base_url == "https://openrouter.ai/api/v1" + req = MockRequest(model="stepfun/step-3.5-flash:free") + body = open_router_provider._build_request_body(req) + assert body["model"] == "stepfun/step-3.5-flash:free" + + +@pytest.mark.asyncio +async def test_stream_response_text(open_router_provider): + """Test streaming text response.""" + req = MockRequest() + + mock_chunk1 = MagicMock() + mock_chunk1.choices = [ + MagicMock( + delta=MagicMock(content="Hello", reasoning_content=None), + finish_reason=None, + ) + ] + mock_chunk1.usage = None + + mock_chunk2 = MagicMock() + mock_chunk2.choices = [ + MagicMock( + delta=MagicMock(content=" World", reasoning_content=None), + finish_reason="stop", + ) + ] + mock_chunk2.usage = MagicMock(completion_tokens=10) + + async def mock_stream(): + yield mock_chunk1 + yield mock_chunk2 + + with patch.object( + open_router_provider._client.chat.completions, "create", new_callable=AsyncMock + ) as mock_create: + mock_create.return_value = mock_stream() + + events = [] + async for event in open_router_provider.stream_response(req): + events.append(event) + + assert len(events) > 0 + assert "event: message_start" in events[0] + + text_content = "" + for e in events: + if "event: content_block_delta" in e and '"text_delta"' in e: + for line in e.splitlines(): + if line.startswith("data: "): + data = json.loads(line[6:]) + if "delta" in data and "text" in data["delta"]: + text_content += data["delta"]["text"] + + assert "Hello World" in text_content + + +@pytest.mark.asyncio +async def test_stream_response_reasoning_content(open_router_provider): + """Test streaming with reasoning_content delta.""" + req = MockRequest() + + mock_chunk = MagicMock() + mock_chunk.choices = [ + MagicMock( + delta=MagicMock(content=None, reasoning_content="Thinking..."), + finish_reason=None, + ) + ] + mock_chunk.usage = None + + async def mock_stream(): + yield mock_chunk + + with patch.object( + open_router_provider._client.chat.completions, "create", new_callable=AsyncMock + ) as mock_create: + mock_create.return_value = mock_stream() + + events = [] + async for event in open_router_provider.stream_response(req): + events.append(event) + + found_thinking = False + for e in events: + if "event: content_block_delta" in e and '"thinking_delta"' in e: + if "Thinking..." in e: + found_thinking = True + assert found_thinking