refactor: remove OpenRouter rollback, shims, and redundant layers

- OpenRouter: native Anthropic only; remove chat_request and OPENROUTER_TRANSPORT
- Drop OpenAICompatibleProvider alias, api.request_utils, voice_pipeline facade
- Simplify OpenRouter SSE, generic reasoning in conversion, messaging dispatch
- Shared markdown table helpers; API optimization response helper; contract guards
- Restore PLAN.md; update docs and tests
This commit is contained in:
Alishahryar1 2026-04-24 21:08:38 -07:00
parent 22837720ca
commit 0e3b2c24b4
43 changed files with 356 additions and 615 deletions

View file

@ -13,7 +13,7 @@ from providers.exceptions import ProviderError
from .dependencies import cleanup_provider
from .routes import router
from .runtime import AppRuntime, warn_if_process_auth_token
from .runtime import AppRuntime
# Opt-in to future behavior for python-telegram-bot
os.environ["PTB_TIMEDELTA"] = "1"
@ -23,11 +23,6 @@ _settings = get_settings()
configure_logging(_settings.log_file)
def _warn_if_process_auth_token(settings) -> None:
"""Compatibility wrapper for tests importing the old app helper."""
warn_if_process_auth_token(settings)
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Application lifespan manager."""

View file

@ -22,6 +22,22 @@ from .models.anthropic import MessagesRequest
from .models.responses import MessagesResponse, Usage
def _text_response(
request_data: MessagesRequest,
text: str,
*,
input_tokens: int,
output_tokens: int,
) -> MessagesResponse:
return MessagesResponse(
id=f"msg_{uuid.uuid4()}",
model=request_data.model,
content=[{"type": "text", "text": text}],
stop_reason="end_turn",
usage=Usage(input_tokens=input_tokens, output_tokens=output_tokens),
)
def try_prefix_detection(
request_data: MessagesRequest, settings: Settings
) -> MessagesResponse | None:
@ -34,12 +50,11 @@ def try_prefix_detection(
return None
logger.info("Optimization: Fast prefix detection request")
return MessagesResponse(
id=f"msg_{uuid.uuid4()}",
model=request_data.model,
content=[{"type": "text", "text": extract_command_prefix(command)}],
stop_reason="end_turn",
usage=Usage(input_tokens=100, output_tokens=5),
return _text_response(
request_data,
extract_command_prefix(command),
input_tokens=100,
output_tokens=5,
)
@ -53,13 +68,11 @@ def try_quota_mock(
return None
logger.info("Optimization: Intercepted and mocked quota probe")
return MessagesResponse(
id=f"msg_{uuid.uuid4()}",
model=request_data.model,
role="assistant",
content=[{"type": "text", "text": "Quota check passed."}],
stop_reason="end_turn",
usage=Usage(input_tokens=10, output_tokens=5),
return _text_response(
request_data,
"Quota check passed.",
input_tokens=10,
output_tokens=5,
)
@ -73,13 +86,11 @@ def try_title_skip(
return None
logger.info("Optimization: Skipped title generation request")
return MessagesResponse(
id=f"msg_{uuid.uuid4()}",
model=request_data.model,
role="assistant",
content=[{"type": "text", "text": "Conversation"}],
stop_reason="end_turn",
usage=Usage(input_tokens=100, output_tokens=5),
return _text_response(
request_data,
"Conversation",
input_tokens=100,
output_tokens=5,
)
@ -93,13 +104,11 @@ def try_suggestion_skip(
return None
logger.info("Optimization: Skipped suggestion mode request")
return MessagesResponse(
id=f"msg_{uuid.uuid4()}",
model=request_data.model,
role="assistant",
content=[{"type": "text", "text": ""}],
stop_reason="end_turn",
usage=Usage(input_tokens=100, output_tokens=1),
return _text_response(
request_data,
"",
input_tokens=100,
output_tokens=1,
)
@ -116,13 +125,11 @@ def try_filepath_mock(
filepaths = extract_filepaths_from_command(cmd, output)
logger.info("Optimization: Mocked filepath extraction")
return MessagesResponse(
id=f"msg_{uuid.uuid4()}",
model=request_data.model,
role="assistant",
content=[{"type": "text", "text": filepaths}],
stop_reason="end_turn",
usage=Usage(input_tokens=100, output_tokens=10),
return _text_response(
request_data,
filepaths,
input_tokens=100,
output_tokens=10,
)

View file

@ -1,5 +0,0 @@
"""Backward-compatible token counting import for API route handlers."""
from core.anthropic import get_token_count
__all__ = ["get_token_count"]

View file

@ -4,11 +4,11 @@ from fastapi import APIRouter, Depends, HTTPException, Request, Response
from loguru import logger
from config.settings import Settings
from core.anthropic import get_token_count
from .dependencies import get_provider_for_type, get_settings, require_api_key
from .models.anthropic import MessagesRequest, TokenCountRequest
from .models.responses import ModelResponse, ModelsListResponse
from .request_utils import get_token_count
from .services import ClaudeProxyService
router = APIRouter()
@ -75,7 +75,6 @@ def _probe_response(allow: str) -> Response:
@router.post("/v1/messages")
async def create_message(
request_data: MessagesRequest,
_raw_request: Request,
service: ClaudeProxyService = Depends(get_proxy_service),
_auth=Depends(require_api_key),
):

View file

@ -12,7 +12,7 @@ from fastapi.responses import StreamingResponse
from loguru import logger
from config.settings import Settings
from core.anthropic import get_user_facing_error_message
from core.anthropic import get_token_count, get_user_facing_error_message
from providers.base import BaseProvider
from providers.exceptions import InvalidRequestError, ProviderError
@ -20,7 +20,6 @@ from .model_router import ModelRouter
from .models.anthropic import MessagesRequest, TokenCountRequest
from .models.responses import TokenCountResponse
from .optimization_handlers import try_optimizations
from .request_utils import get_token_count
TokenCounter = Callable[[list[Any], str | list[Any] | None, list[Any] | None], int]