Improved optimizations to decrease llm calls further and increase throughput

This commit is contained in:
Alishahryar1 2026-02-18 17:54:41 -08:00
parent 593fb55954
commit e7ac85264f
5 changed files with 53 additions and 4 deletions

View file

@ -99,7 +99,18 @@ def is_filepath_extraction_request(
if "Command:" not in content or "Output:" not in content:
return False, "", ""
if "filepaths" not in content.lower() and "<filepaths>" not in content.lower():
# Match if user content OR system block indicates filepath extraction
user_has_filepaths = (
"filepaths" in content.lower() or "<filepaths>" in content.lower()
)
system_text = (
extract_text_from_content(request_data.system) if request_data.system else ""
)
system_has_extract = (
"extract any file paths" in system_text.lower()
or "file paths that this command" in system_text.lower()
)
if not user_has_filepaths and not system_has_extract:
return False, "", ""
try:

View file

@ -9,7 +9,7 @@ from loguru import logger
from config.settings import Settings
from providers.base import BaseProvider
from providers.exceptions import ProviderError
from providers.exceptions import InvalidRequestError, ProviderError
from providers.logging_utils import build_request_summary, log_request_compact
from .dependencies import get_provider, get_settings
@ -36,6 +36,9 @@ async def create_message(
"""Create a message (always streaming)."""
try:
if not request_data.messages:
raise InvalidRequestError("messages cannot be empty")
optimized = try_optimizations(request_data, settings)
if optimized is not None:
return optimized

View file

@ -81,7 +81,12 @@ def test_error_fallbacks():
RateLimitError,
)
base_payload = {"model": "test", "messages": [], "max_tokens": 10, "stream": True}
base_payload = {
"model": "test",
"messages": [{"role": "user", "content": "Hi"}],
"max_tokens": 10,
"stream": True,
}
def _raise_auth(*args, **kwargs):
raise AuthenticationError("Invalid Key")

View file

@ -10,10 +10,11 @@ from api.detection import (
from api.models.anthropic import Message, MessagesRequest
def _mk_req(messages, tools=None):
def _mk_req(messages, tools=None, system=None):
req = MagicMock(spec=MessagesRequest)
req.messages = messages
req.tools = tools
req.system = system
return req
@ -64,6 +65,20 @@ class TestFilepathExtractionDetection:
ok, cmd, out = is_filepath_extraction_request(req)
assert (ok, cmd, out) == (False, "", "")
def test_detects_filepath_extraction_via_system_block(self):
"""Command: + Output: in user, no filepaths in user; system has extract instructions."""
msg = _mk_msg("user", "Command: ls\nOutput: avazu-ctr\nfree-claude-code")
req = _mk_req(
[msg],
tools=None,
system="Extract any file paths that this command reads or modifies.",
)
ok, cmd, out = is_filepath_extraction_request(req)
assert ok is True
assert cmd == "ls"
assert "avazu-ctr" in out
assert "free-claude-code" in out
def test_extracts_command_and_output_and_cleans_output(self):
msg = _mk_msg(
"user",

View file

@ -88,6 +88,21 @@ def test_create_message_title_generation_skip(client, mock_settings):
app.dependency_overrides.clear()
def test_create_message_empty_messages_returns_400(client):
"""POST /v1/messages with messages: [] returns 400 invalid_request_error."""
payload = {
"model": "claude-3-sonnet",
"max_tokens": 100,
"messages": [],
}
response = client.post("/v1/messages", json=payload)
assert response.status_code == 400
data = response.json()
assert data.get("type") == "error"
assert data.get("error", {}).get("type") == "invalid_request_error"
assert "cannot be empty" in data.get("error", {}).get("message", "")
def test_count_tokens_endpoint(client):
payload = {
"model": "claude-3-sonnet",