Improved optimizations to decrease llm calls further and increase throughput

2026-04-28 03:20:01 +00:00 · 2026-02-18 17:54:41 -08:00 · 2026-02-18 17:54:41 -08:00 · e7ac85264f
commit e7ac85264f
parent 593fb55954
5 changed files with 53 additions and 4 deletions
--- a/api/detection.py
+++ b/api/detection.py
@ -99,7 +99,18 @@ def is_filepath_extraction_request(
    if "Command:" not in content or "Output:" not in content:
        return False, "", ""

-    if "filepaths" not in content.lower() and "<filepaths>" not in content.lower():
+    # Match if user content OR system block indicates filepath extraction
+    user_has_filepaths = (
+        "filepaths" in content.lower() or "<filepaths>" in content.lower()
+    )
+    system_text = (
+        extract_text_from_content(request_data.system) if request_data.system else ""
+    )
+    system_has_extract = (
+        "extract any file paths" in system_text.lower()
+        or "file paths that this command" in system_text.lower()
+    )
+    if not user_has_filepaths and not system_has_extract:
        return False, "", ""

    try:
--- a/api/routes.py
+++ b/api/routes.py
@ -9,7 +9,7 @@ from loguru import logger

 from config.settings import Settings
 from providers.base import BaseProvider
-from providers.exceptions import ProviderError
+from providers.exceptions import InvalidRequestError, ProviderError
 from providers.logging_utils import build_request_summary, log_request_compact

 from .dependencies import get_provider, get_settings
@ -36,6 +36,9 @@ async def create_message(
    """Create a message (always streaming)."""

    try:
+        if not request_data.messages:
+            raise InvalidRequestError("messages cannot be empty")
+
        optimized = try_optimizations(request_data, settings)
        if optimized is not None:
            return optimized
--- a/tests/api/test_api.py
+++ b/tests/api/test_api.py
@ -81,7 +81,12 @@ def test_error_fallbacks():
        RateLimitError,
    )

-    base_payload = {"model": "test", "messages": [], "max_tokens": 10, "stream": True}
+    base_payload = {
+        "model": "test",
+        "messages": [{"role": "user", "content": "Hi"}],
+        "max_tokens": 10,
+        "stream": True,
+    }

    def _raise_auth(*args, **kwargs):
        raise AuthenticationError("Invalid Key")
--- a/tests/api/test_request_utils_filepaths_and_suggestions.py
+++ b/tests/api/test_request_utils_filepaths_and_suggestions.py
@ -10,10 +10,11 @@ from api.detection import (
 from api.models.anthropic import Message, MessagesRequest


-def _mk_req(messages, tools=None):
+def _mk_req(messages, tools=None, system=None):
    req = MagicMock(spec=MessagesRequest)
    req.messages = messages
    req.tools = tools
+    req.system = system
    return req


@ -64,6 +65,20 @@ class TestFilepathExtractionDetection:
        ok, cmd, out = is_filepath_extraction_request(req)
        assert (ok, cmd, out) == (False, "", "")

+    def test_detects_filepath_extraction_via_system_block(self):
+        """Command: + Output: in user, no filepaths in user; system has extract instructions."""
+        msg = _mk_msg("user", "Command: ls\nOutput: avazu-ctr\nfree-claude-code")
+        req = _mk_req(
+            [msg],
+            tools=None,
+            system="Extract any file paths that this command reads or modifies.",
+        )
+        ok, cmd, out = is_filepath_extraction_request(req)
+        assert ok is True
+        assert cmd == "ls"
+        assert "avazu-ctr" in out
+        assert "free-claude-code" in out
+
    def test_extracts_command_and_output_and_cleans_output(self):
        msg = _mk_msg(
            "user",
--- a/tests/api/test_routes_optimizations.py
+++ b/tests/api/test_routes_optimizations.py
@ -88,6 +88,21 @@ def test_create_message_title_generation_skip(client, mock_settings):
    app.dependency_overrides.clear()


+def test_create_message_empty_messages_returns_400(client):
+    """POST /v1/messages with messages: [] returns 400 invalid_request_error."""
+    payload = {
+        "model": "claude-3-sonnet",
+        "max_tokens": 100,
+        "messages": [],
+    }
+    response = client.post("/v1/messages", json=payload)
+    assert response.status_code == 400
+    data = response.json()
+    assert data.get("type") == "error"
+    assert data.get("error", {}).get("type") == "invalid_request_error"
+    assert "cannot be empty" in data.get("error", {}).get("message", "")
+
+
 def test_count_tokens_endpoint(client):
    payload = {
        "model": "claude-3-sonnet",