Improved optimizations to decrease llm calls further and increase throughput

This commit is contained in:
Alishahryar1 2026-02-18 17:54:41 -08:00
parent 593fb55954
commit e7ac85264f
5 changed files with 53 additions and 4 deletions

View file

@ -81,7 +81,12 @@ def test_error_fallbacks():
RateLimitError,
)
base_payload = {"model": "test", "messages": [], "max_tokens": 10, "stream": True}
base_payload = {
"model": "test",
"messages": [{"role": "user", "content": "Hi"}],
"max_tokens": 10,
"stream": True,
}
def _raise_auth(*args, **kwargs):
raise AuthenticationError("Invalid Key")