mirror of
https://github.com/Skyvern-AI/skyvern.git
synced 2025-09-12 00:14:34 +00:00
Implement LLMRouter (#127)
This commit is contained in:
parent
c58aaba4bb
commit
1c397a13af
3 changed files with 146 additions and 8 deletions
|
@ -1,5 +1,5 @@
|
|||
from dataclasses import dataclass
|
||||
from typing import Any, Awaitable, Protocol
|
||||
from typing import Any, Awaitable, Literal, Protocol
|
||||
|
||||
from skyvern.forge.sdk.models import Step
|
||||
from skyvern.forge.sdk.settings_manager import SettingsManager
|
||||
|
@ -21,6 +21,33 @@ class LLMConfig:
|
|||
return missing_env_vars
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class LLMRouterModelConfig:
|
||||
model_name: str
|
||||
# https://litellm.vercel.app/docs/routing
|
||||
litellm_params: dict[str, Any]
|
||||
tpm: int | None = None
|
||||
rpm: int | None = None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class LLMRouterConfig(LLMConfig):
|
||||
model_list: list[LLMRouterModelConfig]
|
||||
redis_host: str
|
||||
redis_port: int
|
||||
main_model_group: str
|
||||
fallback_model_group: str | None = None
|
||||
routing_strategy: Literal[
|
||||
"simple-shuffle",
|
||||
"least-busy",
|
||||
"usage-based-routing",
|
||||
"latency-based-routing",
|
||||
] = "usage-based-routing"
|
||||
num_retries: int = 2
|
||||
retry_delay_seconds: int = 15
|
||||
set_verbose: bool = True
|
||||
|
||||
|
||||
class LLMAPIHandler(Protocol):
|
||||
def __call__(
|
||||
self,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue