mirror of
https://github.com/eigent-ai/eigent.git
synced 2026-05-17 04:01:18 +00:00
Co-authored-by: bytecii <bytecii@users.noreply.github.com> Co-authored-by: Wendong-Fan <w3ndong.fan@gmail.com> Co-authored-by: Wendong-Fan <133094783+Wendong-Fan@users.noreply.github.com>
108 lines
3.5 KiB
Python
108 lines
3.5 KiB
Python
# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. =========
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. =========
|
|
|
|
import json
|
|
import os
|
|
from pathlib import Path
|
|
|
|
from dotenv import dotenv_values, load_dotenv
|
|
from pydantic import BaseModel
|
|
|
|
from app.model.chat import Chat, McpServers
|
|
|
|
# Load benchmark env files (.env takes priority over .env.development)
|
|
_BENCHMARK_DIR = Path(__file__).resolve().parent
|
|
load_dotenv(_BENCHMARK_DIR / ".env")
|
|
load_dotenv(_BENCHMARK_DIR / ".env.development")
|
|
|
|
|
|
class Env(BaseModel):
|
|
# TODO: add more environment variables
|
|
# TODO: allow specifying files in the directory
|
|
files: list[str] = []
|
|
browser_port: int = 9222
|
|
installed_mcp: McpServers = {"mcpServers": {}}
|
|
env_file: str | None = None
|
|
|
|
|
|
class Tests(BaseModel):
|
|
grader: list[str] = []
|
|
checker: list[str] = []
|
|
|
|
|
|
class ModelKwargs(BaseModel):
|
|
model_platform: str = os.environ.get("BENCHMARK_MODEL_PLATFORM", "openai")
|
|
model_type: str = os.environ.get("BENCHMARK_MODEL_TYPE", "gpt-5.2")
|
|
api_key: str | None = os.environ.get("BENCHMARK_API_KEY")
|
|
api_url: str = os.environ.get(
|
|
"BENCHMARK_API_URL", "https://api.openai.com/v1"
|
|
)
|
|
|
|
|
|
class Metadata(BaseModel):
|
|
difficulty: str = ""
|
|
description: str = ""
|
|
tags: list[str] = []
|
|
|
|
|
|
class BenchmarkData(BaseModel):
|
|
name: str
|
|
question: str
|
|
env: Env = Env()
|
|
_chat: Chat | None = None
|
|
|
|
def to_chat(self, model_kwargs: ModelKwargs) -> Chat:
|
|
installed_mcp = self.env.installed_mcp
|
|
if self.env.env_file:
|
|
env_vars = dotenv_values(self.env.env_file)
|
|
for server_cfg in installed_mcp["mcpServers"].values():
|
|
server_env = server_cfg.get("env", {})
|
|
server_env.update(env_vars)
|
|
server_cfg["env"] = server_env
|
|
|
|
api_key = (
|
|
model_kwargs.api_key
|
|
or os.environ.get("BENCHMARK_API_KEY")
|
|
or os.environ["OPENAI_API_KEY"]
|
|
)
|
|
|
|
self._chat = Chat(
|
|
task_id=f"benchmark_{self.name}",
|
|
project_id=f"benchmark_{self.name}",
|
|
email="benchmark@eigent.ai",
|
|
question=self.question,
|
|
model_platform=model_kwargs.model_platform,
|
|
model_type=model_kwargs.model_type,
|
|
api_key=api_key,
|
|
api_url=model_kwargs.api_url,
|
|
browser_port=self.env.browser_port,
|
|
installed_mcp=installed_mcp,
|
|
)
|
|
return self._chat
|
|
|
|
def get_working_directory(self, model_kwargs: ModelKwargs) -> str:
|
|
chat = self._chat or self.to_chat(model_kwargs)
|
|
return chat.file_save_path()
|
|
|
|
|
|
class BenchmarkConfig(BaseModel):
|
|
metadata: Metadata = Metadata()
|
|
data: BenchmarkData
|
|
model_kwargs: ModelKwargs = ModelKwargs()
|
|
tests: Tests = Tests()
|
|
|
|
@classmethod
|
|
def from_json(cls, path: str | Path) -> "BenchmarkConfig":
|
|
with open(path) as f:
|
|
return cls.model_validate(json.load(f))
|