eigent/backend/benchmark/environment.py

# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. =========
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ========= Copyright 2025-2026 @ Eigent.ai All Rights Reserved. =========

import json
import os
from pathlib import Path

from dotenv import dotenv_values, load_dotenv
from pydantic import BaseModel

from app.model.chat import Chat, McpServers

# Load benchmark env files (.env takes priority over .env.development)
_BENCHMARK_DIR = Path(__file__).resolve().parent
load_dotenv(_BENCHMARK_DIR / ".env")
load_dotenv(_BENCHMARK_DIR / ".env.development")


class Env(BaseModel):
    # TODO: add more environment variables
    # TODO: allow specifying files in the directory
    files: list[str] = []
    browser_port: int = 9222
    installed_mcp: McpServers = {"mcpServers": {}}
    env_file: str | None = None


class Tests(BaseModel):
    grader: list[str] = []
    checker: list[str] = []


class ModelKwargs(BaseModel):
    model_platform: str = os.environ.get("BENCHMARK_MODEL_PLATFORM", "openai")
    model_type: str = os.environ.get("BENCHMARK_MODEL_TYPE", "gpt-5.2")
    api_key: str | None = os.environ.get("BENCHMARK_API_KEY")
    api_url: str = os.environ.get(
        "BENCHMARK_API_URL", "https://api.openai.com/v1"
    )


class Metadata(BaseModel):
    difficulty: str = ""
    description: str = ""
    tags: list[str] = []


class BenchmarkData(BaseModel):
    name: str
    question: str
    env: Env = Env()
    _chat: Chat | None = None

    def to_chat(self, model_kwargs: ModelKwargs) -> Chat:
        installed_mcp = self.env.installed_mcp
        if self.env.env_file:
            env_vars = dotenv_values(self.env.env_file)
            for server_cfg in installed_mcp["mcpServers"].values():
                server_env = server_cfg.get("env", {})
                server_env.update(env_vars)
                server_cfg["env"] = server_env

        api_key = (
            model_kwargs.api_key
            or os.environ.get("BENCHMARK_API_KEY")
            or os.environ["OPENAI_API_KEY"]
        )

        self._chat = Chat(
            task_id=f"benchmark_{self.name}",
            project_id=f"benchmark_{self.name}",
            email="benchmark@eigent.ai",
            question=self.question,
            model_platform=model_kwargs.model_platform,
            model_type=model_kwargs.model_type,
            api_key=api_key,
            api_url=model_kwargs.api_url,
            browser_port=self.env.browser_port,
            installed_mcp=installed_mcp,
        )
        return self._chat

    def get_working_directory(self, model_kwargs: ModelKwargs) -> str:
        chat = self._chat or self.to_chat(model_kwargs)
        return chat.file_save_path()


class BenchmarkConfig(BaseModel):
    metadata: Metadata = Metadata()
    data: BenchmarkData
    model_kwargs: ModelKwargs = ModelKwargs()
    tests: Tests = Tests()

    @classmethod
    def from_json(cls, path: str | Path) -> "BenchmarkConfig":
        with open(path) as f:
            return cls.model_validate(json.load(f))