"""Harbor agent wrapper for Zed's eval-cli binary. Usage: # Build eval-cli locally first: cargo build --release -p eval_cli # Run via Harbor with a local binary: harbor run -d "dataset@version" \ --agent-import-path zed_eval.agent:ZedAgent \ --ae binary_path=/path/to/target/release/eval-cli \ --agent-model anthropic/claude-sonnet-4-6-latest # Or with a download URL (for CI): harbor run -d "dataset@version" \ --agent-import-path zed_eval.agent:ZedAgent \ --ae download_url=https://example.com/eval-cli \ --agent-model anthropic/claude-sonnet-4-6-latest """ import json import os import shlex from pathlib import Path from harbor.agents.installed.base import BaseInstalledAgent, with_prompt_template from harbor.environments.base import BaseEnvironment from harbor.models.agent.context import AgentContext class ZedAgent(BaseInstalledAgent): """Runs Zed's headless AI agent (eval-cli) to solve tasks. The eval-cli binary boots a headless GPUI application and uses the same NativeAgent + AcpThread pipeline as the production Zed editor, driving the full agentic loop (tool calls, subagents, retries) without a GUI. """ def __init__( self, logs_dir: Path, binary_path: str | None = None, download_url: str | None = None, *args, **kwargs, ): super().__init__(logs_dir, *args, **kwargs) self._binary_path = binary_path self._download_url = download_url or os.environ.get("EVAL_CLI_DOWNLOAD_URL") @staticmethod def name() -> str: return "zed" async def _detect_workdir(self, environment: BaseEnvironment) -> str: """Detect the working directory inside the container. Checks, in order: 1. Explicit ``EVAL_CLI_WORKDIR`` extra-env override 2. Well-known dirs with a ``.git`` subdirectory (SWE-bench style) 3. First git repo found under ``/`` (max depth 3) 4. Well-known dirs that exist at all (terminal-bench style) 5. The container's default working directory (``pwd``) """ override = self._extra_env.get("EVAL_CLI_WORKDIR") if override: return override # First: try to find a git repo (SWE-bench, etc.) result = await self.exec_as_agent( environment, command=( "for d in /app /testbed /repo; do " ' if [ -d "$d/.git" ]; then echo "$d"; exit 0; fi; ' "done; " "find / -maxdepth 3 -name .git -type d 2>/dev/null " '| head -1 | sed "s|/.git$||"' ), ) workdir = (result.stdout or "").strip() if workdir: return workdir # Fallback: use the first well-known directory that exists, # even without .git (terminal-bench containers aren't git repos). result = await self.exec_as_agent( environment, command=( "for d in /app /testbed /repo /root /home; do " ' if [ -d "$d" ]; then echo "$d"; exit 0; fi; ' "done; " "pwd" ), ) workdir = (result.stdout or "").strip() if workdir: return workdir raise RuntimeError( "Could not detect a working directory in the container. " "Set EVAL_CLI_WORKDIR explicitly via --ae EVAL_CLI_WORKDIR=/path/to/repo" ) async def install(self, environment: BaseEnvironment) -> None: # Detect the package manager and install base dependencies. # Supports Debian/Ubuntu (apt-get), Alpine (apk), and # Fedora/RHEL/CentOS (dnf/yum). await self.exec_as_root( environment, command=( "if command -v apt-get >/dev/null 2>&1; then " " apt-get update && " " apt-get install -y --no-install-recommends ca-certificates curl git; " "elif command -v apk >/dev/null 2>&1; then " " apk add --no-cache ca-certificates curl git bash coreutils gcompat libstdc++; " "elif command -v dnf >/dev/null 2>&1; then " " dnf install -y ca-certificates curl git; " "elif command -v yum >/dev/null 2>&1; then " " yum install -y ca-certificates curl git; " "else " " echo 'WARNING: No supported package manager found (apt-get, apk, dnf, yum)' >&2; " "fi" ), env={"DEBIAN_FRONTEND": "noninteractive"}, ) # ── Non-essential tooling ───────────────────────────────────── # Everything below here (Node.js, LSPs, uv/ruff) is nice-to-have. # If any step fails (e.g. musl incompatibility, network issues), # log a warning and continue — the agent can still work without # pre-installed language servers. await self._install_node(environment) await self._install_lsps(environment) await self._install_uv_and_ruff(environment) if self._binary_path: binary = Path(self._binary_path) if not binary.exists(): raise FileNotFoundError( f"eval-cli binary not found at {binary}. " "Build it with: cargo build --release -p eval_cli" ) await environment.upload_file( source_path=binary, target_path="/usr/local/bin/eval-cli", ) await self.exec_as_root( environment, command="chmod +x /usr/local/bin/eval-cli && eval-cli --help", ) return if self._download_url: await self.exec_as_root( environment, command=( f"curl -fsSL {shlex.quote(self._download_url)} " "-o /usr/local/bin/eval-cli && " "chmod +x /usr/local/bin/eval-cli && " "eval-cli --help" ), ) return raise ValueError( "No eval-cli binary provided. " "Either pass binary_path=/path/to/target/release/eval-cli " "or set download_url=/EVAL_CLI_DOWNLOAD_URL." ) async def _install_node(self, environment: BaseEnvironment) -> None: """Install Node.js from official binary tarballs. Uses the musl build on Alpine and the glibc build elsewhere. Skips if node is already on PATH. """ try: await self.exec_as_root( environment, command=( "if command -v node >/dev/null 2>&1; then " ' echo "Node.js already available: $(node --version)"; ' "else " " NODE_VER=v22.14.0; " " ARCH=$(uname -m); " ' case "$ARCH" in ' " x86_64) NODE_ARCH=x64 ;; " " aarch64) NODE_ARCH=arm64 ;; " ' *) echo "WARNING: unsupported arch $ARCH for Node.js" >&2; exit 0 ;; ' " esac; " " if ldd /bin/sh 2>&1 | grep -qi musl; then " ' NODE_URL="https://unofficial-builds.nodejs.org/download/release/${NODE_VER}/node-${NODE_VER}-linux-${NODE_ARCH}-musl.tar.gz"; ' " else " ' NODE_URL="https://nodejs.org/dist/${NODE_VER}/node-${NODE_VER}-linux-${NODE_ARCH}.tar.gz"; ' " fi; " ' echo "Downloading Node.js from $NODE_URL"; ' ' curl -fsSL "$NODE_URL" | tar -xz -C /usr/local --strip-components=1; ' ' echo "Installed Node.js $(node --version)"; ' "fi" ), ) except Exception as exc: self.logger.warning("Node.js installation failed (non-fatal): %s", exc) async def _install_lsps(self, environment: BaseEnvironment) -> None: """Pre-install language servers so Zed doesn't download them at runtime. Each LSP is installed independently so one failure doesn't block the rest. """ # npm-based LSPs — skip all if npm is not available. try: await self.exec_as_agent( environment, command="command -v npm >/dev/null 2>&1", ) except Exception: self.logger.warning("npm not available — skipping npm-based LSP installs") return lsp_installs = [ ( "basedpyright", 'DIR="$ZED_DATA_DIR/languages/basedpyright"; ' 'mkdir -p "$DIR" && npm install --prefix "$DIR" --save-exact basedpyright', ), ( "typescript-language-server", 'DIR="$ZED_DATA_DIR/languages/typescript-language-server"; ' 'mkdir -p "$DIR" && npm install --prefix "$DIR" --save-exact typescript typescript-language-server', ), ( "vtsls", 'DIR="$ZED_DATA_DIR/languages/vtsls"; ' 'mkdir -p "$DIR" && npm install --prefix "$DIR" --save-exact @vtsls/language-server typescript', ), ( "tailwindcss-language-server", 'DIR="$ZED_DATA_DIR/languages/tailwindcss-language-server"; ' 'mkdir -p "$DIR" && npm install --prefix "$DIR" --save-exact @tailwindcss/language-server', ), ] for name, cmd in lsp_installs: try: await self.exec_as_agent( environment, command=( 'ZED_DATA_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/zed"; ' + cmd ), ) except Exception as exc: self.logger.warning( "LSP install '%s' failed (non-fatal): %s", name, exc ) # eslint — downloaded from GitHub and compiled separately. try: await self.exec_as_agent( environment, command=( "set -euo pipefail; " 'ZED_DATA_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/zed"; ' 'ESLINT_DIR="$ZED_DATA_DIR/languages/eslint/vscode-eslint-2.4.4"; ' 'mkdir -p "$ESLINT_DIR"; ' 'curl -fsSL "https://github.com/zed-industries/vscode-eslint/archive/refs/tags/release/2.4.4.tar.gz" ' '| tar -xz -C "$ESLINT_DIR"; ' 'mv "$ESLINT_DIR"/vscode-eslint-release-2.4.4 "$ESLINT_DIR/vscode-eslint"; ' 'cd "$ESLINT_DIR/vscode-eslint" && npm install && npm run compile' ), ) except Exception as exc: self.logger.warning("eslint LSP install failed (non-fatal): %s", exc) # gopls — only when Go is present. Guarded by a 120s timeout so slow # compilation can never eat the full setup budget. gopls_script = ( "if command -v go >/dev/null 2>&1; then " "if go install golang.org/x/tools/gopls@latest 2>/dev/null; then " "echo 'Installed gopls@latest'; " "else " ' MY_GO=$(go env GOVERSION | sed "s/^go//"); ' " for v in $(curl -fsSL " "https://proxy.golang.org/golang.org/x/tools/gopls/@v/list 2>/dev/null" " | grep -E '^v[0-9]+\\.[0-9]+\\.[0-9]+$' | sort -rV | head -5); do " " NEED=$(curl -fsSL " '"https://proxy.golang.org/golang.org/x/tools/gopls/@v/${v}.mod"' " 2>/dev/null | awk '/^go /{print $2; exit}'); " ' if [ -n "$NEED" ] ' ' && [ "$(printf \'%s\\n%s\\n\' "$NEED" "$MY_GO" ' ' | sort -V | head -1)" = "$NEED" ]; then ' ' echo "Installing gopls $v (compatible with Go $MY_GO)"; ' ' go install "golang.org/x/tools/gopls@$v" && break; ' " fi; " " done; " "fi; " "fi" ) try: await self.exec_as_agent( environment, command=( "timeout 120 bash -c " + shlex.quote(gopls_script) + " || echo 'WARNING: gopls installation timed out or failed -- skipping'" ), ) except Exception as exc: self.logger.warning("gopls install failed (non-fatal): %s", exc) async def _install_uv_and_ruff(self, environment: BaseEnvironment) -> None: """Install uv and ruff for Python tooling.""" try: await self.exec_as_agent( environment, command=( "curl -LsSf https://astral.sh/uv/install.sh | sh && " '. "$HOME/.local/bin/env"' ), ) agent_home_result = await self.exec_as_agent( environment, command='printf %s "$HOME"', ) agent_home = agent_home_result.stdout.strip() if not agent_home: self.logger.warning( "Could not determine agent home directory — skipping uv symlinks" ) return await self.exec_as_root( environment, command=( f"ln -sf {shlex.quote(agent_home + '/.local/bin/uv')} /usr/local/bin/uv && " f"ln -sf {shlex.quote(agent_home + '/.local/bin/uvx')} /usr/local/bin/uvx" ), ) await self.exec_as_agent( environment, command='export PATH="$HOME/.local/bin:$PATH" && uv tool install ruff', ) except Exception as exc: self.logger.warning("uv/ruff installation failed (non-fatal): %s", exc) def populate_context_post_run(self, context: AgentContext) -> None: result_data = None for json_file in self.logs_dir.rglob("result.json"): try: result_data = json.loads(json_file.read_text()) break except (json.JSONDecodeError, OSError): continue if result_data is None: self.logger.warning("Could not find or parse result.json from eval-cli") return if result_data.get("input_tokens") is not None: context.n_input_tokens = result_data["input_tokens"] if result_data.get("output_tokens") is not None: context.n_output_tokens = result_data["output_tokens"] if result_data.get("cache_read_input_tokens") is not None: context.n_cache_tokens = result_data["cache_read_input_tokens"] context.metadata = { "status": result_data.get("status"), "duration_secs": result_data.get("duration_secs"), "model": result_data.get("model"), } def _get_api_env(self) -> dict[str, str]: env: dict[str, str] = {} if not self.model_name or "/" not in self.model_name: return env provider = self.model_name.split("/", 1)[0] provider_env_map = { "anthropic": "ANTHROPIC_API_KEY", "openai": "OPENAI_API_KEY", "google": "GEMINI_API_KEY", "gemini": "GEMINI_API_KEY", "deepseek": "DEEPSEEK_API_KEY", "mistral": "MISTRAL_API_KEY", } env_var = provider_env_map.get(provider) if env_var: api_key = os.environ.get(env_var, "") if api_key: env[env_var] = api_key return env @with_prompt_template async def run( self, instruction: str, environment: BaseEnvironment, context: AgentContext ) -> None: escaped_instruction = shlex.quote(instruction) env = self._get_api_env() workdir = await self._detect_workdir(environment) parts = [ "eval-cli", f"--workdir {shlex.quote(workdir)}", "--output-dir /logs/agent", ] if self.model_name: parts.append(f"--model {shlex.quote(self.model_name)}") timeout = self._extra_env.get("EVAL_CLI_TIMEOUT") if timeout: parts.append(f"--timeout {shlex.quote(timeout)}") staff = self._extra_env.get("EVAL_CLI_STAFF") if staff and staff.lower() == "false": parts.append("--no-staff") reasoning_effort = self._extra_env.get("EVAL_CLI_REASONING_EFFORT") if reasoning_effort: parts.append(f"--reasoning-effort {shlex.quote(reasoning_effort)}") enable_thinking = self._extra_env.get("EVAL_CLI_ENABLE_THINKING") if enable_thinking: if enable_thinking.lower() == "true": parts.append("--enable-thinking") elif enable_thinking.lower() == "false": parts.append("--disable-thinking") parts.append(f"--instruction {escaped_instruction}") await self.exec_as_agent( environment, command=( " ".join(parts) + " 2>&1 | if command -v stdbuf >/dev/null 2>&1;" " then stdbuf -oL tee /logs/agent/eval-cli.txt;" " else tee /logs/agent/eval-cli.txt; fi" ), env=env, ) # Only generate a patch if the workdir is a git repo with a valid HEAD # (SWE-bench style). Terminal-bench containers aren't git repos, and # some harnesses mount an initialized repo before creating the first commit. await self.exec_as_agent( environment, command=( "if git rev-parse --git-dir >/dev/null 2>&1; then " "git add -A && " "if git rev-parse --verify HEAD >/dev/null 2>&1; then " "git diff --cached HEAD -- > /logs/agent/patch.diff && " 'echo "Patch size: $(wc -c < /logs/agent/patch.diff) bytes"; ' "else " 'echo "Git repo has no valid HEAD, skipping patch generation"; ' "fi; " "else " 'echo "No git repo found, skipping patch generation"; ' "fi" ), cwd=workdir, )