mirror of
https://github.com/zed-industries/zed.git
synced 2026-05-23 21:05:08 +00:00
465 lines
18 KiB
Python
465 lines
18 KiB
Python
"""Harbor agent wrapper for Zed's eval-cli binary.
|
|
|
|
Usage:
|
|
# Build eval-cli locally first:
|
|
cargo build --release -p eval_cli
|
|
|
|
# Run via Harbor with a local binary:
|
|
harbor run -d "dataset@version" \
|
|
--agent-import-path zed_eval.agent:ZedAgent \
|
|
--ae binary_path=/path/to/target/release/eval-cli \
|
|
--agent-model anthropic/claude-sonnet-4-6-latest
|
|
|
|
# Or with a download URL (for CI):
|
|
harbor run -d "dataset@version" \
|
|
--agent-import-path zed_eval.agent:ZedAgent \
|
|
--ae download_url=https://example.com/eval-cli \
|
|
--agent-model anthropic/claude-sonnet-4-6-latest
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import shlex
|
|
from pathlib import Path
|
|
|
|
from harbor.agents.installed.base import BaseInstalledAgent, with_prompt_template
|
|
from harbor.environments.base import BaseEnvironment
|
|
from harbor.models.agent.context import AgentContext
|
|
|
|
|
|
class ZedAgent(BaseInstalledAgent):
|
|
"""Runs Zed's headless AI agent (eval-cli) to solve tasks.
|
|
|
|
The eval-cli binary boots a headless GPUI application and uses the same
|
|
NativeAgent + AcpThread pipeline as the production Zed editor, driving
|
|
the full agentic loop (tool calls, subagents, retries) without a GUI.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
logs_dir: Path,
|
|
binary_path: str | None = None,
|
|
download_url: str | None = None,
|
|
*args,
|
|
**kwargs,
|
|
):
|
|
super().__init__(logs_dir, *args, **kwargs)
|
|
self._binary_path = binary_path
|
|
self._download_url = download_url or os.environ.get("EVAL_CLI_DOWNLOAD_URL")
|
|
|
|
@staticmethod
|
|
def name() -> str:
|
|
return "zed"
|
|
|
|
async def _detect_workdir(self, environment: BaseEnvironment) -> str:
|
|
"""Detect the working directory inside the container.
|
|
|
|
Checks, in order:
|
|
1. Explicit ``EVAL_CLI_WORKDIR`` extra-env override
|
|
2. Well-known dirs with a ``.git`` subdirectory (SWE-bench style)
|
|
3. First git repo found under ``/`` (max depth 3)
|
|
4. Well-known dirs that exist at all (terminal-bench style)
|
|
5. The container's default working directory (``pwd``)
|
|
"""
|
|
override = self._extra_env.get("EVAL_CLI_WORKDIR")
|
|
if override:
|
|
return override
|
|
|
|
# First: try to find a git repo (SWE-bench, etc.)
|
|
result = await self.exec_as_agent(
|
|
environment,
|
|
command=(
|
|
"for d in /app /testbed /repo; do "
|
|
' if [ -d "$d/.git" ]; then echo "$d"; exit 0; fi; '
|
|
"done; "
|
|
"find / -maxdepth 3 -name .git -type d 2>/dev/null "
|
|
'| head -1 | sed "s|/.git$||"'
|
|
),
|
|
)
|
|
workdir = (result.stdout or "").strip()
|
|
if workdir:
|
|
return workdir
|
|
|
|
# Fallback: use the first well-known directory that exists,
|
|
# even without .git (terminal-bench containers aren't git repos).
|
|
result = await self.exec_as_agent(
|
|
environment,
|
|
command=(
|
|
"for d in /app /testbed /repo /root /home; do "
|
|
' if [ -d "$d" ]; then echo "$d"; exit 0; fi; '
|
|
"done; "
|
|
"pwd"
|
|
),
|
|
)
|
|
workdir = (result.stdout or "").strip()
|
|
if workdir:
|
|
return workdir
|
|
|
|
raise RuntimeError(
|
|
"Could not detect a working directory in the container. "
|
|
"Set EVAL_CLI_WORKDIR explicitly via --ae EVAL_CLI_WORKDIR=/path/to/repo"
|
|
)
|
|
|
|
async def install(self, environment: BaseEnvironment) -> None:
|
|
# Detect the package manager and install base dependencies.
|
|
# Supports Debian/Ubuntu (apt-get), Alpine (apk), and
|
|
# Fedora/RHEL/CentOS (dnf/yum).
|
|
await self.exec_as_root(
|
|
environment,
|
|
command=(
|
|
"if command -v apt-get >/dev/null 2>&1; then "
|
|
" apt-get update && "
|
|
" apt-get install -y --no-install-recommends ca-certificates curl git; "
|
|
"elif command -v apk >/dev/null 2>&1; then "
|
|
" apk add --no-cache ca-certificates curl git bash coreutils gcompat libstdc++; "
|
|
"elif command -v dnf >/dev/null 2>&1; then "
|
|
" dnf install -y ca-certificates curl git; "
|
|
"elif command -v yum >/dev/null 2>&1; then "
|
|
" yum install -y ca-certificates curl git; "
|
|
"else "
|
|
" echo 'WARNING: No supported package manager found (apt-get, apk, dnf, yum)' >&2; "
|
|
"fi"
|
|
),
|
|
env={"DEBIAN_FRONTEND": "noninteractive"},
|
|
)
|
|
|
|
# ── Non-essential tooling ─────────────────────────────────────
|
|
# Everything below here (Node.js, LSPs, uv/ruff) is nice-to-have.
|
|
# If any step fails (e.g. musl incompatibility, network issues),
|
|
# log a warning and continue — the agent can still work without
|
|
# pre-installed language servers.
|
|
|
|
await self._install_node(environment)
|
|
await self._install_lsps(environment)
|
|
await self._install_uv_and_ruff(environment)
|
|
|
|
if self._binary_path:
|
|
binary = Path(self._binary_path)
|
|
if not binary.exists():
|
|
raise FileNotFoundError(
|
|
f"eval-cli binary not found at {binary}. "
|
|
"Build it with: cargo build --release -p eval_cli"
|
|
)
|
|
await environment.upload_file(
|
|
source_path=binary,
|
|
target_path="/usr/local/bin/eval-cli",
|
|
)
|
|
await self.exec_as_root(
|
|
environment,
|
|
command="chmod +x /usr/local/bin/eval-cli && eval-cli --help",
|
|
)
|
|
return
|
|
|
|
if self._download_url:
|
|
await self.exec_as_root(
|
|
environment,
|
|
command=(
|
|
f"curl -fsSL {shlex.quote(self._download_url)} "
|
|
"-o /usr/local/bin/eval-cli && "
|
|
"chmod +x /usr/local/bin/eval-cli && "
|
|
"eval-cli --help"
|
|
),
|
|
)
|
|
return
|
|
|
|
raise ValueError(
|
|
"No eval-cli binary provided. "
|
|
"Either pass binary_path=/path/to/target/release/eval-cli "
|
|
"or set download_url=/EVAL_CLI_DOWNLOAD_URL."
|
|
)
|
|
|
|
async def _install_node(self, environment: BaseEnvironment) -> None:
|
|
"""Install Node.js from official binary tarballs.
|
|
|
|
Uses the musl build on Alpine and the glibc build elsewhere.
|
|
Skips if node is already on PATH.
|
|
"""
|
|
try:
|
|
await self.exec_as_root(
|
|
environment,
|
|
command=(
|
|
"if command -v node >/dev/null 2>&1; then "
|
|
' echo "Node.js already available: $(node --version)"; '
|
|
"else "
|
|
" NODE_VER=v22.14.0; "
|
|
" ARCH=$(uname -m); "
|
|
' case "$ARCH" in '
|
|
" x86_64) NODE_ARCH=x64 ;; "
|
|
" aarch64) NODE_ARCH=arm64 ;; "
|
|
' *) echo "WARNING: unsupported arch $ARCH for Node.js" >&2; exit 0 ;; '
|
|
" esac; "
|
|
" if ldd /bin/sh 2>&1 | grep -qi musl; then "
|
|
' NODE_URL="https://unofficial-builds.nodejs.org/download/release/${NODE_VER}/node-${NODE_VER}-linux-${NODE_ARCH}-musl.tar.gz"; '
|
|
" else "
|
|
' NODE_URL="https://nodejs.org/dist/${NODE_VER}/node-${NODE_VER}-linux-${NODE_ARCH}.tar.gz"; '
|
|
" fi; "
|
|
' echo "Downloading Node.js from $NODE_URL"; '
|
|
' curl -fsSL "$NODE_URL" | tar -xz -C /usr/local --strip-components=1; '
|
|
' echo "Installed Node.js $(node --version)"; '
|
|
"fi"
|
|
),
|
|
)
|
|
except Exception as exc:
|
|
self.logger.warning("Node.js installation failed (non-fatal): %s", exc)
|
|
|
|
async def _install_lsps(self, environment: BaseEnvironment) -> None:
|
|
"""Pre-install language servers so Zed doesn't download them at runtime.
|
|
|
|
Each LSP is installed independently so one failure doesn't block the rest.
|
|
"""
|
|
# npm-based LSPs — skip all if npm is not available.
|
|
try:
|
|
await self.exec_as_agent(
|
|
environment,
|
|
command="command -v npm >/dev/null 2>&1",
|
|
)
|
|
except Exception:
|
|
self.logger.warning("npm not available — skipping npm-based LSP installs")
|
|
return
|
|
|
|
lsp_installs = [
|
|
(
|
|
"basedpyright",
|
|
'DIR="$ZED_DATA_DIR/languages/basedpyright"; '
|
|
'mkdir -p "$DIR" && npm install --prefix "$DIR" --save-exact basedpyright',
|
|
),
|
|
(
|
|
"typescript-language-server",
|
|
'DIR="$ZED_DATA_DIR/languages/typescript-language-server"; '
|
|
'mkdir -p "$DIR" && npm install --prefix "$DIR" --save-exact typescript typescript-language-server',
|
|
),
|
|
(
|
|
"vtsls",
|
|
'DIR="$ZED_DATA_DIR/languages/vtsls"; '
|
|
'mkdir -p "$DIR" && npm install --prefix "$DIR" --save-exact @vtsls/language-server typescript',
|
|
),
|
|
(
|
|
"tailwindcss-language-server",
|
|
'DIR="$ZED_DATA_DIR/languages/tailwindcss-language-server"; '
|
|
'mkdir -p "$DIR" && npm install --prefix "$DIR" --save-exact @tailwindcss/language-server',
|
|
),
|
|
]
|
|
|
|
for name, cmd in lsp_installs:
|
|
try:
|
|
await self.exec_as_agent(
|
|
environment,
|
|
command=(
|
|
'ZED_DATA_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/zed"; '
|
|
+ cmd
|
|
),
|
|
)
|
|
except Exception as exc:
|
|
self.logger.warning(
|
|
"LSP install '%s' failed (non-fatal): %s", name, exc
|
|
)
|
|
|
|
# eslint — downloaded from GitHub and compiled separately.
|
|
try:
|
|
await self.exec_as_agent(
|
|
environment,
|
|
command=(
|
|
"set -euo pipefail; "
|
|
'ZED_DATA_DIR="${XDG_DATA_HOME:-$HOME/.local/share}/zed"; '
|
|
'ESLINT_DIR="$ZED_DATA_DIR/languages/eslint/vscode-eslint-2.4.4"; '
|
|
'mkdir -p "$ESLINT_DIR"; '
|
|
'curl -fsSL "https://github.com/zed-industries/vscode-eslint/archive/refs/tags/release/2.4.4.tar.gz" '
|
|
'| tar -xz -C "$ESLINT_DIR"; '
|
|
'mv "$ESLINT_DIR"/vscode-eslint-release-2.4.4 "$ESLINT_DIR/vscode-eslint"; '
|
|
'cd "$ESLINT_DIR/vscode-eslint" && npm install && npm run compile'
|
|
),
|
|
)
|
|
except Exception as exc:
|
|
self.logger.warning("eslint LSP install failed (non-fatal): %s", exc)
|
|
|
|
# gopls — only when Go is present. Guarded by a 120s timeout so slow
|
|
# compilation can never eat the full setup budget.
|
|
gopls_script = (
|
|
"if command -v go >/dev/null 2>&1; then "
|
|
"if go install golang.org/x/tools/gopls@latest 2>/dev/null; then "
|
|
"echo 'Installed gopls@latest'; "
|
|
"else "
|
|
' MY_GO=$(go env GOVERSION | sed "s/^go//"); '
|
|
" for v in $(curl -fsSL "
|
|
"https://proxy.golang.org/golang.org/x/tools/gopls/@v/list 2>/dev/null"
|
|
" | grep -E '^v[0-9]+\\.[0-9]+\\.[0-9]+$' | sort -rV | head -5); do "
|
|
" NEED=$(curl -fsSL "
|
|
'"https://proxy.golang.org/golang.org/x/tools/gopls/@v/${v}.mod"'
|
|
" 2>/dev/null | awk '/^go /{print $2; exit}'); "
|
|
' if [ -n "$NEED" ] '
|
|
' && [ "$(printf \'%s\\n%s\\n\' "$NEED" "$MY_GO" '
|
|
' | sort -V | head -1)" = "$NEED" ]; then '
|
|
' echo "Installing gopls $v (compatible with Go $MY_GO)"; '
|
|
' go install "golang.org/x/tools/gopls@$v" && break; '
|
|
" fi; "
|
|
" done; "
|
|
"fi; "
|
|
"fi"
|
|
)
|
|
try:
|
|
await self.exec_as_agent(
|
|
environment,
|
|
command=(
|
|
"timeout 120 bash -c "
|
|
+ shlex.quote(gopls_script)
|
|
+ " || echo 'WARNING: gopls installation timed out or failed -- skipping'"
|
|
),
|
|
)
|
|
except Exception as exc:
|
|
self.logger.warning("gopls install failed (non-fatal): %s", exc)
|
|
|
|
async def _install_uv_and_ruff(self, environment: BaseEnvironment) -> None:
|
|
"""Install uv and ruff for Python tooling."""
|
|
try:
|
|
await self.exec_as_agent(
|
|
environment,
|
|
command=(
|
|
"curl -LsSf https://astral.sh/uv/install.sh | sh && "
|
|
'. "$HOME/.local/bin/env"'
|
|
),
|
|
)
|
|
|
|
agent_home_result = await self.exec_as_agent(
|
|
environment,
|
|
command='printf %s "$HOME"',
|
|
)
|
|
agent_home = agent_home_result.stdout.strip()
|
|
if not agent_home:
|
|
self.logger.warning(
|
|
"Could not determine agent home directory — skipping uv symlinks"
|
|
)
|
|
return
|
|
|
|
await self.exec_as_root(
|
|
environment,
|
|
command=(
|
|
f"ln -sf {shlex.quote(agent_home + '/.local/bin/uv')} /usr/local/bin/uv && "
|
|
f"ln -sf {shlex.quote(agent_home + '/.local/bin/uvx')} /usr/local/bin/uvx"
|
|
),
|
|
)
|
|
|
|
await self.exec_as_agent(
|
|
environment,
|
|
command='export PATH="$HOME/.local/bin:$PATH" && uv tool install ruff',
|
|
)
|
|
except Exception as exc:
|
|
self.logger.warning("uv/ruff installation failed (non-fatal): %s", exc)
|
|
|
|
def populate_context_post_run(self, context: AgentContext) -> None:
|
|
result_data = None
|
|
for json_file in self.logs_dir.rglob("result.json"):
|
|
try:
|
|
result_data = json.loads(json_file.read_text())
|
|
break
|
|
except (json.JSONDecodeError, OSError):
|
|
continue
|
|
|
|
if result_data is None:
|
|
self.logger.warning("Could not find or parse result.json from eval-cli")
|
|
return
|
|
|
|
if result_data.get("input_tokens") is not None:
|
|
context.n_input_tokens = result_data["input_tokens"]
|
|
if result_data.get("output_tokens") is not None:
|
|
context.n_output_tokens = result_data["output_tokens"]
|
|
if result_data.get("cache_read_input_tokens") is not None:
|
|
context.n_cache_tokens = result_data["cache_read_input_tokens"]
|
|
|
|
context.metadata = {
|
|
"status": result_data.get("status"),
|
|
"duration_secs": result_data.get("duration_secs"),
|
|
"model": result_data.get("model"),
|
|
}
|
|
|
|
def _get_api_env(self) -> dict[str, str]:
|
|
env: dict[str, str] = {}
|
|
if not self.model_name or "/" not in self.model_name:
|
|
return env
|
|
|
|
provider = self.model_name.split("/", 1)[0]
|
|
provider_env_map = {
|
|
"anthropic": "ANTHROPIC_API_KEY",
|
|
"openai": "OPENAI_API_KEY",
|
|
"google": "GEMINI_API_KEY",
|
|
"gemini": "GEMINI_API_KEY",
|
|
"deepseek": "DEEPSEEK_API_KEY",
|
|
"mistral": "MISTRAL_API_KEY",
|
|
}
|
|
|
|
env_var = provider_env_map.get(provider)
|
|
if env_var:
|
|
api_key = os.environ.get(env_var, "")
|
|
if api_key:
|
|
env[env_var] = api_key
|
|
|
|
return env
|
|
|
|
@with_prompt_template
|
|
async def run(
|
|
self, instruction: str, environment: BaseEnvironment, context: AgentContext
|
|
) -> None:
|
|
escaped_instruction = shlex.quote(instruction)
|
|
env = self._get_api_env()
|
|
|
|
workdir = await self._detect_workdir(environment)
|
|
|
|
parts = [
|
|
"eval-cli",
|
|
f"--workdir {shlex.quote(workdir)}",
|
|
"--output-dir /logs/agent",
|
|
]
|
|
|
|
if self.model_name:
|
|
parts.append(f"--model {shlex.quote(self.model_name)}")
|
|
|
|
timeout = self._extra_env.get("EVAL_CLI_TIMEOUT")
|
|
if timeout:
|
|
parts.append(f"--timeout {shlex.quote(timeout)}")
|
|
|
|
staff = self._extra_env.get("EVAL_CLI_STAFF")
|
|
if staff and staff.lower() == "false":
|
|
parts.append("--no-staff")
|
|
|
|
reasoning_effort = self._extra_env.get("EVAL_CLI_REASONING_EFFORT")
|
|
if reasoning_effort:
|
|
parts.append(f"--reasoning-effort {shlex.quote(reasoning_effort)}")
|
|
|
|
enable_thinking = self._extra_env.get("EVAL_CLI_ENABLE_THINKING")
|
|
if enable_thinking:
|
|
if enable_thinking.lower() == "true":
|
|
parts.append("--enable-thinking")
|
|
elif enable_thinking.lower() == "false":
|
|
parts.append("--disable-thinking")
|
|
|
|
parts.append(f"--instruction {escaped_instruction}")
|
|
|
|
await self.exec_as_agent(
|
|
environment,
|
|
command=(
|
|
" ".join(parts) + " 2>&1 | if command -v stdbuf >/dev/null 2>&1;"
|
|
" then stdbuf -oL tee /logs/agent/eval-cli.txt;"
|
|
" else tee /logs/agent/eval-cli.txt; fi"
|
|
),
|
|
env=env,
|
|
)
|
|
|
|
# Only generate a patch if the workdir is a git repo with a valid HEAD
|
|
# (SWE-bench style). Terminal-bench containers aren't git repos, and
|
|
# some harnesses mount an initialized repo before creating the first commit.
|
|
await self.exec_as_agent(
|
|
environment,
|
|
command=(
|
|
"if git rev-parse --git-dir >/dev/null 2>&1; then "
|
|
"git add -A && "
|
|
"if git rev-parse --verify HEAD >/dev/null 2>&1; then "
|
|
"git diff --cached HEAD -- > /logs/agent/patch.diff && "
|
|
'echo "Patch size: $(wc -c < /logs/agent/patch.diff) bytes"; '
|
|
"else "
|
|
'echo "Git repo has no valid HEAD, skipping patch generation"; '
|
|
"fi; "
|
|
"else "
|
|
'echo "No git repo found, skipping patch generation"; '
|
|
"fi"
|
|
),
|
|
cwd=workdir,
|
|
)
|