Render terminal sandbox section in system prompt behind feature flag (#57430)

Stacked on top of #57429.

Adds a new `sandboxing` feature flag (off for staff by default) and a
single source-of-truth helper `sandboxing_enabled(cx) = cfg!(target_os =
"macos") && cx.has_flag::<SandboxingFeatureFlag>()`. When the helper
returns true, the agent's system prompt gains a new `## Terminal
sandbox` section that:

- Lists each worktree's absolute path as a writable directory.
- Describes the per-command `$TMPDIR` scratch directory.
- States that outbound network access is blocked.
- Documents the three per-command flags (`allow_network`,
`allow_fs_write`, `unsandboxed`) the model can request to relax the
sandbox.
- Tells the model the section is stable for the duration of the
conversation.

When the flag is off, the section is omitted entirely — no mention of
sandboxing at all.

No behavior change to terminal execution yet; that's the next PR in the
stack.

Three new tests cover: section omitted when `sandboxing: false`, section
rendered with all worktrees + flag docs when `sandboxing: true`, and the
zero-worktrees case.

Release Notes:

- N/A

---------

Co-authored-by: MartinYe1234 <52641447+MartinYe1234@users.noreply.github.com>
Co-authored-by: Martin Ye <martin@zed.dev>
This commit is contained in:
Richard Feldman 2026-05-27 17:11:27 -04:00 committed by GitHub
parent c3b9cacc0e
commit ffbda20db1
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 147 additions and 0 deletions

View file

@ -3,6 +3,7 @@ mod legacy_thread;
mod native_agent_server;
pub mod outline;
mod pattern_extraction;
mod sandboxing;
mod templates;
#[cfg(test)]
mod tests;

View file

@ -0,0 +1,25 @@
//! Agent-side glue for the [`sandbox`] crate.
//!
//! Centralizes the "should agent-run terminal commands be sandboxed for this
//! process?" check so the system prompt, the terminal tool, and any other
//! caller see the same answer (and so the `target_os` gate lives in one
//! place instead of scattered across the agent crate).
//!
//! The current policy is: enabled iff we're on macOS *and* the user has the
//! `sandboxing` feature flag turned on. There's deliberately no settings or
//! env-var override yet — the flag is the only switch.
//!
//! On non-macOS hosts we don't have a sandbox integration today, so this
//! returns `false` regardless of the flag.
//!
//! Naming note: this module is about agent terminal sandboxing specifically.
//! Other agent operations (e.g. file edits) are gated separately.
use feature_flags::{FeatureFlagAppExt as _, SandboxingFeatureFlag};
use gpui::App;
/// Whether agent-run terminal commands should be wrapped in an OS-level
/// sandbox for this process. See module docs for the policy.
pub(crate) fn sandboxing_enabled(cx: &App) -> bool {
cfg!(target_os = "macos") && cx.has_flag::<SandboxingFeatureFlag>()
}

View file

@ -43,6 +43,12 @@ pub struct SystemPromptTemplate<'a> {
/// Contents of the user-global `~/.config/zed/AGENTS.md` file (or the
/// platform equivalent), if present and non-empty.
pub user_agents_md: Option<SharedString>,
/// Whether agent-run terminal commands are wrapped in an OS-level
/// sandbox for this conversation. When `true`, the rendered prompt
/// describes the sandbox's read/write/network rules and the
/// per-command flags the model can request to relax them. When
/// `false`, the prompt omits the sandbox section entirely.
pub sandboxing: bool,
}
impl Template for SystemPromptTemplate<'_> {
@ -87,6 +93,7 @@ mod tests {
model_name: Some("test-model".to_string()),
date: "2026-01-01".to_string(),
user_agents_md: None,
sandboxing: false,
};
let templates = Templates::new();
let rendered = template.render(&templates).unwrap();
@ -119,6 +126,7 @@ mod tests {
model_name: Some("test-model".to_string()),
date: "2026-01-01".to_string(),
user_agents_md: Some("always be concise".into()),
sandboxing: false,
};
let templates = Templates::new();
let rendered = template.render(&templates).unwrap();
@ -136,6 +144,80 @@ mod tests {
);
}
#[test]
fn test_system_prompt_omits_sandbox_section_when_sandboxing_disabled() {
let project = prompt_store::ProjectContext::default();
let template = SystemPromptTemplate {
project: &project,
available_tools: vec!["echo".into()],
model_name: Some("test-model".to_string()),
date: "2026-01-01".to_string(),
user_agents_md: None,
sandboxing: false,
};
let templates = Templates::new();
let rendered = template.render(&templates).unwrap();
assert!(!rendered.contains("## Terminal sandbox"));
assert!(!rendered.contains("allow_network"));
}
#[test]
fn test_system_prompt_renders_sandbox_section_with_worktrees_when_enabled() {
use prompt_store::{ProjectContext, WorktreeContext};
let worktrees = vec![
WorktreeContext {
root_name: "alpha".to_string(),
abs_path: std::path::Path::new("/tmp/alpha").into(),
rules_file: None,
},
WorktreeContext {
root_name: "beta".to_string(),
abs_path: std::path::Path::new("/tmp/beta").into(),
rules_file: None,
},
];
let project = ProjectContext::new(worktrees);
let template = SystemPromptTemplate {
project: &project,
available_tools: vec!["echo".into()],
model_name: Some("test-model".to_string()),
date: "2026-01-01".to_string(),
user_agents_md: None,
sandboxing: true,
};
let templates = Templates::new();
let rendered = template.render(&templates).unwrap();
assert!(rendered.contains("## Terminal sandbox"));
assert!(rendered.contains("`/tmp/alpha`"));
assert!(rendered.contains("`/tmp/beta`"));
assert!(rendered.contains("allow_network: true"));
assert!(rendered.contains("allow_fs_write: true"));
assert!(rendered.contains("unsandboxed: true"));
// The model is told the section is stable so it doesn't re-check
// sandbox state every turn.
assert!(rendered.contains("remain in effect for the entire duration"));
}
#[test]
fn test_system_prompt_sandbox_section_handles_zero_worktrees() {
let project = prompt_store::ProjectContext::default();
let template = SystemPromptTemplate {
project: &project,
available_tools: vec!["echo".into()],
model_name: Some("test-model".to_string()),
date: "2026-01-01".to_string(),
user_agents_md: None,
sandboxing: true,
};
let templates = Templates::new();
let rendered = template.render(&templates).unwrap();
assert!(rendered.contains("## Terminal sandbox"));
assert!(rendered.contains("No project directories are currently writable"));
}
#[test]
fn test_system_prompt_omits_user_agents_md_section_when_absent() {
let project = prompt_store::ProjectContext::default();
@ -145,6 +227,7 @@ mod tests {
model_name: Some("test-model".to_string()),
date: "2026-01-01".to_string(),
user_agents_md: None,
sandboxing: false,
};
let templates = Templates::new();
let rendered = template.render(&templates).unwrap();
@ -160,6 +243,7 @@ mod tests {
model_name: Some("test-model".to_string()),
date: "2026-01-01".to_string(),
user_agents_md: None,
sandboxing: false,
};
let templates = Templates::new();
let rendered = template.render(&templates).unwrap();

View file

@ -187,6 +187,24 @@ The current project contains the following root directories:
- `{{abs_path}}`
{{/each}}
{{#if sandboxing}}
## Terminal sandbox
The `terminal` tool runs commands inside a sandbox with these permissions:
- Reads: any path on the filesystem is readable.
- Writes: a per-command temporary directory exposed via `$TMPDIR`, `$TMP`, and `$TEMP` is writable{{#if worktrees}}, along with these project directories:
{{#each worktrees}}
- `{{abs_path}}`
{{/each}}
Writes anywhere else on the filesystem are blocked.{{else}}. No project directories are currently writable.{{/if}}
- Network: outbound network access is blocked.
You can request elevated permissions on individual `terminal` calls by setting `allow_network: true`, `allow_fs_write: true`, or `unsandboxed: true`. The user will be prompted to approve before the command runs.
These sandbox settings are guaranteed to remain in effect for the entire duration of this conversation. If they ever change, you'll be told.
{{/if}}
{{#if model_name}}
## Model Information

View file

@ -3171,6 +3171,7 @@ impl Thread {
model_name: self.model.as_ref().map(|m| m.name().0.to_string()),
date: Local::now().format("%Y-%m-%d").to_string(),
user_agents_md,
sandboxing: crate::sandboxing::sandboxing_enabled(cx),
}
.render(&self.templates)
.context("failed to build system prompt")

View file

@ -372,6 +372,7 @@ impl EditToolTest {
model_name: None,
date: chrono::Local::now().format("%Y-%m-%d").to_string(),
user_agents_md: None,
sandboxing: false,
};
let templates = Templates::new();
template.render(&templates)?

View file

@ -231,6 +231,7 @@ impl TerminalToolTest {
model_name: None,
date: chrono::Local::now().format("%Y-%m-%d").to_string(),
user_agents_md: None,
sandboxing: false,
};
template.render(&Templates::new())?
};

View file

@ -202,6 +202,7 @@ impl WriteToolTest {
model_name: None,
date: chrono::Local::now().format("%Y-%m-%d").to_string(),
user_agents_md: None,
sandboxing: false,
};
let templates = Templates::new();
template.render(&templates)?

View file

@ -135,3 +135,18 @@ impl FeatureFlag for AutoWatchFeatureFlag {
type Value = PresenceFlag;
}
register_feature_flag!(AutoWatchFeatureFlag);
/// Wraps agent-run terminal commands in an OS-level sandbox where supported
/// (currently macOS Seatbelt only). When off, terminal commands run with the
/// agent's full ambient permissions, as they always have.
pub struct SandboxingFeatureFlag;
impl FeatureFlag for SandboxingFeatureFlag {
const NAME: &'static str = "sandboxing";
type Value = PresenceFlag;
fn enabled_for_staff() -> bool {
false
}
}
register_feature_flag!(SandboxingFeatureFlag);