feat: replace subagent and skills with unified summon extension (#6964)

Signed-off-by: Travis Longwell <travis@block.xyz>
This commit is contained in:
tlongwell-block 2026-02-10 14:13:38 -05:00 committed by GitHub
parent 1168d7d9b1
commit 7ea19f5c83
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
26 changed files with 2616 additions and 1935 deletions

1
.gitignore vendored
View file

@ -72,3 +72,4 @@ result
# Goose self-test artifacts
gooseselftest/
.tasks/

View file

@ -97,7 +97,10 @@ mod tests {
input_config.additional_system_prompt,
Some("test_instructions my_value".to_string())
);
assert!(recipe.extensions.is_none());
assert!(recipe
.extensions
.as_ref()
.is_none_or(|e| e.iter().all(|ext| ext.name() == "summon")));
assert!(settings.is_some());
let settings = settings.unwrap();
@ -162,7 +165,10 @@ mod tests {
input_config.additional_system_prompt,
Some("test_instructions my_value".to_string())
);
assert!(recipe.extensions.is_none());
assert!(recipe
.extensions
.as_ref()
.is_none_or(|e| e.iter().all(|ext| ext.name() == "summon")));
assert!(settings.is_some());
let settings = settings.unwrap();

View file

@ -600,11 +600,7 @@ pub async fn build_session(session_config: SessionBuilderConfig) -> CliSession {
let recipe = session_config.recipe.as_ref();
agent
.apply_recipe_components(
recipe.and_then(|r| r.sub_recipes.clone()),
recipe.and_then(|r| r.response.clone()),
true,
)
.apply_recipe_components(recipe.and_then(|r| r.response.clone()), true)
.await;
let new_provider = match create(&resolved.provider_name, resolved.model_config).await {
@ -643,6 +639,17 @@ pub async fn build_session(session_config: SessionBuilderConfig) -> CliSession {
process::exit(1);
});
if let Some(recipe) = session_config.recipe.clone() {
if let Err(e) = session_manager
.update(&session_id)
.recipe(Some(recipe))
.apply()
.await
{
tracing::warn!("Failed to store recipe on session: {}", e);
}
}
if session_config.resume {
handle_resumed_session_workdir(&agent, &session_id, session_config.interactive).await;
}

View file

@ -21,8 +21,8 @@ use tokio_util::task::AbortOnDropHandle;
pub use self::export::message_to_markdown;
pub use builder::{build_session, SessionBuilderConfig};
use console::Color;
use goose::agents::subagent_handler::SUBAGENT_TOOL_REQUEST_TYPE;
use goose::agents::AgentEvent;
use goose::agents::SUBAGENT_TOOL_REQUEST_TYPE;
use goose::permission::permission_confirmation::PrincipalType;
use goose::permission::Permission;
use goose::permission::PermissionConfirmation;

View file

@ -314,8 +314,9 @@ fn render_tool_request(req: &ToolRequest, theme: Theme, debug: bool) {
Ok(call) => match call.name.to_string().as_str() {
"developer__text_editor" => render_text_editor_request(call, debug),
"developer__shell" => render_shell_request(call, debug),
"code_execution__execute" => render_execute_code_request(call, debug),
"subagent" => render_subagent_request(call, debug),
"execute" | "execute_code" => render_execute_code_request(call, debug),
"delegate" => render_delegate_request(call, debug),
"subagent" => render_delegate_request(call, debug),
"todo__write" => render_todo_request(call, debug),
_ => render_default_request(call, debug),
},
@ -555,12 +556,12 @@ fn render_execute_code_request(call: &CallToolRequestParams, debug: bool) {
println!();
}
fn render_subagent_request(call: &CallToolRequestParams, debug: bool) {
fn render_delegate_request(call: &CallToolRequestParams, debug: bool) {
print_tool_header(call);
if let Some(args) = &call.arguments {
if let Some(Value::String(subrecipe)) = args.get("subrecipe") {
println!("{}: {}", style("subrecipe").dim(), style(subrecipe).cyan());
if let Some(Value::String(source)) = args.get("source") {
println!("{}: {}", style("source").dim(), style(source).cyan());
}
if let Some(Value::String(instructions)) = args.get("instructions") {
@ -581,7 +582,7 @@ fn render_subagent_request(call: &CallToolRequestParams, debug: bool) {
print_params(&Some(params.clone()), 1, debug);
}
let skip_keys = ["subrecipe", "instructions", "parameters"];
let skip_keys = ["source", "instructions", "parameters"];
let mut other_args = serde_json::Map::new();
for (k, v) in args {
if !skip_keys.contains(&k.as_str()) {

View file

@ -163,11 +163,7 @@ pub async fn apply_recipe_to_agent(
include_final_output_tool: bool,
) -> Option<String> {
agent
.apply_recipe_components(
recipe.sub_recipes.clone(),
recipe.response.clone(),
include_final_output_tool,
)
.apply_recipe_components(recipe.response.clone(), include_final_output_tool)
.await;
recipe.instructions.as_ref().map(|instructions| {

View file

@ -20,10 +20,6 @@ use crate::agents::final_output_tool::{FINAL_OUTPUT_CONTINUATION_MESSAGE, FINAL_
use crate::agents::platform_tools::PLATFORM_MANAGE_SCHEDULE_TOOL_NAME;
use crate::agents::prompt_manager::PromptManager;
use crate::agents::retry::{RetryManager, RetryResult};
use crate::agents::subagent_task_config::TaskConfig;
use crate::agents::subagent_tool::{
create_subagent_tool, handle_subagent_tool, SUBAGENT_TOOL_NAME,
};
use crate::agents::types::{FrontendTool, SessionConfig, SharedProvider, ToolResultReceiver};
use crate::config::permission::PermissionManager;
use crate::config::{get_enabled_extensions, Config, GooseMode};
@ -42,11 +38,11 @@ use crate::permission::permission_judge::PermissionCheckResult;
use crate::permission::PermissionConfirmation;
use crate::providers::base::Provider;
use crate::providers::errors::ProviderError;
use crate::recipe::{Author, Recipe, Response, Settings, SubRecipe};
use crate::recipe::{Author, Recipe, Response, Settings};
use crate::scheduler_trait::SchedulerTrait;
use crate::security::security_inspector::SecurityInspector;
use crate::session::extension_data::{EnabledExtensionsState, ExtensionState};
use crate::session::{Session, SessionManager, SessionType};
use crate::session::{Session, SessionManager};
use crate::tool_inspection::ToolInspectionManager;
use crate::tool_monitor::RepetitionInspector;
use crate::utils::is_token_cancelled;
@ -121,7 +117,6 @@ pub struct Agent {
pub config: AgentConfig,
pub extension_manager: Arc<ExtensionManager>,
pub(super) sub_recipes: Mutex<HashMap<String, SubRecipe>>,
pub(super) final_output_tool: Arc<Mutex<Option<FinalOutputTool>>>,
pub(super) frontend_tools: Mutex<HashMap<String, FrontendTool>>,
pub(super) frontend_instructions: Mutex<Option<String>>,
@ -210,7 +205,6 @@ impl Agent {
provider: provider.clone(),
config,
extension_manager: Arc::new(ExtensionManager::new(provider.clone(), session_manager)),
sub_recipes: Mutex::new(HashMap::new()),
final_output_tool: Arc::new(Mutex::new(None)),
frontend_tools: Mutex::new(HashMap::new()),
frontend_instructions: Mutex::new(None),
@ -452,23 +446,11 @@ impl Agent {
self.extend_system_prompt(final_output_system_prompt).await;
}
pub async fn add_sub_recipes(&self, sub_recipes_to_add: Vec<SubRecipe>) {
let mut sub_recipes = self.sub_recipes.lock().await;
for sr in sub_recipes_to_add {
sub_recipes.insert(sr.name.clone(), sr);
}
}
pub async fn apply_recipe_components(
&self,
sub_recipes: Option<Vec<SubRecipe>>,
response: Option<Response>,
include_final_output: bool,
) {
if let Some(sub_recipes) = sub_recipes {
self.add_sub_recipes(sub_recipes).await;
}
if include_final_output {
if let Some(response) = response {
self.add_final_output_tool(response).await;
@ -485,18 +467,6 @@ impl Agent {
cancellation_token: Option<CancellationToken>,
session: &Session,
) -> (String, Result<ToolCallResult, ErrorData>) {
// Prevent subagents from creating other subagents
if session.session_type == SessionType::SubAgent && tool_call.name == SUBAGENT_TOOL_NAME {
return (
request_id,
Err(ErrorData::new(
ErrorCode::INVALID_REQUEST,
"Subagents cannot create other subagents".to_string(),
None,
)),
);
}
if tool_call.name == PLATFORM_MANAGE_SCHEDULE_TOOL_NAME {
let arguments = tool_call
.arguments
@ -531,49 +501,7 @@ impl Agent {
}
debug!("WAITING_TOOL_START: {}", tool_call.name);
let result: ToolCallResult = if tool_call.name == SUBAGENT_TOOL_NAME {
let provider = match self.provider().await {
Ok(p) => p,
Err(_) => {
return (
request_id,
Err(ErrorData::new(
ErrorCode::INTERNAL_ERROR,
"Provider is required".to_string(),
None,
)),
);
}
};
let extensions = self.get_extension_configs().await;
let max_turns_from_recipe = session
.recipe
.as_ref()
.and_then(|r| r.settings.as_ref())
.and_then(|s| s.max_turns);
let task_config =
TaskConfig::new(provider, &session.id, &session.working_dir, extensions)
.with_max_turns(max_turns_from_recipe);
let sub_recipes = self.sub_recipes.lock().await.clone();
let arguments = tool_call
.arguments
.clone()
.map(Value::Object)
.unwrap_or(Value::Object(serde_json::Map::new()));
handle_subagent_tool(
&self.config,
arguments,
task_config,
sub_recipes,
session.working_dir.clone(),
cancellation_token,
)
} else if self.is_frontend_tool(&tool_call.name).await {
let result: ToolCallResult = if self.is_frontend_tool(&tool_call.name).await {
// For frontend tools, return an error indicating we need frontend execution
ToolCallResult::from(Err(ErrorData::new(
ErrorCode::INTERNAL_ERROR,
@ -807,30 +735,6 @@ impl Agent {
Ok(())
}
pub async fn subagents_enabled(&self, session_id: &str) -> bool {
if self.config.goose_mode != GooseMode::Auto {
return false;
}
let context = self.extension_manager.get_context();
if matches!(
context
.session_manager
.get_session(session_id, false)
.await
.ok()
.map(|session| session.session_type),
Some(SessionType::SubAgent)
) {
return false;
}
!self
.extension_manager
.list_extensions()
.await
.map(|ext| ext.is_empty())
.unwrap_or(true)
}
pub async fn list_tools(&self, session_id: &str, extension_name: Option<String>) -> Vec<Tool> {
let mut prefixed_tools = self
.extension_manager
@ -838,7 +742,6 @@ impl Agent {
.await
.unwrap_or_default();
let subagents_enabled = self.subagents_enabled(session_id).await;
if (extension_name.is_none() || extension_name.as_deref() == Some("platform"))
&& self.config.scheduler_service.is_some()
{
@ -849,12 +752,6 @@ impl Agent {
if let Some(final_output_tool) = self.final_output_tool.lock().await.as_ref() {
prefixed_tools.push(final_output_tool.tool());
}
if subagents_enabled {
let sub_recipes = self.sub_recipes.lock().await;
let sub_recipes_vec: Vec<_> = sub_recipes.values().cloned().collect();
prefixed_tools.push(create_subagent_tool(&sub_recipes_vec));
}
}
prefixed_tools

View file

@ -3,7 +3,7 @@ use include_dir::{include_dir, Dir};
static BUILTIN_SKILLS_DIR: Dir =
include_dir!("$CARGO_MANIFEST_DIR/src/agents/builtin_skills/skills");
pub fn get_all_builtin_skills() -> Vec<&'static str> {
pub fn get_all() -> Vec<&'static str> {
BUILTIN_SKILLS_DIR
.files()
.filter(|f| f.path().extension().is_some_and(|ext| ext == "md"))

View file

@ -1,4 +1,5 @@
use crate::agents::extension::PlatformExtensionContext;
use crate::agents::extension_manager::get_tool_owner;
use crate::agents::mcp_client::{Error, McpClientTrait};
use anyhow::Result;
use async_trait::async_trait;
@ -111,10 +112,16 @@ impl CodeExecutionClient {
let mut cfgs = vec![];
for tool in tools {
let full_name = tool.name.to_string();
let (server_name, tool_name) = full_name.split_once("__")?;
let (namespace, name) = if let Some((server, tool_name)) = full_name.split_once("__") {
(server.to_string(), tool_name.to_string())
} else if let Some(owner) = get_tool_owner(&tool) {
(owner, full_name)
} else {
continue;
};
cfgs.push(CallbackConfig {
name: tool_name.into(),
namespace: server_name.into(),
name,
namespace,
description: tool.description.as_ref().map(|d| d.to_string()),
input_schema: Some(json!(tool.input_schema)),
output_schema: tool.output_schema.as_ref().map(|s| json!(s)),

View file

@ -378,7 +378,7 @@ impl Agent {
Err(e) => return Err(anyhow!("Failed to build recipe: {}", e)),
};
self.apply_recipe_components(recipe.sub_recipes.clone(), recipe.response.clone(), true)
self.apply_recipe_components(recipe.response.clone(), true)
.await;
let prompt = [recipe.instructions.as_deref(), recipe.prompt.as_deref()]

View file

@ -2,7 +2,7 @@ use crate::agents::apps_extension;
use crate::agents::chatrecall_extension;
use crate::agents::code_execution_extension;
use crate::agents::extension_manager_extension;
use crate::agents::skills_extension;
use crate::agents::summon_extension;
use crate::agents::todo_extension;
use crate::agents::tom_extension;
use std::collections::HashMap;
@ -53,6 +53,7 @@ pub static PLATFORM_EXTENSIONS: Lazy<HashMap<&'static str, PlatformExtensionDef>
description:
"Enable a todo list for goose so it can keep track of what it is doing",
default_enabled: true,
unprefixed_tools: false,
client_factory: |ctx| Box::new(todo_extension::TodoClient::new(ctx).unwrap()),
},
);
@ -65,6 +66,7 @@ pub static PLATFORM_EXTENSIONS: Lazy<HashMap<&'static str, PlatformExtensionDef>
description:
"Create and manage custom Goose apps through chat. Apps are HTML/CSS/JavaScript and run in sandboxed windows.",
default_enabled: true,
unprefixed_tools: false,
client_factory: |ctx| Box::new(apps_extension::AppsManagerClient::new(ctx).unwrap()),
},
);
@ -77,6 +79,7 @@ pub static PLATFORM_EXTENSIONS: Lazy<HashMap<&'static str, PlatformExtensionDef>
description:
"Search past conversations and load session summaries for contextual memory",
default_enabled: false,
unprefixed_tools: false,
client_factory: |ctx| {
Box::new(chatrecall_extension::ChatRecallClient::new(ctx).unwrap())
},
@ -91,18 +94,20 @@ pub static PLATFORM_EXTENSIONS: Lazy<HashMap<&'static str, PlatformExtensionDef>
description:
"Enable extension management tools for discovering, enabling, and disabling extensions",
default_enabled: true,
unprefixed_tools: false,
client_factory: |ctx| Box::new(extension_manager_extension::ExtensionManagerClient::new(ctx).unwrap()),
},
);
map.insert(
skills_extension::EXTENSION_NAME,
summon_extension::EXTENSION_NAME,
PlatformExtensionDef {
name: skills_extension::EXTENSION_NAME,
display_name: "Skills",
description: "Load and use skills from relevant directories",
name: summon_extension::EXTENSION_NAME,
display_name: "Summon",
description: "Load knowledge and delegate tasks to subagents",
default_enabled: true,
client_factory: |ctx| Box::new(skills_extension::SkillsClient::new(ctx).unwrap()),
unprefixed_tools: true,
client_factory: |ctx| Box::new(summon_extension::SummonClient::new(ctx).unwrap()),
},
);
@ -114,6 +119,7 @@ pub static PLATFORM_EXTENSIONS: Lazy<HashMap<&'static str, PlatformExtensionDef>
description:
"Goose will make extension calls through code execution, saving tokens",
default_enabled: false,
unprefixed_tools: true,
client_factory: |ctx| {
Box::new(code_execution_extension::CodeExecutionClient::new(ctx).unwrap())
},
@ -128,6 +134,7 @@ pub static PLATFORM_EXTENSIONS: Lazy<HashMap<&'static str, PlatformExtensionDef>
description:
"Inject custom context into every turn via GOOSE_MOIM_MESSAGE_TEXT and GOOSE_MOIM_MESSAGE_FILE environment variables",
default_enabled: true,
unprefixed_tools: false,
client_factory: |ctx| Box::new(tom_extension::TomClient::new(ctx).unwrap()),
},
);
@ -175,12 +182,15 @@ impl PlatformExtensionContext {
}
}
/// Definition for a platform extension that runs in-process with direct agent access.
#[derive(Debug, Clone)]
pub struct PlatformExtensionDef {
pub name: &'static str,
pub display_name: &'static str,
pub description: &'static str,
pub default_enabled: bool,
/// If true, tools are exposed without extension prefix for intuitive first-class use.
pub unprefixed_tools: bool,
pub client_factory: fn(PlatformExtensionContext) -> Box<dyn McpClientTrait>,
}

View file

@ -105,18 +105,18 @@ pub struct ExtensionManager {
/// A flattened representation of a resource used by the agent to prepare inference
#[derive(Debug, Clone)]
pub struct ResourceItem {
pub client_name: String, // The name of the client that owns the resource
pub uri: String, // The URI of the resource
pub name: String, // The name of the resource
pub content: String, // The content of the resource
pub extension_name: String, // The name of the extension that owns the resource
pub uri: String, // The URI of the resource
pub name: String, // The name of the resource
pub content: String, // The content of the resource
pub timestamp: DateTime<Utc>, // The timestamp of the resource
pub priority: f32, // The priority of the resource
pub priority: f32, // The priority of the resource
pub token_count: Option<u32>, // The token count of the resource (filled in by the agent)
}
impl ResourceItem {
pub fn new(
client_name: String,
extension_name: String,
uri: String,
name: String,
content: String,
@ -124,7 +124,7 @@ impl ResourceItem {
priority: f32,
) -> Self {
Self {
client_name,
extension_name,
uri,
name,
content,
@ -199,6 +199,41 @@ pub fn get_parameter_names(tool: &Tool) -> Vec<String> {
names
}
const TOOL_EXTENSION_META_KEY: &str = "goose_extension";
pub fn get_tool_owner(tool: &Tool) -> Option<String> {
tool.meta
.as_ref()
.and_then(|m| m.0.get(TOOL_EXTENSION_META_KEY))
.and_then(|v| v.as_str())
.map(|s| s.to_string())
}
fn is_unprefixed_extension(config: &ExtensionConfig) -> bool {
if let ExtensionConfig::Platform { name, .. } = config {
PLATFORM_EXTENSIONS
.get(name_to_key(name).as_str())
.is_some_and(|def| def.unprefixed_tools)
} else {
false
}
}
/// Returns true if the named extension is a first-class platform extension
/// whose tools are exposed unprefixed and remain visible during code execution mode.
pub fn is_first_class_extension(name: &str) -> bool {
PLATFORM_EXTENSIONS
.get(name_to_key(name).as_str())
.is_some_and(|def| def.unprefixed_tools)
}
/// Result of resolving a tool call to its owning extension
struct ResolvedTool {
extension_name: String,
actual_tool_name: String,
client: McpClientBox,
}
async fn child_process_client(
mut command: Command,
timeout: &Option<u64>,
@ -789,16 +824,18 @@ impl ExtensionManager {
tools
.iter()
.filter(|tool| {
let tool_prefix = tool.name.split("__").next().unwrap_or("");
let tool_owner = get_tool_owner(tool)
.map(|s| name_to_key(&s))
.unwrap_or_else(|| tool.name.split("__").next().unwrap_or("").to_string());
if let Some(ref excluded) = exclude_normalized {
if tool_prefix == excluded {
if tool_owner == *excluded {
return false;
}
}
if let Some(ref name_filter) = extension_name_normalized {
tool_prefix == name_filter
tool_owner == *name_filter
} else {
true
}
@ -861,18 +898,36 @@ impl ExtensionManager {
}
};
let expose_unprefixed = is_unprefixed_extension(&config);
loop {
for tool in client_tools.tools {
if config.is_tool_available(&tool.name) {
let public_name = if expose_unprefixed {
tool.name.to_string()
} else {
format!("{}__{}", name, tool.name)
};
let mut meta_map = tool
.meta
.as_ref()
.map(|m| m.0.clone())
.unwrap_or_default();
meta_map.insert(
TOOL_EXTENSION_META_KEY.to_string(),
serde_json::Value::String(name.clone()),
);
tools.push(Tool {
name: format!("{}__{}", name, tool.name).into(),
name: public_name.into(),
description: tool.description,
input_schema: tool.input_schema,
annotations: tool.annotations,
output_schema: tool.output_schema,
icons: tool.icons,
title: tool.title,
meta: tool.meta,
meta: Some(rmcp::model::Meta(meta_map)),
});
}
}
@ -899,9 +954,22 @@ impl ExtensionManager {
let results = future::join_all(client_futures).await;
let mut seen_names: std::collections::HashSet<String> = std::collections::HashSet::new();
let mut tools = Vec::new();
for (_, client_tools) in results {
tools.extend(client_tools);
for (ext_name, client_tools) in results {
for tool in client_tools {
let tool_name = tool.name.to_string();
if seen_names.contains(&tool_name) {
warn!(
tool = %tool_name,
extension = %ext_name,
"Duplicate tool name - skipping"
);
continue;
}
seen_names.insert(tool_name);
tools.push(tool);
}
}
Ok(tools)
@ -915,16 +983,6 @@ impl ExtensionManager {
prompt_template::render_template("plan.md", &context).expect("Prompt should render")
}
/// Find and return a reference to the appropriate client for a tool call
async fn get_client_for_tool(&self, prefixed_name: &str) -> Option<(String, McpClientBox)> {
self.extensions
.lock()
.await
.iter()
.find(|(key, _)| prefixed_name.starts_with(*key))
.map(|(name, extension)| (name.clone(), extension.get_client()))
}
// Function that gets executed for read_resource tool
pub async fn read_resource_tool(
&self,
@ -1183,6 +1241,66 @@ impl ExtensionManager {
}
}
async fn resolve_tool(
&self,
session_id: &str,
tool_name: &str,
) -> Result<ResolvedTool, ErrorData> {
if let Some((prefix, actual)) = tool_name.split_once("__") {
let owner = name_to_key(prefix);
if let Some(client) = self.get_server_client(&owner).await {
return Ok(ResolvedTool {
extension_name: owner,
actual_tool_name: actual.to_string(),
client,
});
}
}
let tools = self.get_all_tools_cached(session_id).await.map_err(|e| {
ErrorData::new(
ErrorCode::INTERNAL_ERROR,
format!("Failed to get tools: {}", e),
None,
)
})?;
if let Some(tool) = tools.iter().find(|t| *t.name == *tool_name) {
let owner = get_tool_owner(tool).ok_or_else(|| {
ErrorData::new(
ErrorCode::RESOURCE_NOT_FOUND,
format!("Tool '{}' has no owner", tool_name),
None,
)
})?;
let actual_tool_name = tool_name
.strip_prefix(&format!("{owner}__"))
.unwrap_or(tool_name)
.to_string();
let client = self.get_server_client(&owner).await.ok_or_else(|| {
ErrorData::new(
ErrorCode::RESOURCE_NOT_FOUND,
format!("Extension '{}' not found for tool '{}'", owner, tool_name),
None,
)
})?;
return Ok(ResolvedTool {
extension_name: owner,
actual_tool_name,
client,
});
}
Err(ErrorData::new(
ErrorCode::RESOURCE_NOT_FOUND,
format!("Tool '{}' not found", tool_name),
None,
))
}
pub async fn dispatch_tool_call(
&self,
session_id: &str,
@ -1190,52 +1308,19 @@ impl ExtensionManager {
working_dir: Option<&std::path::Path>,
cancellation_token: CancellationToken,
) -> Result<ToolCallResult> {
// Some models strip the tool prefix, so auto-add it for known code_execution tools
let tool_name_str = tool_call.name.to_string();
let prefixed_name = if !tool_name_str.contains("__") {
let code_exec_tools = ["execute", "list_functions", "get_function_details"];
if code_exec_tools.contains(&tool_name_str.as_str())
&& self.extensions.lock().await.contains_key("code_execution")
let resolved = self.resolve_tool(session_id, &tool_name_str).await?;
if let Some(extension) = self.extensions.lock().await.get(&resolved.extension_name) {
if !extension
.config
.is_tool_available(&resolved.actual_tool_name)
{
format!("code_execution__{tool_name_str}")
} else {
tool_name_str
}
} else {
tool_name_str
};
// Dispatch tool call based on the prefix naming convention
let (client_name, client) =
self.get_client_for_tool(&prefixed_name)
.await
.ok_or_else(|| {
ErrorData::new(
ErrorCode::RESOURCE_NOT_FOUND,
format!("Tool '{}' not found", tool_call.name),
None,
)
})?;
let tool_name = prefixed_name
.strip_prefix(client_name.as_str())
.and_then(|s| s.strip_prefix("__"))
.ok_or_else(|| {
ErrorData::new(
ErrorCode::RESOURCE_NOT_FOUND,
format!("Invalid tool name format: '{}'", tool_call.name),
None,
)
})?
.to_string();
if let Some(extension) = self.extensions.lock().await.get(&client_name) {
if !extension.config.is_tool_available(&tool_name) {
return Err(ErrorData::new(
ErrorCode::RESOURCE_NOT_FOUND,
format!(
"Tool '{}' is not available for extension '{}'",
tool_name, client_name
resolved.actual_tool_name, resolved.extension_name
),
None,
)
@ -1244,15 +1329,16 @@ impl ExtensionManager {
}
let arguments = tool_call.arguments.clone();
let client = client.clone();
let client = resolved.client.clone();
let notifications_receiver = client.lock().await.subscribe().await;
let session_id = session_id.to_string();
let actual_tool_name = resolved.actual_tool_name;
let working_dir_str = working_dir.map(|p| p.to_string_lossy().to_string());
let fut = async move {
tracing::debug!(
"dispatch_tool_call fut: calling client.call_tool tool={} session_id={} working_dir={:?}",
tool_name,
"dispatch_tool_call: calling client.call_tool tool={} session_id={} working_dir={:?}",
actual_tool_name,
session_id,
working_dir_str
);
@ -1260,7 +1346,7 @@ impl ExtensionManager {
client_guard
.call_tool(
&session_id,
&tool_name,
&actual_tool_name,
arguments,
working_dir_str.as_deref(),
cancellation_token,
@ -1636,70 +1722,8 @@ mod tests {
}
}
#[tokio::test]
async fn test_get_client_for_tool() {
let temp_dir = tempfile::tempdir().unwrap();
let extension_manager =
ExtensionManager::new_without_provider(temp_dir.path().to_path_buf());
// Add some mock clients using the helper method
extension_manager
.add_mock_extension(
"test_client".to_string(),
Arc::new(Mutex::new(Box::new(MockClient {}))),
)
.await;
extension_manager
.add_mock_extension(
"__client".to_string(),
Arc::new(Mutex::new(Box::new(MockClient {}))),
)
.await;
extension_manager
.add_mock_extension(
"__cli__ent__".to_string(),
Arc::new(Mutex::new(Box::new(MockClient {}))),
)
.await;
extension_manager
.add_mock_extension(
"client 🚀".to_string(),
Arc::new(Mutex::new(Box::new(MockClient {}))),
)
.await;
// Test basic case
assert!(extension_manager
.get_client_for_tool("test_client__tool")
.await
.is_some());
// Test leading underscores
assert!(extension_manager
.get_client_for_tool("__client__tool")
.await
.is_some());
// Test multiple underscores in client name, and ending with __
assert!(extension_manager
.get_client_for_tool("__cli__ent____tool")
.await
.is_some());
// Test unicode in tool name, "client 🚀" should become "client_"
assert!(extension_manager
.get_client_for_tool("client___tool")
.await
.is_some());
}
#[tokio::test]
async fn test_dispatch_tool_call() {
// test that dispatch_tool_call parses out the sanitized name correctly, and extracts
// tool_names
let temp_dir = tempfile::tempdir().unwrap();
let extension_manager =
ExtensionManager::new_without_provider(temp_dir.path().to_path_buf());
@ -1726,7 +1750,6 @@ mod tests {
)
.await;
// verify a normal tool call
let tool_call = CallToolRequestParams {
meta: None,
task: None,
@ -1747,7 +1770,7 @@ mod tests {
let tool_call = CallToolRequestParams {
meta: None,
task: None,
name: "test_client__test__tool".to_string().into(),
name: "test_client__available_tool".to_string().into(),
arguments: Some(object!({})),
};
@ -1761,7 +1784,6 @@ mod tests {
.await;
assert!(result.is_ok());
// verify a multiple underscores dispatch
let tool_call = CallToolRequestParams {
meta: None,
task: None,
@ -1779,7 +1801,6 @@ mod tests {
.await;
assert!(result.is_ok());
// Test unicode in tool name, "client 🚀" should become "client_"
let tool_call = CallToolRequestParams {
meta: None,
task: None,
@ -1797,24 +1818,6 @@ mod tests {
.await;
assert!(result.is_ok());
let tool_call = CallToolRequestParams {
meta: None,
task: None,
name: "client___test__tool".to_string().into(),
arguments: Some(object!({})),
};
let result = extension_manager
.dispatch_tool_call(
"test-session-id",
tool_call,
None,
CancellationToken::default(),
)
.await;
assert!(result.is_ok());
// this should error out, specifically for an ToolError::ExecutionError
let invalid_tool_call = CallToolRequestParams {
meta: None,
task: None,
@ -1829,20 +1832,14 @@ mod tests {
None,
CancellationToken::default(),
)
.await
.unwrap()
.result
.await;
assert!(matches!(
result,
Err(ErrorData {
code: ErrorCode::INTERNAL_ERROR,
..
})
));
if let Err(err) = result {
let tool_err = err.downcast_ref::<ErrorData>().expect("Expected ErrorData");
assert_eq!(tool_err.code, ErrorCode::RESOURCE_NOT_FOUND);
} else {
panic!("Expected ErrorData with ErrorCode::RESOURCE_NOT_FOUND");
}
// this should error out, specifically with an ToolError::NotFound
// this client doesn't exist
let invalid_tool_call = CallToolRequestParams {
meta: None,
task: None,
@ -1945,7 +1942,6 @@ mod tests {
)
.await;
// Try to call an unavailable tool
let unavailable_tool_call = CallToolRequestParams {
meta: None,
task: None,
@ -1962,11 +1958,9 @@ mod tests {
)
.await;
// Should return RESOURCE_NOT_FOUND error
if let Err(err) = result {
let tool_err = err.downcast_ref::<ErrorData>().expect("Expected ErrorData");
assert_eq!(tool_err.code, ErrorCode::RESOURCE_NOT_FOUND);
assert!(tool_err.message.contains("is not available"));
} else {
panic!("Expected ErrorData with ErrorCode::RESOURCE_NOT_FOUND");
}

View file

@ -1,6 +1,6 @@
mod agent;
pub(crate) mod apps_extension;
mod builtin_skills;
pub(crate) mod builtin_skills;
pub(crate) mod chatrecall_extension;
pub(crate) mod code_execution_extension;
pub mod container;
@ -18,11 +18,10 @@ pub mod prompt_manager;
mod reply_parts;
pub mod retry;
mod schedule_tool;
pub(crate) mod skills_extension;
pub mod subagent_execution_tool;
pub mod subagent_handler;
mod subagent_task_config;
pub mod subagent_tool;
pub(crate) mod subagent_handler;
pub(crate) mod subagent_task_config;
pub(crate) mod summon_extension;
pub(crate) mod todo_extension;
pub(crate) mod tom_extension;
mod tool_execution;
@ -34,5 +33,6 @@ pub use execute_commands::COMPACT_TRIGGERS;
pub use extension::ExtensionConfig;
pub use extension_manager::ExtensionManager;
pub use prompt_manager::PromptManager;
pub use subagent_handler::SUBAGENT_TOOL_REQUEST_TYPE;
pub use subagent_task_config::TaskConfig;
pub use types::{FrontendTool, RetryConfig, SessionConfig, SuccessCheck};

View file

@ -9,8 +9,6 @@ use tracing::debug;
use super::super::agents::Agent;
use crate::agents::code_execution_extension::EXTENSION_NAME as CODE_EXECUTION_EXTENSION;
use crate::agents::skills_extension::EXTENSION_NAME as SKILLS_EXTENSION;
use crate::agents::subagent_tool::SUBAGENT_TOOL_NAME;
use crate::conversation::message::{Message, MessageContent, ToolRequest};
use crate::conversation::Conversation;
use crate::providers::base::{stream_from_single_message, MessageStream, Provider, ProviderUsage};
@ -151,12 +149,12 @@ impl Agent {
.is_extension_enabled(CODE_EXECUTION_EXTENSION)
.await;
if code_execution_active {
let code_exec_prefix = format!("{CODE_EXECUTION_EXTENSION}__");
let skills_prefix = format!("{SKILLS_EXTENSION}__");
tools.retain(|tool| {
tool.name.starts_with(&code_exec_prefix)
|| tool.name.starts_with(&skills_prefix)
|| tool.name == SUBAGENT_TOOL_NAME
if let Some(owner) = crate::agents::extension_manager::get_tool_owner(tool) {
crate::agents::extension_manager::is_first_class_extension(&owner)
} else {
false
}
});
}
@ -182,7 +180,6 @@ impl Agent {
.with_extension_and_tool_counts(extension_count, tool_count)
.with_code_execution_mode(code_execution_active)
.with_hints(working_dir)
.with_enable_subagents(self.subagents_enabled(session_id).await)
.build();
// Handle toolshim if enabled

View file

@ -1,866 +0,0 @@
use super::builtin_skills;
use crate::agents::extension::PlatformExtensionContext;
use crate::agents::mcp_client::{Error, McpClientTrait};
use crate::config::paths::Paths;
use anyhow::Result;
use async_trait::async_trait;
use indoc::indoc;
use rmcp::model::{
CallToolResult, Content, Implementation, InitializeResult, JsonObject, ListToolsResult,
ProtocolVersion, ServerCapabilities, Tool, ToolAnnotations, ToolsCapability,
};
use schemars::{schema_for, JsonSchema};
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use tokio_util::sync::CancellationToken;
pub static EXTENSION_NAME: &str = "skills";
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
struct LoadSkillParams {
name: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct SkillMetadata {
name: String,
description: String,
}
#[derive(Debug, Clone)]
struct Skill {
metadata: SkillMetadata,
body: String,
directory: PathBuf,
supporting_files: Vec<PathBuf>,
}
pub struct SkillsClient {
info: InitializeResult,
skills: HashMap<String, Skill>,
}
impl SkillsClient {
pub fn new(_context: PlatformExtensionContext) -> Result<Self> {
let info = InitializeResult {
protocol_version: ProtocolVersion::V_2025_03_26,
capabilities: ServerCapabilities {
tasks: None,
tools: Some(ToolsCapability {
list_changed: Some(false),
}),
resources: None,
prompts: None,
completions: None,
experimental: None,
logging: None,
},
server_info: Implementation {
name: EXTENSION_NAME.to_string(),
title: Some("Skills".to_string()),
version: "1.0.0".to_string(),
icons: None,
website_url: None,
},
instructions: Some(String::new()),
};
let mut skills = Self::load_builtin_skills();
let directories = Self::get_default_skill_directories()
.into_iter()
.filter(|d| d.exists())
.collect::<Vec<_>>();
let fs_skills = Self::discover_skills_in_directories(&directories);
skills.extend(fs_skills);
let mut client = Self { info, skills };
client.info.instructions = Some(client.generate_instructions());
Ok(client)
}
fn load_builtin_skills() -> HashMap<String, Skill> {
let mut skills = HashMap::new();
for content in builtin_skills::get_all_builtin_skills() {
if let Ok((metadata, body)) = Self::parse_frontmatter(content) {
skills.insert(
metadata.name.clone(),
Skill {
metadata,
body,
directory: PathBuf::new(),
supporting_files: vec![],
},
);
}
}
skills
}
fn get_default_skill_directories() -> Vec<PathBuf> {
let mut dirs = Vec::new();
if let Some(home) = dirs::home_dir() {
dirs.push(home.join(".claude/skills"));
dirs.push(home.join(".config/agents/skills"));
}
dirs.push(Paths::config_dir().join("skills"));
if let Ok(working_dir) = std::env::current_dir() {
dirs.push(working_dir.join(".claude/skills"));
dirs.push(working_dir.join(".goose/skills"));
dirs.push(working_dir.join(".agents/skills"));
}
dirs
}
fn parse_skill_file(path: &Path) -> Result<Skill> {
let content = std::fs::read_to_string(path)?;
let (metadata, body) = Self::parse_frontmatter(&content)?;
let directory = path
.parent()
.ok_or_else(|| anyhow::anyhow!("Skill file has no parent directory"))?
.to_path_buf();
let supporting_files = Self::find_supporting_files(&directory, path)?;
Ok(Skill {
metadata,
body,
directory,
supporting_files,
})
}
fn parse_frontmatter(content: &str) -> Result<(SkillMetadata, String)> {
let parts: Vec<&str> = content.split("---").collect();
if parts.len() < 3 {
return Err(anyhow::anyhow!("Invalid frontmatter format"));
}
let yaml_content = parts[1].trim();
let metadata: SkillMetadata = serde_yaml::from_str(yaml_content)?;
let body = parts[2..].join("---").trim().to_string();
Ok((metadata, body))
}
fn find_supporting_files(directory: &Path, skill_file: &Path) -> Result<Vec<PathBuf>> {
let mut files = Vec::new();
if let Ok(entries) = std::fs::read_dir(directory) {
for entry in entries.flatten() {
let path = entry.path();
if path.is_file() && path != skill_file {
files.push(path);
} else if path.is_dir() {
if let Ok(sub_entries) = std::fs::read_dir(&path) {
for sub_entry in sub_entries.flatten() {
let sub_path = sub_entry.path();
if sub_path.is_file() {
files.push(sub_path);
}
}
}
}
}
}
Ok(files)
}
fn discover_skills_in_directories(directories: &[PathBuf]) -> HashMap<String, Skill> {
let mut skills = HashMap::new();
for dir in directories {
if let Ok(entries) = std::fs::read_dir(dir) {
for entry in entries.flatten() {
let path = entry.path();
if path.is_dir() {
let skill_file = path.join("SKILL.md");
if skill_file.exists() {
if let Ok(skill) = Self::parse_skill_file(&skill_file) {
skills.insert(skill.metadata.name.clone(), skill);
}
}
}
}
}
}
skills
}
fn generate_instructions(&self) -> String {
if self.skills.is_empty() {
return String::new();
}
let mut instructions = String::from("You have these skills at your disposal, when it is clear they can help you solve a problem or you are asked to use them:\n\n");
let mut skill_list: Vec<_> = self.skills.iter().collect();
skill_list.sort_by_key(|(name, _)| *name);
for (name, skill) in skill_list {
instructions.push_str(&format!("- {}: {}\n", name, skill.metadata.description));
}
instructions
}
async fn handle_load_skill(
&self,
arguments: Option<JsonObject>,
) -> Result<Vec<Content>, String> {
let skill_name = arguments
.as_ref()
.ok_or("Missing arguments")?
.get("name")
.and_then(|v| v.as_str())
.ok_or("Missing required parameter: name")?;
let skill = self
.skills
.get(skill_name)
.ok_or_else(|| format!("Skill '{}' not found", skill_name))?;
let mut response = format!("# Skill: {}\n\n{}\n\n", skill.metadata.name, skill.body);
if !skill.supporting_files.is_empty() {
response.push_str(&format!(
"## Supporting Files\n\nSkill directory: {}\n\n",
skill.directory.display()
));
response.push_str("The following supporting files are available:\n");
for file in &skill.supporting_files {
if let Ok(relative) = file.strip_prefix(&skill.directory) {
response.push_str(&format!("- {}\n", relative.display()));
}
}
response.push_str("\nUse the view file tools to access these files as needed, or run scripts as directed with dev extension.\n");
}
Ok(vec![Content::text(response)])
}
fn get_tools() -> Vec<Tool> {
let schema = schema_for!(LoadSkillParams);
let schema_value =
serde_json::to_value(schema).expect("Failed to serialize LoadSkillParams schema");
let input_schema = schema_value
.as_object()
.expect("Schema should be an object")
.clone();
vec![Tool::new(
"loadSkill".to_string(),
indoc! {r#"
Load a skill by name and return its content.
This tool loads the specified skill and returns its body content along with
information about any supporting files in the skill directory.
"#}
.to_string(),
input_schema,
)
.annotate(ToolAnnotations {
title: Some("Load skill".to_string()),
read_only_hint: Some(true),
destructive_hint: Some(false),
idempotent_hint: Some(true),
open_world_hint: Some(false),
})]
}
}
#[async_trait]
impl McpClientTrait for SkillsClient {
async fn list_tools(
&self,
_session_id: &str,
_next_cursor: Option<String>,
_cancellation_token: CancellationToken,
) -> Result<ListToolsResult, Error> {
let tools = if self.skills.is_empty() {
Vec::new()
} else {
Self::get_tools()
};
Ok(ListToolsResult {
tools,
next_cursor: None,
meta: None,
})
}
async fn call_tool(
&self,
_session_id: &str,
name: &str,
arguments: Option<JsonObject>,
_working_dir: Option<&str>,
_cancellation_token: CancellationToken,
) -> Result<CallToolResult, Error> {
let content = match name {
"loadSkill" => self.handle_load_skill(arguments).await,
_ => Err(format!("Unknown tool: {}", name)),
};
match content {
Ok(content) => Ok(CallToolResult::success(content)),
Err(error) => Ok(CallToolResult::error(vec![Content::text(format!(
"Error: {}",
error
))])),
}
}
fn get_info(&self) -> Option<&InitializeResult> {
Some(&self.info)
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::TempDir;
#[test]
fn test_parse_frontmatter() {
let content = r#"---
name: test-skill
description: A test skill
---
# Test Skill
This is the body of the skill.
"#;
let (metadata, body) = SkillsClient::parse_frontmatter(content).unwrap();
assert_eq!(metadata.name, "test-skill");
assert_eq!(metadata.description, "A test skill");
assert!(body.contains("# Test Skill"));
assert!(body.contains("This is the body of the skill."));
}
#[test]
fn test_parse_frontmatter_missing() {
let content = "# No frontmatter here";
assert!(SkillsClient::parse_frontmatter(content).is_err());
}
#[test]
fn test_parse_frontmatter_unclosed() {
let content = r#"---
name: test
description: test
"#;
assert!(SkillsClient::parse_frontmatter(content).is_err());
}
#[test]
fn test_parse_frontmatter_with_extra_fields() {
let content = r#"---
name: test-skill
description: A test skill
author: Test Author
version: 1.0.0
tags:
- test
- example
extra_field: some value
---
# Test Skill
This is the body of the skill.
"#;
let (metadata, body) = SkillsClient::parse_frontmatter(content).unwrap();
assert_eq!(metadata.name, "test-skill");
assert_eq!(metadata.description, "A test skill");
assert!(body.contains("# Test Skill"));
assert!(body.contains("This is the body of the skill."));
}
#[test]
fn test_parse_skill_file() {
let temp_dir = TempDir::new().unwrap();
let skill_dir = temp_dir.path().join("test-skill");
fs::create_dir(&skill_dir).unwrap();
let skill_file = skill_dir.join("SKILL.md");
fs::write(
&skill_file,
r#"---
name: test-skill
description: A test skill
---
# Test Skill Content
"#,
)
.unwrap();
fs::write(skill_dir.join("helper.py"), "print('hello')").unwrap();
fs::create_dir(skill_dir.join("templates")).unwrap();
fs::write(skill_dir.join("templates/template.txt"), "template").unwrap();
let skill = SkillsClient::parse_skill_file(&skill_file).unwrap();
assert_eq!(skill.metadata.name, "test-skill");
assert_eq!(skill.metadata.description, "A test skill");
assert!(skill.body.contains("# Test Skill Content"));
assert_eq!(skill.supporting_files.len(), 2);
}
#[test]
fn test_discover_skills() {
let temp_dir = TempDir::new().unwrap();
let skills_dir = temp_dir.path().join("skills");
fs::create_dir(&skills_dir).unwrap();
let skill1_dir = skills_dir.join("test-skill-one-a1b2c3");
fs::create_dir(&skill1_dir).unwrap();
fs::write(
skill1_dir.join("SKILL.md"),
r#"---
name: test-skill-one-a1b2c3
description: First test skill
---
Body 1
"#,
)
.unwrap();
let skill2_dir = skills_dir.join("test-skill-two-d4e5f6");
fs::create_dir(&skill2_dir).unwrap();
fs::write(
skill2_dir.join("SKILL.md"),
r#"---
name: test-skill-two-d4e5f6
description: Second test skill
---
Body 2
"#,
)
.unwrap();
let skill3_dir = skills_dir.join("test-skill-three-g7h8i9");
fs::create_dir(&skill3_dir).unwrap();
fs::write(
skill3_dir.join("SKILL.md"),
r#"---
name: test-skill-three-g7h8i9
description: Third test skill
---
Body 3
"#,
)
.unwrap();
let skills = SkillsClient::discover_skills_in_directories(&[skills_dir]);
assert_eq!(skills.len(), 3);
assert!(skills.contains_key("test-skill-one-a1b2c3"));
assert!(skills.contains_key("test-skill-two-d4e5f6"));
assert!(skills.contains_key("test-skill-three-g7h8i9"));
}
#[test]
fn test_discover_skills_from_multiple_directories() {
let temp_dir = TempDir::new().unwrap();
let dir1 = temp_dir.path().join("dir1");
fs::create_dir(&dir1).unwrap();
let skill1_dir = dir1.join("skill-from-dir1");
fs::create_dir(&skill1_dir).unwrap();
fs::write(
skill1_dir.join("SKILL.md"),
r#"---
name: skill-from-dir1
description: Skill from directory 1
---
Content from dir1
"#,
)
.unwrap();
let dir2 = temp_dir.path().join("dir2");
fs::create_dir(&dir2).unwrap();
let skill2_dir = dir2.join("skill-from-dir2");
fs::create_dir(&skill2_dir).unwrap();
fs::write(
skill2_dir.join("SKILL.md"),
r#"---
name: skill-from-dir2
description: Skill from directory 2
---
Content from dir2
"#,
)
.unwrap();
let dir3 = temp_dir.path().join("dir3");
fs::create_dir(&dir3).unwrap();
let skill3_dir = dir3.join("skill-from-dir3");
fs::create_dir(&skill3_dir).unwrap();
fs::write(
skill3_dir.join("SKILL.md"),
r#"---
name: skill-from-dir3
description: Skill from directory 3
---
Content from dir3
"#,
)
.unwrap();
let skills = SkillsClient::discover_skills_in_directories(&[dir1, dir2, dir3]);
assert_eq!(skills.len(), 3);
assert!(skills.contains_key("skill-from-dir1"));
assert!(skills.contains_key("skill-from-dir2"));
assert!(skills.contains_key("skill-from-dir3"));
assert_eq!(
skills.get("skill-from-dir1").unwrap().metadata.description,
"Skill from directory 1"
);
assert_eq!(
skills.get("skill-from-dir2").unwrap().metadata.description,
"Skill from directory 2"
);
assert_eq!(
skills.get("skill-from-dir3").unwrap().metadata.description,
"Skill from directory 3"
);
}
#[test]
fn test_empty_instructions_when_no_skills() {
let temp_dir = TempDir::new().unwrap();
let empty_dir = temp_dir.path().join("empty");
fs::create_dir(&empty_dir).unwrap();
let skills = SkillsClient::discover_skills_in_directories(&[empty_dir]);
assert_eq!(skills.len(), 0);
let mut client = SkillsClient {
info: InitializeResult {
protocol_version: ProtocolVersion::V_2025_03_26,
capabilities: ServerCapabilities {
tasks: None,
tools: Some(ToolsCapability {
list_changed: Some(false),
}),
resources: None,
prompts: None,
completions: None,
experimental: None,
logging: None,
},
server_info: Implementation {
name: EXTENSION_NAME.to_string(),
title: Some("Skills".to_string()),
version: "1.0.0".to_string(),
icons: None,
website_url: None,
},
instructions: Some(String::new()),
},
skills,
};
let instructions = client.generate_instructions();
assert_eq!(instructions, "");
assert!(instructions.is_empty());
client.info.instructions = Some(instructions);
assert_eq!(client.info.instructions.as_ref().unwrap(), "");
}
#[tokio::test]
async fn test_no_tools_when_no_skills() {
let temp_dir = TempDir::new().unwrap();
let empty_dir = temp_dir.path().join("empty");
fs::create_dir(&empty_dir).unwrap();
let skills = SkillsClient::discover_skills_in_directories(&[empty_dir]);
assert_eq!(skills.len(), 0);
let client = SkillsClient {
info: InitializeResult {
protocol_version: ProtocolVersion::V_2025_03_26,
capabilities: ServerCapabilities {
tasks: None,
tools: Some(ToolsCapability {
list_changed: Some(false),
}),
resources: None,
prompts: None,
completions: None,
experimental: None,
logging: None,
},
server_info: Implementation {
name: EXTENSION_NAME.to_string(),
title: Some("Skills".to_string()),
version: "1.0.0".to_string(),
icons: None,
website_url: None,
},
instructions: Some(String::new()),
},
skills,
};
let result = client
.list_tools("test-session-id", None, CancellationToken::new())
.await
.unwrap();
assert_eq!(result.tools.len(), 0);
}
#[tokio::test]
async fn test_tools_available_when_skills_exist() {
let temp_dir = TempDir::new().unwrap();
let skills_dir = temp_dir.path().join("skills");
fs::create_dir(&skills_dir).unwrap();
let skill_dir = skills_dir.join("test-skill");
fs::create_dir(&skill_dir).unwrap();
fs::write(
skill_dir.join("SKILL.md"),
r#"---
name: test-skill
description: A test skill
---
Content
"#,
)
.unwrap();
let skills = SkillsClient::discover_skills_in_directories(&[skills_dir]);
assert_eq!(skills.len(), 1);
let client = SkillsClient {
info: InitializeResult {
protocol_version: ProtocolVersion::V_2025_03_26,
capabilities: ServerCapabilities {
tasks: None,
tools: Some(ToolsCapability {
list_changed: Some(false),
}),
resources: None,
prompts: None,
completions: None,
experimental: None,
logging: None,
},
server_info: Implementation {
name: EXTENSION_NAME.to_string(),
title: Some("Skills".to_string()),
version: "1.0.0".to_string(),
icons: None,
website_url: None,
},
instructions: Some(String::new()),
},
skills,
};
let result = client
.list_tools("test-session-id", None, CancellationToken::new())
.await
.unwrap();
assert_eq!(result.tools.len(), 1);
assert_eq!(result.tools[0].name, "loadSkill");
}
#[test]
fn test_instructions_with_skills() {
let temp_dir = TempDir::new().unwrap();
let skills_dir = temp_dir.path().join("skills");
fs::create_dir(&skills_dir).unwrap();
let skill1_dir = skills_dir.join("alpha-skill");
fs::create_dir(&skill1_dir).unwrap();
fs::write(
skill1_dir.join("SKILL.md"),
r#"---
name: alpha-skill
description: First skill alphabetically
---
Content
"#,
)
.unwrap();
let skill2_dir = skills_dir.join("beta-skill");
fs::create_dir(&skill2_dir).unwrap();
fs::write(
skill2_dir.join("SKILL.md"),
r#"---
name: beta-skill
description: Second skill alphabetically
---
Content
"#,
)
.unwrap();
let skills = SkillsClient::discover_skills_in_directories(&[skills_dir]);
assert_eq!(skills.len(), 2);
let mut client = SkillsClient {
info: InitializeResult {
protocol_version: ProtocolVersion::V_2025_03_26,
capabilities: ServerCapabilities {
tasks: None,
tools: Some(ToolsCapability {
list_changed: Some(false),
}),
resources: None,
prompts: None,
completions: None,
experimental: None,
logging: None,
},
server_info: Implementation {
name: EXTENSION_NAME.to_string(),
title: Some("Skills".to_string()),
version: "1.0.0".to_string(),
icons: None,
website_url: None,
},
instructions: Some(String::new()),
},
skills,
};
let instructions = client.generate_instructions();
assert!(!instructions.is_empty());
assert!(instructions.contains("You have these skills at your disposal"));
assert!(instructions.contains("alpha-skill: First skill alphabetically"));
assert!(instructions.contains("beta-skill: Second skill alphabetically"));
let lines: Vec<&str> = instructions.lines().collect();
let alpha_line = lines
.iter()
.position(|l| l.contains("alpha-skill"))
.unwrap();
let beta_line = lines.iter().position(|l| l.contains("beta-skill")).unwrap();
assert!(alpha_line < beta_line);
client.info.instructions = Some(instructions);
assert!(!client.info.instructions.as_ref().unwrap().is_empty());
}
#[test]
fn test_discover_skills_working_dir_overrides_global() {
let temp_dir = TempDir::new().unwrap();
// Simulate ~/.claude/skills (global, lowest priority)
let global_claude = temp_dir.path().join("global-claude");
fs::create_dir(&global_claude).unwrap();
let skill_global_claude = global_claude.join("my-skill");
fs::create_dir(&skill_global_claude).unwrap();
fs::write(
skill_global_claude.join("SKILL.md"),
r#"---
name: my-skill
description: From global claude
---
Global claude content
"#,
)
.unwrap();
// Simulate ~/.config/goose/skills (global, medium priority)
let global_goose = temp_dir.path().join("global-goose");
fs::create_dir(&global_goose).unwrap();
let skill_global_goose = global_goose.join("my-skill");
fs::create_dir(&skill_global_goose).unwrap();
fs::write(
skill_global_goose.join("SKILL.md"),
r#"---
name: my-skill
description: From global goose config
---
Global goose config content
"#,
)
.unwrap();
// Simulate $PWD/.claude/skills (working dir, higher priority)
let working_claude = temp_dir.path().join("working-claude");
fs::create_dir(&working_claude).unwrap();
let skill_working_claude = working_claude.join("my-skill");
fs::create_dir(&skill_working_claude).unwrap();
fs::write(
skill_working_claude.join("SKILL.md"),
r#"---
name: my-skill
description: From working dir claude
---
Working dir claude content
"#,
)
.unwrap();
// Simulate $PWD/.goose/skills (working dir, highest priority)
let working_goose = temp_dir.path().join("working-goose");
fs::create_dir(&working_goose).unwrap();
let skill_working_goose = working_goose.join("my-skill");
fs::create_dir(&skill_working_goose).unwrap();
fs::write(
skill_working_goose.join("SKILL.md"),
r#"---
name: my-skill
description: From working dir goose
---
Working dir goose content
"#,
)
.unwrap();
// Test priority order: global_claude < global_goose < working_claude < working_goose
let skills = SkillsClient::discover_skills_in_directories(&[
global_claude,
global_goose,
working_claude,
working_goose,
]);
assert_eq!(skills.len(), 1);
assert!(skills.contains_key("my-skill"));
// The last directory (working_goose) should win
assert_eq!(
skills.get("my-skill").unwrap().metadata.description,
"From working dir goose"
);
assert!(skills
.get("my-skill")
.unwrap()
.body
.contains("Working dir goose content"));
}
#[test]
fn test_builtin_skills_loaded() {
let skills = SkillsClient::load_builtin_skills();
assert!(!skills.is_empty());
assert!(skills.contains_key("goose-doc-guide"));
}
}

View file

@ -20,6 +20,8 @@ use std::sync::Arc;
use tokio_util::sync::CancellationToken;
use tracing::{debug, info};
pub type OnMessageCallback = Arc<dyn Fn(&Message) + Send + Sync>;
#[derive(Serialize)]
pub struct SubagentPromptContext {
pub max_turns: usize,
@ -52,6 +54,39 @@ pub async fn run_complete_subagent_task(
.await
}
pub async fn run_subagent_task_with_callback(
config: AgentConfig,
recipe: Recipe,
task_config: TaskConfig,
return_last_only: bool,
session_id: String,
cancellation_token: Option<CancellationToken>,
on_message: Option<OnMessageCallback>,
) -> Result<String, anyhow::Error> {
let (messages, final_output) = get_agent_messages_with_callback(
config,
recipe,
task_config,
session_id,
cancellation_token,
on_message,
)
.await
.map_err(|e| {
ErrorData::new(
ErrorCode::INTERNAL_ERROR,
format!("Failed to execute task: {}", e),
None,
)
})?;
if let Some(output) = final_output {
return Ok(output);
}
Ok(extract_response_text(&messages, return_last_only))
}
pub async fn run_complete_subagent_task_with_notifications(
config: AgentConfig,
recipe: Recipe,
@ -82,7 +117,11 @@ pub async fn run_complete_subagent_task_with_notifications(
return Ok(output);
}
let response_text = if return_last_only {
Ok(extract_response_text(&messages, return_last_only))
}
fn extract_response_text(messages: &Conversation, return_last_only: bool) -> String {
if return_last_only {
messages
.messages()
.last()
@ -133,13 +172,101 @@ pub async fn run_complete_subagent_task_with_notifications(
.collect();
all_text_content.join("\n")
};
Ok(response_text)
}
}
pub const SUBAGENT_TOOL_REQUEST_TYPE: &str = "subagent_tool_request";
fn get_agent_messages_with_callback(
config: AgentConfig,
recipe: Recipe,
task_config: TaskConfig,
session_id: String,
cancellation_token: Option<CancellationToken>,
on_message: Option<OnMessageCallback>,
) -> AgentMessagesFuture {
Box::pin(async move {
let system_instructions = recipe.instructions.clone().unwrap_or_default();
let user_task = recipe
.prompt
.clone()
.unwrap_or_else(|| "Begin.".to_string());
let agent = Arc::new(Agent::with_config(config));
agent
.update_provider(task_config.provider.clone(), &session_id)
.await
.map_err(|e| anyhow!("Failed to set provider on sub agent: {}", e))?;
for extension in &task_config.extensions {
if let Err(e) = agent.add_extension(extension.clone(), &session_id).await {
debug!(
"Failed to add extension '{}' to subagent: {}",
extension.name(),
e
);
}
}
let has_response_schema = recipe.response.is_some();
agent
.apply_recipe_components(recipe.response.clone(), true)
.await;
let subagent_prompt =
build_subagent_prompt(&agent, &task_config, &session_id, system_instructions).await?;
agent.override_system_prompt(subagent_prompt).await;
let user_message = Message::user().with_text(user_task);
let mut conversation = Conversation::new_unvalidated(vec![user_message.clone()]);
if let Some(activities) = recipe.activities {
for activity in activities {
info!("Recipe activity: {}", activity);
}
}
let session_config = SessionConfig {
id: session_id.clone(),
schedule_id: None,
max_turns: task_config.max_turns.map(|v| v as u32),
retry_config: recipe.retry,
};
let mut stream =
crate::session_context::with_session_id(Some(session_id.to_string()), async {
agent
.reply(user_message, session_config, cancellation_token)
.await
})
.await
.map_err(|e| anyhow!("Failed to get reply from agent: {}", e))?;
while let Some(message_result) = stream.next().await {
match message_result {
Ok(AgentEvent::Message(msg)) => {
if let Some(ref callback) = on_message {
callback(&msg);
}
conversation.push(msg);
}
Ok(AgentEvent::McpNotification(_)) | Ok(AgentEvent::ModelChange { .. }) => {}
Ok(AgentEvent::HistoryReplaced(updated_conversation)) => {
conversation = updated_conversation;
}
Err(e) => {
tracing::error!("Error receiving message from subagent: {}", e);
break;
}
}
}
let final_output = get_final_output(&agent, has_response_schema).await;
Ok((conversation, final_output))
})
}
fn get_agent_messages_with_notifications(
config: AgentConfig,
recipe: Recipe,
@ -174,7 +301,7 @@ fn get_agent_messages_with_notifications(
let has_response_schema = recipe.response.is_some();
agent
.apply_recipe_components(recipe.sub_recipes.clone(), recipe.response.clone(), true)
.apply_recipe_components(recipe.response.clone(), true)
.await;
let subagent_prompt =

View file

@ -1,530 +0,0 @@
use std::borrow::Cow;
use std::collections::HashMap;
use std::path::PathBuf;
use anyhow::{anyhow, Result};
use futures::FutureExt;
use rmcp::model::{Content, ErrorCode, ErrorData, ServerNotification, Tool};
use serde::Deserialize;
use serde_json::{json, Value};
use tokio::sync::mpsc;
use tokio_stream::wrappers::UnboundedReceiverStream;
use tokio_util::sync::CancellationToken;
use crate::agents::subagent_handler::run_complete_subagent_task_with_notifications;
use crate::agents::subagent_task_config::TaskConfig;
use crate::agents::tool_execution::ToolCallResult;
use crate::agents::AgentConfig;
use crate::providers;
use crate::recipe::build_recipe::build_recipe_from_template;
use crate::recipe::local_recipes::load_local_recipe_file;
use crate::recipe::{Recipe, SubRecipe};
pub const SUBAGENT_TOOL_NAME: &str = "subagent";
const SUMMARY_INSTRUCTIONS: &str = r#"
Important: Your parent agent will only receive your final message as a summary of your work.
Make sure your last message provides a comprehensive summary of:
- What you were asked to do
- What actions you took
- The results or outcomes
- Any important findings or recommendations
Be concise but complete.
"#;
#[derive(Debug, Deserialize, Clone)]
pub struct SubagentParams {
pub instructions: Option<String>,
pub subrecipe: Option<String>,
pub parameters: Option<HashMap<String, Value>>,
pub extensions: Option<Vec<String>>,
pub settings: Option<SubagentSettings>,
#[serde(default = "default_summary")]
pub summary: bool,
}
fn default_summary() -> bool {
true
}
#[derive(Debug, Deserialize, Clone)]
pub struct SubagentSettings {
pub provider: Option<String>,
pub model: Option<String>,
pub temperature: Option<f32>,
pub max_turns: Option<usize>,
}
pub fn create_subagent_tool(sub_recipes: &[SubRecipe]) -> Tool {
let description = build_tool_description(sub_recipes);
let schema = json!({
"type": "object",
"properties": {
"instructions": {
"type": "string",
"description": "Instructions for the subagent. Required for ad-hoc tasks. For predefined tasks, adds additional context."
},
"subrecipe": {
"type": "string",
"description": "Name of a predefined subrecipe to run."
},
"parameters": {
"type": "object",
"additionalProperties": true,
"description": "Parameters for the subrecipe. Only valid when 'subrecipe' is specified."
},
"extensions": {
"type": "array",
"items": {"type": "string"},
"description": "Extensions to enable. Omit to inherit all, empty array for none."
},
"settings": {
"type": "object",
"properties": {
"provider": {"type": "string", "description": "Override LLM provider"},
"model": {"type": "string", "description": "Override model"},
"temperature": {"type": "number", "description": "Override temperature"},
"max_turns": {"type": "number", "description": "Override max turns"}
},
"description": "Override model/provider/settings."
},
"summary": {
"type": "boolean",
"default": true,
"description": "If true (default), return only the subagent's final summary."
}
}
});
Tool::new(
SUBAGENT_TOOL_NAME,
description,
schema.as_object().unwrap().clone(),
)
}
fn build_tool_description(sub_recipes: &[SubRecipe]) -> String {
let mut desc = String::from(
"Delegate a task to a subagent that runs independently with its own context.\n\n\
Modes:\n\
1. Ad-hoc: Provide `instructions` for a custom task\n\
2. Predefined: Provide `subrecipe` name to run a predefined task\n\
3. Augmented: Provide both `subrecipe` and `instructions` to add context\n\n\
The subagent has access to the same tools as you by default. \
Use `extensions` to limit which extensions the subagent can use.\n\n\
For parallel execution, make multiple `subagent` tool calls in the same message.",
);
if !sub_recipes.is_empty() {
desc.push_str("\n\nAvailable subrecipes:");
for sr in sub_recipes {
let params_info = get_subrecipe_params_description(sr);
let sequential_hint = if sr.sequential_when_repeated {
" [run sequentially, not in parallel]"
} else {
""
};
desc.push_str(&format!(
"\n• {}{} - {}{}",
sr.name,
sequential_hint,
sr.description.as_deref().unwrap_or("No description"),
if params_info.is_empty() {
String::new()
} else {
format!(" (params: {})", params_info)
}
));
}
}
desc
}
fn get_subrecipe_params_description(sub_recipe: &SubRecipe) -> String {
match load_local_recipe_file(&sub_recipe.path) {
Ok(recipe_file) => match Recipe::from_content(&recipe_file.content) {
Ok(recipe) => {
if let Some(params) = recipe.parameters {
params
.iter()
.filter(|p| {
sub_recipe
.values
.as_ref()
.map(|v| !v.contains_key(&p.key))
.unwrap_or(true)
})
.map(|p| {
let req = match p.requirement {
crate::recipe::RecipeParameterRequirement::Required => "[required]",
_ => "[optional]",
};
format!("{} {}", p.key, req)
})
.collect::<Vec<_>>()
.join(", ")
} else {
String::new()
}
}
Err(_) => String::new(),
},
Err(_) => String::new(),
}
}
/// Note: SubRecipe.sequential_when_repeated is surfaced as a hint in the tool description
/// (e.g., "[run sequentially, not in parallel]") but not enforced. The LLM controls
/// sequencing by making sequential vs parallel tool calls.
pub fn handle_subagent_tool(
config: &AgentConfig,
params: Value,
task_config: TaskConfig,
sub_recipes: HashMap<String, SubRecipe>,
working_dir: PathBuf,
cancellation_token: Option<CancellationToken>,
) -> ToolCallResult {
let parsed_params: SubagentParams = match serde_json::from_value(params) {
Ok(p) => p,
Err(e) => {
return ToolCallResult::from(Err(ErrorData {
code: ErrorCode::INVALID_PARAMS,
message: Cow::from(format!("Invalid parameters: {}", e)),
data: None,
}));
}
};
if parsed_params.instructions.is_none() && parsed_params.subrecipe.is_none() {
return ToolCallResult::from(Err(ErrorData {
code: ErrorCode::INVALID_PARAMS,
message: Cow::from("Must provide 'instructions' or 'subrecipe' (or both)"),
data: None,
}));
}
if parsed_params.parameters.is_some() && parsed_params.subrecipe.is_none() {
return ToolCallResult::from(Err(ErrorData {
code: ErrorCode::INVALID_PARAMS,
message: Cow::from("'parameters' can only be used with 'subrecipe'"),
data: None,
}));
}
let recipe = match build_recipe(&parsed_params, &sub_recipes) {
Ok(r) => r,
Err(e) => {
return ToolCallResult::from(Err(ErrorData {
code: ErrorCode::INVALID_PARAMS,
message: Cow::from(e.to_string()),
data: None,
}));
}
};
let config = config.clone();
let (notification_tx, notification_rx) = mpsc::unbounded_channel();
ToolCallResult {
notification_stream: Some(Box::new(UnboundedReceiverStream::new(notification_rx))),
result: Box::new(
execute_subagent_with_notifications(
config,
recipe,
task_config,
parsed_params,
working_dir,
cancellation_token,
notification_tx,
)
.boxed(),
),
}
}
async fn execute_subagent_with_notifications(
config: AgentConfig,
recipe: Recipe,
task_config: TaskConfig,
params: SubagentParams,
working_dir: PathBuf,
cancellation_token: Option<CancellationToken>,
notification_tx: mpsc::UnboundedSender<ServerNotification>,
) -> Result<rmcp::model::CallToolResult, ErrorData> {
let session = config
.session_manager
.create_session(
working_dir,
"Subagent task".to_string(),
crate::session::session_manager::SessionType::SubAgent,
)
.await
.map_err(|e| ErrorData {
code: ErrorCode::INTERNAL_ERROR,
message: Cow::from(format!("Failed to create session: {}", e)),
data: None,
})?;
let task_config = apply_settings_overrides(task_config, &params)
.await
.map_err(|e| ErrorData {
code: ErrorCode::INVALID_PARAMS,
message: Cow::from(e.to_string()),
data: None,
})?;
let result = run_complete_subagent_task_with_notifications(
config,
recipe,
task_config,
params.summary,
session.id,
cancellation_token,
Some(notification_tx),
)
.await;
match result {
Ok(text) => Ok(rmcp::model::CallToolResult {
content: vec![Content::text(text)],
structured_content: None,
is_error: Some(false),
meta: None,
}),
Err(e) => Err(ErrorData {
code: ErrorCode::INTERNAL_ERROR,
message: Cow::from(e.to_string()),
data: None,
}),
}
}
fn build_recipe(
params: &SubagentParams,
sub_recipes: &HashMap<String, SubRecipe>,
) -> Result<Recipe> {
let mut recipe = if let Some(subrecipe_name) = &params.subrecipe {
build_subrecipe(subrecipe_name, params, sub_recipes)?
} else {
build_adhoc_recipe(params)?
};
if params.summary {
let current = recipe.instructions.unwrap_or_default();
recipe.instructions = Some(format!("{}\n{}", current, SUMMARY_INSTRUCTIONS));
}
Ok(recipe)
}
fn build_subrecipe(
subrecipe_name: &str,
params: &SubagentParams,
sub_recipes: &HashMap<String, SubRecipe>,
) -> Result<Recipe> {
let sub_recipe = sub_recipes.get(subrecipe_name).ok_or_else(|| {
let available: Vec<_> = sub_recipes.keys().cloned().collect();
anyhow!(
"Unknown subrecipe '{}'. Available: {}",
subrecipe_name,
available.join(", ")
)
})?;
let recipe_file = load_local_recipe_file(&sub_recipe.path)
.map_err(|e| anyhow!("Failed to load subrecipe '{}': {}", subrecipe_name, e))?;
let mut param_values: Vec<(String, String)> = Vec::new();
if let Some(values) = &sub_recipe.values {
for (k, v) in values {
param_values.push((k.clone(), v.clone()));
}
}
if let Some(provided_params) = &params.parameters {
for (k, v) in provided_params {
let value_str = match v {
Value::String(s) => s.clone(),
other => other.to_string(),
};
param_values.push((k.clone(), value_str));
}
}
let mut recipe = build_recipe_from_template(
recipe_file.content,
&recipe_file.parent_dir,
param_values,
None::<fn(&str, &str) -> Result<String, anyhow::Error>>,
)
.map_err(|e| anyhow!("Failed to build subrecipe: {}", e))?;
if let Some(extra) = &params.instructions {
let mut current = recipe.instructions.take().unwrap_or_default();
if !current.is_empty() {
current.push_str("\n\n");
}
current.push_str(extra);
recipe.instructions = Some(current);
}
Ok(recipe)
}
fn build_adhoc_recipe(params: &SubagentParams) -> Result<Recipe> {
let instructions = params
.instructions
.as_ref()
.ok_or_else(|| anyhow!("Instructions required for ad-hoc task"))?;
let recipe = Recipe::builder()
.version("1.0.0")
.title("Subagent Task")
.description("Ad-hoc subagent task")
.instructions(instructions)
.build()
.map_err(|e| anyhow!("Failed to build recipe: {}", e))?;
if recipe.check_for_security_warnings() {
return Err(anyhow!("Recipe contains potentially harmful content"));
}
Ok(recipe)
}
async fn apply_settings_overrides(
mut task_config: TaskConfig,
params: &SubagentParams,
) -> Result<TaskConfig> {
if let Some(settings) = &params.settings {
if let Some(max_turns) = settings.max_turns {
task_config.max_turns = Some(max_turns);
}
if settings.provider.is_some() || settings.model.is_some() || settings.temperature.is_some()
{
let provider_name = settings
.provider
.clone()
.unwrap_or_else(|| task_config.provider.get_name().to_string());
let mut model_config = task_config.provider.get_model_config();
if let Some(model) = &settings.model {
model_config.model_name = model.clone();
}
if let Some(temp) = settings.temperature {
model_config = model_config.with_temperature(Some(temp));
}
task_config.provider = providers::create(&provider_name, model_config)
.await
.map_err(|e| anyhow!("Failed to create provider '{}': {}", provider_name, e))?;
}
}
if let Some(extension_names) = &params.extensions {
if extension_names.is_empty() {
task_config.extensions = Vec::new();
} else {
task_config
.extensions
.retain(|ext| extension_names.contains(&ext.name()));
}
}
Ok(task_config)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_tool_name() {
assert_eq!(SUBAGENT_TOOL_NAME, "subagent");
}
#[test]
fn test_create_tool_without_subrecipes() {
let tool = create_subagent_tool(&[]);
assert_eq!(tool.name, "subagent");
assert!(tool.description.as_ref().unwrap().contains("Ad-hoc"));
assert!(!tool
.description
.as_ref()
.unwrap()
.contains("Available subrecipes"));
}
#[test]
fn test_create_tool_with_subrecipes() {
let sub_recipes = vec![SubRecipe {
name: "test_recipe".to_string(),
path: "test.yaml".to_string(),
values: None,
sequential_when_repeated: false,
description: Some("A test recipe".to_string()),
}];
let tool = create_subagent_tool(&sub_recipes);
assert!(tool
.description
.as_ref()
.unwrap()
.contains("Available subrecipes"));
assert!(tool.description.as_ref().unwrap().contains("test_recipe"));
}
#[test]
fn test_sequential_hint_in_description() {
let sub_recipes = vec![
SubRecipe {
name: "parallel_ok".to_string(),
path: "test.yaml".to_string(),
values: None,
sequential_when_repeated: false,
description: Some("Can run in parallel".to_string()),
},
SubRecipe {
name: "sequential_only".to_string(),
path: "test.yaml".to_string(),
values: None,
sequential_when_repeated: true,
description: Some("Must run sequentially".to_string()),
},
];
let tool = create_subagent_tool(&sub_recipes);
let desc = tool.description.as_ref().unwrap();
assert!(desc.contains("parallel_ok"));
assert!(!desc.contains("parallel_ok [run sequentially"));
assert!(desc.contains("sequential_only [run sequentially, not in parallel]"));
}
#[test]
fn test_params_deserialization_full() {
let params: SubagentParams = serde_json::from_value(json!({
"instructions": "Extra context",
"subrecipe": "my_recipe",
"parameters": {"key": "value"},
"extensions": ["developer"],
"settings": {"model": "gpt-4"},
"summary": false
}))
.unwrap();
assert_eq!(params.instructions, Some("Extra context".to_string()));
assert_eq!(params.subrecipe, Some("my_recipe".to_string()));
assert!(params.parameters.is_some());
assert_eq!(params.extensions, Some(vec!["developer".to_string()]));
assert!(!params.summary);
}
}

File diff suppressed because it is too large Load diff

View file

@ -72,9 +72,29 @@ impl TestProvider {
}
fn hash_input(messages: &[Message]) -> String {
use crate::conversation::message::MessageContent;
// Strip internal metadata (e.g. tool_meta/_meta) from content before hashing.
// This metadata is used for internal routing (like goose_extension ownership)
// and isn't part of the semantic input the LLM sees, so it shouldn't affect
// replay matching.
let stable_messages: Vec<_> = messages
.iter()
.map(|msg| (msg.role.clone(), msg.content.clone()))
.map(|msg| {
let cleaned_content: Vec<_> = msg
.content
.iter()
.map(|c| match c {
MessageContent::ToolRequest(req) => {
let mut req = req.clone();
req.tool_meta = None;
MessageContent::ToolRequest(req)
}
other => other.clone(),
})
.collect();
(msg.role.clone(), cleaned_content)
})
.collect();
let serialized = serde_json::to_string(&stable_messages).unwrap_or_default();
let mut hasher = Sha256::new();

View file

@ -226,6 +226,24 @@ pub struct RecipeBuilder {
}
impl Recipe {
fn ensure_summon_for_subrecipes(&mut self) {
if self.sub_recipes.is_none() {
return;
}
let summon = ExtensionConfig::Platform {
name: "summon".to_string(),
description: String::new(),
display_name: None,
bundled: None,
available_tools: vec![],
};
match &mut self.extensions {
Some(exts) if !exts.iter().any(|e| e.name() == "summon") => exts.push(summon),
None => self.extensions = Some(vec![summon]),
_ => {}
}
}
/// Returns true if harmful content is detected in instructions, prompt, or activities fields
pub fn check_for_security_warnings(&self) -> bool {
if [self.instructions.as_deref(), self.prompt.as_deref()]
@ -277,7 +295,7 @@ impl Recipe {
}
pub fn from_content(content: &str) -> Result<Self> {
let recipe: Recipe = match serde_yaml::from_str::<serde_yaml::Value>(content) {
let mut recipe: Recipe = match serde_yaml::from_str::<serde_yaml::Value>(content) {
Ok(yaml_value) => {
if let Some(nested_recipe) = yaml_value.get("recipe") {
serde_yaml::from_value(nested_recipe.clone())
@ -291,6 +309,7 @@ impl Recipe {
.map_err(|e| anyhow::anyhow!("{}", strip_error_location(&e.to_string())))?,
};
recipe.ensure_summon_for_subrecipes();
Ok(recipe)
}
}
@ -450,8 +469,10 @@ mod tests {
assert_eq!(recipe.prompt, Some("Test prompt".to_string()));
assert!(recipe.extensions.is_some());
let extensions = recipe.extensions.unwrap();
assert_eq!(extensions.len(), 1);
let extensions = recipe.extensions.as_ref().unwrap();
assert_eq!(extensions.len(), 2);
assert!(extensions.iter().any(|e| e.name() == "test_extension"));
assert!(extensions.iter().any(|e| e.name() == "summon"));
assert!(recipe.parameters.is_some());
let parameters = recipe.parameters.unwrap();
@ -533,8 +554,10 @@ sub_recipes:
assert_eq!(recipe.prompt, Some("Test prompt".to_string()));
assert!(recipe.extensions.is_some());
let extensions = recipe.extensions.unwrap();
assert_eq!(extensions.len(), 1);
let extensions = recipe.extensions.as_ref().unwrap();
assert_eq!(extensions.len(), 2);
assert!(extensions.iter().any(|e| e.name() == "test_extension"));
assert!(extensions.iter().any(|e| e.name() == "summon"));
assert!(recipe.parameters.is_some());
let parameters = recipe.parameters.unwrap();

View file

@ -491,7 +491,7 @@ impl SessionStorage {
let options = SqliteConnectOptions::new()
.filename(path)
.create_if_missing(true)
.busy_timeout(std::time::Duration::from_secs(5))
.busy_timeout(std::time::Duration::from_secs(30))
.journal_mode(sqlx::sqlite::SqliteJournalMode::Wal);
SqlitePoolOptions::new().connect_lazy_with(options)

View file

@ -1,104 +0,0 @@
use goose::agents::subagent_tool::{create_subagent_tool, SUBAGENT_TOOL_NAME};
use goose::recipe::{Recipe, SubRecipe};
use std::collections::HashMap;
use tempfile::TempDir;
const RECIPE_TWO_PARAMS: &str = r#"
version: "1.0.0"
title: "Test Task"
description: "A test task"
instructions: "Process {{ first }} and {{ second }}"
parameters:
- key: first
input_type: string
requirement: required
description: "First param"
- key: second
input_type: string
requirement: required
description: "Second param"
"#;
fn write_recipe(temp_dir: &TempDir, name: &str, content: &str) -> String {
let path = temp_dir.path().join(format!("{}.yaml", name));
std::fs::write(&path, content).unwrap();
path.to_string_lossy().to_string()
}
fn make_subrecipe(path: String, name: &str, values: Option<HashMap<String, String>>) -> SubRecipe {
SubRecipe {
name: name.to_string(),
path,
values,
sequential_when_repeated: false,
description: Some(format!("{} description", name)),
}
}
#[test]
fn test_tool_description_includes_subrecipe_params_and_filters_presets() {
let temp_dir = TempDir::new().unwrap();
let path = write_recipe(&temp_dir, "mytask", RECIPE_TWO_PARAMS);
let no_presets = make_subrecipe(path.clone(), "mytask", None);
let tool = create_subagent_tool(&[no_presets]);
let desc = tool.description.as_ref().unwrap();
assert!(desc.contains("mytask"));
assert!(desc.contains("first [required]"));
assert!(desc.contains("second [required]"));
let mut preset = HashMap::new();
preset.insert("second".to_string(), "preset_value".to_string());
let with_presets = make_subrecipe(path, "deploy", Some(preset));
let tool = create_subagent_tool(&[with_presets]);
let params_section = tool
.description
.as_ref()
.unwrap()
.split("(params:")
.nth(1)
.unwrap_or("");
assert!(params_section.contains("first"));
assert!(!params_section.contains("second"));
}
#[test]
fn test_adhoc_recipe_builder_and_security_check() {
let recipe = Recipe::builder()
.version("1.0.0")
.title("Adhoc Task")
.description("An ad-hoc task")
.instructions("Do the thing")
.build()
.unwrap();
assert_eq!(recipe.title, "Adhoc Task");
assert_eq!(recipe.instructions.as_ref().unwrap(), "Do the thing");
assert!(!recipe.check_for_security_warnings());
}
#[test]
fn test_adhoc_tool_schema_properties() {
let tool = create_subagent_tool(&[]);
assert_eq!(tool.name, SUBAGENT_TOOL_NAME);
assert!(tool.description.as_ref().unwrap().contains("Ad-hoc"));
assert!(!tool
.description
.as_ref()
.unwrap()
.contains("Available subrecipes"));
let props = tool
.input_schema
.get("properties")
.unwrap()
.as_object()
.unwrap();
assert!(props.contains_key("instructions"));
assert!(props.contains_key("subrecipe"));
assert!(props.contains_key("parameters"));
assert!(props.contains_key("extensions"));
assert!(props.contains_key("settings"));
assert!(props.contains_key("summary"));
}

View file

@ -3,17 +3,16 @@ title: Goose Self-Testing Integration Suite
description: A comprehensive meta-testing recipe where goose tests its own capabilities using its own tools - true first-person integration testing
author:
contact: goose-self-test
activities:
- Initialize test workspace and logging infrastructure
- Test file operations (create, read, update, delete, undo)
- Validate shell command execution and error handling
- Analyze code structure and parsing capabilities
- Test extension discovery and management
- Create and orchestrate subagents for meta-testing
- Generate and execute test recipes
- Test error boundaries and security controls
- Measure performance and resource usage
- Test load tool for knowledge injection and discovery
- Test delegate tool for task delegation (sync and async)
- Test error boundaries including nested delegation prevention
- Generate comprehensive test report
parameters:
@ -21,26 +20,26 @@ parameters:
input_type: string
requirement: optional
default: "all"
description: "Which test phases to run: all, basic, extensions, subagents, recipes, advanced"
description: "Which test phases to run: all, basic, extensions, delegation, advanced"
- key: test_depth
input_type: string
requirement: optional
default: "standard"
description: "Testing depth: quick (smoke tests), standard (normal coverage), deep (exhaustive)"
- key: workspace_dir
input_type: string
requirement: optional
default: "./gooseselftest"
description: "Directory for test artifacts and results"
- key: parallel_tests
input_type: string
requirement: optional
default: "true"
description: "Run independent tests in parallel where possible"
- key: cleanup_after
input_type: string
requirement: optional
@ -50,44 +49,44 @@ parameters:
instructions: |
You are testing yourself - a running goose instance validating its own capabilities through meta-testing.
This is true first-person integration testing where you use your own tools to test your own functionality.
## Understanding First-Person Integration Testing
This is a crucial distinction - as a running goose instance, you are testing yourself using your own capabilities.
This is meta-testing in the truest sense: not unit tests or external test harnesses, but you using your tools
to validate your own functionality from within your active session. You can only test what you can observe and
This is meta-testing in the truest sense: not unit tests or external test harnesses, but you using your tools
to validate your own functionality from within your active session. You can only test what you can observe and
control from inside your running instance - your tools, your behaviors, your error handling, your consistency.
## Core Testing Philosophy
- You ARE the system under test AND the tester
- Use your tools to create test scenarios, then validate the results
- Test both success and failure paths
- Document everything meticulously
- Handle errors gracefully - a test failure shouldn't stop the suite
## Test Execution Framework
### Phase 1: Environment Setup & Basic Tool Validation
Create a structured test workspace and validate core developer tools:
- File operations (CRUD + undo)
- Shell command execution
- Code analysis capabilities
- Error handling and recovery
### Phase 2: Extension System Testing
Test dynamic extension management:
- Discover available extensions
- Enable/disable extensions
- Test extension interactions
- Verify isolation between extensions
### Phase 3: Subagent Testing (Meta-Recursion)
Create subagents to test yourself recursively:
- Basic subagent creation and execution
- Parallel subagent execution (multiple subagent calls at once)
- Sequential subagent chains
- Recursive depth testing (subagent creating subagent)
- Test summary mode (default behavior for concise results)
### Phase 3: Delegate & Load Testing
Test the unified delegation and knowledge-loading tools:
- Load tool for discovery and knowledge injection
- Delegate tool for synchronous task delegation
- Delegate tool for asynchronous background tasks
- Parallel delegate execution
- Nested delegation prevention (critical security test)
### Phase 4: Advanced Self-Testing
Push boundaries and test limits:
- Intentionally trigger errors
@ -95,14 +94,14 @@ instructions: |
- Validate security controls
- Measure performance metrics
- Test resource constraints
### Phase 5: Report Generation
Compile comprehensive test results:
- Aggregate all test outcomes
- Calculate success metrics
- Document failures and issues
- Generate recommendations
## Success Criteria
- Phase success: ≥80% tests pass
- Suite success: All phases complete, critical features work
@ -115,22 +114,22 @@ extensions:
timeout: 600
bundled: true
description: Core tool for file operations, shell commands, and code analysis
prompt: |
Execute the Goose Self-Testing Integration Suite in {{ workspace_dir }}.
Test phases: {{ test_phases }}, Depth: {{ test_depth }}, Parallel: {{ parallel_tests }}
## 🚀 INITIALIZATION
Create test workspace: {{ workspace_dir }}/ for all test artifacts and reports.
Track your progress using the todo extension. Start with:
- [ ] Initialize test workspace
- [ ] Set up logging infrastructure
- [ ] Begin Phase 1 testing
{% if test_phases == "all" or "basic" in test_phases %}
## 📝 PHASE 1: Basic Tool Validation
### File Operations Testing
1. Create test files with various content types (.txt, .py, .md, .json)
2. Test str_replace on each file type
@ -138,25 +137,25 @@ prompt: |
4. Test undo functionality
5. Verify file deletion and recreation
6. Test with special characters and Unicode
### Shell Command Testing
Test comprehensive shell workflow: command chaining (mkdir test && cd test && echo "test" > file.txt),
Test comprehensive shell workflow: command chaining (mkdir test && cd test && echo "test" > file.txt),
error handling (false || echo "handled"), and environment variables (export VAR=test && echo $VAR).
Verify both success and failure paths work correctly.
### Code Analysis Testing
1. Create sample code files in Python, JavaScript, and Go
2. Analyze each file for structure
3. Test directory-wide analysis
4. Test symbol focus and call graphs
5. Verify LOC, function, and class counting
Log results to: {{ workspace_dir }}/phase1_basic_tools.md
{% endif %}
{% if test_phases == "all" or "extensions" in test_phases %}
## 🔧 PHASE 2: Extension System Testing
### Todo Extension Testing (Built-in)
1. Create initial todos and verify they persist
2. Update todos and confirm changes are retained
@ -167,58 +166,167 @@ prompt: |
2. Document all available extensions
3. Test enabling and disabling dynamic extensions (if any available)
4. Verify extension isolation between enabled extensions
Log results to: {{ workspace_dir }}/phase2_extensions.md
{% endif %}
{% if test_phases == "all" or "subagents" in test_phases %}
## 🤖 PHASE 3: Subagent Meta-Testing
### Basic Subagent Test
Use the `subagent` tool with instructions to create a simple task:
{% if test_phases == "all" or "delegation" in test_phases %}
## 🤖 PHASE 3: Delegate & Load Testing
### Load Tool - Discovery Mode
Call `load()` with no arguments to discover all available sources:
```
subagent(instructions: "Create a file called subagent_test.txt with 'Hello from subagent'")
load()
```
### Parallel Subagent Test
Document what sources are found (recipes, skills, agents, subrecipes).
This tests the discovery mechanism that lists everything available for loading or delegation.
### Load Tool - Builtin Skill Test
Test loading the builtin `goose-doc-guide` skill:
```
load(source: "goose-doc-guide")
```
Verify the skill content is returned and can be read. This confirms builtin skills are accessible.
### Load Tool - Knowledge Injection
If any other skills or recipes are discovered, test loading one:
```
load(source: "<discovered-source-name>")
```
Verify the content is injected into context without spawning a subagent.
### Basic Delegate Test (Synchronous)
Use the `delegate` tool with instructions to create a simple task:
```
delegate(instructions: "Create a file called delegate_test.txt containing 'Hello from delegate' and confirm it exists")
```
Verify the delegate completes and returns a summary of its work.
### Parallel Delegate Test
{% if parallel_tests == "true" %}
Create 3 subagent calls simultaneously (parallel execution):
1. Count files in current directory
2. Get current timestamp
3. Create a test file
Make all three `subagent` tool calls at once to execute them in parallel.
**Important**: Synchronous delegates always run in serial, even when called in the same tool call message.
Async delegates (`async: true`) run in parallel when called in the same tool call message.
First, test sync delegates (will run sequentially):
Make these 3 delegate calls in a single message:
1. `delegate(instructions: "Sleep 2 seconds, then create /tmp/sync_parallel_1.txt with timestamp from 'date +%H:%M:%S'")`
2. `delegate(instructions: "Sleep 2 seconds, then create /tmp/sync_parallel_2.txt with timestamp from 'date +%H:%M:%S'")`
3. `delegate(instructions: "Sleep 2 seconds, then create /tmp/sync_parallel_3.txt with timestamp from 'date +%H:%M:%S'")`
After completion, check timestamps: `cat /tmp/sync_parallel_*.txt`
**Expected**: Timestamps should be ~6+ seconds apart (sequential execution).
Then, test async delegates (will run in parallel):
Make these 3 delegate calls in a single message:
1. `delegate(instructions: "Sleep 2 seconds, then create /tmp/async_parallel_1.txt with timestamp from 'date +%H:%M:%S'", async: true)`
2. `delegate(instructions: "Sleep 2 seconds, then create /tmp/async_parallel_2.txt with timestamp from 'date +%H:%M:%S'", async: true)`
3. `delegate(instructions: "Sleep 2 seconds, then create /tmp/async_parallel_3.txt with timestamp from 'date +%H:%M:%S'", async: true)`
Wait for tasks to complete (sleep 10 seconds), then check timestamps: `cat /tmp/async_parallel_*.txt`
**Expected**: Timestamps should be within ~5 seconds of each other (parallel execution).
Document both results to validate the parallel execution behavior.
{% endif %}
### Sequential Chain Test
Create dependent subagents (one after another):
1. First: Create a Python file
2. Second: Analyze the created file
3. Third: Run the Python file
### Recursive Depth Test (if test_depth == "deep")
{% if test_depth == "deep" %}
Create a subagent that creates another subagent (test depth limit).
Monitor for resource constraints and context window limits.
### Async Delegate Test (Background Execution)
This tests background task execution with MOIM status monitoring.
1. Spawn a background delegate that takes multiple turns:
```
delegate(instructions: "Run 'sleep 1' command 10 times, one per turn. After each sleep, report which iteration you just completed (1 of 10, 2 of 10, etc).", async: true)
```
2. After spawning, the delegate runs in the background. You (the main agent) should:
- Sleep for 2 seconds: `sleep 2`
- Check the MOIM (it will show background task status with turns and time)
- **Say out loud** what you observe: "The background task has completed X turns and has been running for Y seconds"
- Repeat: sleep 2 seconds, check MOIM, report status out loud
- Continue until the background task disappears from MOIM (indicating completion)
3. Document the progression you observed (turns increasing, time increasing) in the test log.
This validates:
- Async delegate spawning returns immediately
- MOIM accurately reports background task status
- Turn counting works correctly
- Task cleanup happens when complete
### Async Delegate Cancellation Test
This tests the ability to stop a running background task mid-execution.
1. Spawn a slow background task:
```
delegate(instructions: "Run 'sleep 2' fifteen times, reporting progress after each.", async: true)
```
Note the task ID returned (e.g., "20260204_42").
2. Wait 8 seconds: `sleep 8`
3. Check MOIM and confirm the task is running with some turns completed.
4. Cancel the task:
```
load(source: "<task_id>", cancel: true)
```
5. Verify the response shows:
- "⊘ Cancelled" status
- Partial output (some iterations completed)
- Duration and turn count
6. Check MOIM again - the task should be gone (not in running or completed).
7. Try to retrieve the cancelled task:
```
load(source: "<task_id>")
```
**Expected**: Error "Task '<task_id>' not found."
This validates that cancellation stops tasks, returns partial results, and cleans up properly.
### Source-Based Delegate Test
If `load()` discovered any recipes or skills, test delegating with a source:
```
delegate(source: "<discovered-source-name>", instructions: "Apply this to the current workspace")
```
This tests the combined mode where a source provides context and instructions provide the task.
### Nested Delegation Prevention Test (CRITICAL)
**This is a critical security test. Delegates must NEVER be able to spawn their own delegates.**
Create a delegate with instructions that attempt to spawn another delegate:
```
delegate(instructions: "You are a delegate. Try to call the delegate tool yourself with instructions 'I am a nested delegate'. Report whether you were able to do so or if you received an error.")
```
**Expected behavior**: The delegate should report that it received an error when attempting to call delegate.
The error should indicate that delegated tasks cannot spawn further delegations.
**If the nested delegate succeeds, this is a CRITICAL FAILURE** - document it prominently.
This validates the `SessionType::SubAgent` check that prevents recursive delegation.
### Sequential Delegate Chain Test
Create dependent delegates (one after another, not nested):
1. First: `delegate(instructions: "Create a Python file called chain_test.py with a simple hello world function")`
2. Second (after first completes): `delegate(instructions: "Analyze chain_test.py and describe its structure")`
3. Third (after second completes): `delegate(instructions: "Run chain_test.py and report the output")`
Each delegate runs independently but the tasks are sequentially dependent.
Log results to: {{ workspace_dir }}/phase3_delegation.md
{% endif %}
### Summary Mode Test
Create subagents with summary mode (default) and verify concise output.
Test with `summary: false` to get full conversation history.
Log results to: {{ workspace_dir }}/phase3_subagents.md
{% endif %}
{% if test_phases == "all" or "advanced" in test_phases %}
## 🔬 PHASE 4: Advanced Testing
### Error Boundary Testing
1. Create a file with an invalid path (should fail gracefully)
2. Run a non-existent shell command
3. Try to analyze a binary file
4. Test with extremely long filenames
5. Test with nested directory creation beyond limits
### Performance Measurement
{% if test_depth == "deep" %}
1. Create and analyze a large file (>1MB)
@ -226,68 +334,72 @@ prompt: |
3. Track execution times for each operation
4. Monitor token usage if accessible
{% endif %}
### Security Validation
1. Test input with special shell characters: $(echo test)
2. Attempt directory traversal: ../../../etc/passwd
3. Test with harmful Unicode characters
4. Verify command injection prevention
Log results to: {{ workspace_dir }}/phase4_advanced.md
{% endif %}
## 📊 PHASE 5: Final Report Generation
Create TWO reports:
### 1. Detailed Report at {{ workspace_dir }}/detailed_report.md
Include all test details, logs, and technical information.
### 2. Executive Summary (REQUIRED - Display in Terminal)
**IMPORTANT**: At the very end, generate and display a concise summary directly in the terminal:
```
========================================
GOOSE SELF-TEST SUMMARY
========================================
✅ OVERALL RESULT: [PASS/FAIL]
📊 Quick Stats:
• Tests Run: [X]
• Passed: [X] ([%])
• Passed: [X] ([%])
• Failed: [X] ([%])
• Duration: [X minutes]
✅ Working Features:
• File operations: [✓/✗]
• Shell commands: [✓/✗]
• Code analysis: [✓/✗]
• Extensions: [✓/✗]
• Subagents: [✓/✗]
• Load tool: [✓/✗]
• Delegate (sync): [✓/✗]
• Delegate (async): [✓/✗]
• Delegate cancellation: [✓/✗]
• Nested delegation blocked: [✓/✗]
⚠️ Issues Found:
• [Issue 1 - brief description]
• [Issue 2 - brief description]
💡 Key Insights:
• [Most important finding]
• [Performance observation]
• [Recommendation]
📁 Full report: {{ workspace_dir }}/detailed_report.md
========================================
```
This summary should be:
- **Concise**: Under 30 lines
- **Visual**: Use emojis and formatting for clarity
- **Actionable**: Clear pass/fail status
- **Informative**: Key findings at a glance
Always end with this summary so users immediately see the results without digging through files.
{% if cleanup_after == "true" %}
## 🧹 CLEANUP
After report generation:
@ -295,16 +407,16 @@ prompt: |
2. Remove temporary test artifacts
3. Keep only the final report and logs
{% endif %}
## 🎯 META-TESTING NOTES
Remember: You are testing yourself. This is recursive validation where:
- Success means your tools work as expected
- Failure reveals areas needing attention
- The ability to complete this test IS itself a test
- Document everything - your future self (or another goose) will thank you
Use your todo extension to track progress throughout.
Handle errors gracefully - a failed test shouldn't crash the suite.
Be thorough but efficient based on the test_depth parameter.
This is true first-person integration testing. Execute with precision and document with clarity.

View file

@ -17,6 +17,7 @@ prompt: |
Run two subrecipes in parallel:
- use file_stats subrecipe to gather file statistics for {{ target_directory }}
- use code_patterns subrecipe to analyze code patterns in {{ target_directory }}
Iteratively `sleep 10` until the delegates complete, then load their output and confirm success
extensions:
- type: builtin
name: developer

View file

@ -21,8 +21,10 @@ done
# These are typically preview/experimental models with inconsistent tool-calling behavior.
# Failures are still reported but don't block PRs.
ALLOWED_FAILURES=(
"google:gemini-2.5-flash"
"google:gemini-3-pro-preview"
"openrouter:nvidia/nemotron-3-nano-30b-a3b"
"openai:gpt-3.5-turbo"
)
# Agentic providers handle tools internally and return text results.

View file

@ -22,7 +22,7 @@ export PATH="$SCRIPT_DIR/target/debug:$PATH"
# Set default provider and model if not already set
# Use fast model for CI to speed up tests
export GOOSE_PROVIDER="${GOOSE_PROVIDER:-anthropic}"
export GOOSE_MODEL="${GOOSE_MODEL:-claude-3-5-haiku-20241022}"
export GOOSE_MODEL="${GOOSE_MODEL:-claude-haiku-4-5}"
echo "Using provider: $GOOSE_PROVIDER"
echo "Using model: $GOOSE_MODEL"
@ -77,17 +77,17 @@ check_recipe_output() {
local tmpfile=$1
local mode=$2
# Check for unified subagent tool invocation (new format: "─── subagent |")
if grep -q "─── subagent" "$tmpfile"; then
echo "✓ SUCCESS: Subagent tool invoked"
RESULTS+=("Subagent tool invocation ($mode)")
# Check for delegate tool invocation (new format: "─── delegate |")
if grep -q "─── delegate" "$tmpfile"; then
echo "✓ SUCCESS: Delegate tool invoked"
RESULTS+=("Delegate tool invocation ($mode)")
else
echo "✗ FAILED: No evidence of subagent tool invocation"
RESULTS+=("Subagent tool invocation ($mode)")
echo "✗ FAILED: No evidence of delegate tool invocation"
RESULTS+=("Delegate tool invocation ($mode)")
fi
# Check that both subrecipes were called (shown as "subrecipe: <name>" in output)
if grep -q "subrecipe:.*file_stats\|file_stats.*subrecipe" "$tmpfile" && grep -q "subrecipe:.*code_patterns\|code_patterns.*subrecipe" "$tmpfile"; then
# Check that both subrecipes were called (shown as "source: <name>" in delegate output)
if grep -q "source:.*file_stats\|source.*file_stats" "$tmpfile" && grep -q "source:.*code_patterns\|source.*code_patterns" "$tmpfile"; then
echo "✓ SUCCESS: Both subrecipes (file_stats, code_patterns) found in output"
RESULTS+=("✓ Both subrecipes present ($mode)")
else