feat: replace subagent and skills with unified summon extension (#6964)

Signed-off-by: Travis Longwell <travis@block.xyz>
2026-04-28 03:29:36 +00:00 · 2026-02-10 14:13:38 -05:00 · 2026-02-10 14:13:38 -05:00 · 7ea19f5c83
commit 7ea19f5c83
parent 1168d7d9b1
26 changed files with 2616 additions and 1935 deletions
--- a/.gitignore
+++ b/.gitignore
@ -72,3 +72,4 @@ result

 # Goose self-test artifacts
 gooseselftest/
+.tasks/
--- a/crates/goose-cli/src/recipes/extract_from_cli.rs
+++ b/crates/goose-cli/src/recipes/extract_from_cli.rs
@ -97,7 +97,10 @@ mod tests {
            input_config.additional_system_prompt,
            Some("test_instructions my_value".to_string())
        );
-        assert!(recipe.extensions.is_none());
+        assert!(recipe
+            .extensions
+            .as_ref()
+            .is_none_or(|e| e.iter().all(|ext| ext.name() == "summon")));

        assert!(settings.is_some());
        let settings = settings.unwrap();
@ -162,7 +165,10 @@ mod tests {
            input_config.additional_system_prompt,
            Some("test_instructions my_value".to_string())
        );
-        assert!(recipe.extensions.is_none());
+        assert!(recipe
+            .extensions
+            .as_ref()
+            .is_none_or(|e| e.iter().all(|ext| ext.name() == "summon")));

        assert!(settings.is_some());
        let settings = settings.unwrap();
--- a/crates/goose-cli/src/session/builder.rs
+++ b/crates/goose-cli/src/session/builder.rs
@ -600,11 +600,7 @@ pub async fn build_session(session_config: SessionBuilderConfig) -> CliSession {
    let recipe = session_config.recipe.as_ref();

    agent
-        .apply_recipe_components(
-            recipe.and_then(|r| r.sub_recipes.clone()),
-            recipe.and_then(|r| r.response.clone()),
-            true,
-        )
+        .apply_recipe_components(recipe.and_then(|r| r.response.clone()), true)
        .await;

    let new_provider = match create(&resolved.provider_name, resolved.model_config).await {
@ -643,6 +639,17 @@ pub async fn build_session(session_config: SessionBuilderConfig) -> CliSession {
            process::exit(1);
        });

+    if let Some(recipe) = session_config.recipe.clone() {
+        if let Err(e) = session_manager
+            .update(&session_id)
+            .recipe(Some(recipe))
+            .apply()
+            .await
+        {
+            tracing::warn!("Failed to store recipe on session: {}", e);
+        }
+    }
+
    if session_config.resume {
        handle_resumed_session_workdir(&agent, &session_id, session_config.interactive).await;
    }
--- a/crates/goose-cli/src/session/mod.rs
+++ b/crates/goose-cli/src/session/mod.rs
@ -21,8 +21,8 @@ use tokio_util::task::AbortOnDropHandle;
 pub use self::export::message_to_markdown;
 pub use builder::{build_session, SessionBuilderConfig};
 use console::Color;
-use goose::agents::subagent_handler::SUBAGENT_TOOL_REQUEST_TYPE;
 use goose::agents::AgentEvent;
+use goose::agents::SUBAGENT_TOOL_REQUEST_TYPE;
 use goose::permission::permission_confirmation::PrincipalType;
 use goose::permission::Permission;
 use goose::permission::PermissionConfirmation;
--- a/crates/goose-cli/src/session/output.rs
+++ b/crates/goose-cli/src/session/output.rs
@ -314,8 +314,9 @@ fn render_tool_request(req: &ToolRequest, theme: Theme, debug: bool) {
        Ok(call) => match call.name.to_string().as_str() {
            "developer__text_editor" => render_text_editor_request(call, debug),
            "developer__shell" => render_shell_request(call, debug),
-            "code_execution__execute" => render_execute_code_request(call, debug),
-            "subagent" => render_subagent_request(call, debug),
+            "execute" | "execute_code" => render_execute_code_request(call, debug),
+            "delegate" => render_delegate_request(call, debug),
+            "subagent" => render_delegate_request(call, debug),
            "todo__write" => render_todo_request(call, debug),
            _ => render_default_request(call, debug),
        },
@ -555,12 +556,12 @@ fn render_execute_code_request(call: &CallToolRequestParams, debug: bool) {
    println!();
 }

-fn render_subagent_request(call: &CallToolRequestParams, debug: bool) {
+fn render_delegate_request(call: &CallToolRequestParams, debug: bool) {
    print_tool_header(call);

    if let Some(args) = &call.arguments {
-        if let Some(Value::String(subrecipe)) = args.get("subrecipe") {
-            println!("{}: {}", style("subrecipe").dim(), style(subrecipe).cyan());
+        if let Some(Value::String(source)) = args.get("source") {
+            println!("{}: {}", style("source").dim(), style(source).cyan());
        }

        if let Some(Value::String(instructions)) = args.get("instructions") {
@ -581,7 +582,7 @@ fn render_subagent_request(call: &CallToolRequestParams, debug: bool) {
            print_params(&Some(params.clone()), 1, debug);
        }

-        let skip_keys = ["subrecipe", "instructions", "parameters"];
+        let skip_keys = ["source", "instructions", "parameters"];
        let mut other_args = serde_json::Map::new();
        for (k, v) in args {
            if !skip_keys.contains(&k.as_str()) {
--- a/crates/goose-server/src/routes/recipe_utils.rs
+++ b/crates/goose-server/src/routes/recipe_utils.rs
@ -163,11 +163,7 @@ pub async fn apply_recipe_to_agent(
    include_final_output_tool: bool,
 ) -> Option<String> {
    agent
-        .apply_recipe_components(
-            recipe.sub_recipes.clone(),
-            recipe.response.clone(),
-            include_final_output_tool,
-        )
+        .apply_recipe_components(recipe.response.clone(), include_final_output_tool)
        .await;

    recipe.instructions.as_ref().map(|instructions| {
--- a/crates/goose/src/agents/agent.rs
+++ b/crates/goose/src/agents/agent.rs
@ -20,10 +20,6 @@ use crate::agents::final_output_tool::{FINAL_OUTPUT_CONTINUATION_MESSAGE, FINAL_
 use crate::agents::platform_tools::PLATFORM_MANAGE_SCHEDULE_TOOL_NAME;
 use crate::agents::prompt_manager::PromptManager;
 use crate::agents::retry::{RetryManager, RetryResult};
-use crate::agents::subagent_task_config::TaskConfig;
-use crate::agents::subagent_tool::{
-    create_subagent_tool, handle_subagent_tool, SUBAGENT_TOOL_NAME,
-};
 use crate::agents::types::{FrontendTool, SessionConfig, SharedProvider, ToolResultReceiver};
 use crate::config::permission::PermissionManager;
 use crate::config::{get_enabled_extensions, Config, GooseMode};
@ -42,11 +38,11 @@ use crate::permission::permission_judge::PermissionCheckResult;
 use crate::permission::PermissionConfirmation;
 use crate::providers::base::Provider;
 use crate::providers::errors::ProviderError;
-use crate::recipe::{Author, Recipe, Response, Settings, SubRecipe};
+use crate::recipe::{Author, Recipe, Response, Settings};
 use crate::scheduler_trait::SchedulerTrait;
 use crate::security::security_inspector::SecurityInspector;
 use crate::session::extension_data::{EnabledExtensionsState, ExtensionState};
-use crate::session::{Session, SessionManager, SessionType};
+use crate::session::{Session, SessionManager};
 use crate::tool_inspection::ToolInspectionManager;
 use crate::tool_monitor::RepetitionInspector;
 use crate::utils::is_token_cancelled;
@ -121,7 +117,6 @@ pub struct Agent {
    pub config: AgentConfig,

    pub extension_manager: Arc<ExtensionManager>,
-    pub(super) sub_recipes: Mutex<HashMap<String, SubRecipe>>,
    pub(super) final_output_tool: Arc<Mutex<Option<FinalOutputTool>>>,
    pub(super) frontend_tools: Mutex<HashMap<String, FrontendTool>>,
    pub(super) frontend_instructions: Mutex<Option<String>>,
@ -210,7 +205,6 @@ impl Agent {
            provider: provider.clone(),
            config,
            extension_manager: Arc::new(ExtensionManager::new(provider.clone(), session_manager)),
-            sub_recipes: Mutex::new(HashMap::new()),
            final_output_tool: Arc::new(Mutex::new(None)),
            frontend_tools: Mutex::new(HashMap::new()),
            frontend_instructions: Mutex::new(None),
@ -452,23 +446,11 @@ impl Agent {
        self.extend_system_prompt(final_output_system_prompt).await;
    }

-    pub async fn add_sub_recipes(&self, sub_recipes_to_add: Vec<SubRecipe>) {
-        let mut sub_recipes = self.sub_recipes.lock().await;
-        for sr in sub_recipes_to_add {
-            sub_recipes.insert(sr.name.clone(), sr);
-        }
-    }
-
    pub async fn apply_recipe_components(
        &self,
-        sub_recipes: Option<Vec<SubRecipe>>,
        response: Option<Response>,
        include_final_output: bool,
    ) {
-        if let Some(sub_recipes) = sub_recipes {
-            self.add_sub_recipes(sub_recipes).await;
-        }
-
        if include_final_output {
            if let Some(response) = response {
                self.add_final_output_tool(response).await;
@ -485,18 +467,6 @@ impl Agent {
        cancellation_token: Option<CancellationToken>,
        session: &Session,
    ) -> (String, Result<ToolCallResult, ErrorData>) {
-        // Prevent subagents from creating other subagents
-        if session.session_type == SessionType::SubAgent && tool_call.name == SUBAGENT_TOOL_NAME {
-            return (
-                request_id,
-                Err(ErrorData::new(
-                    ErrorCode::INVALID_REQUEST,
-                    "Subagents cannot create other subagents".to_string(),
-                    None,
-                )),
-            );
-        }
-
        if tool_call.name == PLATFORM_MANAGE_SCHEDULE_TOOL_NAME {
            let arguments = tool_call
                .arguments
@ -531,49 +501,7 @@ impl Agent {
        }

        debug!("WAITING_TOOL_START: {}", tool_call.name);
-        let result: ToolCallResult = if tool_call.name == SUBAGENT_TOOL_NAME {
-            let provider = match self.provider().await {
-                Ok(p) => p,
-                Err(_) => {
-                    return (
-                        request_id,
-                        Err(ErrorData::new(
-                            ErrorCode::INTERNAL_ERROR,
-                            "Provider is required".to_string(),
-                            None,
-                        )),
-                    );
-                }
-            };
-
-            let extensions = self.get_extension_configs().await;
-
-            let max_turns_from_recipe = session
-                .recipe
-                .as_ref()
-                .and_then(|r| r.settings.as_ref())
-                .and_then(|s| s.max_turns);
-
-            let task_config =
-                TaskConfig::new(provider, &session.id, &session.working_dir, extensions)
-                    .with_max_turns(max_turns_from_recipe);
-            let sub_recipes = self.sub_recipes.lock().await.clone();
-
-            let arguments = tool_call
-                .arguments
-                .clone()
-                .map(Value::Object)
-                .unwrap_or(Value::Object(serde_json::Map::new()));
-
-            handle_subagent_tool(
-                &self.config,
-                arguments,
-                task_config,
-                sub_recipes,
-                session.working_dir.clone(),
-                cancellation_token,
-            )
-        } else if self.is_frontend_tool(&tool_call.name).await {
+        let result: ToolCallResult = if self.is_frontend_tool(&tool_call.name).await {
            // For frontend tools, return an error indicating we need frontend execution
            ToolCallResult::from(Err(ErrorData::new(
                ErrorCode::INTERNAL_ERROR,
@ -807,30 +735,6 @@ impl Agent {
        Ok(())
    }

-    pub async fn subagents_enabled(&self, session_id: &str) -> bool {
-        if self.config.goose_mode != GooseMode::Auto {
-            return false;
-        }
-        let context = self.extension_manager.get_context();
-        if matches!(
-            context
-                .session_manager
-                .get_session(session_id, false)
-                .await
-                .ok()
-                .map(|session| session.session_type),
-            Some(SessionType::SubAgent)
-        ) {
-            return false;
-        }
-        !self
-            .extension_manager
-            .list_extensions()
-            .await
-            .map(|ext| ext.is_empty())
-            .unwrap_or(true)
-    }
-
    pub async fn list_tools(&self, session_id: &str, extension_name: Option<String>) -> Vec<Tool> {
        let mut prefixed_tools = self
            .extension_manager
@ -838,7 +742,6 @@ impl Agent {
            .await
            .unwrap_or_default();

-        let subagents_enabled = self.subagents_enabled(session_id).await;
        if (extension_name.is_none() || extension_name.as_deref() == Some("platform"))
            && self.config.scheduler_service.is_some()
        {
@ -849,12 +752,6 @@ impl Agent {
            if let Some(final_output_tool) = self.final_output_tool.lock().await.as_ref() {
                prefixed_tools.push(final_output_tool.tool());
            }
-
-            if subagents_enabled {
-                let sub_recipes = self.sub_recipes.lock().await;
-                let sub_recipes_vec: Vec<_> = sub_recipes.values().cloned().collect();
-                prefixed_tools.push(create_subagent_tool(&sub_recipes_vec));
-            }
        }

        prefixed_tools
--- a/crates/goose/src/agents/builtin_skills/mod.rs
+++ b/crates/goose/src/agents/builtin_skills/mod.rs
@ -3,7 +3,7 @@ use include_dir::{include_dir, Dir};
 static BUILTIN_SKILLS_DIR: Dir =
    include_dir!("$CARGO_MANIFEST_DIR/src/agents/builtin_skills/skills");

-pub fn get_all_builtin_skills() -> Vec<&'static str> {
+pub fn get_all() -> Vec<&'static str> {
    BUILTIN_SKILLS_DIR
        .files()
        .filter(|f| f.path().extension().is_some_and(|ext| ext == "md"))
--- a/crates/goose/src/agents/code_execution_extension.rs
+++ b/crates/goose/src/agents/code_execution_extension.rs
@ -1,4 +1,5 @@
 use crate::agents::extension::PlatformExtensionContext;
+use crate::agents::extension_manager::get_tool_owner;
 use crate::agents::mcp_client::{Error, McpClientTrait};
 use anyhow::Result;
 use async_trait::async_trait;
@ -111,10 +112,16 @@ impl CodeExecutionClient {
        let mut cfgs = vec![];
        for tool in tools {
            let full_name = tool.name.to_string();
-            let (server_name, tool_name) = full_name.split_once("__")?;
+            let (namespace, name) = if let Some((server, tool_name)) = full_name.split_once("__") {
+                (server.to_string(), tool_name.to_string())
+            } else if let Some(owner) = get_tool_owner(&tool) {
+                (owner, full_name)
+            } else {
+                continue;
+            };
            cfgs.push(CallbackConfig {
-                name: tool_name.into(),
-                namespace: server_name.into(),
+                name,
+                namespace,
                description: tool.description.as_ref().map(|d| d.to_string()),
                input_schema: Some(json!(tool.input_schema)),
                output_schema: tool.output_schema.as_ref().map(|s| json!(s)),
--- a/crates/goose/src/agents/execute_commands.rs
+++ b/crates/goose/src/agents/execute_commands.rs
@ -378,7 +378,7 @@ impl Agent {
            Err(e) => return Err(anyhow!("Failed to build recipe: {}", e)),
        };

-        self.apply_recipe_components(recipe.sub_recipes.clone(), recipe.response.clone(), true)
+        self.apply_recipe_components(recipe.response.clone(), true)
            .await;

        let prompt = [recipe.instructions.as_deref(), recipe.prompt.as_deref()]
--- a/crates/goose/src/agents/extension.rs
+++ b/crates/goose/src/agents/extension.rs
@ -2,7 +2,7 @@ use crate::agents::apps_extension;
 use crate::agents::chatrecall_extension;
 use crate::agents::code_execution_extension;
 use crate::agents::extension_manager_extension;
-use crate::agents::skills_extension;
+use crate::agents::summon_extension;
 use crate::agents::todo_extension;
 use crate::agents::tom_extension;
 use std::collections::HashMap;
@ -53,6 +53,7 @@ pub static PLATFORM_EXTENSIONS: Lazy<HashMap<&'static str, PlatformExtensionDef>
                description:
                    "Enable a todo list for goose so it can keep track of what it is doing",
                default_enabled: true,
+                unprefixed_tools: false,
                client_factory: |ctx| Box::new(todo_extension::TodoClient::new(ctx).unwrap()),
            },
        );
@ -65,6 +66,7 @@ pub static PLATFORM_EXTENSIONS: Lazy<HashMap<&'static str, PlatformExtensionDef>
                description:
                    "Create and manage custom Goose apps through chat. Apps are HTML/CSS/JavaScript and run in sandboxed windows.",
                default_enabled: true,
+                unprefixed_tools: false,
                client_factory: |ctx| Box::new(apps_extension::AppsManagerClient::new(ctx).unwrap()),
            },
        );
@ -77,6 +79,7 @@ pub static PLATFORM_EXTENSIONS: Lazy<HashMap<&'static str, PlatformExtensionDef>
                description:
                    "Search past conversations and load session summaries for contextual memory",
                default_enabled: false,
+                unprefixed_tools: false,
                client_factory: |ctx| {
                    Box::new(chatrecall_extension::ChatRecallClient::new(ctx).unwrap())
                },
@ -91,18 +94,20 @@ pub static PLATFORM_EXTENSIONS: Lazy<HashMap<&'static str, PlatformExtensionDef>
                description:
                    "Enable extension management tools for discovering, enabling, and disabling extensions",
                default_enabled: true,
+                unprefixed_tools: false,
                client_factory: |ctx| Box::new(extension_manager_extension::ExtensionManagerClient::new(ctx).unwrap()),
            },
        );

        map.insert(
-            skills_extension::EXTENSION_NAME,
+            summon_extension::EXTENSION_NAME,
            PlatformExtensionDef {
-                name: skills_extension::EXTENSION_NAME,
-                display_name: "Skills",
-                description: "Load and use skills from relevant directories",
+                name: summon_extension::EXTENSION_NAME,
+                display_name: "Summon",
+                description: "Load knowledge and delegate tasks to subagents",
                default_enabled: true,
-                client_factory: |ctx| Box::new(skills_extension::SkillsClient::new(ctx).unwrap()),
+                unprefixed_tools: true,
+                client_factory: |ctx| Box::new(summon_extension::SummonClient::new(ctx).unwrap()),
            },
        );

@ -114,6 +119,7 @@ pub static PLATFORM_EXTENSIONS: Lazy<HashMap<&'static str, PlatformExtensionDef>
                description:
                    "Goose will make extension calls through code execution, saving tokens",
                default_enabled: false,
+                unprefixed_tools: true,
                client_factory: |ctx| {
                    Box::new(code_execution_extension::CodeExecutionClient::new(ctx).unwrap())
                },
@ -128,6 +134,7 @@ pub static PLATFORM_EXTENSIONS: Lazy<HashMap<&'static str, PlatformExtensionDef>
                description:
                    "Inject custom context into every turn via GOOSE_MOIM_MESSAGE_TEXT and GOOSE_MOIM_MESSAGE_FILE environment variables",
                default_enabled: true,
+                unprefixed_tools: false,
                client_factory: |ctx| Box::new(tom_extension::TomClient::new(ctx).unwrap()),
            },
        );
@ -175,12 +182,15 @@ impl PlatformExtensionContext {
    }
 }

+/// Definition for a platform extension that runs in-process with direct agent access.
 #[derive(Debug, Clone)]
 pub struct PlatformExtensionDef {
    pub name: &'static str,
    pub display_name: &'static str,
    pub description: &'static str,
    pub default_enabled: bool,
+    /// If true, tools are exposed without extension prefix for intuitive first-class use.
+    pub unprefixed_tools: bool,
    pub client_factory: fn(PlatformExtensionContext) -> Box<dyn McpClientTrait>,
 }

--- a/crates/goose/src/agents/extension_manager.rs
+++ b/crates/goose/src/agents/extension_manager.rs
@ -105,18 +105,18 @@ pub struct ExtensionManager {
 /// A flattened representation of a resource used by the agent to prepare inference
 #[derive(Debug, Clone)]
 pub struct ResourceItem {
-    pub client_name: String,      // The name of the client that owns the resource
-    pub uri: String,              // The URI of the resource
-    pub name: String,             // The name of the resource
-    pub content: String,          // The content of the resource
+    pub extension_name: String, // The name of the extension that owns the resource
+    pub uri: String,            // The URI of the resource
+    pub name: String,           // The name of the resource
+    pub content: String,        // The content of the resource
    pub timestamp: DateTime<Utc>, // The timestamp of the resource
-    pub priority: f32,            // The priority of the resource
+    pub priority: f32,          // The priority of the resource
    pub token_count: Option<u32>, // The token count of the resource (filled in by the agent)
 }

 impl ResourceItem {
    pub fn new(
-        client_name: String,
+        extension_name: String,
        uri: String,
        name: String,
        content: String,
@ -124,7 +124,7 @@ impl ResourceItem {
        priority: f32,
    ) -> Self {
        Self {
-            client_name,
+            extension_name,
            uri,
            name,
            content,
@ -199,6 +199,41 @@ pub fn get_parameter_names(tool: &Tool) -> Vec<String> {
    names
 }

+const TOOL_EXTENSION_META_KEY: &str = "goose_extension";
+
+pub fn get_tool_owner(tool: &Tool) -> Option<String> {
+    tool.meta
+        .as_ref()
+        .and_then(|m| m.0.get(TOOL_EXTENSION_META_KEY))
+        .and_then(|v| v.as_str())
+        .map(|s| s.to_string())
+}
+
+fn is_unprefixed_extension(config: &ExtensionConfig) -> bool {
+    if let ExtensionConfig::Platform { name, .. } = config {
+        PLATFORM_EXTENSIONS
+            .get(name_to_key(name).as_str())
+            .is_some_and(|def| def.unprefixed_tools)
+    } else {
+        false
+    }
+}
+
+/// Returns true if the named extension is a first-class platform extension
+/// whose tools are exposed unprefixed and remain visible during code execution mode.
+pub fn is_first_class_extension(name: &str) -> bool {
+    PLATFORM_EXTENSIONS
+        .get(name_to_key(name).as_str())
+        .is_some_and(|def| def.unprefixed_tools)
+}
+
+/// Result of resolving a tool call to its owning extension
+struct ResolvedTool {
+    extension_name: String,
+    actual_tool_name: String,
+    client: McpClientBox,
+}
+
 async fn child_process_client(
    mut command: Command,
    timeout: &Option<u64>,
@ -789,16 +824,18 @@ impl ExtensionManager {
        tools
            .iter()
            .filter(|tool| {
-                let tool_prefix = tool.name.split("__").next().unwrap_or("");
+                let tool_owner = get_tool_owner(tool)
+                    .map(|s| name_to_key(&s))
+                    .unwrap_or_else(|| tool.name.split("__").next().unwrap_or("").to_string());

                if let Some(ref excluded) = exclude_normalized {
-                    if tool_prefix == excluded {
+                    if tool_owner == *excluded {
                        return false;
                    }
                }

                if let Some(ref name_filter) = extension_name_normalized {
-                    tool_prefix == name_filter
+                    tool_owner == *name_filter
                } else {
                    true
                }
@ -861,18 +898,36 @@ impl ExtensionManager {
                    }
                };

+                let expose_unprefixed = is_unprefixed_extension(&config);
+
                loop {
                    for tool in client_tools.tools {
                        if config.is_tool_available(&tool.name) {
+                            let public_name = if expose_unprefixed {
+                                tool.name.to_string()
+                            } else {
+                                format!("{}__{}", name, tool.name)
+                            };
+
+                            let mut meta_map = tool
+                                .meta
+                                .as_ref()
+                                .map(|m| m.0.clone())
+                                .unwrap_or_default();
+                            meta_map.insert(
+                                TOOL_EXTENSION_META_KEY.to_string(),
+                                serde_json::Value::String(name.clone()),
+                            );
+
                            tools.push(Tool {
-                                name: format!("{}__{}", name, tool.name).into(),
+                                name: public_name.into(),
                                description: tool.description,
                                input_schema: tool.input_schema,
                                annotations: tool.annotations,
                                output_schema: tool.output_schema,
                                icons: tool.icons,
                                title: tool.title,
-                                meta: tool.meta,
+                                meta: Some(rmcp::model::Meta(meta_map)),
                            });
                        }
                    }
@ -899,9 +954,22 @@ impl ExtensionManager {

        let results = future::join_all(client_futures).await;

+        let mut seen_names: std::collections::HashSet<String> = std::collections::HashSet::new();
        let mut tools = Vec::new();
-        for (_, client_tools) in results {
-            tools.extend(client_tools);
+        for (ext_name, client_tools) in results {
+            for tool in client_tools {
+                let tool_name = tool.name.to_string();
+                if seen_names.contains(&tool_name) {
+                    warn!(
+                        tool = %tool_name,
+                        extension = %ext_name,
+                        "Duplicate tool name - skipping"
+                    );
+                    continue;
+                }
+                seen_names.insert(tool_name);
+                tools.push(tool);
+            }
        }

        Ok(tools)
@ -915,16 +983,6 @@ impl ExtensionManager {
        prompt_template::render_template("plan.md", &context).expect("Prompt should render")
    }

-    /// Find and return a reference to the appropriate client for a tool call
-    async fn get_client_for_tool(&self, prefixed_name: &str) -> Option<(String, McpClientBox)> {
-        self.extensions
-            .lock()
-            .await
-            .iter()
-            .find(|(key, _)| prefixed_name.starts_with(*key))
-            .map(|(name, extension)| (name.clone(), extension.get_client()))
-    }
-
    // Function that gets executed for read_resource tool
    pub async fn read_resource_tool(
        &self,
@ -1183,6 +1241,66 @@ impl ExtensionManager {
        }
    }

+    async fn resolve_tool(
+        &self,
+        session_id: &str,
+        tool_name: &str,
+    ) -> Result<ResolvedTool, ErrorData> {
+        if let Some((prefix, actual)) = tool_name.split_once("__") {
+            let owner = name_to_key(prefix);
+            if let Some(client) = self.get_server_client(&owner).await {
+                return Ok(ResolvedTool {
+                    extension_name: owner,
+                    actual_tool_name: actual.to_string(),
+                    client,
+                });
+            }
+        }
+
+        let tools = self.get_all_tools_cached(session_id).await.map_err(|e| {
+            ErrorData::new(
+                ErrorCode::INTERNAL_ERROR,
+                format!("Failed to get tools: {}", e),
+                None,
+            )
+        })?;
+
+        if let Some(tool) = tools.iter().find(|t| *t.name == *tool_name) {
+            let owner = get_tool_owner(tool).ok_or_else(|| {
+                ErrorData::new(
+                    ErrorCode::RESOURCE_NOT_FOUND,
+                    format!("Tool '{}' has no owner", tool_name),
+                    None,
+                )
+            })?;
+
+            let actual_tool_name = tool_name
+                .strip_prefix(&format!("{owner}__"))
+                .unwrap_or(tool_name)
+                .to_string();
+
+            let client = self.get_server_client(&owner).await.ok_or_else(|| {
+                ErrorData::new(
+                    ErrorCode::RESOURCE_NOT_FOUND,
+                    format!("Extension '{}' not found for tool '{}'", owner, tool_name),
+                    None,
+                )
+            })?;
+
+            return Ok(ResolvedTool {
+                extension_name: owner,
+                actual_tool_name,
+                client,
+            });
+        }
+
+        Err(ErrorData::new(
+            ErrorCode::RESOURCE_NOT_FOUND,
+            format!("Tool '{}' not found", tool_name),
+            None,
+        ))
+    }
+
    pub async fn dispatch_tool_call(
        &self,
        session_id: &str,
@ -1190,52 +1308,19 @@ impl ExtensionManager {
        working_dir: Option<&std::path::Path>,
        cancellation_token: CancellationToken,
    ) -> Result<ToolCallResult> {
-        // Some models strip the tool prefix, so auto-add it for known code_execution tools
        let tool_name_str = tool_call.name.to_string();
-        let prefixed_name = if !tool_name_str.contains("__") {
-            let code_exec_tools = ["execute", "list_functions", "get_function_details"];
-            if code_exec_tools.contains(&tool_name_str.as_str())
-                && self.extensions.lock().await.contains_key("code_execution")
+        let resolved = self.resolve_tool(session_id, &tool_name_str).await?;
+
+        if let Some(extension) = self.extensions.lock().await.get(&resolved.extension_name) {
+            if !extension
+                .config
+                .is_tool_available(&resolved.actual_tool_name)
            {
-                format!("code_execution__{tool_name_str}")
-            } else {
-                tool_name_str
-            }
-        } else {
-            tool_name_str
-        };
-
-        // Dispatch tool call based on the prefix naming convention
-        let (client_name, client) =
-            self.get_client_for_tool(&prefixed_name)
-                .await
-                .ok_or_else(|| {
-                    ErrorData::new(
-                        ErrorCode::RESOURCE_NOT_FOUND,
-                        format!("Tool '{}' not found", tool_call.name),
-                        None,
-                    )
-                })?;
-
-        let tool_name = prefixed_name
-            .strip_prefix(client_name.as_str())
-            .and_then(|s| s.strip_prefix("__"))
-            .ok_or_else(|| {
-                ErrorData::new(
-                    ErrorCode::RESOURCE_NOT_FOUND,
-                    format!("Invalid tool name format: '{}'", tool_call.name),
-                    None,
-                )
-            })?
-            .to_string();
-
-        if let Some(extension) = self.extensions.lock().await.get(&client_name) {
-            if !extension.config.is_tool_available(&tool_name) {
                return Err(ErrorData::new(
                    ErrorCode::RESOURCE_NOT_FOUND,
                    format!(
                        "Tool '{}' is not available for extension '{}'",
-                        tool_name, client_name
+                        resolved.actual_tool_name, resolved.extension_name
                    ),
                    None,
                )
@ -1244,15 +1329,16 @@ impl ExtensionManager {
        }

        let arguments = tool_call.arguments.clone();
-        let client = client.clone();
+        let client = resolved.client.clone();
        let notifications_receiver = client.lock().await.subscribe().await;
        let session_id = session_id.to_string();
+        let actual_tool_name = resolved.actual_tool_name;
        let working_dir_str = working_dir.map(|p| p.to_string_lossy().to_string());

        let fut = async move {
            tracing::debug!(
-                "dispatch_tool_call fut: calling client.call_tool tool={} session_id={} working_dir={:?}",
-                tool_name,
+                "dispatch_tool_call: calling client.call_tool tool={} session_id={} working_dir={:?}",
+                actual_tool_name,
                session_id,
                working_dir_str
            );
@ -1260,7 +1346,7 @@ impl ExtensionManager {
            client_guard
                .call_tool(
                    &session_id,
-                    &tool_name,
+                    &actual_tool_name,
                    arguments,
                    working_dir_str.as_deref(),
                    cancellation_token,
@ -1636,70 +1722,8 @@ mod tests {
        }
    }

-    #[tokio::test]
-    async fn test_get_client_for_tool() {
-        let temp_dir = tempfile::tempdir().unwrap();
-        let extension_manager =
-            ExtensionManager::new_without_provider(temp_dir.path().to_path_buf());
-
-        // Add some mock clients using the helper method
-        extension_manager
-            .add_mock_extension(
-                "test_client".to_string(),
-                Arc::new(Mutex::new(Box::new(MockClient {}))),
-            )
-            .await;
-
-        extension_manager
-            .add_mock_extension(
-                "__client".to_string(),
-                Arc::new(Mutex::new(Box::new(MockClient {}))),
-            )
-            .await;
-
-        extension_manager
-            .add_mock_extension(
-                "__cli__ent__".to_string(),
-                Arc::new(Mutex::new(Box::new(MockClient {}))),
-            )
-            .await;
-
-        extension_manager
-            .add_mock_extension(
-                "client 🚀".to_string(),
-                Arc::new(Mutex::new(Box::new(MockClient {}))),
-            )
-            .await;
-
-        // Test basic case
-        assert!(extension_manager
-            .get_client_for_tool("test_client__tool")
-            .await
-            .is_some());
-
-        // Test leading underscores
-        assert!(extension_manager
-            .get_client_for_tool("__client__tool")
-            .await
-            .is_some());
-
-        // Test multiple underscores in client name, and ending with __
-        assert!(extension_manager
-            .get_client_for_tool("__cli__ent____tool")
-            .await
-            .is_some());
-
-        // Test unicode in tool name, "client 🚀" should become "client_"
-        assert!(extension_manager
-            .get_client_for_tool("client___tool")
-            .await
-            .is_some());
-    }
-
    #[tokio::test]
    async fn test_dispatch_tool_call() {
-        // test that dispatch_tool_call parses out the sanitized name correctly, and extracts
-        // tool_names
        let temp_dir = tempfile::tempdir().unwrap();
        let extension_manager =
            ExtensionManager::new_without_provider(temp_dir.path().to_path_buf());
@ -1726,7 +1750,6 @@ mod tests {
            )
            .await;

-        // verify a normal tool call
        let tool_call = CallToolRequestParams {
            meta: None,
            task: None,
@ -1747,7 +1770,7 @@ mod tests {
        let tool_call = CallToolRequestParams {
            meta: None,
            task: None,
-            name: "test_client__test__tool".to_string().into(),
+            name: "test_client__available_tool".to_string().into(),
            arguments: Some(object!({})),
        };

@ -1761,7 +1784,6 @@ mod tests {
            .await;
        assert!(result.is_ok());

-        // verify a multiple underscores dispatch
        let tool_call = CallToolRequestParams {
            meta: None,
            task: None,
@ -1779,7 +1801,6 @@ mod tests {
            .await;
        assert!(result.is_ok());

-        // Test unicode in tool name, "client 🚀" should become "client_"
        let tool_call = CallToolRequestParams {
            meta: None,
            task: None,
@ -1797,24 +1818,6 @@ mod tests {
            .await;
        assert!(result.is_ok());

-        let tool_call = CallToolRequestParams {
-            meta: None,
-            task: None,
-            name: "client___test__tool".to_string().into(),
-            arguments: Some(object!({})),
-        };
-
-        let result = extension_manager
-            .dispatch_tool_call(
-                "test-session-id",
-                tool_call,
-                None,
-                CancellationToken::default(),
-            )
-            .await;
-        assert!(result.is_ok());
-
-        // this should error out, specifically for an ToolError::ExecutionError
        let invalid_tool_call = CallToolRequestParams {
            meta: None,
            task: None,
@ -1829,20 +1832,14 @@ mod tests {
                None,
                CancellationToken::default(),
            )
-            .await
-            .unwrap()
-            .result
            .await;
-        assert!(matches!(
-            result,
-            Err(ErrorData {
-                code: ErrorCode::INTERNAL_ERROR,
-                ..
-            })
-        ));
+        if let Err(err) = result {
+            let tool_err = err.downcast_ref::<ErrorData>().expect("Expected ErrorData");
+            assert_eq!(tool_err.code, ErrorCode::RESOURCE_NOT_FOUND);
+        } else {
+            panic!("Expected ErrorData with ErrorCode::RESOURCE_NOT_FOUND");
+        }

-        // this should error out, specifically with an ToolError::NotFound
-        // this client doesn't exist
        let invalid_tool_call = CallToolRequestParams {
            meta: None,
            task: None,
@ -1945,7 +1942,6 @@ mod tests {
            )
            .await;

-        // Try to call an unavailable tool
        let unavailable_tool_call = CallToolRequestParams {
            meta: None,
            task: None,
@ -1962,11 +1958,9 @@ mod tests {
            )
            .await;

-        // Should return RESOURCE_NOT_FOUND error
        if let Err(err) = result {
            let tool_err = err.downcast_ref::<ErrorData>().expect("Expected ErrorData");
            assert_eq!(tool_err.code, ErrorCode::RESOURCE_NOT_FOUND);
-            assert!(tool_err.message.contains("is not available"));
        } else {
            panic!("Expected ErrorData with ErrorCode::RESOURCE_NOT_FOUND");
        }
--- a/crates/goose/src/agents/mod.rs
+++ b/crates/goose/src/agents/mod.rs
@ -1,6 +1,6 @@
 mod agent;
 pub(crate) mod apps_extension;
-mod builtin_skills;
+pub(crate) mod builtin_skills;
 pub(crate) mod chatrecall_extension;
 pub(crate) mod code_execution_extension;
 pub mod container;
@ -18,11 +18,10 @@ pub mod prompt_manager;
 mod reply_parts;
 pub mod retry;
 mod schedule_tool;
-pub(crate) mod skills_extension;
 pub mod subagent_execution_tool;
-pub mod subagent_handler;
-mod subagent_task_config;
-pub mod subagent_tool;
+pub(crate) mod subagent_handler;
+pub(crate) mod subagent_task_config;
+pub(crate) mod summon_extension;
 pub(crate) mod todo_extension;
 pub(crate) mod tom_extension;
 mod tool_execution;
@ -34,5 +33,6 @@ pub use execute_commands::COMPACT_TRIGGERS;
 pub use extension::ExtensionConfig;
 pub use extension_manager::ExtensionManager;
 pub use prompt_manager::PromptManager;
+pub use subagent_handler::SUBAGENT_TOOL_REQUEST_TYPE;
 pub use subagent_task_config::TaskConfig;
 pub use types::{FrontendTool, RetryConfig, SessionConfig, SuccessCheck};
--- a/crates/goose/src/agents/reply_parts.rs
+++ b/crates/goose/src/agents/reply_parts.rs
@ -9,8 +9,6 @@ use tracing::debug;

 use super::super::agents::Agent;
 use crate::agents::code_execution_extension::EXTENSION_NAME as CODE_EXECUTION_EXTENSION;
-use crate::agents::skills_extension::EXTENSION_NAME as SKILLS_EXTENSION;
-use crate::agents::subagent_tool::SUBAGENT_TOOL_NAME;
 use crate::conversation::message::{Message, MessageContent, ToolRequest};
 use crate::conversation::Conversation;
 use crate::providers::base::{stream_from_single_message, MessageStream, Provider, ProviderUsage};
@ -151,12 +149,12 @@ impl Agent {
            .is_extension_enabled(CODE_EXECUTION_EXTENSION)
            .await;
        if code_execution_active {
-            let code_exec_prefix = format!("{CODE_EXECUTION_EXTENSION}__");
-            let skills_prefix = format!("{SKILLS_EXTENSION}__");
            tools.retain(|tool| {
-                tool.name.starts_with(&code_exec_prefix)
-                    || tool.name.starts_with(&skills_prefix)
-                    || tool.name == SUBAGENT_TOOL_NAME
+                if let Some(owner) = crate::agents::extension_manager::get_tool_owner(tool) {
+                    crate::agents::extension_manager::is_first_class_extension(&owner)
+                } else {
+                    false
+                }
            });
        }

@ -182,7 +180,6 @@ impl Agent {
            .with_extension_and_tool_counts(extension_count, tool_count)
            .with_code_execution_mode(code_execution_active)
            .with_hints(working_dir)
-            .with_enable_subagents(self.subagents_enabled(session_id).await)
            .build();

        // Handle toolshim if enabled
--- a/crates/goose/src/agents/skills_extension.rs
+++ b/crates/goose/src/agents/skills_extension.rs
@ -1,866 +0,0 @@
-use super::builtin_skills;
-use crate::agents::extension::PlatformExtensionContext;
-use crate::agents::mcp_client::{Error, McpClientTrait};
-use crate::config::paths::Paths;
-use anyhow::Result;
-use async_trait::async_trait;
-use indoc::indoc;
-use rmcp::model::{
-    CallToolResult, Content, Implementation, InitializeResult, JsonObject, ListToolsResult,
-    ProtocolVersion, ServerCapabilities, Tool, ToolAnnotations, ToolsCapability,
-};
-use schemars::{schema_for, JsonSchema};
-use serde::{Deserialize, Serialize};
-use std::collections::HashMap;
-use std::path::{Path, PathBuf};
-use tokio_util::sync::CancellationToken;
-
-pub static EXTENSION_NAME: &str = "skills";
-
-#[derive(Debug, Serialize, Deserialize, JsonSchema)]
-struct LoadSkillParams {
-    name: String,
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-struct SkillMetadata {
-    name: String,
-    description: String,
-}
-
-#[derive(Debug, Clone)]
-struct Skill {
-    metadata: SkillMetadata,
-    body: String,
-    directory: PathBuf,
-    supporting_files: Vec<PathBuf>,
-}
-
-pub struct SkillsClient {
-    info: InitializeResult,
-    skills: HashMap<String, Skill>,
-}
-
-impl SkillsClient {
-    pub fn new(_context: PlatformExtensionContext) -> Result<Self> {
-        let info = InitializeResult {
-            protocol_version: ProtocolVersion::V_2025_03_26,
-            capabilities: ServerCapabilities {
-                tasks: None,
-                tools: Some(ToolsCapability {
-                    list_changed: Some(false),
-                }),
-                resources: None,
-                prompts: None,
-                completions: None,
-                experimental: None,
-                logging: None,
-            },
-            server_info: Implementation {
-                name: EXTENSION_NAME.to_string(),
-                title: Some("Skills".to_string()),
-                version: "1.0.0".to_string(),
-                icons: None,
-                website_url: None,
-            },
-            instructions: Some(String::new()),
-        };
-
-        let mut skills = Self::load_builtin_skills();
-
-        let directories = Self::get_default_skill_directories()
-            .into_iter()
-            .filter(|d| d.exists())
-            .collect::<Vec<_>>();
-        let fs_skills = Self::discover_skills_in_directories(&directories);
-        skills.extend(fs_skills);
-
-        let mut client = Self { info, skills };
-        client.info.instructions = Some(client.generate_instructions());
-        Ok(client)
-    }
-
-    fn load_builtin_skills() -> HashMap<String, Skill> {
-        let mut skills = HashMap::new();
-        for content in builtin_skills::get_all_builtin_skills() {
-            if let Ok((metadata, body)) = Self::parse_frontmatter(content) {
-                skills.insert(
-                    metadata.name.clone(),
-                    Skill {
-                        metadata,
-                        body,
-                        directory: PathBuf::new(),
-                        supporting_files: vec![],
-                    },
-                );
-            }
-        }
-        skills
-    }
-
-    fn get_default_skill_directories() -> Vec<PathBuf> {
-        let mut dirs = Vec::new();
-
-        if let Some(home) = dirs::home_dir() {
-            dirs.push(home.join(".claude/skills"));
-            dirs.push(home.join(".config/agents/skills"));
-        }
-
-        dirs.push(Paths::config_dir().join("skills"));
-
-        if let Ok(working_dir) = std::env::current_dir() {
-            dirs.push(working_dir.join(".claude/skills"));
-            dirs.push(working_dir.join(".goose/skills"));
-            dirs.push(working_dir.join(".agents/skills"));
-        }
-
-        dirs
-    }
-
-    fn parse_skill_file(path: &Path) -> Result<Skill> {
-        let content = std::fs::read_to_string(path)?;
-
-        let (metadata, body) = Self::parse_frontmatter(&content)?;
-
-        let directory = path
-            .parent()
-            .ok_or_else(|| anyhow::anyhow!("Skill file has no parent directory"))?
-            .to_path_buf();
-
-        let supporting_files = Self::find_supporting_files(&directory, path)?;
-
-        Ok(Skill {
-            metadata,
-            body,
-            directory,
-            supporting_files,
-        })
-    }
-
-    fn parse_frontmatter(content: &str) -> Result<(SkillMetadata, String)> {
-        let parts: Vec<&str> = content.split("---").collect();
-
-        if parts.len() < 3 {
-            return Err(anyhow::anyhow!("Invalid frontmatter format"));
-        }
-
-        let yaml_content = parts[1].trim();
-        let metadata: SkillMetadata = serde_yaml::from_str(yaml_content)?;
-
-        let body = parts[2..].join("---").trim().to_string();
-
-        Ok((metadata, body))
-    }
-
-    fn find_supporting_files(directory: &Path, skill_file: &Path) -> Result<Vec<PathBuf>> {
-        let mut files = Vec::new();
-
-        if let Ok(entries) = std::fs::read_dir(directory) {
-            for entry in entries.flatten() {
-                let path = entry.path();
-                if path.is_file() && path != skill_file {
-                    files.push(path);
-                } else if path.is_dir() {
-                    if let Ok(sub_entries) = std::fs::read_dir(&path) {
-                        for sub_entry in sub_entries.flatten() {
-                            let sub_path = sub_entry.path();
-                            if sub_path.is_file() {
-                                files.push(sub_path);
-                            }
-                        }
-                    }
-                }
-            }
-        }
-
-        Ok(files)
-    }
-
-    fn discover_skills_in_directories(directories: &[PathBuf]) -> HashMap<String, Skill> {
-        let mut skills = HashMap::new();
-
-        for dir in directories {
-            if let Ok(entries) = std::fs::read_dir(dir) {
-                for entry in entries.flatten() {
-                    let path = entry.path();
-                    if path.is_dir() {
-                        let skill_file = path.join("SKILL.md");
-                        if skill_file.exists() {
-                            if let Ok(skill) = Self::parse_skill_file(&skill_file) {
-                                skills.insert(skill.metadata.name.clone(), skill);
-                            }
-                        }
-                    }
-                }
-            }
-        }
-
-        skills
-    }
-
-    fn generate_instructions(&self) -> String {
-        if self.skills.is_empty() {
-            return String::new();
-        }
-
-        let mut instructions = String::from("You have these skills at your disposal, when it is clear they can help you solve a problem or you are asked to use them:\n\n");
-
-        let mut skill_list: Vec<_> = self.skills.iter().collect();
-        skill_list.sort_by_key(|(name, _)| *name);
-
-        for (name, skill) in skill_list {
-            instructions.push_str(&format!("- {}: {}\n", name, skill.metadata.description));
-        }
-
-        instructions
-    }
-
-    async fn handle_load_skill(
-        &self,
-        arguments: Option<JsonObject>,
-    ) -> Result<Vec<Content>, String> {
-        let skill_name = arguments
-            .as_ref()
-            .ok_or("Missing arguments")?
-            .get("name")
-            .and_then(|v| v.as_str())
-            .ok_or("Missing required parameter: name")?;
-
-        let skill = self
-            .skills
-            .get(skill_name)
-            .ok_or_else(|| format!("Skill '{}' not found", skill_name))?;
-
-        let mut response = format!("# Skill: {}\n\n{}\n\n", skill.metadata.name, skill.body);
-
-        if !skill.supporting_files.is_empty() {
-            response.push_str(&format!(
-                "## Supporting Files\n\nSkill directory: {}\n\n",
-                skill.directory.display()
-            ));
-            response.push_str("The following supporting files are available:\n");
-            for file in &skill.supporting_files {
-                if let Ok(relative) = file.strip_prefix(&skill.directory) {
-                    response.push_str(&format!("- {}\n", relative.display()));
-                }
-            }
-            response.push_str("\nUse the view file tools to access these files as needed, or run scripts as directed with dev extension.\n");
-        }
-
-        Ok(vec![Content::text(response)])
-    }
-
-    fn get_tools() -> Vec<Tool> {
-        let schema = schema_for!(LoadSkillParams);
-        let schema_value =
-            serde_json::to_value(schema).expect("Failed to serialize LoadSkillParams schema");
-
-        let input_schema = schema_value
-            .as_object()
-            .expect("Schema should be an object")
-            .clone();
-
-        vec![Tool::new(
-            "loadSkill".to_string(),
-            indoc! {r#"
-                Load a skill by name and return its content.
-
-                This tool loads the specified skill and returns its body content along with
-                information about any supporting files in the skill directory.
-            "#}
-            .to_string(),
-            input_schema,
-        )
-        .annotate(ToolAnnotations {
-            title: Some("Load skill".to_string()),
-            read_only_hint: Some(true),
-            destructive_hint: Some(false),
-            idempotent_hint: Some(true),
-            open_world_hint: Some(false),
-        })]
-    }
-}
-
-#[async_trait]
-impl McpClientTrait for SkillsClient {
-    async fn list_tools(
-        &self,
-        _session_id: &str,
-        _next_cursor: Option<String>,
-        _cancellation_token: CancellationToken,
-    ) -> Result<ListToolsResult, Error> {
-        let tools = if self.skills.is_empty() {
-            Vec::new()
-        } else {
-            Self::get_tools()
-        };
-        Ok(ListToolsResult {
-            tools,
-            next_cursor: None,
-            meta: None,
-        })
-    }
-
-    async fn call_tool(
-        &self,
-        _session_id: &str,
-        name: &str,
-        arguments: Option<JsonObject>,
-        _working_dir: Option<&str>,
-        _cancellation_token: CancellationToken,
-    ) -> Result<CallToolResult, Error> {
-        let content = match name {
-            "loadSkill" => self.handle_load_skill(arguments).await,
-            _ => Err(format!("Unknown tool: {}", name)),
-        };
-
-        match content {
-            Ok(content) => Ok(CallToolResult::success(content)),
-            Err(error) => Ok(CallToolResult::error(vec![Content::text(format!(
-                "Error: {}",
-                error
-            ))])),
-        }
-    }
-
-    fn get_info(&self) -> Option<&InitializeResult> {
-        Some(&self.info)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use std::fs;
-    use tempfile::TempDir;
-
-    #[test]
-    fn test_parse_frontmatter() {
-        let content = r#"---
-name: test-skill
-description: A test skill
---
-
-# Test Skill
-
-This is the body of the skill.
-"#;
-
-        let (metadata, body) = SkillsClient::parse_frontmatter(content).unwrap();
-        assert_eq!(metadata.name, "test-skill");
-        assert_eq!(metadata.description, "A test skill");
-        assert!(body.contains("# Test Skill"));
-        assert!(body.contains("This is the body of the skill."));
-    }
-
-    #[test]
-    fn test_parse_frontmatter_missing() {
-        let content = "# No frontmatter here";
-        assert!(SkillsClient::parse_frontmatter(content).is_err());
-    }
-
-    #[test]
-    fn test_parse_frontmatter_unclosed() {
-        let content = r#"---
-name: test
-description: test
-"#;
-        assert!(SkillsClient::parse_frontmatter(content).is_err());
-    }
-
-    #[test]
-    fn test_parse_frontmatter_with_extra_fields() {
-        let content = r#"---
-name: test-skill
-description: A test skill
-author: Test Author
-version: 1.0.0
-tags:
-  - test
-  - example
-extra_field: some value
---
-
-# Test Skill
-
-This is the body of the skill.
-"#;
-
-        let (metadata, body) = SkillsClient::parse_frontmatter(content).unwrap();
-        assert_eq!(metadata.name, "test-skill");
-        assert_eq!(metadata.description, "A test skill");
-        assert!(body.contains("# Test Skill"));
-        assert!(body.contains("This is the body of the skill."));
-    }
-
-    #[test]
-    fn test_parse_skill_file() {
-        let temp_dir = TempDir::new().unwrap();
-        let skill_dir = temp_dir.path().join("test-skill");
-        fs::create_dir(&skill_dir).unwrap();
-
-        let skill_file = skill_dir.join("SKILL.md");
-        fs::write(
-            &skill_file,
-            r#"---
-name: test-skill
-description: A test skill
---
-
-# Test Skill Content
-"#,
-        )
-        .unwrap();
-
-        fs::write(skill_dir.join("helper.py"), "print('hello')").unwrap();
-        fs::create_dir(skill_dir.join("templates")).unwrap();
-        fs::write(skill_dir.join("templates/template.txt"), "template").unwrap();
-
-        let skill = SkillsClient::parse_skill_file(&skill_file).unwrap();
-        assert_eq!(skill.metadata.name, "test-skill");
-        assert_eq!(skill.metadata.description, "A test skill");
-        assert!(skill.body.contains("# Test Skill Content"));
-        assert_eq!(skill.supporting_files.len(), 2);
-    }
-
-    #[test]
-    fn test_discover_skills() {
-        let temp_dir = TempDir::new().unwrap();
-        let skills_dir = temp_dir.path().join("skills");
-        fs::create_dir(&skills_dir).unwrap();
-
-        let skill1_dir = skills_dir.join("test-skill-one-a1b2c3");
-        fs::create_dir(&skill1_dir).unwrap();
-        fs::write(
-            skill1_dir.join("SKILL.md"),
-            r#"---
-name: test-skill-one-a1b2c3
-description: First test skill
---
-Body 1
-"#,
-        )
-        .unwrap();
-
-        let skill2_dir = skills_dir.join("test-skill-two-d4e5f6");
-        fs::create_dir(&skill2_dir).unwrap();
-        fs::write(
-            skill2_dir.join("SKILL.md"),
-            r#"---
-name: test-skill-two-d4e5f6
-description: Second test skill
---
-Body 2
-"#,
-        )
-        .unwrap();
-
-        let skill3_dir = skills_dir.join("test-skill-three-g7h8i9");
-        fs::create_dir(&skill3_dir).unwrap();
-        fs::write(
-            skill3_dir.join("SKILL.md"),
-            r#"---
-name: test-skill-three-g7h8i9
-description: Third test skill
---
-Body 3
-"#,
-        )
-        .unwrap();
-
-        let skills = SkillsClient::discover_skills_in_directories(&[skills_dir]);
-
-        assert_eq!(skills.len(), 3);
-        assert!(skills.contains_key("test-skill-one-a1b2c3"));
-        assert!(skills.contains_key("test-skill-two-d4e5f6"));
-        assert!(skills.contains_key("test-skill-three-g7h8i9"));
-    }
-
-    #[test]
-    fn test_discover_skills_from_multiple_directories() {
-        let temp_dir = TempDir::new().unwrap();
-
-        let dir1 = temp_dir.path().join("dir1");
-        fs::create_dir(&dir1).unwrap();
-        let skill1_dir = dir1.join("skill-from-dir1");
-        fs::create_dir(&skill1_dir).unwrap();
-        fs::write(
-            skill1_dir.join("SKILL.md"),
-            r#"---
-name: skill-from-dir1
-description: Skill from directory 1
---
-Content from dir1
-"#,
-        )
-        .unwrap();
-
-        let dir2 = temp_dir.path().join("dir2");
-        fs::create_dir(&dir2).unwrap();
-        let skill2_dir = dir2.join("skill-from-dir2");
-        fs::create_dir(&skill2_dir).unwrap();
-        fs::write(
-            skill2_dir.join("SKILL.md"),
-            r#"---
-name: skill-from-dir2
-description: Skill from directory 2
---
-Content from dir2
-"#,
-        )
-        .unwrap();
-
-        let dir3 = temp_dir.path().join("dir3");
-        fs::create_dir(&dir3).unwrap();
-        let skill3_dir = dir3.join("skill-from-dir3");
-        fs::create_dir(&skill3_dir).unwrap();
-        fs::write(
-            skill3_dir.join("SKILL.md"),
-            r#"---
-name: skill-from-dir3
-description: Skill from directory 3
---
-Content from dir3
-"#,
-        )
-        .unwrap();
-
-        let skills = SkillsClient::discover_skills_in_directories(&[dir1, dir2, dir3]);
-
-        assert_eq!(skills.len(), 3);
-        assert!(skills.contains_key("skill-from-dir1"));
-        assert!(skills.contains_key("skill-from-dir2"));
-        assert!(skills.contains_key("skill-from-dir3"));
-
-        assert_eq!(
-            skills.get("skill-from-dir1").unwrap().metadata.description,
-            "Skill from directory 1"
-        );
-        assert_eq!(
-            skills.get("skill-from-dir2").unwrap().metadata.description,
-            "Skill from directory 2"
-        );
-        assert_eq!(
-            skills.get("skill-from-dir3").unwrap().metadata.description,
-            "Skill from directory 3"
-        );
-    }
-
-    #[test]
-    fn test_empty_instructions_when_no_skills() {
-        let temp_dir = TempDir::new().unwrap();
-        let empty_dir = temp_dir.path().join("empty");
-        fs::create_dir(&empty_dir).unwrap();
-
-        let skills = SkillsClient::discover_skills_in_directories(&[empty_dir]);
-        assert_eq!(skills.len(), 0);
-
-        let mut client = SkillsClient {
-            info: InitializeResult {
-                protocol_version: ProtocolVersion::V_2025_03_26,
-                capabilities: ServerCapabilities {
-                    tasks: None,
-                    tools: Some(ToolsCapability {
-                        list_changed: Some(false),
-                    }),
-                    resources: None,
-                    prompts: None,
-                    completions: None,
-                    experimental: None,
-                    logging: None,
-                },
-                server_info: Implementation {
-                    name: EXTENSION_NAME.to_string(),
-                    title: Some("Skills".to_string()),
-                    version: "1.0.0".to_string(),
-                    icons: None,
-                    website_url: None,
-                },
-                instructions: Some(String::new()),
-            },
-            skills,
-        };
-
-        let instructions = client.generate_instructions();
-        assert_eq!(instructions, "");
-        assert!(instructions.is_empty());
-
-        client.info.instructions = Some(instructions);
-        assert_eq!(client.info.instructions.as_ref().unwrap(), "");
-    }
-
-    #[tokio::test]
-    async fn test_no_tools_when_no_skills() {
-        let temp_dir = TempDir::new().unwrap();
-        let empty_dir = temp_dir.path().join("empty");
-        fs::create_dir(&empty_dir).unwrap();
-
-        let skills = SkillsClient::discover_skills_in_directories(&[empty_dir]);
-        assert_eq!(skills.len(), 0);
-
-        let client = SkillsClient {
-            info: InitializeResult {
-                protocol_version: ProtocolVersion::V_2025_03_26,
-                capabilities: ServerCapabilities {
-                    tasks: None,
-                    tools: Some(ToolsCapability {
-                        list_changed: Some(false),
-                    }),
-                    resources: None,
-                    prompts: None,
-                    completions: None,
-                    experimental: None,
-                    logging: None,
-                },
-                server_info: Implementation {
-                    name: EXTENSION_NAME.to_string(),
-                    title: Some("Skills".to_string()),
-                    version: "1.0.0".to_string(),
-                    icons: None,
-                    website_url: None,
-                },
-                instructions: Some(String::new()),
-            },
-            skills,
-        };
-
-        let result = client
-            .list_tools("test-session-id", None, CancellationToken::new())
-            .await
-            .unwrap();
-        assert_eq!(result.tools.len(), 0);
-    }
-
-    #[tokio::test]
-    async fn test_tools_available_when_skills_exist() {
-        let temp_dir = TempDir::new().unwrap();
-        let skills_dir = temp_dir.path().join("skills");
-        fs::create_dir(&skills_dir).unwrap();
-
-        let skill_dir = skills_dir.join("test-skill");
-        fs::create_dir(&skill_dir).unwrap();
-        fs::write(
-            skill_dir.join("SKILL.md"),
-            r#"---
-name: test-skill
-description: A test skill
---
-Content
-"#,
-        )
-        .unwrap();
-
-        let skills = SkillsClient::discover_skills_in_directories(&[skills_dir]);
-        assert_eq!(skills.len(), 1);
-
-        let client = SkillsClient {
-            info: InitializeResult {
-                protocol_version: ProtocolVersion::V_2025_03_26,
-                capabilities: ServerCapabilities {
-                    tasks: None,
-                    tools: Some(ToolsCapability {
-                        list_changed: Some(false),
-                    }),
-                    resources: None,
-                    prompts: None,
-                    completions: None,
-                    experimental: None,
-                    logging: None,
-                },
-                server_info: Implementation {
-                    name: EXTENSION_NAME.to_string(),
-                    title: Some("Skills".to_string()),
-                    version: "1.0.0".to_string(),
-                    icons: None,
-                    website_url: None,
-                },
-                instructions: Some(String::new()),
-            },
-            skills,
-        };
-
-        let result = client
-            .list_tools("test-session-id", None, CancellationToken::new())
-            .await
-            .unwrap();
-        assert_eq!(result.tools.len(), 1);
-        assert_eq!(result.tools[0].name, "loadSkill");
-    }
-
-    #[test]
-    fn test_instructions_with_skills() {
-        let temp_dir = TempDir::new().unwrap();
-        let skills_dir = temp_dir.path().join("skills");
-        fs::create_dir(&skills_dir).unwrap();
-
-        let skill1_dir = skills_dir.join("alpha-skill");
-        fs::create_dir(&skill1_dir).unwrap();
-        fs::write(
-            skill1_dir.join("SKILL.md"),
-            r#"---
-name: alpha-skill
-description: First skill alphabetically
---
-Content
-"#,
-        )
-        .unwrap();
-
-        let skill2_dir = skills_dir.join("beta-skill");
-        fs::create_dir(&skill2_dir).unwrap();
-        fs::write(
-            skill2_dir.join("SKILL.md"),
-            r#"---
-name: beta-skill
-description: Second skill alphabetically
---
-Content
-"#,
-        )
-        .unwrap();
-
-        let skills = SkillsClient::discover_skills_in_directories(&[skills_dir]);
-        assert_eq!(skills.len(), 2);
-
-        let mut client = SkillsClient {
-            info: InitializeResult {
-                protocol_version: ProtocolVersion::V_2025_03_26,
-                capabilities: ServerCapabilities {
-                    tasks: None,
-                    tools: Some(ToolsCapability {
-                        list_changed: Some(false),
-                    }),
-                    resources: None,
-                    prompts: None,
-                    completions: None,
-                    experimental: None,
-                    logging: None,
-                },
-                server_info: Implementation {
-                    name: EXTENSION_NAME.to_string(),
-                    title: Some("Skills".to_string()),
-                    version: "1.0.0".to_string(),
-                    icons: None,
-                    website_url: None,
-                },
-                instructions: Some(String::new()),
-            },
-            skills,
-        };
-
-        let instructions = client.generate_instructions();
-        assert!(!instructions.is_empty());
-        assert!(instructions.contains("You have these skills at your disposal"));
-        assert!(instructions.contains("alpha-skill: First skill alphabetically"));
-        assert!(instructions.contains("beta-skill: Second skill alphabetically"));
-
-        let lines: Vec<&str> = instructions.lines().collect();
-        let alpha_line = lines
-            .iter()
-            .position(|l| l.contains("alpha-skill"))
-            .unwrap();
-        let beta_line = lines.iter().position(|l| l.contains("beta-skill")).unwrap();
-        assert!(alpha_line < beta_line);
-
-        client.info.instructions = Some(instructions);
-        assert!(!client.info.instructions.as_ref().unwrap().is_empty());
-    }
-
-    #[test]
-    fn test_discover_skills_working_dir_overrides_global() {
-        let temp_dir = TempDir::new().unwrap();
-
-        // Simulate ~/.claude/skills (global, lowest priority)
-        let global_claude = temp_dir.path().join("global-claude");
-        fs::create_dir(&global_claude).unwrap();
-        let skill_global_claude = global_claude.join("my-skill");
-        fs::create_dir(&skill_global_claude).unwrap();
-        fs::write(
-            skill_global_claude.join("SKILL.md"),
-            r#"---
-name: my-skill
-description: From global claude
---
-Global claude content
-"#,
-        )
-        .unwrap();
-
-        // Simulate ~/.config/goose/skills (global, medium priority)
-        let global_goose = temp_dir.path().join("global-goose");
-        fs::create_dir(&global_goose).unwrap();
-        let skill_global_goose = global_goose.join("my-skill");
-        fs::create_dir(&skill_global_goose).unwrap();
-        fs::write(
-            skill_global_goose.join("SKILL.md"),
-            r#"---
-name: my-skill
-description: From global goose config
---
-Global goose config content
-"#,
-        )
-        .unwrap();
-
-        // Simulate $PWD/.claude/skills (working dir, higher priority)
-        let working_claude = temp_dir.path().join("working-claude");
-        fs::create_dir(&working_claude).unwrap();
-        let skill_working_claude = working_claude.join("my-skill");
-        fs::create_dir(&skill_working_claude).unwrap();
-        fs::write(
-            skill_working_claude.join("SKILL.md"),
-            r#"---
-name: my-skill
-description: From working dir claude
---
-Working dir claude content
-"#,
-        )
-        .unwrap();
-
-        // Simulate $PWD/.goose/skills (working dir, highest priority)
-        let working_goose = temp_dir.path().join("working-goose");
-        fs::create_dir(&working_goose).unwrap();
-        let skill_working_goose = working_goose.join("my-skill");
-        fs::create_dir(&skill_working_goose).unwrap();
-        fs::write(
-            skill_working_goose.join("SKILL.md"),
-            r#"---
-name: my-skill
-description: From working dir goose
---
-Working dir goose content
-"#,
-        )
-        .unwrap();
-
-        // Test priority order: global_claude < global_goose < working_claude < working_goose
-        let skills = SkillsClient::discover_skills_in_directories(&[
-            global_claude,
-            global_goose,
-            working_claude,
-            working_goose,
-        ]);
-
-        assert_eq!(skills.len(), 1);
-        assert!(skills.contains_key("my-skill"));
-        // The last directory (working_goose) should win
-        assert_eq!(
-            skills.get("my-skill").unwrap().metadata.description,
-            "From working dir goose"
-        );
-        assert!(skills
-            .get("my-skill")
-            .unwrap()
-            .body
-            .contains("Working dir goose content"));
-    }
-
-    #[test]
-    fn test_builtin_skills_loaded() {
-        let skills = SkillsClient::load_builtin_skills();
-
-        assert!(!skills.is_empty());
-        assert!(skills.contains_key("goose-doc-guide"));
-    }
-}
--- a/crates/goose/src/agents/subagent_handler.rs
+++ b/crates/goose/src/agents/subagent_handler.rs
@ -20,6 +20,8 @@ use std::sync::Arc;
 use tokio_util::sync::CancellationToken;
 use tracing::{debug, info};

+pub type OnMessageCallback = Arc<dyn Fn(&Message) + Send + Sync>;
+
 #[derive(Serialize)]
 pub struct SubagentPromptContext {
    pub max_turns: usize,
@ -52,6 +54,39 @@ pub async fn run_complete_subagent_task(
    .await
 }

+pub async fn run_subagent_task_with_callback(
+    config: AgentConfig,
+    recipe: Recipe,
+    task_config: TaskConfig,
+    return_last_only: bool,
+    session_id: String,
+    cancellation_token: Option<CancellationToken>,
+    on_message: Option<OnMessageCallback>,
+) -> Result<String, anyhow::Error> {
+    let (messages, final_output) = get_agent_messages_with_callback(
+        config,
+        recipe,
+        task_config,
+        session_id,
+        cancellation_token,
+        on_message,
+    )
+    .await
+    .map_err(|e| {
+        ErrorData::new(
+            ErrorCode::INTERNAL_ERROR,
+            format!("Failed to execute task: {}", e),
+            None,
+        )
+    })?;
+
+    if let Some(output) = final_output {
+        return Ok(output);
+    }
+
+    Ok(extract_response_text(&messages, return_last_only))
+}
+
 pub async fn run_complete_subagent_task_with_notifications(
    config: AgentConfig,
    recipe: Recipe,
@ -82,7 +117,11 @@ pub async fn run_complete_subagent_task_with_notifications(
        return Ok(output);
    }

-    let response_text = if return_last_only {
+    Ok(extract_response_text(&messages, return_last_only))
+}
+
+fn extract_response_text(messages: &Conversation, return_last_only: bool) -> String {
+    if return_last_only {
        messages
            .messages()
            .last()
@ -133,13 +172,101 @@ pub async fn run_complete_subagent_task_with_notifications(
            .collect();

        all_text_content.join("\n")
-    };
-
-    Ok(response_text)
+    }
 }

 pub const SUBAGENT_TOOL_REQUEST_TYPE: &str = "subagent_tool_request";

+fn get_agent_messages_with_callback(
+    config: AgentConfig,
+    recipe: Recipe,
+    task_config: TaskConfig,
+    session_id: String,
+    cancellation_token: Option<CancellationToken>,
+    on_message: Option<OnMessageCallback>,
+) -> AgentMessagesFuture {
+    Box::pin(async move {
+        let system_instructions = recipe.instructions.clone().unwrap_or_default();
+        let user_task = recipe
+            .prompt
+            .clone()
+            .unwrap_or_else(|| "Begin.".to_string());
+
+        let agent = Arc::new(Agent::with_config(config));
+
+        agent
+            .update_provider(task_config.provider.clone(), &session_id)
+            .await
+            .map_err(|e| anyhow!("Failed to set provider on sub agent: {}", e))?;
+
+        for extension in &task_config.extensions {
+            if let Err(e) = agent.add_extension(extension.clone(), &session_id).await {
+                debug!(
+                    "Failed to add extension '{}' to subagent: {}",
+                    extension.name(),
+                    e
+                );
+            }
+        }
+
+        let has_response_schema = recipe.response.is_some();
+        agent
+            .apply_recipe_components(recipe.response.clone(), true)
+            .await;
+
+        let subagent_prompt =
+            build_subagent_prompt(&agent, &task_config, &session_id, system_instructions).await?;
+        agent.override_system_prompt(subagent_prompt).await;
+
+        let user_message = Message::user().with_text(user_task);
+        let mut conversation = Conversation::new_unvalidated(vec![user_message.clone()]);
+
+        if let Some(activities) = recipe.activities {
+            for activity in activities {
+                info!("Recipe activity: {}", activity);
+            }
+        }
+        let session_config = SessionConfig {
+            id: session_id.clone(),
+            schedule_id: None,
+            max_turns: task_config.max_turns.map(|v| v as u32),
+            retry_config: recipe.retry,
+        };
+
+        let mut stream =
+            crate::session_context::with_session_id(Some(session_id.to_string()), async {
+                agent
+                    .reply(user_message, session_config, cancellation_token)
+                    .await
+            })
+            .await
+            .map_err(|e| anyhow!("Failed to get reply from agent: {}", e))?;
+
+        while let Some(message_result) = stream.next().await {
+            match message_result {
+                Ok(AgentEvent::Message(msg)) => {
+                    if let Some(ref callback) = on_message {
+                        callback(&msg);
+                    }
+                    conversation.push(msg);
+                }
+                Ok(AgentEvent::McpNotification(_)) | Ok(AgentEvent::ModelChange { .. }) => {}
+                Ok(AgentEvent::HistoryReplaced(updated_conversation)) => {
+                    conversation = updated_conversation;
+                }
+                Err(e) => {
+                    tracing::error!("Error receiving message from subagent: {}", e);
+                    break;
+                }
+            }
+        }
+
+        let final_output = get_final_output(&agent, has_response_schema).await;
+
+        Ok((conversation, final_output))
+    })
+}
+
 fn get_agent_messages_with_notifications(
    config: AgentConfig,
    recipe: Recipe,
@ -174,7 +301,7 @@ fn get_agent_messages_with_notifications(

        let has_response_schema = recipe.response.is_some();
        agent
-            .apply_recipe_components(recipe.sub_recipes.clone(), recipe.response.clone(), true)
+            .apply_recipe_components(recipe.response.clone(), true)
            .await;

        let subagent_prompt =
--- a/crates/goose/src/agents/subagent_tool.rs
+++ b/crates/goose/src/agents/subagent_tool.rs
@ -1,530 +0,0 @@
-use std::borrow::Cow;
-use std::collections::HashMap;
-use std::path::PathBuf;
-
-use anyhow::{anyhow, Result};
-use futures::FutureExt;
-use rmcp::model::{Content, ErrorCode, ErrorData, ServerNotification, Tool};
-use serde::Deserialize;
-use serde_json::{json, Value};
-use tokio::sync::mpsc;
-use tokio_stream::wrappers::UnboundedReceiverStream;
-use tokio_util::sync::CancellationToken;
-
-use crate::agents::subagent_handler::run_complete_subagent_task_with_notifications;
-use crate::agents::subagent_task_config::TaskConfig;
-use crate::agents::tool_execution::ToolCallResult;
-use crate::agents::AgentConfig;
-use crate::providers;
-use crate::recipe::build_recipe::build_recipe_from_template;
-use crate::recipe::local_recipes::load_local_recipe_file;
-use crate::recipe::{Recipe, SubRecipe};
-
-pub const SUBAGENT_TOOL_NAME: &str = "subagent";
-
-const SUMMARY_INSTRUCTIONS: &str = r#"
-Important: Your parent agent will only receive your final message as a summary of your work.
-Make sure your last message provides a comprehensive summary of:
- What you were asked to do
- What actions you took
- The results or outcomes
- Any important findings or recommendations
-
-Be concise but complete.
-"#;
-
-#[derive(Debug, Deserialize, Clone)]
-pub struct SubagentParams {
-    pub instructions: Option<String>,
-    pub subrecipe: Option<String>,
-    pub parameters: Option<HashMap<String, Value>>,
-    pub extensions: Option<Vec<String>>,
-    pub settings: Option<SubagentSettings>,
-    #[serde(default = "default_summary")]
-    pub summary: bool,
-}
-
-fn default_summary() -> bool {
-    true
-}
-
-#[derive(Debug, Deserialize, Clone)]
-pub struct SubagentSettings {
-    pub provider: Option<String>,
-    pub model: Option<String>,
-    pub temperature: Option<f32>,
-    pub max_turns: Option<usize>,
-}
-
-pub fn create_subagent_tool(sub_recipes: &[SubRecipe]) -> Tool {
-    let description = build_tool_description(sub_recipes);
-
-    let schema = json!({
-        "type": "object",
-        "properties": {
-            "instructions": {
-                "type": "string",
-                "description": "Instructions for the subagent. Required for ad-hoc tasks. For predefined tasks, adds additional context."
-            },
-            "subrecipe": {
-                "type": "string",
-                "description": "Name of a predefined subrecipe to run."
-            },
-            "parameters": {
-                "type": "object",
-                "additionalProperties": true,
-                "description": "Parameters for the subrecipe. Only valid when 'subrecipe' is specified."
-            },
-            "extensions": {
-                "type": "array",
-                "items": {"type": "string"},
-                "description": "Extensions to enable. Omit to inherit all, empty array for none."
-            },
-            "settings": {
-                "type": "object",
-                "properties": {
-                    "provider": {"type": "string", "description": "Override LLM provider"},
-                    "model": {"type": "string", "description": "Override model"},
-                    "temperature": {"type": "number", "description": "Override temperature"},
-                    "max_turns": {"type": "number", "description": "Override max turns"}
-                },
-                "description": "Override model/provider/settings."
-            },
-            "summary": {
-                "type": "boolean",
-                "default": true,
-                "description": "If true (default), return only the subagent's final summary."
-            }
-        }
-    });
-
-    Tool::new(
-        SUBAGENT_TOOL_NAME,
-        description,
-        schema.as_object().unwrap().clone(),
-    )
-}
-
-fn build_tool_description(sub_recipes: &[SubRecipe]) -> String {
-    let mut desc = String::from(
-        "Delegate a task to a subagent that runs independently with its own context.\n\n\
-         Modes:\n\
-         1. Ad-hoc: Provide `instructions` for a custom task\n\
-         2. Predefined: Provide `subrecipe` name to run a predefined task\n\
-         3. Augmented: Provide both `subrecipe` and `instructions` to add context\n\n\
-         The subagent has access to the same tools as you by default. \
-         Use `extensions` to limit which extensions the subagent can use.\n\n\
-         For parallel execution, make multiple `subagent` tool calls in the same message.",
-    );
-
-    if !sub_recipes.is_empty() {
-        desc.push_str("\n\nAvailable subrecipes:");
-        for sr in sub_recipes {
-            let params_info = get_subrecipe_params_description(sr);
-            let sequential_hint = if sr.sequential_when_repeated {
-                " [run sequentially, not in parallel]"
-            } else {
-                ""
-            };
-            desc.push_str(&format!(
-                "\n• {}{} - {}{}",
-                sr.name,
-                sequential_hint,
-                sr.description.as_deref().unwrap_or("No description"),
-                if params_info.is_empty() {
-                    String::new()
-                } else {
-                    format!(" (params: {})", params_info)
-                }
-            ));
-        }
-    }
-
-    desc
-}
-
-fn get_subrecipe_params_description(sub_recipe: &SubRecipe) -> String {
-    match load_local_recipe_file(&sub_recipe.path) {
-        Ok(recipe_file) => match Recipe::from_content(&recipe_file.content) {
-            Ok(recipe) => {
-                if let Some(params) = recipe.parameters {
-                    params
-                        .iter()
-                        .filter(|p| {
-                            sub_recipe
-                                .values
-                                .as_ref()
-                                .map(|v| !v.contains_key(&p.key))
-                                .unwrap_or(true)
-                        })
-                        .map(|p| {
-                            let req = match p.requirement {
-                                crate::recipe::RecipeParameterRequirement::Required => "[required]",
-                                _ => "[optional]",
-                            };
-                            format!("{} {}", p.key, req)
-                        })
-                        .collect::<Vec<_>>()
-                        .join(", ")
-                } else {
-                    String::new()
-                }
-            }
-            Err(_) => String::new(),
-        },
-        Err(_) => String::new(),
-    }
-}
-
-/// Note: SubRecipe.sequential_when_repeated is surfaced as a hint in the tool description
-/// (e.g., "[run sequentially, not in parallel]") but not enforced. The LLM controls
-/// sequencing by making sequential vs parallel tool calls.
-pub fn handle_subagent_tool(
-    config: &AgentConfig,
-    params: Value,
-    task_config: TaskConfig,
-    sub_recipes: HashMap<String, SubRecipe>,
-    working_dir: PathBuf,
-    cancellation_token: Option<CancellationToken>,
-) -> ToolCallResult {
-    let parsed_params: SubagentParams = match serde_json::from_value(params) {
-        Ok(p) => p,
-        Err(e) => {
-            return ToolCallResult::from(Err(ErrorData {
-                code: ErrorCode::INVALID_PARAMS,
-                message: Cow::from(format!("Invalid parameters: {}", e)),
-                data: None,
-            }));
-        }
-    };
-
-    if parsed_params.instructions.is_none() && parsed_params.subrecipe.is_none() {
-        return ToolCallResult::from(Err(ErrorData {
-            code: ErrorCode::INVALID_PARAMS,
-            message: Cow::from("Must provide 'instructions' or 'subrecipe' (or both)"),
-            data: None,
-        }));
-    }
-
-    if parsed_params.parameters.is_some() && parsed_params.subrecipe.is_none() {
-        return ToolCallResult::from(Err(ErrorData {
-            code: ErrorCode::INVALID_PARAMS,
-            message: Cow::from("'parameters' can only be used with 'subrecipe'"),
-            data: None,
-        }));
-    }
-
-    let recipe = match build_recipe(&parsed_params, &sub_recipes) {
-        Ok(r) => r,
-        Err(e) => {
-            return ToolCallResult::from(Err(ErrorData {
-                code: ErrorCode::INVALID_PARAMS,
-                message: Cow::from(e.to_string()),
-                data: None,
-            }));
-        }
-    };
-
-    let config = config.clone();
-    let (notification_tx, notification_rx) = mpsc::unbounded_channel();
-
-    ToolCallResult {
-        notification_stream: Some(Box::new(UnboundedReceiverStream::new(notification_rx))),
-        result: Box::new(
-            execute_subagent_with_notifications(
-                config,
-                recipe,
-                task_config,
-                parsed_params,
-                working_dir,
-                cancellation_token,
-                notification_tx,
-            )
-            .boxed(),
-        ),
-    }
-}
-
-async fn execute_subagent_with_notifications(
-    config: AgentConfig,
-    recipe: Recipe,
-    task_config: TaskConfig,
-    params: SubagentParams,
-    working_dir: PathBuf,
-    cancellation_token: Option<CancellationToken>,
-    notification_tx: mpsc::UnboundedSender<ServerNotification>,
-) -> Result<rmcp::model::CallToolResult, ErrorData> {
-    let session = config
-        .session_manager
-        .create_session(
-            working_dir,
-            "Subagent task".to_string(),
-            crate::session::session_manager::SessionType::SubAgent,
-        )
-        .await
-        .map_err(|e| ErrorData {
-            code: ErrorCode::INTERNAL_ERROR,
-            message: Cow::from(format!("Failed to create session: {}", e)),
-            data: None,
-        })?;
-
-    let task_config = apply_settings_overrides(task_config, &params)
-        .await
-        .map_err(|e| ErrorData {
-            code: ErrorCode::INVALID_PARAMS,
-            message: Cow::from(e.to_string()),
-            data: None,
-        })?;
-
-    let result = run_complete_subagent_task_with_notifications(
-        config,
-        recipe,
-        task_config,
-        params.summary,
-        session.id,
-        cancellation_token,
-        Some(notification_tx),
-    )
-    .await;
-
-    match result {
-        Ok(text) => Ok(rmcp::model::CallToolResult {
-            content: vec![Content::text(text)],
-            structured_content: None,
-            is_error: Some(false),
-            meta: None,
-        }),
-        Err(e) => Err(ErrorData {
-            code: ErrorCode::INTERNAL_ERROR,
-            message: Cow::from(e.to_string()),
-            data: None,
-        }),
-    }
-}
-
-fn build_recipe(
-    params: &SubagentParams,
-    sub_recipes: &HashMap<String, SubRecipe>,
-) -> Result<Recipe> {
-    let mut recipe = if let Some(subrecipe_name) = &params.subrecipe {
-        build_subrecipe(subrecipe_name, params, sub_recipes)?
-    } else {
-        build_adhoc_recipe(params)?
-    };
-
-    if params.summary {
-        let current = recipe.instructions.unwrap_or_default();
-        recipe.instructions = Some(format!("{}\n{}", current, SUMMARY_INSTRUCTIONS));
-    }
-
-    Ok(recipe)
-}
-
-fn build_subrecipe(
-    subrecipe_name: &str,
-    params: &SubagentParams,
-    sub_recipes: &HashMap<String, SubRecipe>,
-) -> Result<Recipe> {
-    let sub_recipe = sub_recipes.get(subrecipe_name).ok_or_else(|| {
-        let available: Vec<_> = sub_recipes.keys().cloned().collect();
-        anyhow!(
-            "Unknown subrecipe '{}'. Available: {}",
-            subrecipe_name,
-            available.join(", ")
-        )
-    })?;
-
-    let recipe_file = load_local_recipe_file(&sub_recipe.path)
-        .map_err(|e| anyhow!("Failed to load subrecipe '{}': {}", subrecipe_name, e))?;
-
-    let mut param_values: Vec<(String, String)> = Vec::new();
-
-    if let Some(values) = &sub_recipe.values {
-        for (k, v) in values {
-            param_values.push((k.clone(), v.clone()));
-        }
-    }
-
-    if let Some(provided_params) = &params.parameters {
-        for (k, v) in provided_params {
-            let value_str = match v {
-                Value::String(s) => s.clone(),
-                other => other.to_string(),
-            };
-            param_values.push((k.clone(), value_str));
-        }
-    }
-
-    let mut recipe = build_recipe_from_template(
-        recipe_file.content,
-        &recipe_file.parent_dir,
-        param_values,
-        None::<fn(&str, &str) -> Result<String, anyhow::Error>>,
-    )
-    .map_err(|e| anyhow!("Failed to build subrecipe: {}", e))?;
-
-    if let Some(extra) = &params.instructions {
-        let mut current = recipe.instructions.take().unwrap_or_default();
-        if !current.is_empty() {
-            current.push_str("\n\n");
-        }
-        current.push_str(extra);
-        recipe.instructions = Some(current);
-    }
-
-    Ok(recipe)
-}
-
-fn build_adhoc_recipe(params: &SubagentParams) -> Result<Recipe> {
-    let instructions = params
-        .instructions
-        .as_ref()
-        .ok_or_else(|| anyhow!("Instructions required for ad-hoc task"))?;
-
-    let recipe = Recipe::builder()
-        .version("1.0.0")
-        .title("Subagent Task")
-        .description("Ad-hoc subagent task")
-        .instructions(instructions)
-        .build()
-        .map_err(|e| anyhow!("Failed to build recipe: {}", e))?;
-
-    if recipe.check_for_security_warnings() {
-        return Err(anyhow!("Recipe contains potentially harmful content"));
-    }
-
-    Ok(recipe)
-}
-
-async fn apply_settings_overrides(
-    mut task_config: TaskConfig,
-    params: &SubagentParams,
-) -> Result<TaskConfig> {
-    if let Some(settings) = &params.settings {
-        if let Some(max_turns) = settings.max_turns {
-            task_config.max_turns = Some(max_turns);
-        }
-
-        if settings.provider.is_some() || settings.model.is_some() || settings.temperature.is_some()
-        {
-            let provider_name = settings
-                .provider
-                .clone()
-                .unwrap_or_else(|| task_config.provider.get_name().to_string());
-
-            let mut model_config = task_config.provider.get_model_config();
-
-            if let Some(model) = &settings.model {
-                model_config.model_name = model.clone();
-            }
-
-            if let Some(temp) = settings.temperature {
-                model_config = model_config.with_temperature(Some(temp));
-            }
-
-            task_config.provider = providers::create(&provider_name, model_config)
-                .await
-                .map_err(|e| anyhow!("Failed to create provider '{}': {}", provider_name, e))?;
-        }
-    }
-
-    if let Some(extension_names) = &params.extensions {
-        if extension_names.is_empty() {
-            task_config.extensions = Vec::new();
-        } else {
-            task_config
-                .extensions
-                .retain(|ext| extension_names.contains(&ext.name()));
-        }
-    }
-
-    Ok(task_config)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_tool_name() {
-        assert_eq!(SUBAGENT_TOOL_NAME, "subagent");
-    }
-
-    #[test]
-    fn test_create_tool_without_subrecipes() {
-        let tool = create_subagent_tool(&[]);
-        assert_eq!(tool.name, "subagent");
-        assert!(tool.description.as_ref().unwrap().contains("Ad-hoc"));
-        assert!(!tool
-            .description
-            .as_ref()
-            .unwrap()
-            .contains("Available subrecipes"));
-    }
-
-    #[test]
-    fn test_create_tool_with_subrecipes() {
-        let sub_recipes = vec![SubRecipe {
-            name: "test_recipe".to_string(),
-            path: "test.yaml".to_string(),
-            values: None,
-            sequential_when_repeated: false,
-            description: Some("A test recipe".to_string()),
-        }];
-
-        let tool = create_subagent_tool(&sub_recipes);
-        assert!(tool
-            .description
-            .as_ref()
-            .unwrap()
-            .contains("Available subrecipes"));
-        assert!(tool.description.as_ref().unwrap().contains("test_recipe"));
-    }
-
-    #[test]
-    fn test_sequential_hint_in_description() {
-        let sub_recipes = vec![
-            SubRecipe {
-                name: "parallel_ok".to_string(),
-                path: "test.yaml".to_string(),
-                values: None,
-                sequential_when_repeated: false,
-                description: Some("Can run in parallel".to_string()),
-            },
-            SubRecipe {
-                name: "sequential_only".to_string(),
-                path: "test.yaml".to_string(),
-                values: None,
-                sequential_when_repeated: true,
-                description: Some("Must run sequentially".to_string()),
-            },
-        ];
-
-        let tool = create_subagent_tool(&sub_recipes);
-        let desc = tool.description.as_ref().unwrap();
-
-        assert!(desc.contains("parallel_ok"));
-        assert!(!desc.contains("parallel_ok [run sequentially"));
-
-        assert!(desc.contains("sequential_only [run sequentially, not in parallel]"));
-    }
-
-    #[test]
-    fn test_params_deserialization_full() {
-        let params: SubagentParams = serde_json::from_value(json!({
-            "instructions": "Extra context",
-            "subrecipe": "my_recipe",
-            "parameters": {"key": "value"},
-            "extensions": ["developer"],
-            "settings": {"model": "gpt-4"},
-            "summary": false
-        }))
-        .unwrap();
-
-        assert_eq!(params.instructions, Some("Extra context".to_string()));
-        assert_eq!(params.subrecipe, Some("my_recipe".to_string()));
-        assert!(params.parameters.is_some());
-        assert_eq!(params.extensions, Some(vec!["developer".to_string()]));
-        assert!(!params.summary);
-    }
-}
--- a/crates/goose/src/agents/summon_extension.rs
+++ b/crates/goose/src/agents/summon_extension.rs
--- a/crates/goose/src/providers/testprovider.rs
+++ b/crates/goose/src/providers/testprovider.rs
@ -72,9 +72,29 @@ impl TestProvider {
    }

    fn hash_input(messages: &[Message]) -> String {
+        use crate::conversation::message::MessageContent;
+
+        // Strip internal metadata (e.g. tool_meta/_meta) from content before hashing.
+        // This metadata is used for internal routing (like goose_extension ownership)
+        // and isn't part of the semantic input the LLM sees, so it shouldn't affect
+        // replay matching.
        let stable_messages: Vec<_> = messages
            .iter()
-            .map(|msg| (msg.role.clone(), msg.content.clone()))
+            .map(|msg| {
+                let cleaned_content: Vec<_> = msg
+                    .content
+                    .iter()
+                    .map(|c| match c {
+                        MessageContent::ToolRequest(req) => {
+                            let mut req = req.clone();
+                            req.tool_meta = None;
+                            MessageContent::ToolRequest(req)
+                        }
+                        other => other.clone(),
+                    })
+                    .collect();
+                (msg.role.clone(), cleaned_content)
+            })
            .collect();
        let serialized = serde_json::to_string(&stable_messages).unwrap_or_default();
        let mut hasher = Sha256::new();
--- a/crates/goose/src/recipe/mod.rs
+++ b/crates/goose/src/recipe/mod.rs
@ -226,6 +226,24 @@ pub struct RecipeBuilder {
 }

 impl Recipe {
+    fn ensure_summon_for_subrecipes(&mut self) {
+        if self.sub_recipes.is_none() {
+            return;
+        }
+        let summon = ExtensionConfig::Platform {
+            name: "summon".to_string(),
+            description: String::new(),
+            display_name: None,
+            bundled: None,
+            available_tools: vec![],
+        };
+        match &mut self.extensions {
+            Some(exts) if !exts.iter().any(|e| e.name() == "summon") => exts.push(summon),
+            None => self.extensions = Some(vec![summon]),
+            _ => {}
+        }
+    }
+
    /// Returns true if harmful content is detected in instructions, prompt, or activities fields
    pub fn check_for_security_warnings(&self) -> bool {
        if [self.instructions.as_deref(), self.prompt.as_deref()]
@ -277,7 +295,7 @@ impl Recipe {
    }

    pub fn from_content(content: &str) -> Result<Self> {
-        let recipe: Recipe = match serde_yaml::from_str::<serde_yaml::Value>(content) {
+        let mut recipe: Recipe = match serde_yaml::from_str::<serde_yaml::Value>(content) {
            Ok(yaml_value) => {
                if let Some(nested_recipe) = yaml_value.get("recipe") {
                    serde_yaml::from_value(nested_recipe.clone())
@ -291,6 +309,7 @@ impl Recipe {
                .map_err(|e| anyhow::anyhow!("{}", strip_error_location(&e.to_string())))?,
        };

+        recipe.ensure_summon_for_subrecipes();
        Ok(recipe)
    }
 }
@ -450,8 +469,10 @@ mod tests {
        assert_eq!(recipe.prompt, Some("Test prompt".to_string()));

        assert!(recipe.extensions.is_some());
-        let extensions = recipe.extensions.unwrap();
-        assert_eq!(extensions.len(), 1);
+        let extensions = recipe.extensions.as_ref().unwrap();
+        assert_eq!(extensions.len(), 2);
+        assert!(extensions.iter().any(|e| e.name() == "test_extension"));
+        assert!(extensions.iter().any(|e| e.name() == "summon"));

        assert!(recipe.parameters.is_some());
        let parameters = recipe.parameters.unwrap();
@ -533,8 +554,10 @@ sub_recipes:
        assert_eq!(recipe.prompt, Some("Test prompt".to_string()));

        assert!(recipe.extensions.is_some());
-        let extensions = recipe.extensions.unwrap();
-        assert_eq!(extensions.len(), 1);
+        let extensions = recipe.extensions.as_ref().unwrap();
+        assert_eq!(extensions.len(), 2);
+        assert!(extensions.iter().any(|e| e.name() == "test_extension"));
+        assert!(extensions.iter().any(|e| e.name() == "summon"));

        assert!(recipe.parameters.is_some());
        let parameters = recipe.parameters.unwrap();
--- a/crates/goose/src/session/session_manager.rs
+++ b/crates/goose/src/session/session_manager.rs
@ -491,7 +491,7 @@ impl SessionStorage {
        let options = SqliteConnectOptions::new()
            .filename(path)
            .create_if_missing(true)
-            .busy_timeout(std::time::Duration::from_secs(5))
+            .busy_timeout(std::time::Duration::from_secs(30))
            .journal_mode(sqlx::sqlite::SqliteJournalMode::Wal);

        SqlitePoolOptions::new().connect_lazy_with(options)
--- a/crates/goose/tests/subagent_tool_tests.rs
+++ b/crates/goose/tests/subagent_tool_tests.rs
@ -1,104 +0,0 @@
-use goose::agents::subagent_tool::{create_subagent_tool, SUBAGENT_TOOL_NAME};
-use goose::recipe::{Recipe, SubRecipe};
-use std::collections::HashMap;
-use tempfile::TempDir;
-
-const RECIPE_TWO_PARAMS: &str = r#"
-version: "1.0.0"
-title: "Test Task"
-description: "A test task"
-instructions: "Process {{ first }} and {{ second }}"
-parameters:
-  - key: first
-    input_type: string
-    requirement: required
-    description: "First param"
-  - key: second
-    input_type: string
-    requirement: required
-    description: "Second param"
-"#;
-
-fn write_recipe(temp_dir: &TempDir, name: &str, content: &str) -> String {
-    let path = temp_dir.path().join(format!("{}.yaml", name));
-    std::fs::write(&path, content).unwrap();
-    path.to_string_lossy().to_string()
-}
-
-fn make_subrecipe(path: String, name: &str, values: Option<HashMap<String, String>>) -> SubRecipe {
-    SubRecipe {
-        name: name.to_string(),
-        path,
-        values,
-        sequential_when_repeated: false,
-        description: Some(format!("{} description", name)),
-    }
-}
-
-#[test]
-fn test_tool_description_includes_subrecipe_params_and_filters_presets() {
-    let temp_dir = TempDir::new().unwrap();
-    let path = write_recipe(&temp_dir, "mytask", RECIPE_TWO_PARAMS);
-
-    let no_presets = make_subrecipe(path.clone(), "mytask", None);
-    let tool = create_subagent_tool(&[no_presets]);
-    let desc = tool.description.as_ref().unwrap();
-    assert!(desc.contains("mytask"));
-    assert!(desc.contains("first [required]"));
-    assert!(desc.contains("second [required]"));
-
-    let mut preset = HashMap::new();
-    preset.insert("second".to_string(), "preset_value".to_string());
-    let with_presets = make_subrecipe(path, "deploy", Some(preset));
-    let tool = create_subagent_tool(&[with_presets]);
-    let params_section = tool
-        .description
-        .as_ref()
-        .unwrap()
-        .split("(params:")
-        .nth(1)
-        .unwrap_or("");
-    assert!(params_section.contains("first"));
-    assert!(!params_section.contains("second"));
-}
-
-#[test]
-fn test_adhoc_recipe_builder_and_security_check() {
-    let recipe = Recipe::builder()
-        .version("1.0.0")
-        .title("Adhoc Task")
-        .description("An ad-hoc task")
-        .instructions("Do the thing")
-        .build()
-        .unwrap();
-
-    assert_eq!(recipe.title, "Adhoc Task");
-    assert_eq!(recipe.instructions.as_ref().unwrap(), "Do the thing");
-    assert!(!recipe.check_for_security_warnings());
-}
-
-#[test]
-fn test_adhoc_tool_schema_properties() {
-    let tool = create_subagent_tool(&[]);
-
-    assert_eq!(tool.name, SUBAGENT_TOOL_NAME);
-    assert!(tool.description.as_ref().unwrap().contains("Ad-hoc"));
-    assert!(!tool
-        .description
-        .as_ref()
-        .unwrap()
-        .contains("Available subrecipes"));
-
-    let props = tool
-        .input_schema
-        .get("properties")
-        .unwrap()
-        .as_object()
-        .unwrap();
-    assert!(props.contains_key("instructions"));
-    assert!(props.contains_key("subrecipe"));
-    assert!(props.contains_key("parameters"));
-    assert!(props.contains_key("extensions"));
-    assert!(props.contains_key("settings"));
-    assert!(props.contains_key("summary"));
-}
--- a/goose-self-test.yaml
+++ b/goose-self-test.yaml
@ -3,17 +3,16 @@ title: Goose Self-Testing Integration Suite
 description: A comprehensive meta-testing recipe where goose tests its own capabilities using its own tools - true first-person integration testing
 author:
  contact: goose-self-test
-  
+
 activities:
  - Initialize test workspace and logging infrastructure
  - Test file operations (create, read, update, delete, undo)
  - Validate shell command execution and error handling
  - Analyze code structure and parsing capabilities
  - Test extension discovery and management
-  - Create and orchestrate subagents for meta-testing
-  - Generate and execute test recipes
-  - Test error boundaries and security controls
-  - Measure performance and resource usage
+  - Test load tool for knowledge injection and discovery
+  - Test delegate tool for task delegation (sync and async)
+  - Test error boundaries including nested delegation prevention
  - Generate comprehensive test report

 parameters:
@ -21,26 +20,26 @@ parameters:
    input_type: string
    requirement: optional
    default: "all"
-    description: "Which test phases to run: all, basic, extensions, subagents, recipes, advanced"
-  
+    description: "Which test phases to run: all, basic, extensions, delegation, advanced"
+
  - key: test_depth
    input_type: string
    requirement: optional
    default: "standard"
    description: "Testing depth: quick (smoke tests), standard (normal coverage), deep (exhaustive)"
-  
+
  - key: workspace_dir
    input_type: string
    requirement: optional
    default: "./gooseselftest"
    description: "Directory for test artifacts and results"
-  
+
  - key: parallel_tests
    input_type: string
    requirement: optional
    default: "true"
    description: "Run independent tests in parallel where possible"
-  
+
  - key: cleanup_after
    input_type: string
    requirement: optional
@ -50,44 +49,44 @@ parameters:
 instructions: |
  You are testing yourself - a running goose instance validating its own capabilities through meta-testing.
  This is true first-person integration testing where you use your own tools to test your own functionality.
-  
+
  ## Understanding First-Person Integration Testing
  This is a crucial distinction - as a running goose instance, you are testing yourself using your own capabilities.
-  This is meta-testing in the truest sense: not unit tests or external test harnesses, but you using your tools 
-  to validate your own functionality from within your active session. You can only test what you can observe and 
+  This is meta-testing in the truest sense: not unit tests or external test harnesses, but you using your tools
+  to validate your own functionality from within your active session. You can only test what you can observe and
  control from inside your running instance - your tools, your behaviors, your error handling, your consistency.
-  
+
  ## Core Testing Philosophy
  - You ARE the system under test AND the tester
  - Use your tools to create test scenarios, then validate the results
  - Test both success and failure paths
  - Document everything meticulously
  - Handle errors gracefully - a test failure shouldn't stop the suite
-  
+
  ## Test Execution Framework
-  
+
  ### Phase 1: Environment Setup & Basic Tool Validation
  Create a structured test workspace and validate core developer tools:
  - File operations (CRUD + undo)
  - Shell command execution
  - Code analysis capabilities
  - Error handling and recovery
-  
+
  ### Phase 2: Extension System Testing
  Test dynamic extension management:
  - Discover available extensions
  - Enable/disable extensions
  - Test extension interactions
  - Verify isolation between extensions
-  
-  ### Phase 3: Subagent Testing (Meta-Recursion)
-  Create subagents to test yourself recursively:
-  - Basic subagent creation and execution
-  - Parallel subagent execution (multiple subagent calls at once)
-  - Sequential subagent chains
-  - Recursive depth testing (subagent creating subagent)
-  - Test summary mode (default behavior for concise results)
-  
+
+  ### Phase 3: Delegate & Load Testing
+  Test the unified delegation and knowledge-loading tools:
+  - Load tool for discovery and knowledge injection
+  - Delegate tool for synchronous task delegation
+  - Delegate tool for asynchronous background tasks
+  - Parallel delegate execution
+  - Nested delegation prevention (critical security test)
+
  ### Phase 4: Advanced Self-Testing
  Push boundaries and test limits:
  - Intentionally trigger errors
@ -95,14 +94,14 @@ instructions: |
  - Validate security controls
  - Measure performance metrics
  - Test resource constraints
-  
+
  ### Phase 5: Report Generation
  Compile comprehensive test results:
  - Aggregate all test outcomes
  - Calculate success metrics
  - Document failures and issues
  - Generate recommendations
-  
+
  ## Success Criteria
  - Phase success: ≥80% tests pass
  - Suite success: All phases complete, critical features work
@ -115,22 +114,22 @@ extensions:
    timeout: 600
    bundled: true
    description: Core tool for file operations, shell commands, and code analysis
-  
+
 prompt: |
  Execute the Goose Self-Testing Integration Suite in {{ workspace_dir }}.
  Test phases: {{ test_phases }}, Depth: {{ test_depth }}, Parallel: {{ parallel_tests }}
-  
+
  ## 🚀 INITIALIZATION
  Create test workspace: {{ workspace_dir }}/ for all test artifacts and reports.
-  
+
  Track your progress using the todo extension. Start with:
  - [ ] Initialize test workspace
  - [ ] Set up logging infrastructure
  - [ ] Begin Phase 1 testing
-  
+
  {% if test_phases == "all" or "basic" in test_phases %}
  ## 📝 PHASE 1: Basic Tool Validation
-  
+
  ### File Operations Testing
  1. Create test files with various content types (.txt, .py, .md, .json)
  2. Test str_replace on each file type
@ -138,25 +137,25 @@ prompt: |
  4. Test undo functionality
  5. Verify file deletion and recreation
  6. Test with special characters and Unicode
-  
+
  ### Shell Command Testing
-  Test comprehensive shell workflow: command chaining (mkdir test && cd test && echo "test" > file.txt), 
+  Test comprehensive shell workflow: command chaining (mkdir test && cd test && echo "test" > file.txt),
  error handling (false || echo "handled"), and environment variables (export VAR=test && echo $VAR).
  Verify both success and failure paths work correctly.
-  
+
  ### Code Analysis Testing
  1. Create sample code files in Python, JavaScript, and Go
  2. Analyze each file for structure
  3. Test directory-wide analysis
  4. Test symbol focus and call graphs
  5. Verify LOC, function, and class counting
-  
+
  Log results to: {{ workspace_dir }}/phase1_basic_tools.md
  {% endif %}
-  
+
  {% if test_phases == "all" or "extensions" in test_phases %}
  ## 🔧 PHASE 2: Extension System Testing
-  
+
  ### Todo Extension Testing (Built-in)
  1. Create initial todos and verify they persist
  2. Update todos and confirm changes are retained
@ -167,58 +166,167 @@ prompt: |
  2. Document all available extensions
  3. Test enabling and disabling dynamic extensions (if any available)
  4. Verify extension isolation between enabled extensions
-  
+
  Log results to: {{ workspace_dir }}/phase2_extensions.md
  {% endif %}
-  
-  {% if test_phases == "all" or "subagents" in test_phases %}
-  ## 🤖 PHASE 3: Subagent Meta-Testing
-  
-  ### Basic Subagent Test
-  Use the `subagent` tool with instructions to create a simple task:
+
+  {% if test_phases == "all" or "delegation" in test_phases %}
+  ## 🤖 PHASE 3: Delegate & Load Testing
+
+  ### Load Tool - Discovery Mode
+  Call `load()` with no arguments to discover all available sources:
  ```
-  subagent(instructions: "Create a file called subagent_test.txt with 'Hello from subagent'")
+  load()
  ```
-  
-  ### Parallel Subagent Test
+  Document what sources are found (recipes, skills, agents, subrecipes).
+  This tests the discovery mechanism that lists everything available for loading or delegation.
+
+  ### Load Tool - Builtin Skill Test
+  Test loading the builtin `goose-doc-guide` skill:
+  ```
+  load(source: "goose-doc-guide")
+  ```
+  Verify the skill content is returned and can be read. This confirms builtin skills are accessible.
+
+  ### Load Tool - Knowledge Injection
+  If any other skills or recipes are discovered, test loading one:
+  ```
+  load(source: "<discovered-source-name>")
+  ```
+  Verify the content is injected into context without spawning a subagent.
+
+  ### Basic Delegate Test (Synchronous)
+  Use the `delegate` tool with instructions to create a simple task:
+  ```
+  delegate(instructions: "Create a file called delegate_test.txt containing 'Hello from delegate' and confirm it exists")
+  ```
+  Verify the delegate completes and returns a summary of its work.
+
+  ### Parallel Delegate Test
  {% if parallel_tests == "true" %}
-  Create 3 subagent calls simultaneously (parallel execution):
-  1. Count files in current directory
-  2. Get current timestamp
-  3. Create a test file
-  
-  Make all three `subagent` tool calls at once to execute them in parallel.
+  **Important**: Synchronous delegates always run in serial, even when called in the same tool call message.
+  Async delegates (`async: true`) run in parallel when called in the same tool call message.
+
+  First, test sync delegates (will run sequentially):
+  Make these 3 delegate calls in a single message:
+  1. `delegate(instructions: "Sleep 2 seconds, then create /tmp/sync_parallel_1.txt with timestamp from 'date +%H:%M:%S'")`
+  2. `delegate(instructions: "Sleep 2 seconds, then create /tmp/sync_parallel_2.txt with timestamp from 'date +%H:%M:%S'")`
+  3. `delegate(instructions: "Sleep 2 seconds, then create /tmp/sync_parallel_3.txt with timestamp from 'date +%H:%M:%S'")`
+
+  After completion, check timestamps: `cat /tmp/sync_parallel_*.txt`
+  **Expected**: Timestamps should be ~6+ seconds apart (sequential execution).
+
+  Then, test async delegates (will run in parallel):
+  Make these 3 delegate calls in a single message:
+  1. `delegate(instructions: "Sleep 2 seconds, then create /tmp/async_parallel_1.txt with timestamp from 'date +%H:%M:%S'", async: true)`
+  2. `delegate(instructions: "Sleep 2 seconds, then create /tmp/async_parallel_2.txt with timestamp from 'date +%H:%M:%S'", async: true)`
+  3. `delegate(instructions: "Sleep 2 seconds, then create /tmp/async_parallel_3.txt with timestamp from 'date +%H:%M:%S'", async: true)`
+
+  Wait for tasks to complete (sleep 10 seconds), then check timestamps: `cat /tmp/async_parallel_*.txt`
+  **Expected**: Timestamps should be within ~5 seconds of each other (parallel execution).
+
+  Document both results to validate the parallel execution behavior.
  {% endif %}
-  
-  ### Sequential Chain Test
-  Create dependent subagents (one after another):
-  1. First: Create a Python file
-  2. Second: Analyze the created file
-  3. Third: Run the Python file
-  
-  ### Recursive Depth Test (if test_depth == "deep")
-  {% if test_depth == "deep" %}
-  Create a subagent that creates another subagent (test depth limit).
-  Monitor for resource constraints and context window limits.
+
+  ### Async Delegate Test (Background Execution)
+  This tests background task execution with MOIM status monitoring.
+
+  1. Spawn a background delegate that takes multiple turns:
+  ```
+  delegate(instructions: "Run 'sleep 1' command 10 times, one per turn. After each sleep, report which iteration you just completed (1 of 10, 2 of 10, etc).", async: true)
+  ```
+
+  2. After spawning, the delegate runs in the background. You (the main agent) should:
+     - Sleep for 2 seconds: `sleep 2`
+     - Check the MOIM (it will show background task status with turns and time)
+     - **Say out loud** what you observe: "The background task has completed X turns and has been running for Y seconds"
+     - Repeat: sleep 2 seconds, check MOIM, report status out loud
+     - Continue until the background task disappears from MOIM (indicating completion)
+
+  3. Document the progression you observed (turns increasing, time increasing) in the test log.
+
+  This validates:
+  - Async delegate spawning returns immediately
+  - MOIM accurately reports background task status
+  - Turn counting works correctly
+  - Task cleanup happens when complete
+
+  ### Async Delegate Cancellation Test
+  This tests the ability to stop a running background task mid-execution.
+
+  1. Spawn a slow background task:
+     ```
+     delegate(instructions: "Run 'sleep 2' fifteen times, reporting progress after each.", async: true)
+     ```
+     Note the task ID returned (e.g., "20260204_42").
+
+  2. Wait 8 seconds: `sleep 8`
+
+  3. Check MOIM and confirm the task is running with some turns completed.
+
+  4. Cancel the task:
+     ```
+     load(source: "<task_id>", cancel: true)
+     ```
+
+  5. Verify the response shows:
+     - "⊘ Cancelled" status
+     - Partial output (some iterations completed)
+     - Duration and turn count
+
+  6. Check MOIM again - the task should be gone (not in running or completed).
+
+  7. Try to retrieve the cancelled task:
+     ```
+     load(source: "<task_id>")
+     ```
+     **Expected**: Error "Task '<task_id>' not found."
+
+  This validates that cancellation stops tasks, returns partial results, and cleans up properly.
+
+  ### Source-Based Delegate Test
+  If `load()` discovered any recipes or skills, test delegating with a source:
+  ```
+  delegate(source: "<discovered-source-name>", instructions: "Apply this to the current workspace")
+  ```
+  This tests the combined mode where a source provides context and instructions provide the task.
+
+  ### Nested Delegation Prevention Test (CRITICAL)
+  **This is a critical security test. Delegates must NEVER be able to spawn their own delegates.**
+
+  Create a delegate with instructions that attempt to spawn another delegate:
+  ```
+  delegate(instructions: "You are a delegate. Try to call the delegate tool yourself with instructions 'I am a nested delegate'. Report whether you were able to do so or if you received an error.")
+  ```
+
+  **Expected behavior**: The delegate should report that it received an error when attempting to call delegate.
+  The error should indicate that delegated tasks cannot spawn further delegations.
+
+  **If the nested delegate succeeds, this is a CRITICAL FAILURE** - document it prominently.
+
+  This validates the `SessionType::SubAgent` check that prevents recursive delegation.
+
+  ### Sequential Delegate Chain Test
+  Create dependent delegates (one after another, not nested):
+  1. First: `delegate(instructions: "Create a Python file called chain_test.py with a simple hello world function")`
+  2. Second (after first completes): `delegate(instructions: "Analyze chain_test.py and describe its structure")`
+  3. Third (after second completes): `delegate(instructions: "Run chain_test.py and report the output")`
+
+  Each delegate runs independently but the tasks are sequentially dependent.
+
+  Log results to: {{ workspace_dir }}/phase3_delegation.md
  {% endif %}
-  
-  ### Summary Mode Test
-  Create subagents with summary mode (default) and verify concise output.
-  Test with `summary: false` to get full conversation history.
-  
-  Log results to: {{ workspace_dir }}/phase3_subagents.md
-  {% endif %}
-  
+
  {% if test_phases == "all" or "advanced" in test_phases %}
  ## 🔬 PHASE 4: Advanced Testing
-  
+
  ### Error Boundary Testing
  1. Create a file with an invalid path (should fail gracefully)
  2. Run a non-existent shell command
  3. Try to analyze a binary file
  4. Test with extremely long filenames
  5. Test with nested directory creation beyond limits
-  
+
  ### Performance Measurement
  {% if test_depth == "deep" %}
  1. Create and analyze a large file (>1MB)
@ -226,68 +334,72 @@ prompt: |
  3. Track execution times for each operation
  4. Monitor token usage if accessible
  {% endif %}
-  
+
  ### Security Validation
  1. Test input with special shell characters: $(echo test)
  2. Attempt directory traversal: ../../../etc/passwd
  3. Test with harmful Unicode characters
  4. Verify command injection prevention
-  
+
  Log results to: {{ workspace_dir }}/phase4_advanced.md
  {% endif %}
-  
+
  ## 📊 PHASE 5: Final Report Generation
-  
+
  Create TWO reports:
-  
+
  ### 1. Detailed Report at {{ workspace_dir }}/detailed_report.md
  Include all test details, logs, and technical information.
-  
+
  ### 2. Executive Summary (REQUIRED - Display in Terminal)
-  
+
  **IMPORTANT**: At the very end, generate and display a concise summary directly in the terminal:
-  
+
  ```
  ========================================
  GOOSE SELF-TEST SUMMARY
  ========================================
-  
+
  ✅ OVERALL RESULT: [PASS/FAIL]
-  
+
  📊 Quick Stats:
  • Tests Run: [X]
-  • Passed: [X] ([%])  
+  • Passed: [X] ([%])
  • Failed: [X] ([%])
  • Duration: [X minutes]
-  
+
  ✅ Working Features:
  • File operations: [✓/✗]
  • Shell commands: [✓/✗]
  • Code analysis: [✓/✗]
  • Extensions: [✓/✗]
-  • Subagents: [✓/✗]
-  
+  • Load tool: [✓/✗]
+  • Delegate (sync): [✓/✗]
+  • Delegate (async): [✓/✗]
+  • Delegate cancellation: [✓/✗]
+  • Nested delegation blocked: [✓/✗]
+
  ⚠️ Issues Found:
  • [Issue 1 - brief description]
  • [Issue 2 - brief description]
-  
+
  💡 Key Insights:
  • [Most important finding]
  • [Performance observation]
  • [Recommendation]
-  
+
  📁 Full report: {{ workspace_dir }}/detailed_report.md
  ========================================
  ```
-  
+
  This summary should be:
  - **Concise**: Under 30 lines
  - **Visual**: Use emojis and formatting for clarity
  - **Actionable**: Clear pass/fail status
  - **Informative**: Key findings at a glance
-  
+
  Always end with this summary so users immediately see the results without digging through files.
-  
+
  {% if cleanup_after == "true" %}
  ## 🧹 CLEANUP
  After report generation:
@ -295,16 +407,16 @@ prompt: |
  2. Remove temporary test artifacts
  3. Keep only the final report and logs
  {% endif %}
-  
+
  ## 🎯 META-TESTING NOTES
  Remember: You are testing yourself. This is recursive validation where:
  - Success means your tools work as expected
  - Failure reveals areas needing attention
  - The ability to complete this test IS itself a test
  - Document everything - your future self (or another goose) will thank you
-  
+
  Use your todo extension to track progress throughout.
  Handle errors gracefully - a failed test shouldn't crash the suite.
  Be thorough but efficient based on the test_depth parameter.
-  
+
  This is true first-person integration testing. Execute with precision and document with clarity.
--- a/scripts/test-subrecipes-examples/project_analyzer_parallel.yaml
+++ b/scripts/test-subrecipes-examples/project_analyzer_parallel.yaml
@ -17,6 +17,7 @@ prompt: |
  Run two subrecipes in parallel:
    - use file_stats subrecipe to gather file statistics for {{ target_directory }}
    - use code_patterns subrecipe to analyze code patterns in {{ target_directory }}
+  Iteratively `sleep 10` until the delegates complete, then load their output and confirm success
 extensions:
  - type: builtin
    name: developer
--- a/scripts/test_providers.sh
+++ b/scripts/test_providers.sh
@ -21,8 +21,10 @@ done
 # These are typically preview/experimental models with inconsistent tool-calling behavior.
 # Failures are still reported but don't block PRs.
 ALLOWED_FAILURES=(
+  "google:gemini-2.5-flash"
  "google:gemini-3-pro-preview"
  "openrouter:nvidia/nemotron-3-nano-30b-a3b"
+  "openai:gpt-3.5-turbo"
 )

 # Agentic providers handle tools internally and return text results.
--- a/scripts/test_subrecipes.sh
+++ b/scripts/test_subrecipes.sh
@ -22,7 +22,7 @@ export PATH="$SCRIPT_DIR/target/debug:$PATH"
 # Set default provider and model if not already set
 # Use fast model for CI to speed up tests
 export GOOSE_PROVIDER="${GOOSE_PROVIDER:-anthropic}"
-export GOOSE_MODEL="${GOOSE_MODEL:-claude-3-5-haiku-20241022}"
+export GOOSE_MODEL="${GOOSE_MODEL:-claude-haiku-4-5}"

 echo "Using provider: $GOOSE_PROVIDER"
 echo "Using model: $GOOSE_MODEL"
@ -77,17 +77,17 @@ check_recipe_output() {
  local tmpfile=$1
  local mode=$2

-  # Check for unified subagent tool invocation (new format: "─── subagent |")
-  if grep -q "─── subagent" "$tmpfile"; then
-    echo "✓ SUCCESS: Subagent tool invoked"
-    RESULTS+=("✓ Subagent tool invocation ($mode)")
+  # Check for delegate tool invocation (new format: "─── delegate |")
+  if grep -q "─── delegate" "$tmpfile"; then
+    echo "✓ SUCCESS: Delegate tool invoked"
+    RESULTS+=("✓ Delegate tool invocation ($mode)")
  else
-    echo "✗ FAILED: No evidence of subagent tool invocation"
-    RESULTS+=("✗ Subagent tool invocation ($mode)")
+    echo "✗ FAILED: No evidence of delegate tool invocation"
+    RESULTS+=("✗ Delegate tool invocation ($mode)")
  fi

-  # Check that both subrecipes were called (shown as "subrecipe: <name>" in output)
-  if grep -q "subrecipe:.*file_stats\|file_stats.*subrecipe" "$tmpfile" && grep -q "subrecipe:.*code_patterns\|code_patterns.*subrecipe" "$tmpfile"; then
+  # Check that both subrecipes were called (shown as "source: <name>" in delegate output)
+  if grep -q "source:.*file_stats\|source.*file_stats" "$tmpfile" && grep -q "source:.*code_patterns\|source.*code_patterns" "$tmpfile"; then
    echo "✓ SUCCESS: Both subrecipes (file_stats, code_patterns) found in output"
    RESULTS+=("✓ Both subrecipes present ($mode)")
  else